Fix PR82941 and PR82942 by adding proper vzeroupper generation on SKX.
[official-gcc.git] / gcc / config / i386 / i386.c
blobc5e84a09954583b04ab8438cb71039627e8e89ce
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2017 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "memmodel.h"
27 #include "gimple.h"
28 #include "cfghooks.h"
29 #include "cfgloop.h"
30 #include "df.h"
31 #include "tm_p.h"
32 #include "stringpool.h"
33 #include "expmed.h"
34 #include "optabs.h"
35 #include "regs.h"
36 #include "emit-rtl.h"
37 #include "recog.h"
38 #include "cgraph.h"
39 #include "diagnostic.h"
40 #include "cfgbuild.h"
41 #include "alias.h"
42 #include "fold-const.h"
43 #include "attribs.h"
44 #include "calls.h"
45 #include "stor-layout.h"
46 #include "varasm.h"
47 #include "output.h"
48 #include "insn-attr.h"
49 #include "flags.h"
50 #include "except.h"
51 #include "explow.h"
52 #include "expr.h"
53 #include "cfgrtl.h"
54 #include "common/common-target.h"
55 #include "langhooks.h"
56 #include "reload.h"
57 #include "gimplify.h"
58 #include "dwarf2.h"
59 #include "tm-constrs.h"
60 #include "params.h"
61 #include "cselib.h"
62 #include "sched-int.h"
63 #include "opts.h"
64 #include "tree-pass.h"
65 #include "context.h"
66 #include "pass_manager.h"
67 #include "target-globals.h"
68 #include "gimple-iterator.h"
69 #include "tree-vectorizer.h"
70 #include "shrink-wrap.h"
71 #include "builtins.h"
72 #include "rtl-iter.h"
73 #include "tree-iterator.h"
74 #include "tree-chkp.h"
75 #include "rtl-chkp.h"
76 #include "dbgcnt.h"
77 #include "case-cfn-macros.h"
78 #include "regrename.h"
79 #include "dojump.h"
80 #include "fold-const-call.h"
81 #include "tree-vrp.h"
82 #include "tree-ssanames.h"
83 #include "selftest.h"
84 #include "selftest-rtl.h"
85 #include "print-rtl.h"
86 #include "intl.h"
87 #include "ifcvt.h"
88 #include "symbol-summary.h"
89 #include "ipa-prop.h"
90 #include "ipa-fnsummary.h"
92 /* This file should be included last. */
93 #include "target-def.h"
95 #include "x86-tune-costs.h"
97 static rtx legitimize_dllimport_symbol (rtx, bool);
98 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
99 static rtx legitimize_pe_coff_symbol (rtx, bool);
100 static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
101 static bool ix86_save_reg (unsigned int, bool, bool);
102 static bool ix86_function_naked (const_tree);
103 static bool ix86_notrack_prefixed_insn_p (rtx);
104 static void ix86_emit_restore_reg_using_pop (rtx);
107 #ifndef CHECK_STACK_LIMIT
108 #define CHECK_STACK_LIMIT (-1)
109 #endif
111 /* Return index of given mode in mult and division cost tables. */
112 #define MODE_INDEX(mode) \
113 ((mode) == QImode ? 0 \
114 : (mode) == HImode ? 1 \
115 : (mode) == SImode ? 2 \
116 : (mode) == DImode ? 3 \
117 : 4)
120 /* Set by -mtune. */
121 const struct processor_costs *ix86_tune_cost = NULL;
123 /* Set by -mtune or -Os. */
124 const struct processor_costs *ix86_cost = NULL;
126 /* Processor feature/optimization bitmasks. */
127 #define m_386 (1U<<PROCESSOR_I386)
128 #define m_486 (1U<<PROCESSOR_I486)
129 #define m_PENT (1U<<PROCESSOR_PENTIUM)
130 #define m_LAKEMONT (1U<<PROCESSOR_LAKEMONT)
131 #define m_PPRO (1U<<PROCESSOR_PENTIUMPRO)
132 #define m_PENT4 (1U<<PROCESSOR_PENTIUM4)
133 #define m_NOCONA (1U<<PROCESSOR_NOCONA)
134 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
135 #define m_CORE2 (1U<<PROCESSOR_CORE2)
136 #define m_NEHALEM (1U<<PROCESSOR_NEHALEM)
137 #define m_SANDYBRIDGE (1U<<PROCESSOR_SANDYBRIDGE)
138 #define m_HASWELL (1U<<PROCESSOR_HASWELL)
139 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
140 #define m_BONNELL (1U<<PROCESSOR_BONNELL)
141 #define m_SILVERMONT (1U<<PROCESSOR_SILVERMONT)
142 #define m_KNL (1U<<PROCESSOR_KNL)
143 #define m_KNM (1U<<PROCESSOR_KNM)
144 #define m_SKYLAKE_AVX512 (1U<<PROCESSOR_SKYLAKE_AVX512)
145 #define m_INTEL (1U<<PROCESSOR_INTEL)
147 #define m_GEODE (1U<<PROCESSOR_GEODE)
148 #define m_K6 (1U<<PROCESSOR_K6)
149 #define m_K6_GEODE (m_K6 | m_GEODE)
150 #define m_K8 (1U<<PROCESSOR_K8)
151 #define m_ATHLON (1U<<PROCESSOR_ATHLON)
152 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
153 #define m_AMDFAM10 (1U<<PROCESSOR_AMDFAM10)
154 #define m_BDVER1 (1U<<PROCESSOR_BDVER1)
155 #define m_BDVER2 (1U<<PROCESSOR_BDVER2)
156 #define m_BDVER3 (1U<<PROCESSOR_BDVER3)
157 #define m_BDVER4 (1U<<PROCESSOR_BDVER4)
158 #define m_ZNVER1 (1U<<PROCESSOR_ZNVER1)
159 #define m_BTVER1 (1U<<PROCESSOR_BTVER1)
160 #define m_BTVER2 (1U<<PROCESSOR_BTVER2)
161 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
162 #define m_BTVER (m_BTVER1 | m_BTVER2)
163 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER \
164 | m_ZNVER1)
166 #define m_GENERIC (1U<<PROCESSOR_GENERIC)
168 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
169 #undef DEF_TUNE
170 #define DEF_TUNE(tune, name, selector) name,
171 #include "x86-tune.def"
172 #undef DEF_TUNE
175 /* Feature tests against the various tunings. */
176 unsigned char ix86_tune_features[X86_TUNE_LAST];
178 /* Feature tests against the various tunings used to create ix86_tune_features
179 based on the processor mask. */
180 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
181 #undef DEF_TUNE
182 #define DEF_TUNE(tune, name, selector) selector,
183 #include "x86-tune.def"
184 #undef DEF_TUNE
187 /* Feature tests against the various architecture variations. */
188 unsigned char ix86_arch_features[X86_ARCH_LAST];
190 /* Feature tests against the various architecture variations, used to create
191 ix86_arch_features based on the processor mask. */
192 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
193 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
194 ~(m_386 | m_486 | m_PENT | m_LAKEMONT | m_K6),
196 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
197 ~m_386,
199 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
200 ~(m_386 | m_486),
202 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
203 ~m_386,
205 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
206 ~m_386,
209 /* In case the average insn count for single function invocation is
210 lower than this constant, emit fast (but longer) prologue and
211 epilogue code. */
212 #define FAST_PROLOGUE_INSN_COUNT 20
214 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
215 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
216 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
217 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
219 /* Array of the smallest class containing reg number REGNO, indexed by
220 REGNO. Used by REGNO_REG_CLASS in i386.h. */
222 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
224 /* ax, dx, cx, bx */
225 AREG, DREG, CREG, BREG,
226 /* si, di, bp, sp */
227 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
228 /* FP registers */
229 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
230 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
231 /* arg pointer */
232 NON_Q_REGS,
233 /* flags, fpsr, fpcr, frame */
234 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
235 /* SSE registers */
236 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
237 SSE_REGS, SSE_REGS,
238 /* MMX registers */
239 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
240 MMX_REGS, MMX_REGS,
241 /* REX registers */
242 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
243 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
244 /* SSE REX registers */
245 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
246 SSE_REGS, SSE_REGS,
247 /* AVX-512 SSE registers */
248 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
249 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
250 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
251 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
252 /* Mask registers. */
253 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
254 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
255 /* MPX bound registers */
256 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
259 /* The "default" register map used in 32bit mode. */
261 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
263 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
264 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
265 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
266 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
267 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
268 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
269 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
270 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
271 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
272 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
273 101, 102, 103, 104, /* bound registers */
276 /* The "default" register map used in 64bit mode. */
278 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
280 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
281 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
282 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
283 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
284 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
285 8,9,10,11,12,13,14,15, /* extended integer registers */
286 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
287 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
288 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
289 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
290 126, 127, 128, 129, /* bound registers */
293 /* Define the register numbers to be used in Dwarf debugging information.
294 The SVR4 reference port C compiler uses the following register numbers
295 in its Dwarf output code:
296 0 for %eax (gcc regno = 0)
297 1 for %ecx (gcc regno = 2)
298 2 for %edx (gcc regno = 1)
299 3 for %ebx (gcc regno = 3)
300 4 for %esp (gcc regno = 7)
301 5 for %ebp (gcc regno = 6)
302 6 for %esi (gcc regno = 4)
303 7 for %edi (gcc regno = 5)
304 The following three DWARF register numbers are never generated by
305 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
306 believed these numbers have these meanings.
307 8 for %eip (no gcc equivalent)
308 9 for %eflags (gcc regno = 17)
309 10 for %trapno (no gcc equivalent)
310 It is not at all clear how we should number the FP stack registers
311 for the x86 architecture. If the version of SDB on x86/svr4 were
312 a bit less brain dead with respect to floating-point then we would
313 have a precedent to follow with respect to DWARF register numbers
314 for x86 FP registers, but the SDB on x86/svr4 was so completely
315 broken with respect to FP registers that it is hardly worth thinking
316 of it as something to strive for compatibility with.
317 The version of x86/svr4 SDB I had does (partially)
318 seem to believe that DWARF register number 11 is associated with
319 the x86 register %st(0), but that's about all. Higher DWARF
320 register numbers don't seem to be associated with anything in
321 particular, and even for DWARF regno 11, SDB only seemed to under-
322 stand that it should say that a variable lives in %st(0) (when
323 asked via an `=' command) if we said it was in DWARF regno 11,
324 but SDB still printed garbage when asked for the value of the
325 variable in question (via a `/' command).
326 (Also note that the labels SDB printed for various FP stack regs
327 when doing an `x' command were all wrong.)
328 Note that these problems generally don't affect the native SVR4
329 C compiler because it doesn't allow the use of -O with -g and
330 because when it is *not* optimizing, it allocates a memory
331 location for each floating-point variable, and the memory
332 location is what gets described in the DWARF AT_location
333 attribute for the variable in question.
334 Regardless of the severe mental illness of the x86/svr4 SDB, we
335 do something sensible here and we use the following DWARF
336 register numbers. Note that these are all stack-top-relative
337 numbers.
338 11 for %st(0) (gcc regno = 8)
339 12 for %st(1) (gcc regno = 9)
340 13 for %st(2) (gcc regno = 10)
341 14 for %st(3) (gcc regno = 11)
342 15 for %st(4) (gcc regno = 12)
343 16 for %st(5) (gcc regno = 13)
344 17 for %st(6) (gcc regno = 14)
345 18 for %st(7) (gcc regno = 15)
347 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
349 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
350 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
351 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
352 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
353 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
354 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
355 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
356 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
357 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
358 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
359 101, 102, 103, 104, /* bound registers */
362 /* Define parameter passing and return registers. */
364 static int const x86_64_int_parameter_registers[6] =
366 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
369 static int const x86_64_ms_abi_int_parameter_registers[4] =
371 CX_REG, DX_REG, R8_REG, R9_REG
374 static int const x86_64_int_return_registers[4] =
376 AX_REG, DX_REG, DI_REG, SI_REG
379 /* Additional registers that are clobbered by SYSV calls. */
381 #define NUM_X86_64_MS_CLOBBERED_REGS 12
382 static int const x86_64_ms_sysv_extra_clobbered_registers
383 [NUM_X86_64_MS_CLOBBERED_REGS] =
385 SI_REG, DI_REG,
386 XMM6_REG, XMM7_REG,
387 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
388 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
391 enum xlogue_stub {
392 XLOGUE_STUB_SAVE,
393 XLOGUE_STUB_RESTORE,
394 XLOGUE_STUB_RESTORE_TAIL,
395 XLOGUE_STUB_SAVE_HFP,
396 XLOGUE_STUB_RESTORE_HFP,
397 XLOGUE_STUB_RESTORE_HFP_TAIL,
399 XLOGUE_STUB_COUNT
402 enum xlogue_stub_sets {
403 XLOGUE_SET_ALIGNED,
404 XLOGUE_SET_ALIGNED_PLUS_8,
405 XLOGUE_SET_HFP_ALIGNED_OR_REALIGN,
406 XLOGUE_SET_HFP_ALIGNED_PLUS_8,
408 XLOGUE_SET_COUNT
411 /* Register save/restore layout used by out-of-line stubs. */
412 class xlogue_layout {
413 public:
414 struct reginfo
416 unsigned regno;
417 HOST_WIDE_INT offset; /* Offset used by stub base pointer (rax or
418 rsi) to where each register is stored. */
421 unsigned get_nregs () const {return m_nregs;}
422 HOST_WIDE_INT get_stack_align_off_in () const {return m_stack_align_off_in;}
424 const reginfo &get_reginfo (unsigned reg) const
426 gcc_assert (reg < m_nregs);
427 return m_regs[reg];
430 static const char *get_stub_name (enum xlogue_stub stub,
431 unsigned n_extra_args);
433 /* Returns an rtx for the stub's symbol based upon
434 1.) the specified stub (save, restore or restore_ret) and
435 2.) the value of cfun->machine->call_ms2sysv_extra_regs and
436 3.) rather or not stack alignment is being performed. */
437 static rtx get_stub_rtx (enum xlogue_stub stub);
439 /* Returns the amount of stack space (including padding) that the stub
440 needs to store registers based upon data in the machine_function. */
441 HOST_WIDE_INT get_stack_space_used () const
443 const struct machine_function *m = cfun->machine;
444 unsigned last_reg = m->call_ms2sysv_extra_regs + MIN_REGS - 1;
446 gcc_assert (m->call_ms2sysv_extra_regs <= MAX_EXTRA_REGS);
447 return m_regs[last_reg].offset + STUB_INDEX_OFFSET;
450 /* Returns the offset for the base pointer used by the stub. */
451 HOST_WIDE_INT get_stub_ptr_offset () const
453 return STUB_INDEX_OFFSET + m_stack_align_off_in;
456 static const struct xlogue_layout &get_instance ();
457 static unsigned count_stub_managed_regs ();
458 static bool is_stub_managed_reg (unsigned regno, unsigned count);
460 static const HOST_WIDE_INT STUB_INDEX_OFFSET = 0x70;
461 static const unsigned MIN_REGS = NUM_X86_64_MS_CLOBBERED_REGS;
462 static const unsigned MAX_REGS = 18;
463 static const unsigned MAX_EXTRA_REGS = MAX_REGS - MIN_REGS;
464 static const unsigned VARIANT_COUNT = MAX_EXTRA_REGS + 1;
465 static const unsigned STUB_NAME_MAX_LEN = 20;
466 static const char * const STUB_BASE_NAMES[XLOGUE_STUB_COUNT];
467 static const unsigned REG_ORDER[MAX_REGS];
468 static const unsigned REG_ORDER_REALIGN[MAX_REGS];
470 private:
471 xlogue_layout ();
472 xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp);
473 xlogue_layout (const xlogue_layout &);
475 /* True if hard frame pointer is used. */
476 bool m_hfp;
478 /* Max number of register this layout manages. */
479 unsigned m_nregs;
481 /* Incoming offset from 16-byte alignment. */
482 HOST_WIDE_INT m_stack_align_off_in;
484 /* Register order and offsets. */
485 struct reginfo m_regs[MAX_REGS];
487 /* Lazy-inited cache of symbol names for stubs. */
488 static char s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT]
489 [STUB_NAME_MAX_LEN];
491 static const xlogue_layout s_instances[XLOGUE_SET_COUNT];
494 const char * const xlogue_layout::STUB_BASE_NAMES[XLOGUE_STUB_COUNT] = {
495 "savms64",
496 "resms64",
497 "resms64x",
498 "savms64f",
499 "resms64f",
500 "resms64fx"
503 const unsigned xlogue_layout::REG_ORDER[xlogue_layout::MAX_REGS] = {
504 /* The below offset values are where each register is stored for the layout
505 relative to incoming stack pointer. The value of each m_regs[].offset will
506 be relative to the incoming base pointer (rax or rsi) used by the stub.
508 s_instances: 0 1 2 3
509 Offset: realigned or aligned + 8
510 Register aligned aligned + 8 aligned w/HFP w/HFP */
511 XMM15_REG, /* 0x10 0x18 0x10 0x18 */
512 XMM14_REG, /* 0x20 0x28 0x20 0x28 */
513 XMM13_REG, /* 0x30 0x38 0x30 0x38 */
514 XMM12_REG, /* 0x40 0x48 0x40 0x48 */
515 XMM11_REG, /* 0x50 0x58 0x50 0x58 */
516 XMM10_REG, /* 0x60 0x68 0x60 0x68 */
517 XMM9_REG, /* 0x70 0x78 0x70 0x78 */
518 XMM8_REG, /* 0x80 0x88 0x80 0x88 */
519 XMM7_REG, /* 0x90 0x98 0x90 0x98 */
520 XMM6_REG, /* 0xa0 0xa8 0xa0 0xa8 */
521 SI_REG, /* 0xa8 0xb0 0xa8 0xb0 */
522 DI_REG, /* 0xb0 0xb8 0xb0 0xb8 */
523 BX_REG, /* 0xb8 0xc0 0xb8 0xc0 */
524 BP_REG, /* 0xc0 0xc8 N/A N/A */
525 R12_REG, /* 0xc8 0xd0 0xc0 0xc8 */
526 R13_REG, /* 0xd0 0xd8 0xc8 0xd0 */
527 R14_REG, /* 0xd8 0xe0 0xd0 0xd8 */
528 R15_REG, /* 0xe0 0xe8 0xd8 0xe0 */
531 /* Instantiate static const values. */
532 const HOST_WIDE_INT xlogue_layout::STUB_INDEX_OFFSET;
533 const unsigned xlogue_layout::MIN_REGS;
534 const unsigned xlogue_layout::MAX_REGS;
535 const unsigned xlogue_layout::MAX_EXTRA_REGS;
536 const unsigned xlogue_layout::VARIANT_COUNT;
537 const unsigned xlogue_layout::STUB_NAME_MAX_LEN;
539 /* Initialize xlogue_layout::s_stub_names to zero. */
540 char xlogue_layout::s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT]
541 [STUB_NAME_MAX_LEN];
543 /* Instantiates all xlogue_layout instances. */
544 const xlogue_layout xlogue_layout::s_instances[XLOGUE_SET_COUNT] = {
545 xlogue_layout (0, false),
546 xlogue_layout (8, false),
547 xlogue_layout (0, true),
548 xlogue_layout (8, true)
551 /* Return an appropriate const instance of xlogue_layout based upon values
552 in cfun->machine and crtl. */
553 const struct xlogue_layout &
554 xlogue_layout::get_instance ()
556 enum xlogue_stub_sets stub_set;
557 bool aligned_plus_8 = cfun->machine->call_ms2sysv_pad_in;
559 if (stack_realign_fp)
560 stub_set = XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
561 else if (frame_pointer_needed)
562 stub_set = aligned_plus_8
563 ? XLOGUE_SET_HFP_ALIGNED_PLUS_8
564 : XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
565 else
566 stub_set = aligned_plus_8 ? XLOGUE_SET_ALIGNED_PLUS_8 : XLOGUE_SET_ALIGNED;
568 return s_instances[stub_set];
571 /* Determine how many clobbered registers can be saved by the stub.
572 Returns the count of registers the stub will save and restore. */
573 unsigned
574 xlogue_layout::count_stub_managed_regs ()
576 bool hfp = frame_pointer_needed || stack_realign_fp;
577 unsigned i, count;
578 unsigned regno;
580 for (count = i = MIN_REGS; i < MAX_REGS; ++i)
582 regno = REG_ORDER[i];
583 if (regno == BP_REG && hfp)
584 continue;
585 if (!ix86_save_reg (regno, false, false))
586 break;
587 ++count;
589 return count;
592 /* Determine if register REGNO is a stub managed register given the
593 total COUNT of stub managed registers. */
594 bool
595 xlogue_layout::is_stub_managed_reg (unsigned regno, unsigned count)
597 bool hfp = frame_pointer_needed || stack_realign_fp;
598 unsigned i;
600 for (i = 0; i < count; ++i)
602 gcc_assert (i < MAX_REGS);
603 if (REG_ORDER[i] == BP_REG && hfp)
604 ++count;
605 else if (REG_ORDER[i] == regno)
606 return true;
608 return false;
611 /* Constructor for xlogue_layout. */
612 xlogue_layout::xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp)
613 : m_hfp (hfp) , m_nregs (hfp ? 17 : 18),
614 m_stack_align_off_in (stack_align_off_in)
616 HOST_WIDE_INT offset = stack_align_off_in;
617 unsigned i, j;
619 for (i = j = 0; i < MAX_REGS; ++i)
621 unsigned regno = REG_ORDER[i];
623 if (regno == BP_REG && hfp)
624 continue;
625 if (SSE_REGNO_P (regno))
627 offset += 16;
628 /* Verify that SSE regs are always aligned. */
629 gcc_assert (!((stack_align_off_in + offset) & 15));
631 else
632 offset += 8;
634 m_regs[j].regno = regno;
635 m_regs[j++].offset = offset - STUB_INDEX_OFFSET;
637 gcc_assert (j == m_nregs);
640 const char *
641 xlogue_layout::get_stub_name (enum xlogue_stub stub,
642 unsigned n_extra_regs)
644 const int have_avx = TARGET_AVX;
645 char *name = s_stub_names[!!have_avx][stub][n_extra_regs];
647 /* Lazy init */
648 if (!*name)
650 int res = snprintf (name, STUB_NAME_MAX_LEN, "__%s_%s_%u",
651 (have_avx ? "avx" : "sse"),
652 STUB_BASE_NAMES[stub],
653 MIN_REGS + n_extra_regs);
654 gcc_checking_assert (res < (int)STUB_NAME_MAX_LEN);
657 return name;
660 /* Return rtx of a symbol ref for the entry point (based upon
661 cfun->machine->call_ms2sysv_extra_regs) of the specified stub. */
663 xlogue_layout::get_stub_rtx (enum xlogue_stub stub)
665 const unsigned n_extra_regs = cfun->machine->call_ms2sysv_extra_regs;
666 gcc_checking_assert (n_extra_regs <= MAX_EXTRA_REGS);
667 gcc_assert (stub < XLOGUE_STUB_COUNT);
668 gcc_assert (crtl->stack_realign_finalized);
670 return gen_rtx_SYMBOL_REF (Pmode, get_stub_name (stub, n_extra_regs));
673 /* Define the structure for the machine field in struct function. */
675 struct GTY(()) stack_local_entry {
676 unsigned short mode;
677 unsigned short n;
678 rtx rtl;
679 struct stack_local_entry *next;
682 /* Which cpu are we scheduling for. */
683 enum attr_cpu ix86_schedule;
685 /* Which cpu are we optimizing for. */
686 enum processor_type ix86_tune;
688 /* Which instruction set architecture to use. */
689 enum processor_type ix86_arch;
691 /* True if processor has SSE prefetch instruction. */
692 unsigned char x86_prefetch_sse;
694 /* -mstackrealign option */
695 static const char ix86_force_align_arg_pointer_string[]
696 = "force_align_arg_pointer";
698 static rtx (*ix86_gen_leave) (void);
699 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
700 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
701 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
702 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
703 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
704 static rtx (*ix86_gen_monitorx) (rtx, rtx, rtx);
705 static rtx (*ix86_gen_clzero) (rtx);
706 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
707 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
708 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
709 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
710 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
711 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
713 /* Preferred alignment for stack boundary in bits. */
714 unsigned int ix86_preferred_stack_boundary;
716 /* Alignment for incoming stack boundary in bits specified at
717 command line. */
718 static unsigned int ix86_user_incoming_stack_boundary;
720 /* Default alignment for incoming stack boundary in bits. */
721 static unsigned int ix86_default_incoming_stack_boundary;
723 /* Alignment for incoming stack boundary in bits. */
724 unsigned int ix86_incoming_stack_boundary;
726 /* Calling abi specific va_list type nodes. */
727 static GTY(()) tree sysv_va_list_type_node;
728 static GTY(()) tree ms_va_list_type_node;
730 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
731 char internal_label_prefix[16];
732 int internal_label_prefix_len;
734 /* Fence to use after loop using movnt. */
735 tree x86_mfence;
737 /* Register class used for passing given 64bit part of the argument.
738 These represent classes as documented by the PS ABI, with the exception
739 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
740 use SF or DFmode move instead of DImode to avoid reformatting penalties.
742 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
743 whenever possible (upper half does contain padding). */
744 enum x86_64_reg_class
746 X86_64_NO_CLASS,
747 X86_64_INTEGER_CLASS,
748 X86_64_INTEGERSI_CLASS,
749 X86_64_SSE_CLASS,
750 X86_64_SSESF_CLASS,
751 X86_64_SSEDF_CLASS,
752 X86_64_SSEUP_CLASS,
753 X86_64_X87_CLASS,
754 X86_64_X87UP_CLASS,
755 X86_64_COMPLEX_X87_CLASS,
756 X86_64_MEMORY_CLASS
759 #define MAX_CLASSES 8
761 /* Table of constants used by fldpi, fldln2, etc.... */
762 static REAL_VALUE_TYPE ext_80387_constants_table [5];
763 static bool ext_80387_constants_init;
766 static struct machine_function * ix86_init_machine_status (void);
767 static rtx ix86_function_value (const_tree, const_tree, bool);
768 static bool ix86_function_value_regno_p (const unsigned int);
769 static unsigned int ix86_function_arg_boundary (machine_mode,
770 const_tree);
771 static rtx ix86_static_chain (const_tree, bool);
772 static int ix86_function_regparm (const_tree, const_tree);
773 static void ix86_compute_frame_layout (void);
774 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
775 rtx, rtx, int);
776 static void ix86_add_new_builtins (HOST_WIDE_INT, HOST_WIDE_INT);
777 static tree ix86_canonical_va_list_type (tree);
778 static void predict_jump (int);
779 static unsigned int split_stack_prologue_scratch_regno (void);
780 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
782 enum ix86_function_specific_strings
784 IX86_FUNCTION_SPECIFIC_ARCH,
785 IX86_FUNCTION_SPECIFIC_TUNE,
786 IX86_FUNCTION_SPECIFIC_MAX
789 static char *ix86_target_string (HOST_WIDE_INT, HOST_WIDE_INT, int, int,
790 const char *, const char *, enum fpmath_unit,
791 bool);
792 static void ix86_function_specific_save (struct cl_target_option *,
793 struct gcc_options *opts);
794 static void ix86_function_specific_restore (struct gcc_options *opts,
795 struct cl_target_option *);
796 static void ix86_function_specific_post_stream_in (struct cl_target_option *);
797 static void ix86_function_specific_print (FILE *, int,
798 struct cl_target_option *);
799 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
800 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
801 struct gcc_options *,
802 struct gcc_options *,
803 struct gcc_options *);
804 static bool ix86_can_inline_p (tree, tree);
805 static void ix86_set_current_function (tree);
806 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
808 static enum calling_abi ix86_function_abi (const_tree);
811 #ifndef SUBTARGET32_DEFAULT_CPU
812 #define SUBTARGET32_DEFAULT_CPU "i386"
813 #endif
815 /* Whether -mtune= or -march= were specified */
816 static int ix86_tune_defaulted;
817 static int ix86_arch_specified;
819 /* Vectorization library interface and handlers. */
820 static tree (*ix86_veclib_handler) (combined_fn, tree, tree);
822 static tree ix86_veclibabi_svml (combined_fn, tree, tree);
823 static tree ix86_veclibabi_acml (combined_fn, tree, tree);
825 /* Processor target table, indexed by processor number */
826 struct ptt
828 const char *const name; /* processor name */
829 const struct processor_costs *cost; /* Processor costs */
830 const int align_loop; /* Default alignments. */
831 const int align_loop_max_skip;
832 const int align_jump;
833 const int align_jump_max_skip;
834 const int align_func;
837 /* This table must be in sync with enum processor_type in i386.h. */
838 static const struct ptt processor_target_table[PROCESSOR_max] =
840 {"generic", &generic_cost, 16, 10, 16, 10, 16},
841 {"i386", &i386_cost, 4, 3, 4, 3, 4},
842 {"i486", &i486_cost, 16, 15, 16, 15, 16},
843 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
844 {"lakemont", &lakemont_cost, 16, 7, 16, 7, 16},
845 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
846 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
847 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
848 {"core2", &core_cost, 16, 10, 16, 10, 16},
849 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
850 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
851 {"haswell", &core_cost, 16, 10, 16, 10, 16},
852 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
853 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
854 {"knl", &slm_cost, 16, 15, 16, 7, 16},
855 {"knm", &slm_cost, 16, 15, 16, 7, 16},
856 {"skylake-avx512", &core_cost, 16, 10, 16, 10, 16},
857 {"intel", &intel_cost, 16, 15, 16, 7, 16},
858 {"geode", &geode_cost, 0, 0, 0, 0, 0},
859 {"k6", &k6_cost, 32, 7, 32, 7, 32},
860 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
861 {"k8", &k8_cost, 16, 7, 16, 7, 16},
862 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
863 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
864 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
865 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
866 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
867 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
868 {"btver2", &btver2_cost, 16, 10, 16, 7, 11},
869 {"znver1", &znver1_cost, 16, 15, 16, 15, 16}
872 static unsigned int
873 rest_of_handle_insert_vzeroupper (void)
875 int i;
877 /* vzeroupper instructions are inserted immediately after reload to
878 account for possible spills from 256bit or 512bit registers. The pass
879 reuses mode switching infrastructure by re-running mode insertion
880 pass, so disable entities that have already been processed. */
881 for (i = 0; i < MAX_386_ENTITIES; i++)
882 ix86_optimize_mode_switching[i] = 0;
884 ix86_optimize_mode_switching[AVX_U128] = 1;
886 /* Call optimize_mode_switching. */
887 g->get_passes ()->execute_pass_mode_switching ();
888 return 0;
891 /* Return 1 if INSN uses or defines a hard register.
892 Hard register uses in a memory address are ignored.
893 Clobbers and flags definitions are ignored. */
895 static bool
896 has_non_address_hard_reg (rtx_insn *insn)
898 df_ref ref;
899 FOR_EACH_INSN_DEF (ref, insn)
900 if (HARD_REGISTER_P (DF_REF_REAL_REG (ref))
901 && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER)
902 && DF_REF_REGNO (ref) != FLAGS_REG)
903 return true;
905 FOR_EACH_INSN_USE (ref, insn)
906 if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref)))
907 return true;
909 return false;
912 /* Check if comparison INSN may be transformed
913 into vector comparison. Currently we transform
914 zero checks only which look like:
916 (set (reg:CCZ 17 flags)
917 (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4)
918 (subreg:SI (reg:DI x) 0))
919 (const_int 0 [0]))) */
921 static bool
922 convertible_comparison_p (rtx_insn *insn)
924 if (!TARGET_SSE4_1)
925 return false;
927 rtx def_set = single_set (insn);
929 gcc_assert (def_set);
931 rtx src = SET_SRC (def_set);
932 rtx dst = SET_DEST (def_set);
934 gcc_assert (GET_CODE (src) == COMPARE);
936 if (GET_CODE (dst) != REG
937 || REGNO (dst) != FLAGS_REG
938 || GET_MODE (dst) != CCZmode)
939 return false;
941 rtx op1 = XEXP (src, 0);
942 rtx op2 = XEXP (src, 1);
944 if (op2 != CONST0_RTX (GET_MODE (op2)))
945 return false;
947 if (GET_CODE (op1) != IOR)
948 return false;
950 op2 = XEXP (op1, 1);
951 op1 = XEXP (op1, 0);
953 if (!SUBREG_P (op1)
954 || !SUBREG_P (op2)
955 || GET_MODE (op1) != SImode
956 || GET_MODE (op2) != SImode
957 || ((SUBREG_BYTE (op1) != 0
958 || SUBREG_BYTE (op2) != GET_MODE_SIZE (SImode))
959 && (SUBREG_BYTE (op2) != 0
960 || SUBREG_BYTE (op1) != GET_MODE_SIZE (SImode))))
961 return false;
963 op1 = SUBREG_REG (op1);
964 op2 = SUBREG_REG (op2);
966 if (op1 != op2
967 || !REG_P (op1)
968 || GET_MODE (op1) != DImode)
969 return false;
971 return true;
974 /* The DImode version of scalar_to_vector_candidate_p. */
976 static bool
977 dimode_scalar_to_vector_candidate_p (rtx_insn *insn)
979 rtx def_set = single_set (insn);
981 if (!def_set)
982 return false;
984 if (has_non_address_hard_reg (insn))
985 return false;
987 rtx src = SET_SRC (def_set);
988 rtx dst = SET_DEST (def_set);
990 if (GET_CODE (src) == COMPARE)
991 return convertible_comparison_p (insn);
993 /* We are interested in DImode promotion only. */
994 if ((GET_MODE (src) != DImode
995 && !CONST_INT_P (src))
996 || GET_MODE (dst) != DImode)
997 return false;
999 if (!REG_P (dst) && !MEM_P (dst))
1000 return false;
1002 switch (GET_CODE (src))
1004 case ASHIFTRT:
1005 if (!TARGET_AVX512VL)
1006 return false;
1007 /* FALLTHRU */
1009 case ASHIFT:
1010 case LSHIFTRT:
1011 if (!REG_P (XEXP (src, 1))
1012 && (!SUBREG_P (XEXP (src, 1))
1013 || SUBREG_BYTE (XEXP (src, 1)) != 0
1014 || !REG_P (SUBREG_REG (XEXP (src, 1))))
1015 && (!CONST_INT_P (XEXP (src, 1))
1016 || !IN_RANGE (INTVAL (XEXP (src, 1)), 0, 63)))
1017 return false;
1019 if (GET_MODE (XEXP (src, 1)) != QImode
1020 && !CONST_INT_P (XEXP (src, 1)))
1021 return false;
1022 break;
1024 case PLUS:
1025 case MINUS:
1026 case IOR:
1027 case XOR:
1028 case AND:
1029 if (!REG_P (XEXP (src, 1))
1030 && !MEM_P (XEXP (src, 1))
1031 && !CONST_INT_P (XEXP (src, 1)))
1032 return false;
1034 if (GET_MODE (XEXP (src, 1)) != DImode
1035 && !CONST_INT_P (XEXP (src, 1)))
1036 return false;
1037 break;
1039 case NEG:
1040 case NOT:
1041 break;
1043 case REG:
1044 return true;
1046 case MEM:
1047 case CONST_INT:
1048 return REG_P (dst);
1050 default:
1051 return false;
1054 if (!REG_P (XEXP (src, 0))
1055 && !MEM_P (XEXP (src, 0))
1056 && !CONST_INT_P (XEXP (src, 0))
1057 /* Check for andnot case. */
1058 && (GET_CODE (src) != AND
1059 || GET_CODE (XEXP (src, 0)) != NOT
1060 || !REG_P (XEXP (XEXP (src, 0), 0))))
1061 return false;
1063 if (GET_MODE (XEXP (src, 0)) != DImode
1064 && !CONST_INT_P (XEXP (src, 0)))
1065 return false;
1067 return true;
1070 /* The TImode version of scalar_to_vector_candidate_p. */
1072 static bool
1073 timode_scalar_to_vector_candidate_p (rtx_insn *insn)
1075 rtx def_set = single_set (insn);
1077 if (!def_set)
1078 return false;
1080 if (has_non_address_hard_reg (insn))
1081 return false;
1083 rtx src = SET_SRC (def_set);
1084 rtx dst = SET_DEST (def_set);
1086 /* Only TImode load and store are allowed. */
1087 if (GET_MODE (dst) != TImode)
1088 return false;
1090 if (MEM_P (dst))
1092 /* Check for store. Memory must be aligned or unaligned store
1093 is optimal. Only support store from register, standard SSE
1094 constant or CONST_WIDE_INT generated from piecewise store.
1096 ??? Verify performance impact before enabling CONST_INT for
1097 __int128 store. */
1098 if (misaligned_operand (dst, TImode)
1099 && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
1100 return false;
1102 switch (GET_CODE (src))
1104 default:
1105 return false;
1107 case REG:
1108 case CONST_WIDE_INT:
1109 return true;
1111 case CONST_INT:
1112 return standard_sse_constant_p (src, TImode);
1115 else if (MEM_P (src))
1117 /* Check for load. Memory must be aligned or unaligned load is
1118 optimal. */
1119 return (REG_P (dst)
1120 && (!misaligned_operand (src, TImode)
1121 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL));
1124 return false;
1127 /* Return 1 if INSN may be converted into vector
1128 instruction. */
1130 static bool
1131 scalar_to_vector_candidate_p (rtx_insn *insn)
1133 if (TARGET_64BIT)
1134 return timode_scalar_to_vector_candidate_p (insn);
1135 else
1136 return dimode_scalar_to_vector_candidate_p (insn);
1139 /* The DImode version of remove_non_convertible_regs. */
1141 static void
1142 dimode_remove_non_convertible_regs (bitmap candidates)
1144 bitmap_iterator bi;
1145 unsigned id;
1146 bitmap regs = BITMAP_ALLOC (NULL);
1148 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
1150 rtx def_set = single_set (DF_INSN_UID_GET (id)->insn);
1151 rtx reg = SET_DEST (def_set);
1153 if (!REG_P (reg)
1154 || bitmap_bit_p (regs, REGNO (reg))
1155 || HARD_REGISTER_P (reg))
1156 continue;
1158 for (df_ref def = DF_REG_DEF_CHAIN (REGNO (reg));
1159 def;
1160 def = DF_REF_NEXT_REG (def))
1162 if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1164 if (dump_file)
1165 fprintf (dump_file,
1166 "r%d has non convertible definition in insn %d\n",
1167 REGNO (reg), DF_REF_INSN_UID (def));
1169 bitmap_set_bit (regs, REGNO (reg));
1170 break;
1175 EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
1177 for (df_ref def = DF_REG_DEF_CHAIN (id);
1178 def;
1179 def = DF_REF_NEXT_REG (def))
1180 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1182 if (dump_file)
1183 fprintf (dump_file, "Removing insn %d from candidates list\n",
1184 DF_REF_INSN_UID (def));
1186 bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
1190 BITMAP_FREE (regs);
1193 /* For a register REGNO, scan instructions for its defs and uses.
1194 Put REGNO in REGS if a def or use isn't in CANDIDATES. */
1196 static void
1197 timode_check_non_convertible_regs (bitmap candidates, bitmap regs,
1198 unsigned int regno)
1200 for (df_ref def = DF_REG_DEF_CHAIN (regno);
1201 def;
1202 def = DF_REF_NEXT_REG (def))
1204 if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1206 if (dump_file)
1207 fprintf (dump_file,
1208 "r%d has non convertible def in insn %d\n",
1209 regno, DF_REF_INSN_UID (def));
1211 bitmap_set_bit (regs, regno);
1212 break;
1216 for (df_ref ref = DF_REG_USE_CHAIN (regno);
1217 ref;
1218 ref = DF_REF_NEXT_REG (ref))
1220 /* Debug instructions are skipped. */
1221 if (NONDEBUG_INSN_P (DF_REF_INSN (ref))
1222 && !bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
1224 if (dump_file)
1225 fprintf (dump_file,
1226 "r%d has non convertible use in insn %d\n",
1227 regno, DF_REF_INSN_UID (ref));
1229 bitmap_set_bit (regs, regno);
1230 break;
1235 /* The TImode version of remove_non_convertible_regs. */
1237 static void
1238 timode_remove_non_convertible_regs (bitmap candidates)
1240 bitmap_iterator bi;
1241 unsigned id;
1242 bitmap regs = BITMAP_ALLOC (NULL);
1244 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
1246 rtx def_set = single_set (DF_INSN_UID_GET (id)->insn);
1247 rtx dest = SET_DEST (def_set);
1248 rtx src = SET_SRC (def_set);
1250 if ((!REG_P (dest)
1251 || bitmap_bit_p (regs, REGNO (dest))
1252 || HARD_REGISTER_P (dest))
1253 && (!REG_P (src)
1254 || bitmap_bit_p (regs, REGNO (src))
1255 || HARD_REGISTER_P (src)))
1256 continue;
1258 if (REG_P (dest))
1259 timode_check_non_convertible_regs (candidates, regs,
1260 REGNO (dest));
1262 if (REG_P (src))
1263 timode_check_non_convertible_regs (candidates, regs,
1264 REGNO (src));
1267 EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
1269 for (df_ref def = DF_REG_DEF_CHAIN (id);
1270 def;
1271 def = DF_REF_NEXT_REG (def))
1272 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1274 if (dump_file)
1275 fprintf (dump_file, "Removing insn %d from candidates list\n",
1276 DF_REF_INSN_UID (def));
1278 bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
1281 for (df_ref ref = DF_REG_USE_CHAIN (id);
1282 ref;
1283 ref = DF_REF_NEXT_REG (ref))
1284 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
1286 if (dump_file)
1287 fprintf (dump_file, "Removing insn %d from candidates list\n",
1288 DF_REF_INSN_UID (ref));
1290 bitmap_clear_bit (candidates, DF_REF_INSN_UID (ref));
1294 BITMAP_FREE (regs);
1297 /* For a given bitmap of insn UIDs scans all instruction and
1298 remove insn from CANDIDATES in case it has both convertible
1299 and not convertible definitions.
1301 All insns in a bitmap are conversion candidates according to
1302 scalar_to_vector_candidate_p. Currently it implies all insns
1303 are single_set. */
1305 static void
1306 remove_non_convertible_regs (bitmap candidates)
1308 if (TARGET_64BIT)
1309 timode_remove_non_convertible_regs (candidates);
1310 else
1311 dimode_remove_non_convertible_regs (candidates);
1314 class scalar_chain
1316 public:
1317 scalar_chain ();
1318 virtual ~scalar_chain ();
1320 static unsigned max_id;
1322 /* ID of a chain. */
1323 unsigned int chain_id;
1324 /* A queue of instructions to be included into a chain. */
1325 bitmap queue;
1326 /* Instructions included into a chain. */
1327 bitmap insns;
1328 /* All registers defined by a chain. */
1329 bitmap defs;
1330 /* Registers used in both vector and sclar modes. */
1331 bitmap defs_conv;
1333 void build (bitmap candidates, unsigned insn_uid);
1334 virtual int compute_convert_gain () = 0;
1335 int convert ();
1337 protected:
1338 void add_to_queue (unsigned insn_uid);
1339 void emit_conversion_insns (rtx insns, rtx_insn *pos);
1341 private:
1342 void add_insn (bitmap candidates, unsigned insn_uid);
1343 void analyze_register_chain (bitmap candidates, df_ref ref);
1344 virtual void mark_dual_mode_def (df_ref def) = 0;
1345 virtual void convert_insn (rtx_insn *insn) = 0;
1346 virtual void convert_registers () = 0;
1349 class dimode_scalar_chain : public scalar_chain
1351 public:
1352 int compute_convert_gain ();
1353 private:
1354 void mark_dual_mode_def (df_ref def);
1355 rtx replace_with_subreg (rtx x, rtx reg, rtx subreg);
1356 void replace_with_subreg_in_insn (rtx_insn *insn, rtx reg, rtx subreg);
1357 void convert_insn (rtx_insn *insn);
1358 void convert_op (rtx *op, rtx_insn *insn);
1359 void convert_reg (unsigned regno);
1360 void make_vector_copies (unsigned regno);
1361 void convert_registers ();
1362 int vector_const_cost (rtx exp);
1365 class timode_scalar_chain : public scalar_chain
1367 public:
1368 /* Convert from TImode to V1TImode is always faster. */
1369 int compute_convert_gain () { return 1; }
1371 private:
1372 void mark_dual_mode_def (df_ref def);
1373 void fix_debug_reg_uses (rtx reg);
1374 void convert_insn (rtx_insn *insn);
1375 /* We don't convert registers to difference size. */
1376 void convert_registers () {}
1379 unsigned scalar_chain::max_id = 0;
1381 /* Initialize new chain. */
1383 scalar_chain::scalar_chain ()
1385 chain_id = ++max_id;
1387 if (dump_file)
1388 fprintf (dump_file, "Created a new instruction chain #%d\n", chain_id);
1390 bitmap_obstack_initialize (NULL);
1391 insns = BITMAP_ALLOC (NULL);
1392 defs = BITMAP_ALLOC (NULL);
1393 defs_conv = BITMAP_ALLOC (NULL);
1394 queue = NULL;
1397 /* Free chain's data. */
1399 scalar_chain::~scalar_chain ()
1401 BITMAP_FREE (insns);
1402 BITMAP_FREE (defs);
1403 BITMAP_FREE (defs_conv);
1404 bitmap_obstack_release (NULL);
1407 /* Add instruction into chains' queue. */
1409 void
1410 scalar_chain::add_to_queue (unsigned insn_uid)
1412 if (bitmap_bit_p (insns, insn_uid)
1413 || bitmap_bit_p (queue, insn_uid))
1414 return;
1416 if (dump_file)
1417 fprintf (dump_file, " Adding insn %d into chain's #%d queue\n",
1418 insn_uid, chain_id);
1419 bitmap_set_bit (queue, insn_uid);
1422 /* For DImode conversion, mark register defined by DEF as requiring
1423 conversion. */
1425 void
1426 dimode_scalar_chain::mark_dual_mode_def (df_ref def)
1428 gcc_assert (DF_REF_REG_DEF_P (def));
1430 if (bitmap_bit_p (defs_conv, DF_REF_REGNO (def)))
1431 return;
1433 if (dump_file)
1434 fprintf (dump_file,
1435 " Mark r%d def in insn %d as requiring both modes in chain #%d\n",
1436 DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id);
1438 bitmap_set_bit (defs_conv, DF_REF_REGNO (def));
1441 /* For TImode conversion, it is unused. */
1443 void
1444 timode_scalar_chain::mark_dual_mode_def (df_ref)
1446 gcc_unreachable ();
1449 /* Check REF's chain to add new insns into a queue
1450 and find registers requiring conversion. */
1452 void
1453 scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref)
1455 df_link *chain;
1457 gcc_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))
1458 || bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)));
1459 add_to_queue (DF_REF_INSN_UID (ref));
1461 for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next)
1463 unsigned uid = DF_REF_INSN_UID (chain->ref);
1465 if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref)))
1466 continue;
1468 if (!DF_REF_REG_MEM_P (chain->ref))
1470 if (bitmap_bit_p (insns, uid))
1471 continue;
1473 if (bitmap_bit_p (candidates, uid))
1475 add_to_queue (uid);
1476 continue;
1480 if (DF_REF_REG_DEF_P (chain->ref))
1482 if (dump_file)
1483 fprintf (dump_file, " r%d def in insn %d isn't convertible\n",
1484 DF_REF_REGNO (chain->ref), uid);
1485 mark_dual_mode_def (chain->ref);
1487 else
1489 if (dump_file)
1490 fprintf (dump_file, " r%d use in insn %d isn't convertible\n",
1491 DF_REF_REGNO (chain->ref), uid);
1492 mark_dual_mode_def (ref);
1497 /* Add instruction into a chain. */
1499 void
1500 scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid)
1502 if (bitmap_bit_p (insns, insn_uid))
1503 return;
1505 if (dump_file)
1506 fprintf (dump_file, " Adding insn %d to chain #%d\n", insn_uid, chain_id);
1508 bitmap_set_bit (insns, insn_uid);
1510 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
1511 rtx def_set = single_set (insn);
1512 if (def_set && REG_P (SET_DEST (def_set))
1513 && !HARD_REGISTER_P (SET_DEST (def_set)))
1514 bitmap_set_bit (defs, REGNO (SET_DEST (def_set)));
1516 df_ref ref;
1517 df_ref def;
1518 for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
1519 if (!HARD_REGISTER_P (DF_REF_REG (ref)))
1520 for (def = DF_REG_DEF_CHAIN (DF_REF_REGNO (ref));
1521 def;
1522 def = DF_REF_NEXT_REG (def))
1523 analyze_register_chain (candidates, def);
1524 for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
1525 if (!DF_REF_REG_MEM_P (ref))
1526 analyze_register_chain (candidates, ref);
1529 /* Build new chain starting from insn INSN_UID recursively
1530 adding all dependent uses and definitions. */
1532 void
1533 scalar_chain::build (bitmap candidates, unsigned insn_uid)
1535 queue = BITMAP_ALLOC (NULL);
1536 bitmap_set_bit (queue, insn_uid);
1538 if (dump_file)
1539 fprintf (dump_file, "Building chain #%d...\n", chain_id);
1541 while (!bitmap_empty_p (queue))
1543 insn_uid = bitmap_first_set_bit (queue);
1544 bitmap_clear_bit (queue, insn_uid);
1545 bitmap_clear_bit (candidates, insn_uid);
1546 add_insn (candidates, insn_uid);
1549 if (dump_file)
1551 fprintf (dump_file, "Collected chain #%d...\n", chain_id);
1552 fprintf (dump_file, " insns: ");
1553 dump_bitmap (dump_file, insns);
1554 if (!bitmap_empty_p (defs_conv))
1556 bitmap_iterator bi;
1557 unsigned id;
1558 const char *comma = "";
1559 fprintf (dump_file, " defs to convert: ");
1560 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
1562 fprintf (dump_file, "%sr%d", comma, id);
1563 comma = ", ";
1565 fprintf (dump_file, "\n");
1569 BITMAP_FREE (queue);
1572 /* Return a cost of building a vector costant
1573 instead of using a scalar one. */
1576 dimode_scalar_chain::vector_const_cost (rtx exp)
1578 gcc_assert (CONST_INT_P (exp));
1580 if (standard_sse_constant_p (exp, V2DImode))
1581 return COSTS_N_INSNS (1);
1582 return ix86_cost->sse_load[1];
1585 /* Compute a gain for chain conversion. */
1588 dimode_scalar_chain::compute_convert_gain ()
1590 bitmap_iterator bi;
1591 unsigned insn_uid;
1592 int gain = 0;
1593 int cost = 0;
1595 if (dump_file)
1596 fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
1598 EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
1600 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
1601 rtx def_set = single_set (insn);
1602 rtx src = SET_SRC (def_set);
1603 rtx dst = SET_DEST (def_set);
1605 if (REG_P (src) && REG_P (dst))
1606 gain += COSTS_N_INSNS (2) - ix86_cost->xmm_move;
1607 else if (REG_P (src) && MEM_P (dst))
1608 gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1];
1609 else if (MEM_P (src) && REG_P (dst))
1610 gain += 2 * ix86_cost->int_load[2] - ix86_cost->sse_load[1];
1611 else if (GET_CODE (src) == ASHIFT
1612 || GET_CODE (src) == ASHIFTRT
1613 || GET_CODE (src) == LSHIFTRT)
1615 if (CONST_INT_P (XEXP (src, 0)))
1616 gain -= vector_const_cost (XEXP (src, 0));
1617 if (CONST_INT_P (XEXP (src, 1)))
1619 gain += ix86_cost->shift_const;
1620 if (INTVAL (XEXP (src, 1)) >= 32)
1621 gain -= COSTS_N_INSNS (1);
1623 else
1624 /* Additional gain for omitting two CMOVs. */
1625 gain += ix86_cost->shift_var + COSTS_N_INSNS (2);
1627 else if (GET_CODE (src) == PLUS
1628 || GET_CODE (src) == MINUS
1629 || GET_CODE (src) == IOR
1630 || GET_CODE (src) == XOR
1631 || GET_CODE (src) == AND)
1633 gain += ix86_cost->add;
1634 /* Additional gain for andnot for targets without BMI. */
1635 if (GET_CODE (XEXP (src, 0)) == NOT
1636 && !TARGET_BMI)
1637 gain += 2 * ix86_cost->add;
1639 if (CONST_INT_P (XEXP (src, 0)))
1640 gain -= vector_const_cost (XEXP (src, 0));
1641 if (CONST_INT_P (XEXP (src, 1)))
1642 gain -= vector_const_cost (XEXP (src, 1));
1644 else if (GET_CODE (src) == NEG
1645 || GET_CODE (src) == NOT)
1646 gain += ix86_cost->add - COSTS_N_INSNS (1);
1647 else if (GET_CODE (src) == COMPARE)
1649 /* Assume comparison cost is the same. */
1651 else if (CONST_INT_P (src))
1653 if (REG_P (dst))
1654 gain += COSTS_N_INSNS (2);
1655 else if (MEM_P (dst))
1656 gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1];
1657 gain -= vector_const_cost (src);
1659 else
1660 gcc_unreachable ();
1663 if (dump_file)
1664 fprintf (dump_file, " Instruction conversion gain: %d\n", gain);
1666 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, insn_uid, bi)
1667 cost += DF_REG_DEF_COUNT (insn_uid) * ix86_cost->mmxsse_to_integer;
1669 if (dump_file)
1670 fprintf (dump_file, " Registers conversion cost: %d\n", cost);
1672 gain -= cost;
1674 if (dump_file)
1675 fprintf (dump_file, " Total gain: %d\n", gain);
1677 return gain;
1680 /* Replace REG in X with a V2DI subreg of NEW_REG. */
1683 dimode_scalar_chain::replace_with_subreg (rtx x, rtx reg, rtx new_reg)
1685 if (x == reg)
1686 return gen_rtx_SUBREG (V2DImode, new_reg, 0);
1688 const char *fmt = GET_RTX_FORMAT (GET_CODE (x));
1689 int i, j;
1690 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
1692 if (fmt[i] == 'e')
1693 XEXP (x, i) = replace_with_subreg (XEXP (x, i), reg, new_reg);
1694 else if (fmt[i] == 'E')
1695 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
1696 XVECEXP (x, i, j) = replace_with_subreg (XVECEXP (x, i, j),
1697 reg, new_reg);
1700 return x;
1703 /* Replace REG in INSN with a V2DI subreg of NEW_REG. */
1705 void
1706 dimode_scalar_chain::replace_with_subreg_in_insn (rtx_insn *insn,
1707 rtx reg, rtx new_reg)
1709 replace_with_subreg (single_set (insn), reg, new_reg);
1712 /* Insert generated conversion instruction sequence INSNS
1713 after instruction AFTER. New BB may be required in case
1714 instruction has EH region attached. */
1716 void
1717 scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after)
1719 if (!control_flow_insn_p (after))
1721 emit_insn_after (insns, after);
1722 return;
1725 basic_block bb = BLOCK_FOR_INSN (after);
1726 edge e = find_fallthru_edge (bb->succs);
1727 gcc_assert (e);
1729 basic_block new_bb = split_edge (e);
1730 emit_insn_after (insns, BB_HEAD (new_bb));
1733 /* Make vector copies for all register REGNO definitions
1734 and replace its uses in a chain. */
1736 void
1737 dimode_scalar_chain::make_vector_copies (unsigned regno)
1739 rtx reg = regno_reg_rtx[regno];
1740 rtx vreg = gen_reg_rtx (DImode);
1741 bool count_reg = false;
1742 df_ref ref;
1744 for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
1745 if (!bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
1747 df_ref use;
1749 /* Detect the count register of a shift instruction. */
1750 for (use = DF_REG_USE_CHAIN (regno); use; use = DF_REF_NEXT_REG (use))
1751 if (bitmap_bit_p (insns, DF_REF_INSN_UID (use)))
1753 rtx_insn *insn = DF_REF_INSN (use);
1754 rtx def_set = single_set (insn);
1756 gcc_assert (def_set);
1758 rtx src = SET_SRC (def_set);
1760 if ((GET_CODE (src) == ASHIFT
1761 || GET_CODE (src) == ASHIFTRT
1762 || GET_CODE (src) == LSHIFTRT)
1763 && !CONST_INT_P (XEXP (src, 1))
1764 && reg_or_subregno (XEXP (src, 1)) == regno)
1765 count_reg = true;
1768 start_sequence ();
1769 if (count_reg)
1771 rtx qreg = gen_lowpart (QImode, reg);
1772 rtx tmp = gen_reg_rtx (SImode);
1774 if (TARGET_ZERO_EXTEND_WITH_AND
1775 && optimize_function_for_speed_p (cfun))
1777 emit_move_insn (tmp, const0_rtx);
1778 emit_insn (gen_movstrictqi
1779 (gen_lowpart (QImode, tmp), qreg));
1781 else
1782 emit_insn (gen_rtx_SET
1783 (tmp, gen_rtx_ZERO_EXTEND (SImode, qreg)));
1785 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
1787 rtx slot = assign_386_stack_local (SImode, SLOT_STV_TEMP);
1788 emit_move_insn (slot, tmp);
1789 tmp = copy_rtx (slot);
1792 emit_insn (gen_zero_extendsidi2 (vreg, tmp));
1794 else if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
1796 rtx tmp = assign_386_stack_local (DImode, SLOT_STV_TEMP);
1797 emit_move_insn (adjust_address (tmp, SImode, 0),
1798 gen_rtx_SUBREG (SImode, reg, 0));
1799 emit_move_insn (adjust_address (tmp, SImode, 4),
1800 gen_rtx_SUBREG (SImode, reg, 4));
1801 emit_move_insn (vreg, tmp);
1803 else if (TARGET_SSE4_1)
1805 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
1806 CONST0_RTX (V4SImode),
1807 gen_rtx_SUBREG (SImode, reg, 0)));
1808 emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0),
1809 gen_rtx_SUBREG (V4SImode, vreg, 0),
1810 gen_rtx_SUBREG (SImode, reg, 4),
1811 GEN_INT (2)));
1813 else
1815 rtx tmp = gen_reg_rtx (DImode);
1816 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
1817 CONST0_RTX (V4SImode),
1818 gen_rtx_SUBREG (SImode, reg, 0)));
1819 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0),
1820 CONST0_RTX (V4SImode),
1821 gen_rtx_SUBREG (SImode, reg, 4)));
1822 emit_insn (gen_vec_interleave_lowv4si
1823 (gen_rtx_SUBREG (V4SImode, vreg, 0),
1824 gen_rtx_SUBREG (V4SImode, vreg, 0),
1825 gen_rtx_SUBREG (V4SImode, tmp, 0)));
1827 rtx_insn *seq = get_insns ();
1828 end_sequence ();
1829 rtx_insn *insn = DF_REF_INSN (ref);
1830 emit_conversion_insns (seq, insn);
1832 if (dump_file)
1833 fprintf (dump_file,
1834 " Copied r%d to a vector register r%d for insn %d\n",
1835 regno, REGNO (vreg), INSN_UID (insn));
1838 for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
1839 if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
1841 rtx_insn *insn = DF_REF_INSN (ref);
1842 if (count_reg)
1844 rtx def_set = single_set (insn);
1845 gcc_assert (def_set);
1847 rtx src = SET_SRC (def_set);
1849 if ((GET_CODE (src) == ASHIFT
1850 || GET_CODE (src) == ASHIFTRT
1851 || GET_CODE (src) == LSHIFTRT)
1852 && !CONST_INT_P (XEXP (src, 1))
1853 && reg_or_subregno (XEXP (src, 1)) == regno)
1854 XEXP (src, 1) = vreg;
1856 else
1857 replace_with_subreg_in_insn (insn, reg, vreg);
1859 if (dump_file)
1860 fprintf (dump_file, " Replaced r%d with r%d in insn %d\n",
1861 regno, REGNO (vreg), INSN_UID (insn));
1865 /* Convert all definitions of register REGNO
1866 and fix its uses. Scalar copies may be created
1867 in case register is used in not convertible insn. */
1869 void
1870 dimode_scalar_chain::convert_reg (unsigned regno)
1872 bool scalar_copy = bitmap_bit_p (defs_conv, regno);
1873 rtx reg = regno_reg_rtx[regno];
1874 rtx scopy = NULL_RTX;
1875 df_ref ref;
1876 bitmap conv;
1878 conv = BITMAP_ALLOC (NULL);
1879 bitmap_copy (conv, insns);
1881 if (scalar_copy)
1882 scopy = gen_reg_rtx (DImode);
1884 for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
1886 rtx_insn *insn = DF_REF_INSN (ref);
1887 rtx def_set = single_set (insn);
1888 rtx src = SET_SRC (def_set);
1889 rtx reg = DF_REF_REG (ref);
1891 if (!MEM_P (src))
1893 replace_with_subreg_in_insn (insn, reg, reg);
1894 bitmap_clear_bit (conv, INSN_UID (insn));
1897 if (scalar_copy)
1899 start_sequence ();
1900 if (!TARGET_INTER_UNIT_MOVES_FROM_VEC)
1902 rtx tmp = assign_386_stack_local (DImode, SLOT_STV_TEMP);
1903 emit_move_insn (tmp, reg);
1904 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0),
1905 adjust_address (tmp, SImode, 0));
1906 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
1907 adjust_address (tmp, SImode, 4));
1909 else if (TARGET_SSE4_1)
1911 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
1912 emit_insn
1913 (gen_rtx_SET
1914 (gen_rtx_SUBREG (SImode, scopy, 0),
1915 gen_rtx_VEC_SELECT (SImode,
1916 gen_rtx_SUBREG (V4SImode, reg, 0), tmp)));
1918 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const1_rtx));
1919 emit_insn
1920 (gen_rtx_SET
1921 (gen_rtx_SUBREG (SImode, scopy, 4),
1922 gen_rtx_VEC_SELECT (SImode,
1923 gen_rtx_SUBREG (V4SImode, reg, 0), tmp)));
1925 else
1927 rtx vcopy = gen_reg_rtx (V2DImode);
1928 emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, reg, 0));
1929 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0),
1930 gen_rtx_SUBREG (SImode, vcopy, 0));
1931 emit_move_insn (vcopy,
1932 gen_rtx_LSHIFTRT (V2DImode, vcopy, GEN_INT (32)));
1933 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
1934 gen_rtx_SUBREG (SImode, vcopy, 0));
1936 rtx_insn *seq = get_insns ();
1937 end_sequence ();
1938 emit_conversion_insns (seq, insn);
1940 if (dump_file)
1941 fprintf (dump_file,
1942 " Copied r%d to a scalar register r%d for insn %d\n",
1943 regno, REGNO (scopy), INSN_UID (insn));
1947 for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
1948 if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
1950 if (bitmap_bit_p (conv, DF_REF_INSN_UID (ref)))
1952 rtx_insn *insn = DF_REF_INSN (ref);
1954 rtx def_set = single_set (insn);
1955 gcc_assert (def_set);
1957 rtx src = SET_SRC (def_set);
1958 rtx dst = SET_DEST (def_set);
1960 if ((GET_CODE (src) == ASHIFT
1961 || GET_CODE (src) == ASHIFTRT
1962 || GET_CODE (src) == LSHIFTRT)
1963 && !CONST_INT_P (XEXP (src, 1))
1964 && reg_or_subregno (XEXP (src, 1)) == regno)
1966 rtx tmp2 = gen_reg_rtx (V2DImode);
1968 start_sequence ();
1970 if (TARGET_SSE4_1)
1971 emit_insn (gen_sse4_1_zero_extendv2qiv2di2
1972 (tmp2, gen_rtx_SUBREG (V16QImode, reg, 0)));
1973 else
1975 rtx vec_cst
1976 = gen_rtx_CONST_VECTOR (V2DImode,
1977 gen_rtvec (2, GEN_INT (0xff),
1978 const0_rtx));
1979 vec_cst
1980 = validize_mem (force_const_mem (V2DImode, vec_cst));
1982 emit_insn (gen_rtx_SET
1983 (tmp2,
1984 gen_rtx_AND (V2DImode,
1985 gen_rtx_SUBREG (V2DImode, reg, 0),
1986 vec_cst)));
1988 rtx_insn *seq = get_insns ();
1989 end_sequence ();
1991 emit_insn_before (seq, insn);
1993 XEXP (src, 1) = gen_rtx_SUBREG (DImode, tmp2, 0);
1995 else if (!MEM_P (dst) || !REG_P (src))
1996 replace_with_subreg_in_insn (insn, reg, reg);
1998 bitmap_clear_bit (conv, INSN_UID (insn));
2001 /* Skip debug insns and uninitialized uses. */
2002 else if (DF_REF_CHAIN (ref)
2003 && NONDEBUG_INSN_P (DF_REF_INSN (ref)))
2005 gcc_assert (scopy);
2006 replace_rtx (DF_REF_INSN (ref), reg, scopy);
2007 df_insn_rescan (DF_REF_INSN (ref));
2010 BITMAP_FREE (conv);
2013 /* Convert operand OP in INSN. We should handle
2014 memory operands and uninitialized registers.
2015 All other register uses are converted during
2016 registers conversion. */
2018 void
2019 dimode_scalar_chain::convert_op (rtx *op, rtx_insn *insn)
2021 *op = copy_rtx_if_shared (*op);
2023 if (GET_CODE (*op) == NOT)
2025 convert_op (&XEXP (*op, 0), insn);
2026 PUT_MODE (*op, V2DImode);
2028 else if (MEM_P (*op))
2030 rtx tmp = gen_reg_rtx (DImode);
2032 emit_insn_before (gen_move_insn (tmp, *op), insn);
2033 *op = gen_rtx_SUBREG (V2DImode, tmp, 0);
2035 if (dump_file)
2036 fprintf (dump_file, " Preloading operand for insn %d into r%d\n",
2037 INSN_UID (insn), REGNO (tmp));
2039 else if (REG_P (*op))
2041 /* We may have not converted register usage in case
2042 this register has no definition. Otherwise it
2043 should be converted in convert_reg. */
2044 df_ref ref;
2045 FOR_EACH_INSN_USE (ref, insn)
2046 if (DF_REF_REGNO (ref) == REGNO (*op))
2048 gcc_assert (!DF_REF_CHAIN (ref));
2049 break;
2051 *op = gen_rtx_SUBREG (V2DImode, *op, 0);
2053 else if (CONST_INT_P (*op))
2055 rtx vec_cst;
2056 rtx tmp = gen_rtx_SUBREG (V2DImode, gen_reg_rtx (DImode), 0);
2058 /* Prefer all ones vector in case of -1. */
2059 if (constm1_operand (*op, GET_MODE (*op)))
2060 vec_cst = CONSTM1_RTX (V2DImode);
2061 else
2062 vec_cst = gen_rtx_CONST_VECTOR (V2DImode,
2063 gen_rtvec (2, *op, const0_rtx));
2065 if (!standard_sse_constant_p (vec_cst, V2DImode))
2067 start_sequence ();
2068 vec_cst = validize_mem (force_const_mem (V2DImode, vec_cst));
2069 rtx_insn *seq = get_insns ();
2070 end_sequence ();
2071 emit_insn_before (seq, insn);
2074 emit_insn_before (gen_move_insn (copy_rtx (tmp), vec_cst), insn);
2075 *op = tmp;
2077 else
2079 gcc_assert (SUBREG_P (*op));
2080 gcc_assert (GET_MODE (*op) == V2DImode);
2084 /* Convert INSN to vector mode. */
2086 void
2087 dimode_scalar_chain::convert_insn (rtx_insn *insn)
2089 rtx def_set = single_set (insn);
2090 rtx src = SET_SRC (def_set);
2091 rtx dst = SET_DEST (def_set);
2092 rtx subreg;
2094 if (MEM_P (dst) && !REG_P (src))
2096 /* There are no scalar integer instructions and therefore
2097 temporary register usage is required. */
2098 rtx tmp = gen_reg_rtx (DImode);
2099 emit_conversion_insns (gen_move_insn (dst, tmp), insn);
2100 dst = gen_rtx_SUBREG (V2DImode, tmp, 0);
2103 switch (GET_CODE (src))
2105 case ASHIFT:
2106 case ASHIFTRT:
2107 case LSHIFTRT:
2108 convert_op (&XEXP (src, 0), insn);
2109 PUT_MODE (src, V2DImode);
2110 break;
2112 case PLUS:
2113 case MINUS:
2114 case IOR:
2115 case XOR:
2116 case AND:
2117 convert_op (&XEXP (src, 0), insn);
2118 convert_op (&XEXP (src, 1), insn);
2119 PUT_MODE (src, V2DImode);
2120 break;
2122 case NEG:
2123 src = XEXP (src, 0);
2124 convert_op (&src, insn);
2125 subreg = gen_reg_rtx (V2DImode);
2126 emit_insn_before (gen_move_insn (subreg, CONST0_RTX (V2DImode)), insn);
2127 src = gen_rtx_MINUS (V2DImode, subreg, src);
2128 break;
2130 case NOT:
2131 src = XEXP (src, 0);
2132 convert_op (&src, insn);
2133 subreg = gen_reg_rtx (V2DImode);
2134 emit_insn_before (gen_move_insn (subreg, CONSTM1_RTX (V2DImode)), insn);
2135 src = gen_rtx_XOR (V2DImode, src, subreg);
2136 break;
2138 case MEM:
2139 if (!REG_P (dst))
2140 convert_op (&src, insn);
2141 break;
2143 case REG:
2144 if (!MEM_P (dst))
2145 convert_op (&src, insn);
2146 break;
2148 case SUBREG:
2149 gcc_assert (GET_MODE (src) == V2DImode);
2150 break;
2152 case COMPARE:
2153 src = SUBREG_REG (XEXP (XEXP (src, 0), 0));
2155 gcc_assert ((REG_P (src) && GET_MODE (src) == DImode)
2156 || (SUBREG_P (src) && GET_MODE (src) == V2DImode));
2158 if (REG_P (src))
2159 subreg = gen_rtx_SUBREG (V2DImode, src, 0);
2160 else
2161 subreg = copy_rtx_if_shared (src);
2162 emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (subreg),
2163 copy_rtx_if_shared (subreg),
2164 copy_rtx_if_shared (subreg)),
2165 insn);
2166 dst = gen_rtx_REG (CCmode, FLAGS_REG);
2167 src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (src),
2168 copy_rtx_if_shared (src)),
2169 UNSPEC_PTEST);
2170 break;
2172 case CONST_INT:
2173 convert_op (&src, insn);
2174 break;
2176 default:
2177 gcc_unreachable ();
2180 SET_SRC (def_set) = src;
2181 SET_DEST (def_set) = dst;
2183 /* Drop possible dead definitions. */
2184 PATTERN (insn) = def_set;
2186 INSN_CODE (insn) = -1;
2187 recog_memoized (insn);
2188 df_insn_rescan (insn);
2191 /* Fix uses of converted REG in debug insns. */
2193 void
2194 timode_scalar_chain::fix_debug_reg_uses (rtx reg)
2196 if (!flag_var_tracking)
2197 return;
2199 df_ref ref, next;
2200 for (ref = DF_REG_USE_CHAIN (REGNO (reg)); ref; ref = next)
2202 rtx_insn *insn = DF_REF_INSN (ref);
2203 /* Make sure the next ref is for a different instruction,
2204 so that we're not affected by the rescan. */
2205 next = DF_REF_NEXT_REG (ref);
2206 while (next && DF_REF_INSN (next) == insn)
2207 next = DF_REF_NEXT_REG (next);
2209 if (DEBUG_INSN_P (insn))
2211 /* It may be a debug insn with a TImode variable in
2212 register. */
2213 bool changed = false;
2214 for (; ref != next; ref = DF_REF_NEXT_REG (ref))
2216 rtx *loc = DF_REF_LOC (ref);
2217 if (REG_P (*loc) && GET_MODE (*loc) == V1TImode)
2219 *loc = gen_rtx_SUBREG (TImode, *loc, 0);
2220 changed = true;
2223 if (changed)
2224 df_insn_rescan (insn);
2229 /* Convert INSN from TImode to V1T1mode. */
2231 void
2232 timode_scalar_chain::convert_insn (rtx_insn *insn)
2234 rtx def_set = single_set (insn);
2235 rtx src = SET_SRC (def_set);
2236 rtx dst = SET_DEST (def_set);
2238 switch (GET_CODE (dst))
2240 case REG:
2242 rtx tmp = find_reg_equal_equiv_note (insn);
2243 if (tmp)
2244 PUT_MODE (XEXP (tmp, 0), V1TImode);
2245 PUT_MODE (dst, V1TImode);
2246 fix_debug_reg_uses (dst);
2248 break;
2249 case MEM:
2250 PUT_MODE (dst, V1TImode);
2251 break;
2253 default:
2254 gcc_unreachable ();
2257 switch (GET_CODE (src))
2259 case REG:
2260 PUT_MODE (src, V1TImode);
2261 /* Call fix_debug_reg_uses only if SRC is never defined. */
2262 if (!DF_REG_DEF_CHAIN (REGNO (src)))
2263 fix_debug_reg_uses (src);
2264 break;
2266 case MEM:
2267 PUT_MODE (src, V1TImode);
2268 break;
2270 case CONST_WIDE_INT:
2271 if (NONDEBUG_INSN_P (insn))
2273 /* Since there are no instructions to store 128-bit constant,
2274 temporary register usage is required. */
2275 rtx tmp = gen_reg_rtx (V1TImode);
2276 start_sequence ();
2277 src = gen_rtx_CONST_VECTOR (V1TImode, gen_rtvec (1, src));
2278 src = validize_mem (force_const_mem (V1TImode, src));
2279 rtx_insn *seq = get_insns ();
2280 end_sequence ();
2281 if (seq)
2282 emit_insn_before (seq, insn);
2283 emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
2284 dst = tmp;
2286 break;
2288 case CONST_INT:
2289 switch (standard_sse_constant_p (src, TImode))
2291 case 1:
2292 src = CONST0_RTX (GET_MODE (dst));
2293 break;
2294 case 2:
2295 src = CONSTM1_RTX (GET_MODE (dst));
2296 break;
2297 default:
2298 gcc_unreachable ();
2300 if (NONDEBUG_INSN_P (insn))
2302 rtx tmp = gen_reg_rtx (V1TImode);
2303 /* Since there are no instructions to store standard SSE
2304 constant, temporary register usage is required. */
2305 emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
2306 dst = tmp;
2308 break;
2310 default:
2311 gcc_unreachable ();
2314 SET_SRC (def_set) = src;
2315 SET_DEST (def_set) = dst;
2317 /* Drop possible dead definitions. */
2318 PATTERN (insn) = def_set;
2320 INSN_CODE (insn) = -1;
2321 recog_memoized (insn);
2322 df_insn_rescan (insn);
2325 void
2326 dimode_scalar_chain::convert_registers ()
2328 bitmap_iterator bi;
2329 unsigned id;
2331 EXECUTE_IF_SET_IN_BITMAP (defs, 0, id, bi)
2332 convert_reg (id);
2334 EXECUTE_IF_AND_COMPL_IN_BITMAP (defs_conv, defs, 0, id, bi)
2335 make_vector_copies (id);
2338 /* Convert whole chain creating required register
2339 conversions and copies. */
2342 scalar_chain::convert ()
2344 bitmap_iterator bi;
2345 unsigned id;
2346 int converted_insns = 0;
2348 if (!dbg_cnt (stv_conversion))
2349 return 0;
2351 if (dump_file)
2352 fprintf (dump_file, "Converting chain #%d...\n", chain_id);
2354 convert_registers ();
2356 EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi)
2358 convert_insn (DF_INSN_UID_GET (id)->insn);
2359 converted_insns++;
2362 return converted_insns;
2365 /* Main STV pass function. Find and convert scalar
2366 instructions into vector mode when profitable. */
2368 static unsigned int
2369 convert_scalars_to_vector ()
2371 basic_block bb;
2372 bitmap candidates;
2373 int converted_insns = 0;
2375 bitmap_obstack_initialize (NULL);
2376 candidates = BITMAP_ALLOC (NULL);
2378 calculate_dominance_info (CDI_DOMINATORS);
2379 df_set_flags (DF_DEFER_INSN_RESCAN);
2380 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
2381 df_md_add_problem ();
2382 df_analyze ();
2384 /* Find all instructions we want to convert into vector mode. */
2385 if (dump_file)
2386 fprintf (dump_file, "Searching for mode conversion candidates...\n");
2388 FOR_EACH_BB_FN (bb, cfun)
2390 rtx_insn *insn;
2391 FOR_BB_INSNS (bb, insn)
2392 if (scalar_to_vector_candidate_p (insn))
2394 if (dump_file)
2395 fprintf (dump_file, " insn %d is marked as a candidate\n",
2396 INSN_UID (insn));
2398 bitmap_set_bit (candidates, INSN_UID (insn));
2402 remove_non_convertible_regs (candidates);
2404 if (bitmap_empty_p (candidates))
2405 if (dump_file)
2406 fprintf (dump_file, "There are no candidates for optimization.\n");
2408 while (!bitmap_empty_p (candidates))
2410 unsigned uid = bitmap_first_set_bit (candidates);
2411 scalar_chain *chain;
2413 if (TARGET_64BIT)
2414 chain = new timode_scalar_chain;
2415 else
2416 chain = new dimode_scalar_chain;
2418 /* Find instructions chain we want to convert to vector mode.
2419 Check all uses and definitions to estimate all required
2420 conversions. */
2421 chain->build (candidates, uid);
2423 if (chain->compute_convert_gain () > 0)
2424 converted_insns += chain->convert ();
2425 else
2426 if (dump_file)
2427 fprintf (dump_file, "Chain #%d conversion is not profitable\n",
2428 chain->chain_id);
2430 delete chain;
2433 if (dump_file)
2434 fprintf (dump_file, "Total insns converted: %d\n", converted_insns);
2436 BITMAP_FREE (candidates);
2437 bitmap_obstack_release (NULL);
2438 df_process_deferred_rescans ();
2440 /* Conversion means we may have 128bit register spills/fills
2441 which require aligned stack. */
2442 if (converted_insns)
2444 if (crtl->stack_alignment_needed < 128)
2445 crtl->stack_alignment_needed = 128;
2446 if (crtl->stack_alignment_estimated < 128)
2447 crtl->stack_alignment_estimated = 128;
2448 /* Fix up DECL_RTL/DECL_INCOMING_RTL of arguments. */
2449 if (TARGET_64BIT)
2450 for (tree parm = DECL_ARGUMENTS (current_function_decl);
2451 parm; parm = DECL_CHAIN (parm))
2453 if (TYPE_MODE (TREE_TYPE (parm)) != TImode)
2454 continue;
2455 if (DECL_RTL_SET_P (parm)
2456 && GET_MODE (DECL_RTL (parm)) == V1TImode)
2458 rtx r = DECL_RTL (parm);
2459 if (REG_P (r))
2460 SET_DECL_RTL (parm, gen_rtx_SUBREG (TImode, r, 0));
2462 if (DECL_INCOMING_RTL (parm)
2463 && GET_MODE (DECL_INCOMING_RTL (parm)) == V1TImode)
2465 rtx r = DECL_INCOMING_RTL (parm);
2466 if (REG_P (r))
2467 DECL_INCOMING_RTL (parm) = gen_rtx_SUBREG (TImode, r, 0);
2472 return 0;
2475 namespace {
2477 const pass_data pass_data_insert_vzeroupper =
2479 RTL_PASS, /* type */
2480 "vzeroupper", /* name */
2481 OPTGROUP_NONE, /* optinfo_flags */
2482 TV_MACH_DEP, /* tv_id */
2483 0, /* properties_required */
2484 0, /* properties_provided */
2485 0, /* properties_destroyed */
2486 0, /* todo_flags_start */
2487 TODO_df_finish, /* todo_flags_finish */
2490 class pass_insert_vzeroupper : public rtl_opt_pass
2492 public:
2493 pass_insert_vzeroupper(gcc::context *ctxt)
2494 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2497 /* opt_pass methods: */
2498 virtual bool gate (function *)
2500 return TARGET_AVX && !TARGET_AVX512ER
2501 && TARGET_VZEROUPPER && flag_expensive_optimizations
2502 && !optimize_size;
2505 virtual unsigned int execute (function *)
2507 return rest_of_handle_insert_vzeroupper ();
2510 }; // class pass_insert_vzeroupper
2512 const pass_data pass_data_stv =
2514 RTL_PASS, /* type */
2515 "stv", /* name */
2516 OPTGROUP_NONE, /* optinfo_flags */
2517 TV_MACH_DEP, /* tv_id */
2518 0, /* properties_required */
2519 0, /* properties_provided */
2520 0, /* properties_destroyed */
2521 0, /* todo_flags_start */
2522 TODO_df_finish, /* todo_flags_finish */
2525 class pass_stv : public rtl_opt_pass
2527 public:
2528 pass_stv (gcc::context *ctxt)
2529 : rtl_opt_pass (pass_data_stv, ctxt),
2530 timode_p (false)
2533 /* opt_pass methods: */
2534 virtual bool gate (function *)
2536 return (timode_p == !!TARGET_64BIT
2537 && TARGET_STV && TARGET_SSE2 && optimize > 1);
2540 virtual unsigned int execute (function *)
2542 return convert_scalars_to_vector ();
2545 opt_pass *clone ()
2547 return new pass_stv (m_ctxt);
2550 void set_pass_param (unsigned int n, bool param)
2552 gcc_assert (n == 0);
2553 timode_p = param;
2556 private:
2557 bool timode_p;
2558 }; // class pass_stv
2560 } // anon namespace
2562 rtl_opt_pass *
2563 make_pass_insert_vzeroupper (gcc::context *ctxt)
2565 return new pass_insert_vzeroupper (ctxt);
2568 rtl_opt_pass *
2569 make_pass_stv (gcc::context *ctxt)
2571 return new pass_stv (ctxt);
2574 /* Inserting ENDBRANCH instructions. */
2576 static unsigned int
2577 rest_of_insert_endbranch (void)
2579 timevar_push (TV_MACH_DEP);
2581 rtx cet_eb;
2582 rtx_insn *insn;
2583 basic_block bb;
2585 /* Currently emit EB if it's a tracking function, i.e. 'nocf_check' is
2586 absent among function attributes. Later an optimization will be
2587 introduced to make analysis if an address of a static function is
2588 taken. A static function whose address is not taken will get a
2589 nocf_check attribute. This will allow to reduce the number of EB. */
2591 if (!lookup_attribute ("nocf_check",
2592 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
2593 && !cgraph_node::get (cfun->decl)->only_called_directly_p ())
2595 cet_eb = gen_nop_endbr ();
2597 bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
2598 insn = BB_HEAD (bb);
2599 emit_insn_before (cet_eb, insn);
2602 bb = 0;
2603 FOR_EACH_BB_FN (bb, cfun)
2605 for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
2606 insn = NEXT_INSN (insn))
2608 if (INSN_P (insn) && GET_CODE (insn) == CALL_INSN)
2610 rtx_insn *next_insn = insn;
2612 while ((next_insn != BB_END (bb))
2613 && (DEBUG_INSN_P (NEXT_INSN (next_insn))
2614 || NOTE_P (NEXT_INSN (next_insn))
2615 || BARRIER_P (NEXT_INSN (next_insn))))
2616 next_insn = NEXT_INSN (next_insn);
2618 /* Generate ENDBRANCH after CALL, which can return more than
2619 twice, setjmp-like functions. */
2620 if (find_reg_note (insn, REG_SETJMP, NULL) != NULL)
2622 cet_eb = gen_nop_endbr ();
2623 emit_insn_after (cet_eb, next_insn);
2625 continue;
2628 if (INSN_P (insn) && JUMP_P (insn) && flag_cet_switch)
2630 rtx target = JUMP_LABEL (insn);
2631 if (target == NULL_RTX || ANY_RETURN_P (target))
2632 continue;
2634 /* Check the jump is a switch table. */
2635 rtx_insn *label = as_a<rtx_insn *> (target);
2636 rtx_insn *table = next_insn (label);
2637 if (table == NULL_RTX || !JUMP_TABLE_DATA_P (table))
2638 continue;
2640 /* For the indirect jump find out all places it jumps and insert
2641 ENDBRANCH there. It should be done under a special flag to
2642 control ENDBRANCH generation for switch stmts. */
2643 edge_iterator ei;
2644 edge e;
2645 basic_block dest_blk;
2647 FOR_EACH_EDGE (e, ei, bb->succs)
2649 rtx_insn *insn;
2651 dest_blk = e->dest;
2652 insn = BB_HEAD (dest_blk);
2653 gcc_assert (LABEL_P (insn));
2654 cet_eb = gen_nop_endbr ();
2655 emit_insn_after (cet_eb, insn);
2657 continue;
2660 if ((LABEL_P (insn) && LABEL_PRESERVE_P (insn))
2661 || (NOTE_P (insn)
2662 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
2663 /* TODO. Check /s bit also. */
2665 cet_eb = gen_nop_endbr ();
2666 emit_insn_after (cet_eb, insn);
2667 continue;
2672 timevar_pop (TV_MACH_DEP);
2673 return 0;
2676 namespace {
2678 const pass_data pass_data_insert_endbranch =
2680 RTL_PASS, /* type. */
2681 "cet", /* name. */
2682 OPTGROUP_NONE, /* optinfo_flags. */
2683 TV_MACH_DEP, /* tv_id. */
2684 0, /* properties_required. */
2685 0, /* properties_provided. */
2686 0, /* properties_destroyed. */
2687 0, /* todo_flags_start. */
2688 0, /* todo_flags_finish. */
2691 class pass_insert_endbranch : public rtl_opt_pass
2693 public:
2694 pass_insert_endbranch (gcc::context *ctxt)
2695 : rtl_opt_pass (pass_data_insert_endbranch, ctxt)
2698 /* opt_pass methods: */
2699 virtual bool gate (function *)
2701 return ((flag_cf_protection & CF_BRANCH) && TARGET_IBT);
2704 virtual unsigned int execute (function *)
2706 return rest_of_insert_endbranch ();
2709 }; // class pass_insert_endbranch
2711 } // anon namespace
2713 rtl_opt_pass *
2714 make_pass_insert_endbranch (gcc::context *ctxt)
2716 return new pass_insert_endbranch (ctxt);
2719 /* Return true if a red-zone is in use. */
2721 bool
2722 ix86_using_red_zone (void)
2724 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2727 /* Return a string that documents the current -m options. The caller is
2728 responsible for freeing the string. */
2730 static char *
2731 ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2,
2732 int flags, int flags2,
2733 const char *arch, const char *tune,
2734 enum fpmath_unit fpmath, bool add_nl_p)
2736 struct ix86_target_opts
2738 const char *option; /* option string */
2739 HOST_WIDE_INT mask; /* isa mask options */
2742 /* This table is ordered so that options like -msse4.2 that imply other
2743 ISAs come first. Target string will be displayed in the same order. */
2744 static struct ix86_target_opts isa2_opts[] =
2746 { "-mmpx", OPTION_MASK_ISA_MPX },
2747 { "-mrdpid", OPTION_MASK_ISA_RDPID },
2748 { "-msgx", OPTION_MASK_ISA_SGX },
2749 { "-mavx5124vnniw", OPTION_MASK_ISA_AVX5124VNNIW },
2750 { "-mavx5124fmaps", OPTION_MASK_ISA_AVX5124FMAPS },
2751 { "-mavx512vpopcntdq", OPTION_MASK_ISA_AVX512VPOPCNTDQ },
2752 { "-mibt", OPTION_MASK_ISA_IBT },
2753 { "-mshstk", OPTION_MASK_ISA_SHSTK }
2755 static struct ix86_target_opts isa_opts[] =
2757 { "-mgfni", OPTION_MASK_ISA_GFNI },
2758 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
2759 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
2760 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2761 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2762 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2763 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2764 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2765 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2766 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2767 { "-mavx2", OPTION_MASK_ISA_AVX2 },
2768 { "-mfma", OPTION_MASK_ISA_FMA },
2769 { "-mxop", OPTION_MASK_ISA_XOP },
2770 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2771 { "-mf16c", OPTION_MASK_ISA_F16C },
2772 { "-mavx", OPTION_MASK_ISA_AVX },
2773 /* { "-msse4" OPTION_MASK_ISA_SSE4 }, */
2774 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2775 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2776 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2777 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2778 { "-msse3", OPTION_MASK_ISA_SSE3 },
2779 { "-maes", OPTION_MASK_ISA_AES },
2780 { "-msha", OPTION_MASK_ISA_SHA },
2781 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2782 { "-msse2", OPTION_MASK_ISA_SSE2 },
2783 { "-msse", OPTION_MASK_ISA_SSE },
2784 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2785 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2786 { "-mmmx", OPTION_MASK_ISA_MMX },
2787 { "-mrtm", OPTION_MASK_ISA_RTM },
2788 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2789 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2790 { "-madx", OPTION_MASK_ISA_ADX },
2791 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2792 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2793 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2794 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2795 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2796 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2797 { "-mabm", OPTION_MASK_ISA_ABM },
2798 { "-mbmi", OPTION_MASK_ISA_BMI },
2799 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2800 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2801 { "-mtbm", OPTION_MASK_ISA_TBM },
2802 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2803 { "-mcx16", OPTION_MASK_ISA_CX16 },
2804 { "-msahf", OPTION_MASK_ISA_SAHF },
2805 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2806 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2807 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2808 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2809 { "-mmwaitx", OPTION_MASK_ISA_MWAITX },
2810 { "-mclzero", OPTION_MASK_ISA_CLZERO },
2811 { "-mpku", OPTION_MASK_ISA_PKU },
2812 { "-mlwp", OPTION_MASK_ISA_LWP },
2813 { "-mhle", OPTION_MASK_ISA_HLE },
2814 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2815 { "-mclwb", OPTION_MASK_ISA_CLWB }
2818 /* Flag options. */
2819 static struct ix86_target_opts flag_opts[] =
2821 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2822 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2823 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2824 { "-m80387", MASK_80387 },
2825 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2826 { "-malign-double", MASK_ALIGN_DOUBLE },
2827 { "-mcld", MASK_CLD },
2828 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2829 { "-mieee-fp", MASK_IEEE_FP },
2830 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2831 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2832 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2833 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2834 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2835 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2836 { "-mno-red-zone", MASK_NO_RED_ZONE },
2837 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2838 { "-mrecip", MASK_RECIP },
2839 { "-mrtd", MASK_RTD },
2840 { "-msseregparm", MASK_SSEREGPARM },
2841 { "-mstack-arg-probe", MASK_STACK_PROBE },
2842 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2843 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2844 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2845 { "-mvzeroupper", MASK_VZEROUPPER },
2846 { "-mstv", MASK_STV },
2847 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD },
2848 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE },
2849 { "-mprefer-avx128", MASK_PREFER_AVX128 },
2850 { "-mcall-ms2sysv-xlogues", MASK_CALL_MS2SYSV_XLOGUES }
2853 /* Additional flag options. */
2854 static struct ix86_target_opts flag2_opts[] =
2856 { "-mgeneral-regs-only", OPTION_MASK_GENERAL_REGS_ONLY },
2857 { "-mprefer-avx256", OPTION_MASK_PREFER_AVX256 },
2860 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (isa2_opts)
2861 + ARRAY_SIZE (flag_opts) + ARRAY_SIZE (flag2_opts) + 6][2];
2863 char isa_other[40];
2864 char isa2_other[40];
2865 char flags_other[40];
2866 char flags2_other[40];
2867 unsigned num = 0;
2868 unsigned i, j;
2869 char *ret;
2870 char *ptr;
2871 size_t len;
2872 size_t line_len;
2873 size_t sep_len;
2874 const char *abi;
2876 memset (opts, '\0', sizeof (opts));
2878 /* Add -march= option. */
2879 if (arch)
2881 opts[num][0] = "-march=";
2882 opts[num++][1] = arch;
2885 /* Add -mtune= option. */
2886 if (tune)
2888 opts[num][0] = "-mtune=";
2889 opts[num++][1] = tune;
2892 /* Add -m32/-m64/-mx32. */
2893 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2895 if ((isa & OPTION_MASK_ABI_64) != 0)
2896 abi = "-m64";
2897 else
2898 abi = "-mx32";
2899 isa &= ~ (OPTION_MASK_ISA_64BIT
2900 | OPTION_MASK_ABI_64
2901 | OPTION_MASK_ABI_X32);
2903 else
2904 abi = "-m32";
2905 opts[num++][0] = abi;
2907 /* Pick out the options in isa2 options. */
2908 for (i = 0; i < ARRAY_SIZE (isa2_opts); i++)
2910 if ((isa2 & isa2_opts[i].mask) != 0)
2912 opts[num++][0] = isa2_opts[i].option;
2913 isa2 &= ~ isa2_opts[i].mask;
2917 if (isa2 && add_nl_p)
2919 opts[num++][0] = isa2_other;
2920 sprintf (isa2_other, "(other isa2: %#" HOST_WIDE_INT_PRINT "x)", isa2);
2923 /* Pick out the options in isa options. */
2924 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2926 if ((isa & isa_opts[i].mask) != 0)
2928 opts[num++][0] = isa_opts[i].option;
2929 isa &= ~ isa_opts[i].mask;
2933 if (isa && add_nl_p)
2935 opts[num++][0] = isa_other;
2936 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)", isa);
2939 /* Add flag options. */
2940 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2942 if ((flags & flag_opts[i].mask) != 0)
2944 opts[num++][0] = flag_opts[i].option;
2945 flags &= ~ flag_opts[i].mask;
2949 if (flags && add_nl_p)
2951 opts[num++][0] = flags_other;
2952 sprintf (flags_other, "(other flags: %#x)", flags);
2955 /* Add additional flag options. */
2956 for (i = 0; i < ARRAY_SIZE (flag2_opts); i++)
2958 if ((flags2 & flag2_opts[i].mask) != 0)
2960 opts[num++][0] = flag2_opts[i].option;
2961 flags2 &= ~ flag2_opts[i].mask;
2965 if (flags2 && add_nl_p)
2967 opts[num++][0] = flags2_other;
2968 sprintf (flags2_other, "(other flags2: %#x)", flags2);
2971 /* Add -fpmath= option. */
2972 if (fpmath)
2974 opts[num][0] = "-mfpmath=";
2975 switch ((int) fpmath)
2977 case FPMATH_387:
2978 opts[num++][1] = "387";
2979 break;
2981 case FPMATH_SSE:
2982 opts[num++][1] = "sse";
2983 break;
2985 case FPMATH_387 | FPMATH_SSE:
2986 opts[num++][1] = "sse+387";
2987 break;
2989 default:
2990 gcc_unreachable ();
2994 /* Any options? */
2995 if (num == 0)
2996 return NULL;
2998 gcc_assert (num < ARRAY_SIZE (opts));
3000 /* Size the string. */
3001 len = 0;
3002 sep_len = (add_nl_p) ? 3 : 1;
3003 for (i = 0; i < num; i++)
3005 len += sep_len;
3006 for (j = 0; j < 2; j++)
3007 if (opts[i][j])
3008 len += strlen (opts[i][j]);
3011 /* Build the string. */
3012 ret = ptr = (char *) xmalloc (len);
3013 line_len = 0;
3015 for (i = 0; i < num; i++)
3017 size_t len2[2];
3019 for (j = 0; j < 2; j++)
3020 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
3022 if (i != 0)
3024 *ptr++ = ' ';
3025 line_len++;
3027 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
3029 *ptr++ = '\\';
3030 *ptr++ = '\n';
3031 line_len = 0;
3035 for (j = 0; j < 2; j++)
3036 if (opts[i][j])
3038 memcpy (ptr, opts[i][j], len2[j]);
3039 ptr += len2[j];
3040 line_len += len2[j];
3044 *ptr = '\0';
3045 gcc_assert (ret + len >= ptr);
3047 return ret;
3050 /* Return true, if profiling code should be emitted before
3051 prologue. Otherwise it returns false.
3052 Note: For x86 with "hotfix" it is sorried. */
3053 static bool
3054 ix86_profile_before_prologue (void)
3056 return flag_fentry != 0;
3059 /* Function that is callable from the debugger to print the current
3060 options. */
3061 void ATTRIBUTE_UNUSED
3062 ix86_debug_options (void)
3064 char *opts = ix86_target_string (ix86_isa_flags, ix86_isa_flags2,
3065 target_flags, ix86_target_flags,
3066 ix86_arch_string,ix86_tune_string,
3067 ix86_fpmath, true);
3069 if (opts)
3071 fprintf (stderr, "%s\n\n", opts);
3072 free (opts);
3074 else
3075 fputs ("<no options>\n\n", stderr);
3077 return;
3080 /* Return true if T is one of the bytes we should avoid with
3081 -fmitigate-rop. */
3083 static bool
3084 ix86_rop_should_change_byte_p (int t)
3086 return t == 0xc2 || t == 0xc3 || t == 0xca || t == 0xcb;
3089 static const char *stringop_alg_names[] = {
3090 #define DEF_ENUM
3091 #define DEF_ALG(alg, name) #name,
3092 #include "stringop.def"
3093 #undef DEF_ENUM
3094 #undef DEF_ALG
3097 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
3098 The string is of the following form (or comma separated list of it):
3100 strategy_alg:max_size:[align|noalign]
3102 where the full size range for the strategy is either [0, max_size] or
3103 [min_size, max_size], in which min_size is the max_size + 1 of the
3104 preceding range. The last size range must have max_size == -1.
3106 Examples:
3109 -mmemcpy-strategy=libcall:-1:noalign
3111 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
3115 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
3117 This is to tell the compiler to use the following strategy for memset
3118 1) when the expected size is between [1, 16], use rep_8byte strategy;
3119 2) when the size is between [17, 2048], use vector_loop;
3120 3) when the size is > 2048, use libcall. */
3122 struct stringop_size_range
3124 int max;
3125 stringop_alg alg;
3126 bool noalign;
3129 static void
3130 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
3132 const struct stringop_algs *default_algs;
3133 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
3134 char *curr_range_str, *next_range_str;
3135 const char *opt = is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=";
3136 int i = 0, n = 0;
3138 if (is_memset)
3139 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
3140 else
3141 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
3143 curr_range_str = strategy_str;
3147 int maxs;
3148 char alg_name[128];
3149 char align[16];
3150 next_range_str = strchr (curr_range_str, ',');
3151 if (next_range_str)
3152 *next_range_str++ = '\0';
3154 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
3155 alg_name, &maxs, align))
3157 error ("wrong argument %qs to option %qs", curr_range_str, opt);
3158 return;
3161 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
3163 error ("size ranges of option %qs should be increasing", opt);
3164 return;
3167 for (i = 0; i < last_alg; i++)
3168 if (!strcmp (alg_name, stringop_alg_names[i]))
3169 break;
3171 if (i == last_alg)
3173 error ("wrong strategy name %qs specified for option %qs",
3174 alg_name, opt);
3176 auto_vec <const char *> candidates;
3177 for (i = 0; i < last_alg; i++)
3178 if ((stringop_alg) i != rep_prefix_8_byte || TARGET_64BIT)
3179 candidates.safe_push (stringop_alg_names[i]);
3181 char *s;
3182 const char *hint
3183 = candidates_list_and_hint (alg_name, s, candidates);
3184 if (hint)
3185 inform (input_location,
3186 "valid arguments to %qs are: %s; did you mean %qs?",
3187 opt, s, hint);
3188 else
3189 inform (input_location, "valid arguments to %qs are: %s",
3190 opt, s);
3191 XDELETEVEC (s);
3192 return;
3195 if ((stringop_alg) i == rep_prefix_8_byte
3196 && !TARGET_64BIT)
3198 /* rep; movq isn't available in 32-bit code. */
3199 error ("strategy name %qs specified for option %qs "
3200 "not supported for 32-bit code", alg_name, opt);
3201 return;
3204 input_ranges[n].max = maxs;
3205 input_ranges[n].alg = (stringop_alg) i;
3206 if (!strcmp (align, "align"))
3207 input_ranges[n].noalign = false;
3208 else if (!strcmp (align, "noalign"))
3209 input_ranges[n].noalign = true;
3210 else
3212 error ("unknown alignment %qs specified for option %qs", align, opt);
3213 return;
3215 n++;
3216 curr_range_str = next_range_str;
3218 while (curr_range_str);
3220 if (input_ranges[n - 1].max != -1)
3222 error ("the max value for the last size range should be -1"
3223 " for option %qs", opt);
3224 return;
3227 if (n > MAX_STRINGOP_ALGS)
3229 error ("too many size ranges specified in option %qs", opt);
3230 return;
3233 /* Now override the default algs array. */
3234 for (i = 0; i < n; i++)
3236 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
3237 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
3238 = input_ranges[i].alg;
3239 *const_cast<int *>(&default_algs->size[i].noalign)
3240 = input_ranges[i].noalign;
3245 /* parse -mtune-ctrl= option. When DUMP is true,
3246 print the features that are explicitly set. */
3248 static void
3249 parse_mtune_ctrl_str (bool dump)
3251 if (!ix86_tune_ctrl_string)
3252 return;
3254 char *next_feature_string = NULL;
3255 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3256 char *orig = curr_feature_string;
3257 int i;
3260 bool clear = false;
3262 next_feature_string = strchr (curr_feature_string, ',');
3263 if (next_feature_string)
3264 *next_feature_string++ = '\0';
3265 if (*curr_feature_string == '^')
3267 curr_feature_string++;
3268 clear = true;
3270 for (i = 0; i < X86_TUNE_LAST; i++)
3272 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3274 ix86_tune_features[i] = !clear;
3275 if (dump)
3276 fprintf (stderr, "Explicitly %s feature %s\n",
3277 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3278 break;
3281 if (i == X86_TUNE_LAST)
3282 error ("Unknown parameter to option -mtune-ctrl: %s",
3283 clear ? curr_feature_string - 1 : curr_feature_string);
3284 curr_feature_string = next_feature_string;
3286 while (curr_feature_string);
3287 free (orig);
3290 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3291 processor type. */
3293 static void
3294 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3296 unsigned int ix86_tune_mask = 1u << ix86_tune;
3297 int i;
3299 for (i = 0; i < X86_TUNE_LAST; ++i)
3301 if (ix86_tune_no_default)
3302 ix86_tune_features[i] = 0;
3303 else
3304 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3307 if (dump)
3309 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3310 for (i = 0; i < X86_TUNE_LAST; i++)
3311 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3312 ix86_tune_features[i] ? "on" : "off");
3315 parse_mtune_ctrl_str (dump);
3319 /* Default align_* from the processor table. */
3321 static void
3322 ix86_default_align (struct gcc_options *opts)
3324 if (opts->x_align_loops == 0)
3326 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3327 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3329 if (opts->x_align_jumps == 0)
3331 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3332 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3334 if (opts->x_align_functions == 0)
3336 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3340 /* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */
3342 static void
3343 ix86_override_options_after_change (void)
3345 ix86_default_align (&global_options);
3348 /* Override various settings based on options. If MAIN_ARGS_P, the
3349 options are from the command line, otherwise they are from
3350 attributes. Return true if there's an error related to march
3351 option. */
3353 static bool
3354 ix86_option_override_internal (bool main_args_p,
3355 struct gcc_options *opts,
3356 struct gcc_options *opts_set)
3358 int i;
3359 unsigned int ix86_arch_mask;
3360 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3362 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3363 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3364 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3365 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3366 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3367 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3368 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3369 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3370 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3371 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3372 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3373 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3374 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3375 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3376 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3377 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3378 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3379 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3380 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3381 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3382 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3383 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3384 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3385 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3386 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3387 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3388 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3389 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3390 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3391 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3392 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3393 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3394 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3395 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3396 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3397 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3398 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3399 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3400 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3401 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3402 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3403 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3404 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3405 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3406 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
3407 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3408 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3409 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3410 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3411 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3412 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3413 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3414 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3415 #define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53)
3416 #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
3417 #define PTA_CLWB (HOST_WIDE_INT_1 << 55)
3418 #define PTA_MWAITX (HOST_WIDE_INT_1 << 56)
3419 #define PTA_CLZERO (HOST_WIDE_INT_1 << 57)
3420 #define PTA_NO_80387 (HOST_WIDE_INT_1 << 58)
3421 #define PTA_PKU (HOST_WIDE_INT_1 << 59)
3422 #define PTA_AVX5124VNNIW (HOST_WIDE_INT_1 << 60)
3423 #define PTA_AVX5124FMAPS (HOST_WIDE_INT_1 << 61)
3424 #define PTA_AVX512VPOPCNTDQ (HOST_WIDE_INT_1 << 62)
3425 #define PTA_SGX (HOST_WIDE_INT_1 << 63)
3427 #define PTA_CORE2 \
3428 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3429 | PTA_CX16 | PTA_FXSR)
3430 #define PTA_NEHALEM \
3431 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3432 #define PTA_WESTMERE \
3433 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3434 #define PTA_SANDYBRIDGE \
3435 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3436 #define PTA_IVYBRIDGE \
3437 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3438 #define PTA_HASWELL \
3439 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3440 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3441 #define PTA_BROADWELL \
3442 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3443 #define PTA_SKYLAKE \
3444 (PTA_BROADWELL | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES)
3445 #define PTA_SKYLAKE_AVX512 \
3446 (PTA_SKYLAKE | PTA_AVX512F | PTA_AVX512CD | PTA_AVX512VL \
3447 | PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU)
3448 #define PTA_KNL \
3449 (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
3450 #define PTA_BONNELL \
3451 (PTA_CORE2 | PTA_MOVBE)
3452 #define PTA_SILVERMONT \
3453 (PTA_WESTMERE | PTA_MOVBE)
3454 #define PTA_KNM \
3455 (PTA_KNL | PTA_AVX5124VNNIW | PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ)
3457 /* if this reaches 64, need to widen struct pta flags below */
3459 static struct pta
3461 const char *const name; /* processor name or nickname. */
3462 const enum processor_type processor;
3463 const enum attr_cpu schedule;
3464 const unsigned HOST_WIDE_INT flags;
3466 const processor_alias_table[] =
3468 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3469 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3470 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3471 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3472 {"lakemont", PROCESSOR_LAKEMONT, CPU_PENTIUM, PTA_NO_80387},
3473 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3474 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3475 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3476 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3477 {"samuel-2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3478 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3479 PTA_MMX | PTA_SSE | PTA_FXSR},
3480 {"nehemiah", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3481 PTA_MMX | PTA_SSE | PTA_FXSR},
3482 {"c7", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3483 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3484 {"esther", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3485 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3486 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3487 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3488 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3489 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3490 PTA_MMX | PTA_SSE | PTA_FXSR},
3491 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3492 PTA_MMX | PTA_SSE | PTA_FXSR},
3493 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3494 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3495 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3496 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3497 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3498 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3499 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3500 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3501 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3502 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3503 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3504 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3505 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3506 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3507 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3508 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3509 PTA_SANDYBRIDGE},
3510 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3511 PTA_SANDYBRIDGE},
3512 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3513 PTA_IVYBRIDGE},
3514 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3515 PTA_IVYBRIDGE},
3516 {"haswell", PROCESSOR_HASWELL, CPU_HASWELL, PTA_HASWELL},
3517 {"core-avx2", PROCESSOR_HASWELL, CPU_HASWELL, PTA_HASWELL},
3518 {"broadwell", PROCESSOR_HASWELL, CPU_HASWELL, PTA_BROADWELL},
3519 {"skylake", PROCESSOR_HASWELL, CPU_HASWELL, PTA_SKYLAKE},
3520 {"skylake-avx512", PROCESSOR_HASWELL, CPU_HASWELL, PTA_SKYLAKE_AVX512},
3521 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3522 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3523 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3524 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3525 {"knl", PROCESSOR_KNL, CPU_SLM, PTA_KNL},
3526 {"knm", PROCESSOR_KNM, CPU_SLM, PTA_KNM},
3527 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3528 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3529 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3530 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3531 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3532 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3533 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3534 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3535 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3536 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3537 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3538 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_FXSR},
3539 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3540 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_FXSR},
3541 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3542 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_FXSR},
3543 {"x86-64", PROCESSOR_K8, CPU_K8,
3544 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3545 {"eden-x2", PROCESSOR_K8, CPU_K8,
3546 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3547 {"nano", PROCESSOR_K8, CPU_K8,
3548 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3549 | PTA_SSSE3 | PTA_FXSR},
3550 {"nano-1000", PROCESSOR_K8, CPU_K8,
3551 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3552 | PTA_SSSE3 | PTA_FXSR},
3553 {"nano-2000", PROCESSOR_K8, CPU_K8,
3554 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3555 | PTA_SSSE3 | PTA_FXSR},
3556 {"nano-3000", PROCESSOR_K8, CPU_K8,
3557 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3558 | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR},
3559 {"nano-x2", PROCESSOR_K8, CPU_K8,
3560 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3561 | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR},
3562 {"eden-x4", PROCESSOR_K8, CPU_K8,
3563 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3564 | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR},
3565 {"nano-x4", PROCESSOR_K8, CPU_K8,
3566 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3567 | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR},
3568 {"k8", PROCESSOR_K8, CPU_K8,
3569 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3570 | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3571 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3572 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3573 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_FXSR},
3574 {"opteron", PROCESSOR_K8, CPU_K8,
3575 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3576 | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3577 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3578 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3579 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_FXSR},
3580 {"athlon64", PROCESSOR_K8, CPU_K8,
3581 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3582 | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3583 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3584 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3585 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_FXSR},
3586 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3587 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3588 | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3589 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3590 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3591 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3592 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3593 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3594 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3595 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3596 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3597 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3598 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3599 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3600 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3601 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3602 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3603 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3604 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3605 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3606 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3607 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3608 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3609 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3610 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3611 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3612 | PTA_XSAVEOPT | PTA_FSGSBASE},
3613 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3614 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3615 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3616 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3617 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3618 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3619 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3620 | PTA_MOVBE | PTA_MWAITX},
3621 {"znver1", PROCESSOR_ZNVER1, CPU_ZNVER1,
3622 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3623 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3624 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3625 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_PRFCHW
3626 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE
3627 | PTA_RDRND | PTA_MOVBE | PTA_MWAITX | PTA_ADX | PTA_RDSEED
3628 | PTA_CLZERO | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES
3629 | PTA_SHA | PTA_LZCNT | PTA_POPCNT},
3630 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3631 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3632 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3633 | PTA_FXSR | PTA_XSAVE},
3634 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3635 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3636 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3637 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3638 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3639 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3641 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3642 PTA_64BIT
3643 | PTA_HLE /* flags are only used for -march switch. */ },
3646 /* -mrecip options. */
3647 static struct
3649 const char *string; /* option name */
3650 unsigned int mask; /* mask bits to set */
3652 const recip_options[] =
3654 { "all", RECIP_MASK_ALL },
3655 { "none", RECIP_MASK_NONE },
3656 { "div", RECIP_MASK_DIV },
3657 { "sqrt", RECIP_MASK_SQRT },
3658 { "vec-div", RECIP_MASK_VEC_DIV },
3659 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3662 int const pta_size = ARRAY_SIZE (processor_alias_table);
3664 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3665 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3666 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3667 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3668 #ifdef TARGET_BI_ARCH
3669 else
3671 #if TARGET_BI_ARCH == 1
3672 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3673 is on and OPTION_MASK_ABI_X32 is off. We turn off
3674 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3675 -mx32. */
3676 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3677 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3678 #else
3679 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3680 on and OPTION_MASK_ABI_64 is off. We turn off
3681 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3682 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3683 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3684 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3685 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3686 #endif
3687 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3688 && TARGET_IAMCU_P (opts->x_target_flags))
3689 sorry ("Intel MCU psABI isn%'t supported in %s mode",
3690 TARGET_X32_P (opts->x_ix86_isa_flags) ? "x32" : "64-bit");
3692 #endif
3694 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3696 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3697 OPTION_MASK_ABI_64 for TARGET_X32. */
3698 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3699 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3701 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3702 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3703 | OPTION_MASK_ABI_X32
3704 | OPTION_MASK_ABI_64);
3705 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3707 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3708 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3709 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3710 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3713 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3714 SUBTARGET_OVERRIDE_OPTIONS;
3715 #endif
3717 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3718 SUBSUBTARGET_OVERRIDE_OPTIONS;
3719 #endif
3721 /* -fPIC is the default for x86_64. */
3722 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3723 opts->x_flag_pic = 2;
3725 /* Need to check -mtune=generic first. */
3726 if (opts->x_ix86_tune_string)
3728 /* As special support for cross compilers we read -mtune=native
3729 as -mtune=generic. With native compilers we won't see the
3730 -mtune=native, as it was changed by the driver. */
3731 if (!strcmp (opts->x_ix86_tune_string, "native"))
3733 opts->x_ix86_tune_string = "generic";
3735 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3736 warning (OPT_Wdeprecated,
3737 main_args_p
3738 ? G_("%<-mtune=x86-64%> is deprecated; use %<-mtune=k8%> "
3739 "or %<-mtune=generic%> instead as appropriate")
3740 : G_("%<target(\"tune=x86-64\")%> is deprecated; use "
3741 "%<target(\"tune=k8\")%> or %<target(\"tune=generic\")%>"
3742 " instead as appropriate"));
3744 else
3746 if (opts->x_ix86_arch_string)
3747 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3748 if (!opts->x_ix86_tune_string)
3750 opts->x_ix86_tune_string
3751 = processor_target_table[TARGET_CPU_DEFAULT].name;
3752 ix86_tune_defaulted = 1;
3755 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3756 or defaulted. We need to use a sensible tune option. */
3757 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3759 opts->x_ix86_tune_string = "generic";
3763 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3764 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3766 /* rep; movq isn't available in 32-bit code. */
3767 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3768 opts->x_ix86_stringop_alg = no_stringop;
3771 if (!opts->x_ix86_arch_string)
3772 opts->x_ix86_arch_string
3773 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3774 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3775 else
3776 ix86_arch_specified = 1;
3778 if (opts_set->x_ix86_pmode)
3780 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3781 && opts->x_ix86_pmode == PMODE_SI)
3782 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3783 && opts->x_ix86_pmode == PMODE_DI))
3784 error ("address mode %qs not supported in the %s bit mode",
3785 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3786 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3788 else
3789 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3790 ? PMODE_DI : PMODE_SI;
3792 if (!opts_set->x_ix86_abi)
3793 opts->x_ix86_abi = DEFAULT_ABI;
3795 if (opts->x_ix86_abi == MS_ABI && TARGET_X32_P (opts->x_ix86_isa_flags))
3796 error ("-mabi=ms not supported with X32 ABI");
3797 gcc_assert (opts->x_ix86_abi == SYSV_ABI || opts->x_ix86_abi == MS_ABI);
3799 /* For targets using ms ABI enable ms-extensions, if not
3800 explicit turned off. For non-ms ABI we turn off this
3801 option. */
3802 if (!opts_set->x_flag_ms_extensions)
3803 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3805 if (opts_set->x_ix86_cmodel)
3807 switch (opts->x_ix86_cmodel)
3809 case CM_SMALL:
3810 case CM_SMALL_PIC:
3811 if (opts->x_flag_pic)
3812 opts->x_ix86_cmodel = CM_SMALL_PIC;
3813 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3814 error ("code model %qs not supported in the %s bit mode",
3815 "small", "32");
3816 break;
3818 case CM_MEDIUM:
3819 case CM_MEDIUM_PIC:
3820 if (opts->x_flag_pic)
3821 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3822 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3823 error ("code model %qs not supported in the %s bit mode",
3824 "medium", "32");
3825 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3826 error ("code model %qs not supported in x32 mode",
3827 "medium");
3828 break;
3830 case CM_LARGE:
3831 case CM_LARGE_PIC:
3832 if (opts->x_flag_pic)
3833 opts->x_ix86_cmodel = CM_LARGE_PIC;
3834 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3835 error ("code model %qs not supported in the %s bit mode",
3836 "large", "32");
3837 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3838 error ("code model %qs not supported in x32 mode",
3839 "large");
3840 break;
3842 case CM_32:
3843 if (opts->x_flag_pic)
3844 error ("code model %s does not support PIC mode", "32");
3845 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3846 error ("code model %qs not supported in the %s bit mode",
3847 "32", "64");
3848 break;
3850 case CM_KERNEL:
3851 if (opts->x_flag_pic)
3853 error ("code model %s does not support PIC mode", "kernel");
3854 opts->x_ix86_cmodel = CM_32;
3856 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3857 error ("code model %qs not supported in the %s bit mode",
3858 "kernel", "32");
3859 break;
3861 default:
3862 gcc_unreachable ();
3865 else
3867 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3868 use of rip-relative addressing. This eliminates fixups that
3869 would otherwise be needed if this object is to be placed in a
3870 DLL, and is essentially just as efficient as direct addressing. */
3871 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3872 && (TARGET_RDOS || TARGET_PECOFF))
3873 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3874 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3875 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3876 else
3877 opts->x_ix86_cmodel = CM_32;
3879 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3881 error ("-masm=intel not supported in this configuration");
3882 opts->x_ix86_asm_dialect = ASM_ATT;
3884 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3885 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3886 sorry ("%i-bit mode not compiled in",
3887 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3889 for (i = 0; i < pta_size; i++)
3890 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3892 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3894 error (main_args_p
3895 ? G_("%<generic%> CPU can be used only for %<-mtune=%> "
3896 "switch")
3897 : G_("%<generic%> CPU can be used only for "
3898 "%<target(\"tune=\")%> attribute"));
3899 return false;
3901 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3903 error (main_args_p
3904 ? G_("%<intel%> CPU can be used only for %<-mtune=%> "
3905 "switch")
3906 : G_("%<intel%> CPU can be used only for "
3907 "%<target(\"tune=\")%> attribute"));
3908 return false;
3911 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3912 && !(processor_alias_table[i].flags & PTA_64BIT))
3914 error ("CPU you selected does not support x86-64 "
3915 "instruction set");
3916 return false;
3919 ix86_schedule = processor_alias_table[i].schedule;
3920 ix86_arch = processor_alias_table[i].processor;
3921 /* Default cpu tuning to the architecture. */
3922 ix86_tune = ix86_arch;
3924 if (processor_alias_table[i].flags & PTA_MMX
3925 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3926 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3927 if (processor_alias_table[i].flags & PTA_3DNOW
3928 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3929 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3930 if (processor_alias_table[i].flags & PTA_3DNOW_A
3931 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3932 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3933 if (processor_alias_table[i].flags & PTA_SSE
3934 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3935 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3936 if (processor_alias_table[i].flags & PTA_SSE2
3937 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3938 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3939 if (processor_alias_table[i].flags & PTA_SSE3
3940 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3941 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3942 if (processor_alias_table[i].flags & PTA_SSSE3
3943 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3944 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3945 if (processor_alias_table[i].flags & PTA_SSE4_1
3946 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3947 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3948 if (processor_alias_table[i].flags & PTA_SSE4_2
3949 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3950 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3951 if (processor_alias_table[i].flags & PTA_AVX
3952 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3953 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3954 if (processor_alias_table[i].flags & PTA_AVX2
3955 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3956 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3957 if (processor_alias_table[i].flags & PTA_FMA
3958 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3959 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3960 if (processor_alias_table[i].flags & PTA_SSE4A
3961 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3962 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3963 if (processor_alias_table[i].flags & PTA_FMA4
3964 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3965 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3966 if (processor_alias_table[i].flags & PTA_XOP
3967 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3968 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3969 if (processor_alias_table[i].flags & PTA_LWP
3970 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3971 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3972 if (processor_alias_table[i].flags & PTA_ABM
3973 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3974 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3975 if (processor_alias_table[i].flags & PTA_BMI
3976 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3977 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3978 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3979 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3980 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3981 if (processor_alias_table[i].flags & PTA_TBM
3982 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3983 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3984 if (processor_alias_table[i].flags & PTA_BMI2
3985 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3986 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3987 if (processor_alias_table[i].flags & PTA_CX16
3988 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3989 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3990 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3991 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3992 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3993 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3994 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3995 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3996 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3997 if (processor_alias_table[i].flags & PTA_MOVBE
3998 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3999 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
4000 if (processor_alias_table[i].flags & PTA_AES
4001 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
4002 ix86_isa_flags |= OPTION_MASK_ISA_AES;
4003 if (processor_alias_table[i].flags & PTA_SHA
4004 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
4005 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
4006 if (processor_alias_table[i].flags & PTA_PCLMUL
4007 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
4008 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
4009 if (processor_alias_table[i].flags & PTA_FSGSBASE
4010 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
4011 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
4012 if (processor_alias_table[i].flags & PTA_RDRND
4013 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
4014 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
4015 if (processor_alias_table[i].flags & PTA_F16C
4016 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
4017 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
4018 if (processor_alias_table[i].flags & PTA_RTM
4019 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
4020 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
4021 if (processor_alias_table[i].flags & PTA_HLE
4022 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
4023 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
4024 if (processor_alias_table[i].flags & PTA_PRFCHW
4025 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
4026 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
4027 if (processor_alias_table[i].flags & PTA_RDSEED
4028 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
4029 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
4030 if (processor_alias_table[i].flags & PTA_ADX
4031 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
4032 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
4033 if (processor_alias_table[i].flags & PTA_FXSR
4034 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
4035 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
4036 if (processor_alias_table[i].flags & PTA_XSAVE
4037 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
4038 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
4039 if (processor_alias_table[i].flags & PTA_XSAVEOPT
4040 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
4041 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
4042 if (processor_alias_table[i].flags & PTA_AVX512F
4043 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
4044 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
4045 if (processor_alias_table[i].flags & PTA_AVX512ER
4046 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
4047 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
4048 if (processor_alias_table[i].flags & PTA_AVX512PF
4049 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
4050 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
4051 if (processor_alias_table[i].flags & PTA_AVX512CD
4052 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
4053 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
4054 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
4055 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
4056 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
4057 if (processor_alias_table[i].flags & PTA_CLWB
4058 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
4059 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
4060 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
4061 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
4062 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
4063 if (processor_alias_table[i].flags & PTA_CLZERO
4064 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLZERO))
4065 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLZERO;
4066 if (processor_alias_table[i].flags & PTA_XSAVEC
4067 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
4068 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
4069 if (processor_alias_table[i].flags & PTA_XSAVES
4070 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
4071 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
4072 if (processor_alias_table[i].flags & PTA_AVX512DQ
4073 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
4074 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
4075 if (processor_alias_table[i].flags & PTA_AVX512BW
4076 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
4077 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
4078 if (processor_alias_table[i].flags & PTA_AVX512VL
4079 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
4080 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
4081 if (processor_alias_table[i].flags & PTA_MPX
4082 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_MPX))
4083 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_MPX;
4084 if (processor_alias_table[i].flags & PTA_AVX512VBMI
4085 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
4086 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
4087 if (processor_alias_table[i].flags & PTA_AVX512IFMA
4088 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
4089 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
4091 if (processor_alias_table[i].flags & PTA_AVX5124VNNIW
4092 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_AVX5124VNNIW))
4093 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX5124VNNIW;
4094 if (processor_alias_table[i].flags & PTA_AVX5124FMAPS
4095 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_AVX5124FMAPS))
4096 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX5124FMAPS;
4097 if (processor_alias_table[i].flags & PTA_AVX512VPOPCNTDQ
4098 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_AVX512VPOPCNTDQ))
4099 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX512VPOPCNTDQ;
4100 if (processor_alias_table[i].flags & PTA_SGX
4101 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_SGX))
4102 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_SGX;
4104 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
4105 x86_prefetch_sse = true;
4106 if (processor_alias_table[i].flags & PTA_MWAITX
4107 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MWAITX))
4108 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MWAITX;
4109 if (processor_alias_table[i].flags & PTA_PKU
4110 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PKU))
4111 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PKU;
4113 /* Don't enable x87 instructions if only
4114 general registers are allowed. */
4115 if (!(opts_set->x_ix86_target_flags & OPTION_MASK_GENERAL_REGS_ONLY)
4116 && !(opts_set->x_target_flags & MASK_80387))
4118 if (processor_alias_table[i].flags & PTA_NO_80387)
4119 opts->x_target_flags &= ~MASK_80387;
4120 else
4121 opts->x_target_flags |= MASK_80387;
4123 break;
4126 if (TARGET_X32 && (opts->x_ix86_isa_flags2 & OPTION_MASK_ISA_MPX))
4127 error ("Intel MPX does not support x32");
4129 if (TARGET_X32 && (ix86_isa_flags2 & OPTION_MASK_ISA_MPX))
4130 error ("Intel MPX does not support x32");
4132 if (i == pta_size)
4134 error (main_args_p
4135 ? G_("bad value (%qs) for %<-march=%> switch")
4136 : G_("bad value (%qs) for %<target(\"arch=\")%> attribute"),
4137 opts->x_ix86_arch_string);
4139 auto_vec <const char *> candidates;
4140 for (i = 0; i < pta_size; i++)
4141 if (strcmp (processor_alias_table[i].name, "generic")
4142 && strcmp (processor_alias_table[i].name, "intel")
4143 && (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4144 || (processor_alias_table[i].flags & PTA_64BIT)))
4145 candidates.safe_push (processor_alias_table[i].name);
4147 char *s;
4148 const char *hint
4149 = candidates_list_and_hint (opts->x_ix86_arch_string, s, candidates);
4150 if (hint)
4151 inform (input_location,
4152 main_args_p
4153 ? G_("valid arguments to %<-march=%> switch are: "
4154 "%s; did you mean %qs?")
4155 : G_("valid arguments to %<target(\"arch=\")%> attribute are: "
4156 "%s; did you mean %qs?"), s, hint);
4157 else
4158 inform (input_location,
4159 main_args_p
4160 ? G_("valid arguments to %<-march=%> switch are: %s")
4161 : G_("valid arguments to %<target(\"arch=\")%> attribute "
4162 "are: %s"), s);
4163 XDELETEVEC (s);
4166 ix86_arch_mask = 1u << ix86_arch;
4167 for (i = 0; i < X86_ARCH_LAST; ++i)
4168 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4170 for (i = 0; i < pta_size; i++)
4171 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
4173 ix86_schedule = processor_alias_table[i].schedule;
4174 ix86_tune = processor_alias_table[i].processor;
4175 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4177 if (!(processor_alias_table[i].flags & PTA_64BIT))
4179 if (ix86_tune_defaulted)
4181 opts->x_ix86_tune_string = "x86-64";
4182 for (i = 0; i < pta_size; i++)
4183 if (! strcmp (opts->x_ix86_tune_string,
4184 processor_alias_table[i].name))
4185 break;
4186 ix86_schedule = processor_alias_table[i].schedule;
4187 ix86_tune = processor_alias_table[i].processor;
4189 else
4190 error ("CPU you selected does not support x86-64 "
4191 "instruction set");
4194 /* Intel CPUs have always interpreted SSE prefetch instructions as
4195 NOPs; so, we can enable SSE prefetch instructions even when
4196 -mtune (rather than -march) points us to a processor that has them.
4197 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
4198 higher processors. */
4199 if (TARGET_CMOV
4200 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
4201 x86_prefetch_sse = true;
4202 break;
4205 if (ix86_tune_specified && i == pta_size)
4207 error (main_args_p
4208 ? G_("bad value (%qs) for %<-mtune=%> switch")
4209 : G_("bad value (%qs) for %<target(\"tune=\")%> attribute"),
4210 opts->x_ix86_tune_string);
4212 auto_vec <const char *> candidates;
4213 for (i = 0; i < pta_size; i++)
4214 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4215 || (processor_alias_table[i].flags & PTA_64BIT))
4216 candidates.safe_push (processor_alias_table[i].name);
4218 char *s;
4219 const char *hint
4220 = candidates_list_and_hint (opts->x_ix86_tune_string, s, candidates);
4221 if (hint)
4222 inform (input_location,
4223 main_args_p
4224 ? G_("valid arguments to %<-mtune=%> switch are: "
4225 "%s; did you mean %qs?")
4226 : G_("valid arguments to %<target(\"tune=\")%> attribute are: "
4227 "%s; did you mean %qs?"), s, hint);
4228 else
4229 inform (input_location,
4230 main_args_p
4231 ? G_("valid arguments to %<-mtune=%> switch are: %s")
4232 : G_("valid arguments to %<target(\"tune=\")%> attribute "
4233 "are: %s"), s);
4234 XDELETEVEC (s);
4237 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
4239 #ifndef USE_IX86_FRAME_POINTER
4240 #define USE_IX86_FRAME_POINTER 0
4241 #endif
4243 #ifndef USE_X86_64_FRAME_POINTER
4244 #define USE_X86_64_FRAME_POINTER 0
4245 #endif
4247 /* Set the default values for switches whose default depends on TARGET_64BIT
4248 in case they weren't overwritten by command line options. */
4249 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4251 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
4252 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
4253 if (opts->x_flag_asynchronous_unwind_tables
4254 && !opts_set->x_flag_unwind_tables
4255 && TARGET_64BIT_MS_ABI)
4256 opts->x_flag_unwind_tables = 1;
4257 if (opts->x_flag_asynchronous_unwind_tables == 2)
4258 opts->x_flag_unwind_tables
4259 = opts->x_flag_asynchronous_unwind_tables = 1;
4260 if (opts->x_flag_pcc_struct_return == 2)
4261 opts->x_flag_pcc_struct_return = 0;
4263 else
4265 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
4266 opts->x_flag_omit_frame_pointer
4267 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
4268 if (opts->x_flag_asynchronous_unwind_tables == 2)
4269 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
4270 if (opts->x_flag_pcc_struct_return == 2)
4272 /* Intel MCU psABI specifies that -freg-struct-return should
4273 be on. Instead of setting DEFAULT_PCC_STRUCT_RETURN to 1,
4274 we check -miamcu so that -freg-struct-return is always
4275 turned on if -miamcu is used. */
4276 if (TARGET_IAMCU_P (opts->x_target_flags))
4277 opts->x_flag_pcc_struct_return = 0;
4278 else
4279 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
4283 ix86_tune_cost = processor_target_table[ix86_tune].cost;
4284 /* TODO: ix86_cost should be chosen at instruction or function granuality
4285 so for cold code we use size_cost even in !optimize_size compilation. */
4286 if (opts->x_optimize_size)
4287 ix86_cost = &ix86_size_cost;
4288 else
4289 ix86_cost = ix86_tune_cost;
4291 /* Arrange to set up i386_stack_locals for all functions. */
4292 init_machine_status = ix86_init_machine_status;
4294 /* Validate -mregparm= value. */
4295 if (opts_set->x_ix86_regparm)
4297 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4298 warning (0, "-mregparm is ignored in 64-bit mode");
4299 else if (TARGET_IAMCU_P (opts->x_target_flags))
4300 warning (0, "-mregparm is ignored for Intel MCU psABI");
4301 if (opts->x_ix86_regparm > REGPARM_MAX)
4303 error ("-mregparm=%d is not between 0 and %d",
4304 opts->x_ix86_regparm, REGPARM_MAX);
4305 opts->x_ix86_regparm = 0;
4308 if (TARGET_IAMCU_P (opts->x_target_flags)
4309 || TARGET_64BIT_P (opts->x_ix86_isa_flags))
4310 opts->x_ix86_regparm = REGPARM_MAX;
4312 /* Default align_* from the processor table. */
4313 ix86_default_align (opts);
4315 /* Provide default for -mbranch-cost= value. */
4316 if (!opts_set->x_ix86_branch_cost)
4317 opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost;
4319 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4321 opts->x_target_flags
4322 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
4324 /* Enable by default the SSE and MMX builtins. Do allow the user to
4325 explicitly disable any of these. In particular, disabling SSE and
4326 MMX for kernel code is extremely useful. */
4327 if (!ix86_arch_specified)
4328 opts->x_ix86_isa_flags
4329 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
4330 | TARGET_SUBTARGET64_ISA_DEFAULT)
4331 & ~opts->x_ix86_isa_flags_explicit);
4333 if (TARGET_RTD_P (opts->x_target_flags))
4334 warning (0,
4335 main_args_p
4336 ? G_("%<-mrtd%> is ignored in 64bit mode")
4337 : G_("%<target(\"rtd\")%> is ignored in 64bit mode"));
4339 else
4341 opts->x_target_flags
4342 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
4344 if (!ix86_arch_specified)
4345 opts->x_ix86_isa_flags
4346 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
4348 /* i386 ABI does not specify red zone. It still makes sense to use it
4349 when programmer takes care to stack from being destroyed. */
4350 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
4351 opts->x_target_flags |= MASK_NO_RED_ZONE;
4354 /* Keep nonleaf frame pointers. */
4355 if (opts->x_flag_omit_frame_pointer)
4356 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
4357 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
4358 opts->x_flag_omit_frame_pointer = 1;
4360 /* If we're doing fast math, we don't care about comparison order
4361 wrt NaNs. This lets us use a shorter comparison sequence. */
4362 if (opts->x_flag_finite_math_only)
4363 opts->x_target_flags &= ~MASK_IEEE_FP;
4365 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
4366 since the insns won't need emulation. */
4367 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
4368 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
4370 /* Likewise, if the target doesn't have a 387, or we've specified
4371 software floating point, don't use 387 inline intrinsics. */
4372 if (!TARGET_80387_P (opts->x_target_flags))
4373 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
4375 /* Turn on MMX builtins for -msse. */
4376 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
4377 opts->x_ix86_isa_flags
4378 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
4380 /* Enable SSE prefetch. */
4381 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
4382 || (TARGET_PRFCHW_P (opts->x_ix86_isa_flags)
4383 && !TARGET_3DNOW_P (opts->x_ix86_isa_flags))
4384 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
4385 x86_prefetch_sse = true;
4387 /* Enable popcnt instruction for -msse4.2 or -mabm. */
4388 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
4389 || TARGET_ABM_P (opts->x_ix86_isa_flags))
4390 opts->x_ix86_isa_flags
4391 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
4393 /* Enable lzcnt instruction for -mabm. */
4394 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
4395 opts->x_ix86_isa_flags
4396 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
4398 /* Disable BMI, BMI2 and TBM instructions for -m16. */
4399 if (TARGET_16BIT_P(opts->x_ix86_isa_flags))
4400 opts->x_ix86_isa_flags
4401 &= ~((OPTION_MASK_ISA_BMI | OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_TBM)
4402 & ~opts->x_ix86_isa_flags_explicit);
4404 /* Validate -mpreferred-stack-boundary= value or default it to
4405 PREFERRED_STACK_BOUNDARY_DEFAULT. */
4406 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
4407 if (opts_set->x_ix86_preferred_stack_boundary_arg)
4409 int min = TARGET_64BIT_P (opts->x_ix86_isa_flags)? 3 : 2;
4410 int max = TARGET_SEH ? 4 : 12;
4412 if (opts->x_ix86_preferred_stack_boundary_arg < min
4413 || opts->x_ix86_preferred_stack_boundary_arg > max)
4415 if (min == max)
4416 error ("-mpreferred-stack-boundary is not supported "
4417 "for this target");
4418 else
4419 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
4420 opts->x_ix86_preferred_stack_boundary_arg, min, max);
4422 else
4423 ix86_preferred_stack_boundary
4424 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
4427 /* Set the default value for -mstackrealign. */
4428 if (!opts_set->x_ix86_force_align_arg_pointer)
4429 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
4431 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
4433 /* Validate -mincoming-stack-boundary= value or default it to
4434 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
4435 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4436 if (opts_set->x_ix86_incoming_stack_boundary_arg)
4438 int min = TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 3 : 2;
4440 if (opts->x_ix86_incoming_stack_boundary_arg < min
4441 || opts->x_ix86_incoming_stack_boundary_arg > 12)
4442 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4443 opts->x_ix86_incoming_stack_boundary_arg, min);
4444 else
4446 ix86_user_incoming_stack_boundary
4447 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
4448 ix86_incoming_stack_boundary
4449 = ix86_user_incoming_stack_boundary;
4453 #ifndef NO_PROFILE_COUNTERS
4454 if (flag_nop_mcount)
4455 error ("-mnop-mcount is not compatible with this target");
4456 #endif
4457 if (flag_nop_mcount && flag_pic)
4458 error ("-mnop-mcount is not implemented for -fPIC");
4460 /* Accept -msseregparm only if at least SSE support is enabled. */
4461 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
4462 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
4463 error (main_args_p
4464 ? G_("%<-msseregparm%> used without SSE enabled")
4465 : G_("%<target(\"sseregparm\")%> used without SSE enabled"));
4467 if (opts_set->x_ix86_fpmath)
4469 if (opts->x_ix86_fpmath & FPMATH_SSE)
4471 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
4473 if (TARGET_80387_P (opts->x_target_flags))
4475 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4476 opts->x_ix86_fpmath = FPMATH_387;
4479 else if ((opts->x_ix86_fpmath & FPMATH_387)
4480 && !TARGET_80387_P (opts->x_target_flags))
4482 warning (0, "387 instruction set disabled, using SSE arithmetics");
4483 opts->x_ix86_fpmath = FPMATH_SSE;
4487 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4488 fpmath=387. The second is however default at many targets since the
4489 extra 80bit precision of temporaries is considered to be part of ABI.
4490 Overwrite the default at least for -ffast-math.
4491 TODO: -mfpmath=both seems to produce same performing code with bit
4492 smaller binaries. It is however not clear if register allocation is
4493 ready for this setting.
4494 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4495 codegen. We may switch to 387 with -ffast-math for size optimized
4496 functions. */
4497 else if (fast_math_flags_set_p (&global_options)
4498 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4499 opts->x_ix86_fpmath = FPMATH_SSE;
4500 else
4501 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4503 /* Use external vectorized library in vectorizing intrinsics. */
4504 if (opts_set->x_ix86_veclibabi_type)
4505 switch (opts->x_ix86_veclibabi_type)
4507 case ix86_veclibabi_type_svml:
4508 ix86_veclib_handler = ix86_veclibabi_svml;
4509 break;
4511 case ix86_veclibabi_type_acml:
4512 ix86_veclib_handler = ix86_veclibabi_acml;
4513 break;
4515 default:
4516 gcc_unreachable ();
4519 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4520 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4521 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4523 /* If stack probes are required, the space used for large function
4524 arguments on the stack must also be probed, so enable
4525 -maccumulate-outgoing-args so this happens in the prologue. */
4526 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4527 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4529 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4530 warning (0,
4531 main_args_p
4532 ? G_("stack probing requires %<-maccumulate-outgoing-args%> "
4533 "for correctness")
4534 : G_("stack probing requires "
4535 "%<target(\"accumulate-outgoing-args\")%> for "
4536 "correctness"));
4537 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4540 /* Stack realignment without -maccumulate-outgoing-args requires %ebp,
4541 so enable -maccumulate-outgoing-args when %ebp is fixed. */
4542 if (fixed_regs[BP_REG]
4543 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4545 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4546 warning (0,
4547 main_args_p
4548 ? G_("fixed ebp register requires "
4549 "%<-maccumulate-outgoing-args%>")
4550 : G_("fixed ebp register requires "
4551 "%<target(\"accumulate-outgoing-args\")%>"));
4552 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4555 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4557 char *p;
4558 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4559 p = strchr (internal_label_prefix, 'X');
4560 internal_label_prefix_len = p - internal_label_prefix;
4561 *p = '\0';
4564 /* When scheduling description is not available, disable scheduler pass
4565 so it won't slow down the compilation and make x87 code slower. */
4566 if (!TARGET_SCHEDULE)
4567 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4569 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4570 ix86_tune_cost->simultaneous_prefetches,
4571 opts->x_param_values,
4572 opts_set->x_param_values);
4573 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4574 ix86_tune_cost->prefetch_block,
4575 opts->x_param_values,
4576 opts_set->x_param_values);
4577 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4578 ix86_tune_cost->l1_cache_size,
4579 opts->x_param_values,
4580 opts_set->x_param_values);
4581 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4582 ix86_tune_cost->l2_cache_size,
4583 opts->x_param_values,
4584 opts_set->x_param_values);
4586 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4587 if (opts->x_flag_prefetch_loop_arrays < 0
4588 && HAVE_prefetch
4589 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4590 && !opts->x_optimize_size
4591 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4592 opts->x_flag_prefetch_loop_arrays = 1;
4594 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4595 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4596 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4597 targetm.expand_builtin_va_start = NULL;
4599 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4601 ix86_gen_leave = gen_leave_rex64;
4602 if (Pmode == DImode)
4604 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4605 ix86_gen_tls_local_dynamic_base_64
4606 = gen_tls_local_dynamic_base_64_di;
4608 else
4610 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4611 ix86_gen_tls_local_dynamic_base_64
4612 = gen_tls_local_dynamic_base_64_si;
4615 else
4616 ix86_gen_leave = gen_leave;
4618 if (Pmode == DImode)
4620 ix86_gen_add3 = gen_adddi3;
4621 ix86_gen_sub3 = gen_subdi3;
4622 ix86_gen_sub3_carry = gen_subdi3_carry;
4623 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4624 ix86_gen_andsp = gen_anddi3;
4625 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4626 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4627 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4628 ix86_gen_monitor = gen_sse3_monitor_di;
4629 ix86_gen_monitorx = gen_monitorx_di;
4630 ix86_gen_clzero = gen_clzero_di;
4632 else
4634 ix86_gen_add3 = gen_addsi3;
4635 ix86_gen_sub3 = gen_subsi3;
4636 ix86_gen_sub3_carry = gen_subsi3_carry;
4637 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4638 ix86_gen_andsp = gen_andsi3;
4639 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4640 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4641 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4642 ix86_gen_monitor = gen_sse3_monitor_si;
4643 ix86_gen_monitorx = gen_monitorx_si;
4644 ix86_gen_clzero = gen_clzero_si;
4647 #ifdef USE_IX86_CLD
4648 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4649 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4650 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4651 #endif
4653 /* Set the default value for -mfentry. */
4654 if (!opts_set->x_flag_fentry)
4655 opts->x_flag_fentry = TARGET_SEH;
4656 else
4658 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic
4659 && opts->x_flag_fentry)
4660 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4661 "with -fpic");
4662 else if (TARGET_SEH && !opts->x_flag_fentry)
4663 sorry ("-mno-fentry isn%'t compatible with SEH");
4666 if (TARGET_SEH && TARGET_CALL_MS2SYSV_XLOGUES)
4667 sorry ("-mcall-ms2sysv-xlogues isn%'t currently supported with SEH");
4669 if (!(opts_set->x_target_flags & MASK_VZEROUPPER))
4670 opts->x_target_flags |= MASK_VZEROUPPER;
4671 if (!(opts_set->x_target_flags & MASK_STV))
4672 opts->x_target_flags |= MASK_STV;
4673 /* Disable STV if -mpreferred-stack-boundary={2,3} or
4674 -mincoming-stack-boundary={2,3} or -mstackrealign - the needed
4675 stack realignment will be extra cost the pass doesn't take into
4676 account and the pass can't realign the stack. */
4677 if (ix86_preferred_stack_boundary < 128
4678 || ix86_incoming_stack_boundary < 128
4679 || opts->x_ix86_force_align_arg_pointer)
4680 opts->x_target_flags &= ~MASK_STV;
4681 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4682 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4683 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4684 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4685 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4686 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4687 /* Enable 128-bit AVX instruction generation
4688 for the auto-vectorizer. */
4689 if (TARGET_AVX128_OPTIMAL
4690 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4691 opts->x_target_flags |= MASK_PREFER_AVX128;
4693 if (opts->x_ix86_recip_name)
4695 char *p = ASTRDUP (opts->x_ix86_recip_name);
4696 char *q;
4697 unsigned int mask, i;
4698 bool invert;
4700 while ((q = strtok (p, ",")) != NULL)
4702 p = NULL;
4703 if (*q == '!')
4705 invert = true;
4706 q++;
4708 else
4709 invert = false;
4711 if (!strcmp (q, "default"))
4712 mask = RECIP_MASK_ALL;
4713 else
4715 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4716 if (!strcmp (q, recip_options[i].string))
4718 mask = recip_options[i].mask;
4719 break;
4722 if (i == ARRAY_SIZE (recip_options))
4724 error ("unknown option for -mrecip=%s", q);
4725 invert = false;
4726 mask = RECIP_MASK_NONE;
4730 opts->x_recip_mask_explicit |= mask;
4731 if (invert)
4732 opts->x_recip_mask &= ~mask;
4733 else
4734 opts->x_recip_mask |= mask;
4738 if (TARGET_RECIP_P (opts->x_target_flags))
4739 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4740 else if (opts_set->x_target_flags & MASK_RECIP)
4741 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4743 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4744 for 64-bit Bionic. Also default long double to 64-bit for Intel
4745 MCU psABI. */
4746 if ((TARGET_HAS_BIONIC || TARGET_IAMCU)
4747 && !(opts_set->x_target_flags
4748 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4749 opts->x_target_flags |= (TARGET_64BIT
4750 ? MASK_LONG_DOUBLE_128
4751 : MASK_LONG_DOUBLE_64);
4753 /* Only one of them can be active. */
4754 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4755 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4757 /* Handle stack protector */
4758 if (!opts_set->x_ix86_stack_protector_guard)
4759 opts->x_ix86_stack_protector_guard
4760 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4762 #ifdef TARGET_THREAD_SSP_OFFSET
4763 ix86_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4764 #endif
4766 if (global_options_set.x_ix86_stack_protector_guard_offset_str)
4768 char *endp;
4769 const char *str = ix86_stack_protector_guard_offset_str;
4771 errno = 0;
4772 int64_t offset;
4774 #if defined(INT64_T_IS_LONG)
4775 offset = strtol (str, &endp, 0);
4776 #else
4777 offset = strtoll (str, &endp, 0);
4778 #endif
4780 if (!*str || *endp || errno)
4781 error ("%qs is not a valid number "
4782 "in -mstack-protector-guard-offset=", str);
4784 if (!IN_RANGE (offset, HOST_WIDE_INT_C (-0x80000000),
4785 HOST_WIDE_INT_C (0x7fffffff)))
4786 error ("%qs is not a valid offset "
4787 "in -mstack-protector-guard-offset=", str);
4789 ix86_stack_protector_guard_offset = offset;
4792 ix86_stack_protector_guard_reg = DEFAULT_TLS_SEG_REG;
4794 /* The kernel uses a different segment register for performance
4795 reasons; a system call would not have to trash the userspace
4796 segment register, which would be expensive. */
4797 if (ix86_cmodel == CM_KERNEL)
4798 ix86_stack_protector_guard_reg = ADDR_SPACE_SEG_GS;
4800 if (global_options_set.x_ix86_stack_protector_guard_reg_str)
4802 const char *str = ix86_stack_protector_guard_reg_str;
4803 addr_space_t seg = ADDR_SPACE_GENERIC;
4805 /* Discard optional register prefix. */
4806 if (str[0] == '%')
4807 str++;
4809 if (strlen (str) == 2 && str[1] == 's')
4811 if (str[0] == 'f')
4812 seg = ADDR_SPACE_SEG_FS;
4813 else if (str[0] == 'g')
4814 seg = ADDR_SPACE_SEG_GS;
4817 if (seg == ADDR_SPACE_GENERIC)
4818 error ("%qs is not a valid base register "
4819 "in -mstack-protector-guard-reg=",
4820 ix86_stack_protector_guard_reg_str);
4822 ix86_stack_protector_guard_reg = seg;
4825 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4826 if (opts->x_ix86_tune_memcpy_strategy)
4828 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4829 ix86_parse_stringop_strategy_string (str, false);
4830 free (str);
4833 if (opts->x_ix86_tune_memset_strategy)
4835 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4836 ix86_parse_stringop_strategy_string (str, true);
4837 free (str);
4840 /* Save the initial options in case the user does function specific
4841 options. */
4842 if (main_args_p)
4843 target_option_default_node = target_option_current_node
4844 = build_target_option_node (opts);
4846 /* Do not support control flow instrumentation if CET is not enabled. */
4847 if (opts->x_flag_cf_protection != CF_NONE)
4849 if (!(TARGET_IBT_P (opts->x_ix86_isa_flags2)
4850 || TARGET_SHSTK_P (opts->x_ix86_isa_flags2)))
4852 if (flag_cf_protection == CF_FULL)
4854 error ("%<-fcf-protection=full%> requires CET support "
4855 "on this target. Use -mcet or one of -mibt, "
4856 "-mshstk options to enable CET");
4858 else if (flag_cf_protection == CF_BRANCH)
4860 error ("%<-fcf-protection=branch%> requires CET support "
4861 "on this target. Use -mcet or one of -mibt, "
4862 "-mshstk options to enable CET");
4864 else if (flag_cf_protection == CF_RETURN)
4866 error ("%<-fcf-protection=return%> requires CET support "
4867 "on this target. Use -mcet or one of -mibt, "
4868 "-mshstk options to enable CET");
4870 flag_cf_protection = CF_NONE;
4871 return false;
4873 opts->x_flag_cf_protection =
4874 (cf_protection_level) (opts->x_flag_cf_protection | CF_SET);
4877 return true;
4880 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4882 static void
4883 ix86_option_override (void)
4885 ix86_option_override_internal (true, &global_options, &global_options_set);
4888 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
4889 static char *
4890 ix86_offload_options (void)
4892 if (TARGET_LP64)
4893 return xstrdup ("-foffload-abi=lp64");
4894 return xstrdup ("-foffload-abi=ilp32");
4897 /* Update register usage after having seen the compiler flags. */
4899 static void
4900 ix86_conditional_register_usage (void)
4902 int i, c_mask;
4904 /* If there are no caller-saved registers, preserve all registers.
4905 except fixed_regs and registers used for function return value
4906 since aggregate_value_p checks call_used_regs[regno] on return
4907 value. */
4908 if (cfun && cfun->machine->no_caller_saved_registers)
4909 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4910 if (!fixed_regs[i] && !ix86_function_value_regno_p (i))
4911 call_used_regs[i] = 0;
4913 /* For 32-bit targets, squash the REX registers. */
4914 if (! TARGET_64BIT)
4916 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4917 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4918 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4919 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4920 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4921 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4924 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4925 c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
4927 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4929 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4931 /* Set/reset conditionally defined registers from
4932 CALL_USED_REGISTERS initializer. */
4933 if (call_used_regs[i] > 1)
4934 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4936 /* Calculate registers of CLOBBERED_REGS register set
4937 as call used registers from GENERAL_REGS register set. */
4938 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4939 && call_used_regs[i])
4940 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4943 /* If MMX is disabled, squash the registers. */
4944 if (! TARGET_MMX)
4945 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4946 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4947 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4949 /* If SSE is disabled, squash the registers. */
4950 if (! TARGET_SSE)
4951 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4952 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4953 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4955 /* If the FPU is disabled, squash the registers. */
4956 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4957 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4958 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4959 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4961 /* If AVX512F is disabled, squash the registers. */
4962 if (! TARGET_AVX512F)
4964 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4965 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4967 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4968 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4971 /* If MPX is disabled, squash the registers. */
4972 if (! TARGET_MPX)
4973 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
4974 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4977 /* Canonicalize a comparison from one we don't have to one we do have. */
4979 static void
4980 ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4981 bool op0_preserve_value)
4983 /* The order of operands in x87 ficom compare is forced by combine in
4984 simplify_comparison () function. Float operator is treated as RTX_OBJ
4985 with a precedence over other operators and is always put in the first
4986 place. Swap condition and operands to match ficom instruction. */
4987 if (!op0_preserve_value
4988 && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1))
4990 enum rtx_code scode = swap_condition ((enum rtx_code) *code);
4992 /* We are called only for compares that are split to SAHF instruction.
4993 Ensure that we have setcc/jcc insn for the swapped condition. */
4994 if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN)
4996 std::swap (*op0, *op1);
4997 *code = (int) scode;
5002 /* Save the current options */
5004 static void
5005 ix86_function_specific_save (struct cl_target_option *ptr,
5006 struct gcc_options *opts)
5008 ptr->arch = ix86_arch;
5009 ptr->schedule = ix86_schedule;
5010 ptr->prefetch_sse = x86_prefetch_sse;
5011 ptr->tune = ix86_tune;
5012 ptr->branch_cost = ix86_branch_cost;
5013 ptr->tune_defaulted = ix86_tune_defaulted;
5014 ptr->arch_specified = ix86_arch_specified;
5015 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
5016 ptr->x_ix86_isa_flags2_explicit = opts->x_ix86_isa_flags2_explicit;
5017 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
5018 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
5019 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
5020 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
5021 ptr->x_ix86_abi = opts->x_ix86_abi;
5022 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
5023 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
5024 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
5025 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
5026 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
5027 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
5028 ptr->x_ix86_pmode = opts->x_ix86_pmode;
5029 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
5030 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
5031 ptr->x_ix86_regparm = opts->x_ix86_regparm;
5032 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
5033 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
5034 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
5035 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
5036 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
5037 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
5038 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
5039 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
5040 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
5041 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
5043 /* The fields are char but the variables are not; make sure the
5044 values fit in the fields. */
5045 gcc_assert (ptr->arch == ix86_arch);
5046 gcc_assert (ptr->schedule == ix86_schedule);
5047 gcc_assert (ptr->tune == ix86_tune);
5048 gcc_assert (ptr->branch_cost == ix86_branch_cost);
5051 /* Restore the current options */
5053 static void
5054 ix86_function_specific_restore (struct gcc_options *opts,
5055 struct cl_target_option *ptr)
5057 enum processor_type old_tune = ix86_tune;
5058 enum processor_type old_arch = ix86_arch;
5059 unsigned int ix86_arch_mask;
5060 int i;
5062 /* We don't change -fPIC. */
5063 opts->x_flag_pic = flag_pic;
5065 ix86_arch = (enum processor_type) ptr->arch;
5066 ix86_schedule = (enum attr_cpu) ptr->schedule;
5067 ix86_tune = (enum processor_type) ptr->tune;
5068 x86_prefetch_sse = ptr->prefetch_sse;
5069 opts->x_ix86_branch_cost = ptr->branch_cost;
5070 ix86_tune_defaulted = ptr->tune_defaulted;
5071 ix86_arch_specified = ptr->arch_specified;
5072 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
5073 opts->x_ix86_isa_flags2_explicit = ptr->x_ix86_isa_flags2_explicit;
5074 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
5075 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
5076 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
5077 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
5078 opts->x_ix86_abi = ptr->x_ix86_abi;
5079 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
5080 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
5081 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
5082 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
5083 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
5084 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
5085 opts->x_ix86_pmode = ptr->x_ix86_pmode;
5086 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
5087 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
5088 opts->x_ix86_regparm = ptr->x_ix86_regparm;
5089 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
5090 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
5091 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
5092 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
5093 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
5094 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
5095 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
5096 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
5097 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
5098 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
5099 ix86_tune_cost = processor_target_table[ix86_tune].cost;
5100 /* TODO: ix86_cost should be chosen at instruction or function granuality
5101 so for cold code we use size_cost even in !optimize_size compilation. */
5102 if (opts->x_optimize_size)
5103 ix86_cost = &ix86_size_cost;
5104 else
5105 ix86_cost = ix86_tune_cost;
5107 /* Recreate the arch feature tests if the arch changed */
5108 if (old_arch != ix86_arch)
5110 ix86_arch_mask = 1u << ix86_arch;
5111 for (i = 0; i < X86_ARCH_LAST; ++i)
5112 ix86_arch_features[i]
5113 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
5116 /* Recreate the tune optimization tests */
5117 if (old_tune != ix86_tune)
5118 set_ix86_tune_features (ix86_tune, false);
5121 /* Adjust target options after streaming them in. This is mainly about
5122 reconciling them with global options. */
5124 static void
5125 ix86_function_specific_post_stream_in (struct cl_target_option *ptr)
5127 /* flag_pic is a global option, but ix86_cmodel is target saved option
5128 partly computed from flag_pic. If flag_pic is on, adjust x_ix86_cmodel
5129 for PIC, or error out. */
5130 if (flag_pic)
5131 switch (ptr->x_ix86_cmodel)
5133 case CM_SMALL:
5134 ptr->x_ix86_cmodel = CM_SMALL_PIC;
5135 break;
5137 case CM_MEDIUM:
5138 ptr->x_ix86_cmodel = CM_MEDIUM_PIC;
5139 break;
5141 case CM_LARGE:
5142 ptr->x_ix86_cmodel = CM_LARGE_PIC;
5143 break;
5145 case CM_KERNEL:
5146 error ("code model %s does not support PIC mode", "kernel");
5147 break;
5149 default:
5150 break;
5152 else
5153 switch (ptr->x_ix86_cmodel)
5155 case CM_SMALL_PIC:
5156 ptr->x_ix86_cmodel = CM_SMALL;
5157 break;
5159 case CM_MEDIUM_PIC:
5160 ptr->x_ix86_cmodel = CM_MEDIUM;
5161 break;
5163 case CM_LARGE_PIC:
5164 ptr->x_ix86_cmodel = CM_LARGE;
5165 break;
5167 default:
5168 break;
5172 /* Print the current options */
5174 static void
5175 ix86_function_specific_print (FILE *file, int indent,
5176 struct cl_target_option *ptr)
5178 char *target_string
5179 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_ix86_isa_flags2,
5180 ptr->x_target_flags, ptr->x_ix86_target_flags,
5181 NULL, NULL, ptr->x_ix86_fpmath, false);
5183 gcc_assert (ptr->arch < PROCESSOR_max);
5184 fprintf (file, "%*sarch = %d (%s)\n",
5185 indent, "",
5186 ptr->arch, processor_target_table[ptr->arch].name);
5188 gcc_assert (ptr->tune < PROCESSOR_max);
5189 fprintf (file, "%*stune = %d (%s)\n",
5190 indent, "",
5191 ptr->tune, processor_target_table[ptr->tune].name);
5193 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
5195 if (target_string)
5197 fprintf (file, "%*s%s\n", indent, "", target_string);
5198 free (target_string);
5203 /* Inner function to process the attribute((target(...))), take an argument and
5204 set the current options from the argument. If we have a list, recursively go
5205 over the list. */
5207 static bool
5208 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
5209 struct gcc_options *opts,
5210 struct gcc_options *opts_set,
5211 struct gcc_options *enum_opts_set)
5213 char *next_optstr;
5214 bool ret = true;
5216 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
5217 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
5218 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
5219 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
5220 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
5222 enum ix86_opt_type
5224 ix86_opt_unknown,
5225 ix86_opt_yes,
5226 ix86_opt_no,
5227 ix86_opt_str,
5228 ix86_opt_enum,
5229 ix86_opt_isa
5232 static const struct
5234 const char *string;
5235 size_t len;
5236 enum ix86_opt_type type;
5237 int opt;
5238 int mask;
5239 } attrs[] = {
5240 /* isa options */
5241 IX86_ATTR_ISA ("sgx", OPT_msgx),
5242 IX86_ATTR_ISA ("avx5124fmaps", OPT_mavx5124fmaps),
5243 IX86_ATTR_ISA ("avx5124vnniw", OPT_mavx5124vnniw),
5244 IX86_ATTR_ISA ("avx512vpopcntdq", OPT_mavx512vpopcntdq),
5246 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
5247 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
5248 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
5249 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
5250 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
5251 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
5252 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
5253 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
5254 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
5255 IX86_ATTR_ISA ("avx2", OPT_mavx2),
5256 IX86_ATTR_ISA ("fma", OPT_mfma),
5257 IX86_ATTR_ISA ("xop", OPT_mxop),
5258 IX86_ATTR_ISA ("fma4", OPT_mfma4),
5259 IX86_ATTR_ISA ("f16c", OPT_mf16c),
5260 IX86_ATTR_ISA ("avx", OPT_mavx),
5261 IX86_ATTR_ISA ("sse4", OPT_msse4),
5262 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
5263 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
5264 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
5265 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
5266 IX86_ATTR_ISA ("sse3", OPT_msse3),
5267 IX86_ATTR_ISA ("aes", OPT_maes),
5268 IX86_ATTR_ISA ("sha", OPT_msha),
5269 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
5270 IX86_ATTR_ISA ("sse2", OPT_msse2),
5271 IX86_ATTR_ISA ("sse", OPT_msse),
5272 IX86_ATTR_ISA ("3dnowa", OPT_m3dnowa),
5273 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
5274 IX86_ATTR_ISA ("mmx", OPT_mmmx),
5275 IX86_ATTR_ISA ("rtm", OPT_mrtm),
5276 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
5277 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
5278 IX86_ATTR_ISA ("adx", OPT_madx),
5279 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
5280 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
5281 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
5282 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
5283 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
5284 IX86_ATTR_ISA ("xsave", OPT_mxsave),
5285 IX86_ATTR_ISA ("abm", OPT_mabm),
5286 IX86_ATTR_ISA ("bmi", OPT_mbmi),
5287 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
5288 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
5289 IX86_ATTR_ISA ("tbm", OPT_mtbm),
5290 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
5291 IX86_ATTR_ISA ("cx16", OPT_mcx16),
5292 IX86_ATTR_ISA ("sahf", OPT_msahf),
5293 IX86_ATTR_ISA ("movbe", OPT_mmovbe),
5294 IX86_ATTR_ISA ("crc32", OPT_mcrc32),
5295 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
5296 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
5297 IX86_ATTR_ISA ("mwaitx", OPT_mmwaitx),
5298 IX86_ATTR_ISA ("clzero", OPT_mclzero),
5299 IX86_ATTR_ISA ("pku", OPT_mpku),
5300 IX86_ATTR_ISA ("lwp", OPT_mlwp),
5301 IX86_ATTR_ISA ("hle", OPT_mhle),
5302 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
5303 IX86_ATTR_ISA ("mpx", OPT_mmpx),
5304 IX86_ATTR_ISA ("clwb", OPT_mclwb),
5305 IX86_ATTR_ISA ("rdpid", OPT_mrdpid),
5306 IX86_ATTR_ISA ("gfni", OPT_mgfni),
5307 IX86_ATTR_ISA ("ibt", OPT_mibt),
5308 IX86_ATTR_ISA ("shstk", OPT_mshstk),
5310 /* enum options */
5311 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
5313 /* string options */
5314 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
5315 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
5317 /* flag options */
5318 IX86_ATTR_YES ("cld",
5319 OPT_mcld,
5320 MASK_CLD),
5322 IX86_ATTR_NO ("fancy-math-387",
5323 OPT_mfancy_math_387,
5324 MASK_NO_FANCY_MATH_387),
5326 IX86_ATTR_YES ("ieee-fp",
5327 OPT_mieee_fp,
5328 MASK_IEEE_FP),
5330 IX86_ATTR_YES ("inline-all-stringops",
5331 OPT_minline_all_stringops,
5332 MASK_INLINE_ALL_STRINGOPS),
5334 IX86_ATTR_YES ("inline-stringops-dynamically",
5335 OPT_minline_stringops_dynamically,
5336 MASK_INLINE_STRINGOPS_DYNAMICALLY),
5338 IX86_ATTR_NO ("align-stringops",
5339 OPT_mno_align_stringops,
5340 MASK_NO_ALIGN_STRINGOPS),
5342 IX86_ATTR_YES ("recip",
5343 OPT_mrecip,
5344 MASK_RECIP),
5348 /* If this is a list, recurse to get the options. */
5349 if (TREE_CODE (args) == TREE_LIST)
5351 bool ret = true;
5353 for (; args; args = TREE_CHAIN (args))
5354 if (TREE_VALUE (args)
5355 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
5356 p_strings, opts, opts_set,
5357 enum_opts_set))
5358 ret = false;
5360 return ret;
5363 else if (TREE_CODE (args) != STRING_CST)
5365 error ("attribute %<target%> argument not a string");
5366 return false;
5369 /* Handle multiple arguments separated by commas. */
5370 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
5372 while (next_optstr && *next_optstr != '\0')
5374 char *p = next_optstr;
5375 char *orig_p = p;
5376 char *comma = strchr (next_optstr, ',');
5377 const char *opt_string;
5378 size_t len, opt_len;
5379 int opt;
5380 bool opt_set_p;
5381 char ch;
5382 unsigned i;
5383 enum ix86_opt_type type = ix86_opt_unknown;
5384 int mask = 0;
5386 if (comma)
5388 *comma = '\0';
5389 len = comma - next_optstr;
5390 next_optstr = comma + 1;
5392 else
5394 len = strlen (p);
5395 next_optstr = NULL;
5398 /* Recognize no-xxx. */
5399 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
5401 opt_set_p = false;
5402 p += 3;
5403 len -= 3;
5405 else
5406 opt_set_p = true;
5408 /* Find the option. */
5409 ch = *p;
5410 opt = N_OPTS;
5411 for (i = 0; i < ARRAY_SIZE (attrs); i++)
5413 type = attrs[i].type;
5414 opt_len = attrs[i].len;
5415 if (ch == attrs[i].string[0]
5416 && ((type != ix86_opt_str && type != ix86_opt_enum)
5417 ? len == opt_len
5418 : len > opt_len)
5419 && memcmp (p, attrs[i].string, opt_len) == 0)
5421 opt = attrs[i].opt;
5422 mask = attrs[i].mask;
5423 opt_string = attrs[i].string;
5424 break;
5428 /* Process the option. */
5429 if (opt == N_OPTS)
5431 error ("attribute(target(\"%s\")) is unknown", orig_p);
5432 ret = false;
5435 else if (type == ix86_opt_isa)
5437 struct cl_decoded_option decoded;
5439 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
5440 ix86_handle_option (opts, opts_set,
5441 &decoded, input_location);
5444 else if (type == ix86_opt_yes || type == ix86_opt_no)
5446 if (type == ix86_opt_no)
5447 opt_set_p = !opt_set_p;
5449 if (opt_set_p)
5450 opts->x_target_flags |= mask;
5451 else
5452 opts->x_target_flags &= ~mask;
5455 else if (type == ix86_opt_str)
5457 if (p_strings[opt])
5459 error ("option(\"%s\") was already specified", opt_string);
5460 ret = false;
5462 else
5463 p_strings[opt] = xstrdup (p + opt_len);
5466 else if (type == ix86_opt_enum)
5468 bool arg_ok;
5469 int value;
5471 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
5472 if (arg_ok)
5473 set_option (opts, enum_opts_set, opt, value,
5474 p + opt_len, DK_UNSPECIFIED, input_location,
5475 global_dc);
5476 else
5478 error ("attribute(target(\"%s\")) is unknown", orig_p);
5479 ret = false;
5483 else
5484 gcc_unreachable ();
5487 return ret;
5490 /* Release allocated strings. */
5491 static void
5492 release_options_strings (char **option_strings)
5494 /* Free up memory allocated to hold the strings */
5495 for (unsigned i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
5496 free (option_strings[i]);
5499 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
5501 tree
5502 ix86_valid_target_attribute_tree (tree args,
5503 struct gcc_options *opts,
5504 struct gcc_options *opts_set)
5506 const char *orig_arch_string = opts->x_ix86_arch_string;
5507 const char *orig_tune_string = opts->x_ix86_tune_string;
5508 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
5509 int orig_tune_defaulted = ix86_tune_defaulted;
5510 int orig_arch_specified = ix86_arch_specified;
5511 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
5512 tree t = NULL_TREE;
5513 struct cl_target_option *def
5514 = TREE_TARGET_OPTION (target_option_default_node);
5515 struct gcc_options enum_opts_set;
5517 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
5519 /* Process each of the options on the chain. */
5520 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
5521 opts_set, &enum_opts_set))
5522 return error_mark_node;
5524 /* If the changed options are different from the default, rerun
5525 ix86_option_override_internal, and then save the options away.
5526 The string options are attribute options, and will be undone
5527 when we copy the save structure. */
5528 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
5529 || opts->x_ix86_isa_flags2 != def->x_ix86_isa_flags2
5530 || opts->x_target_flags != def->x_target_flags
5531 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
5532 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
5533 || enum_opts_set.x_ix86_fpmath)
5535 /* If we are using the default tune= or arch=, undo the string assigned,
5536 and use the default. */
5537 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
5539 opts->x_ix86_arch_string
5540 = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]);
5542 /* If arch= is set, clear all bits in x_ix86_isa_flags,
5543 except for ISA_64BIT, ABI_64, ABI_X32, and CODE16. */
5544 opts->x_ix86_isa_flags &= (OPTION_MASK_ISA_64BIT
5545 | OPTION_MASK_ABI_64
5546 | OPTION_MASK_ABI_X32
5547 | OPTION_MASK_CODE16);
5548 opts->x_ix86_isa_flags2 = 0;
5550 else if (!orig_arch_specified)
5551 opts->x_ix86_arch_string = NULL;
5553 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
5554 opts->x_ix86_tune_string
5555 = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]);
5556 else if (orig_tune_defaulted)
5557 opts->x_ix86_tune_string = NULL;
5559 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
5560 if (enum_opts_set.x_ix86_fpmath)
5561 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
5563 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
5564 bool r = ix86_option_override_internal (false, opts, opts_set);
5565 if (!r)
5567 release_options_strings (option_strings);
5568 return error_mark_node;
5571 /* Add any builtin functions with the new isa if any. */
5572 ix86_add_new_builtins (opts->x_ix86_isa_flags, opts->x_ix86_isa_flags2);
5574 /* Save the current options unless we are validating options for
5575 #pragma. */
5576 t = build_target_option_node (opts);
5578 opts->x_ix86_arch_string = orig_arch_string;
5579 opts->x_ix86_tune_string = orig_tune_string;
5580 opts_set->x_ix86_fpmath = orig_fpmath_set;
5582 release_options_strings (option_strings);
5585 return t;
5588 /* Hook to validate attribute((target("string"))). */
5590 static bool
5591 ix86_valid_target_attribute_p (tree fndecl,
5592 tree ARG_UNUSED (name),
5593 tree args,
5594 int ARG_UNUSED (flags))
5596 struct gcc_options func_options;
5597 tree new_target, new_optimize;
5598 bool ret = true;
5600 /* attribute((target("default"))) does nothing, beyond
5601 affecting multi-versioning. */
5602 if (TREE_VALUE (args)
5603 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
5604 && TREE_CHAIN (args) == NULL_TREE
5605 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
5606 return true;
5608 tree old_optimize = build_optimization_node (&global_options);
5610 /* Get the optimization options of the current function. */
5611 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
5613 if (!func_optimize)
5614 func_optimize = old_optimize;
5616 /* Init func_options. */
5617 memset (&func_options, 0, sizeof (func_options));
5618 init_options_struct (&func_options, NULL);
5619 lang_hooks.init_options_struct (&func_options);
5621 cl_optimization_restore (&func_options,
5622 TREE_OPTIMIZATION (func_optimize));
5624 /* Initialize func_options to the default before its target options can
5625 be set. */
5626 cl_target_option_restore (&func_options,
5627 TREE_TARGET_OPTION (target_option_default_node));
5629 new_target = ix86_valid_target_attribute_tree (args, &func_options,
5630 &global_options_set);
5632 new_optimize = build_optimization_node (&func_options);
5634 if (new_target == error_mark_node)
5635 ret = false;
5637 else if (fndecl && new_target)
5639 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
5641 if (old_optimize != new_optimize)
5642 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
5645 finalize_options_struct (&func_options);
5647 return ret;
5651 /* Hook to determine if one function can safely inline another. */
5653 static bool
5654 ix86_can_inline_p (tree caller, tree callee)
5656 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
5657 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
5658 if (!callee_tree)
5659 callee_tree = target_option_default_node;
5660 if (!caller_tree)
5661 caller_tree = target_option_default_node;
5662 if (callee_tree == caller_tree)
5663 return true;
5665 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
5666 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
5667 bool ret = false;
5669 /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
5670 function can inline a SSE2 function but a SSE2 function can't inline
5671 a SSE4 function. */
5672 if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
5673 != callee_opts->x_ix86_isa_flags)
5674 || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)
5675 != callee_opts->x_ix86_isa_flags2))
5676 ret = false;
5678 /* See if we have the same non-isa options. */
5679 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
5680 ret = false;
5682 /* See if arch, tune, etc. are the same. */
5683 else if (caller_opts->arch != callee_opts->arch)
5684 ret = false;
5686 else if (caller_opts->tune != callee_opts->tune)
5687 ret = false;
5689 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath
5690 /* If the calle doesn't use FP expressions differences in
5691 ix86_fpmath can be ignored. We are called from FEs
5692 for multi-versioning call optimization, so beware of
5693 ipa_fn_summaries not available. */
5694 && (! ipa_fn_summaries
5695 || ipa_fn_summaries->get
5696 (cgraph_node::get (callee))->fp_expressions))
5697 ret = false;
5699 else if (caller_opts->branch_cost != callee_opts->branch_cost)
5700 ret = false;
5702 else
5703 ret = true;
5705 return ret;
5709 /* Remember the last target of ix86_set_current_function. */
5710 static GTY(()) tree ix86_previous_fndecl;
5712 /* Set targets globals to the default (or current #pragma GCC target
5713 if active). Invalidate ix86_previous_fndecl cache. */
5715 void
5716 ix86_reset_previous_fndecl (void)
5718 tree new_tree = target_option_current_node;
5719 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5720 if (TREE_TARGET_GLOBALS (new_tree))
5721 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5722 else if (new_tree == target_option_default_node)
5723 restore_target_globals (&default_target_globals);
5724 else
5725 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5726 ix86_previous_fndecl = NULL_TREE;
5729 /* Set the func_type field from the function FNDECL. */
5731 static void
5732 ix86_set_func_type (tree fndecl)
5734 if (cfun->machine->func_type == TYPE_UNKNOWN)
5736 if (lookup_attribute ("interrupt",
5737 TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
5739 if (ix86_function_naked (fndecl))
5740 error_at (DECL_SOURCE_LOCATION (fndecl),
5741 "interrupt and naked attributes are not compatible");
5743 int nargs = 0;
5744 for (tree arg = DECL_ARGUMENTS (fndecl);
5745 arg;
5746 arg = TREE_CHAIN (arg))
5747 nargs++;
5748 cfun->machine->no_caller_saved_registers = true;
5749 cfun->machine->func_type
5750 = nargs == 2 ? TYPE_EXCEPTION : TYPE_INTERRUPT;
5752 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
5754 /* Only dwarf2out.c can handle -WORD(AP) as a pointer argument. */
5755 if (write_symbols != NO_DEBUG && write_symbols != DWARF2_DEBUG)
5756 sorry ("Only DWARF debug format is supported for interrupt "
5757 "service routine.");
5759 else
5761 cfun->machine->func_type = TYPE_NORMAL;
5762 if (lookup_attribute ("no_caller_saved_registers",
5763 TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
5764 cfun->machine->no_caller_saved_registers = true;
5769 /* Establish appropriate back-end context for processing the function
5770 FNDECL. The argument might be NULL to indicate processing at top
5771 level, outside of any function scope. */
5772 static void
5773 ix86_set_current_function (tree fndecl)
5775 /* Only change the context if the function changes. This hook is called
5776 several times in the course of compiling a function, and we don't want to
5777 slow things down too much or call target_reinit when it isn't safe. */
5778 if (fndecl == ix86_previous_fndecl)
5780 /* There may be 2 function bodies for the same function FNDECL,
5781 one is extern inline and one isn't. Call ix86_set_func_type
5782 to set the func_type field. */
5783 if (fndecl != NULL_TREE)
5784 ix86_set_func_type (fndecl);
5785 return;
5788 tree old_tree;
5789 if (ix86_previous_fndecl == NULL_TREE)
5790 old_tree = target_option_current_node;
5791 else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl))
5792 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl);
5793 else
5794 old_tree = target_option_default_node;
5796 if (fndecl == NULL_TREE)
5798 if (old_tree != target_option_current_node)
5799 ix86_reset_previous_fndecl ();
5800 return;
5803 ix86_set_func_type (fndecl);
5805 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
5806 if (new_tree == NULL_TREE)
5807 new_tree = target_option_default_node;
5809 if (old_tree != new_tree)
5811 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5812 if (TREE_TARGET_GLOBALS (new_tree))
5813 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5814 else if (new_tree == target_option_default_node)
5815 restore_target_globals (&default_target_globals);
5816 else
5817 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5819 ix86_previous_fndecl = fndecl;
5821 static bool prev_no_caller_saved_registers;
5823 /* 64-bit MS and SYSV ABI have different set of call used registers.
5824 Avoid expensive re-initialization of init_regs each time we switch
5825 function context. */
5826 if (TARGET_64BIT
5827 && (call_used_regs[SI_REG]
5828 == (cfun->machine->call_abi == MS_ABI)))
5829 reinit_regs ();
5830 /* Need to re-initialize init_regs if caller-saved registers are
5831 changed. */
5832 else if (prev_no_caller_saved_registers
5833 != cfun->machine->no_caller_saved_registers)
5834 reinit_regs ();
5836 if (cfun->machine->func_type != TYPE_NORMAL
5837 || cfun->machine->no_caller_saved_registers)
5839 /* Don't allow MPX, SSE, MMX nor x87 instructions since they
5840 may change processor state. */
5841 const char *isa;
5842 if (TARGET_MPX)
5843 isa = "MPX";
5844 else if (TARGET_SSE)
5845 isa = "SSE";
5846 else if (TARGET_MMX)
5847 isa = "MMX/3Dnow";
5848 else if (TARGET_80387)
5849 isa = "80387";
5850 else
5851 isa = NULL;
5852 if (isa != NULL)
5854 if (cfun->machine->func_type != TYPE_NORMAL)
5855 sorry ("%s instructions aren't allowed in %s service routine",
5856 isa, (cfun->machine->func_type == TYPE_EXCEPTION
5857 ? "exception" : "interrupt"));
5858 else
5859 sorry ("%s instructions aren't allowed in function with "
5860 "no_caller_saved_registers attribute", isa);
5861 /* Don't issue the same error twice. */
5862 cfun->machine->func_type = TYPE_NORMAL;
5863 cfun->machine->no_caller_saved_registers = false;
5867 prev_no_caller_saved_registers
5868 = cfun->machine->no_caller_saved_registers;
5872 /* Return true if this goes in large data/bss. */
5874 static bool
5875 ix86_in_large_data_p (tree exp)
5877 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5878 return false;
5880 if (exp == NULL_TREE)
5881 return false;
5883 /* Functions are never large data. */
5884 if (TREE_CODE (exp) == FUNCTION_DECL)
5885 return false;
5887 /* Automatic variables are never large data. */
5888 if (VAR_P (exp) && !is_global_var (exp))
5889 return false;
5891 if (VAR_P (exp) && DECL_SECTION_NAME (exp))
5893 const char *section = DECL_SECTION_NAME (exp);
5894 if (strcmp (section, ".ldata") == 0
5895 || strcmp (section, ".lbss") == 0)
5896 return true;
5897 return false;
5899 else
5901 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5903 /* If this is an incomplete type with size 0, then we can't put it
5904 in data because it might be too big when completed. Also,
5905 int_size_in_bytes returns -1 if size can vary or is larger than
5906 an integer in which case also it is safer to assume that it goes in
5907 large data. */
5908 if (size <= 0 || size > ix86_section_threshold)
5909 return true;
5912 return false;
5915 /* i386-specific section flag to mark large sections. */
5916 #define SECTION_LARGE SECTION_MACH_DEP
5918 /* Switch to the appropriate section for output of DECL.
5919 DECL is either a `VAR_DECL' node or a constant of some sort.
5920 RELOC indicates whether forming the initial value of DECL requires
5921 link-time relocations. */
5923 ATTRIBUTE_UNUSED static section *
5924 x86_64_elf_select_section (tree decl, int reloc,
5925 unsigned HOST_WIDE_INT align)
5927 if (ix86_in_large_data_p (decl))
5929 const char *sname = NULL;
5930 unsigned int flags = SECTION_WRITE | SECTION_LARGE;
5931 switch (categorize_decl_for_section (decl, reloc))
5933 case SECCAT_DATA:
5934 sname = ".ldata";
5935 break;
5936 case SECCAT_DATA_REL:
5937 sname = ".ldata.rel";
5938 break;
5939 case SECCAT_DATA_REL_LOCAL:
5940 sname = ".ldata.rel.local";
5941 break;
5942 case SECCAT_DATA_REL_RO:
5943 sname = ".ldata.rel.ro";
5944 break;
5945 case SECCAT_DATA_REL_RO_LOCAL:
5946 sname = ".ldata.rel.ro.local";
5947 break;
5948 case SECCAT_BSS:
5949 sname = ".lbss";
5950 flags |= SECTION_BSS;
5951 break;
5952 case SECCAT_RODATA:
5953 case SECCAT_RODATA_MERGE_STR:
5954 case SECCAT_RODATA_MERGE_STR_INIT:
5955 case SECCAT_RODATA_MERGE_CONST:
5956 sname = ".lrodata";
5957 flags &= ~SECTION_WRITE;
5958 break;
5959 case SECCAT_SRODATA:
5960 case SECCAT_SDATA:
5961 case SECCAT_SBSS:
5962 gcc_unreachable ();
5963 case SECCAT_TEXT:
5964 case SECCAT_TDATA:
5965 case SECCAT_TBSS:
5966 /* We don't split these for medium model. Place them into
5967 default sections and hope for best. */
5968 break;
5970 if (sname)
5972 /* We might get called with string constants, but get_named_section
5973 doesn't like them as they are not DECLs. Also, we need to set
5974 flags in that case. */
5975 if (!DECL_P (decl))
5976 return get_section (sname, flags, NULL);
5977 return get_named_section (decl, sname, reloc);
5980 return default_elf_select_section (decl, reloc, align);
5983 /* Select a set of attributes for section NAME based on the properties
5984 of DECL and whether or not RELOC indicates that DECL's initializer
5985 might contain runtime relocations. */
5987 static unsigned int ATTRIBUTE_UNUSED
5988 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5990 unsigned int flags = default_section_type_flags (decl, name, reloc);
5992 if (ix86_in_large_data_p (decl))
5993 flags |= SECTION_LARGE;
5995 if (decl == NULL_TREE
5996 && (strcmp (name, ".ldata.rel.ro") == 0
5997 || strcmp (name, ".ldata.rel.ro.local") == 0))
5998 flags |= SECTION_RELRO;
6000 if (strcmp (name, ".lbss") == 0
6001 || strncmp (name, ".lbss.", 5) == 0
6002 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
6003 flags |= SECTION_BSS;
6005 return flags;
6008 /* Build up a unique section name, expressed as a
6009 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
6010 RELOC indicates whether the initial value of EXP requires
6011 link-time relocations. */
6013 static void ATTRIBUTE_UNUSED
6014 x86_64_elf_unique_section (tree decl, int reloc)
6016 if (ix86_in_large_data_p (decl))
6018 const char *prefix = NULL;
6019 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
6020 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
6022 switch (categorize_decl_for_section (decl, reloc))
6024 case SECCAT_DATA:
6025 case SECCAT_DATA_REL:
6026 case SECCAT_DATA_REL_LOCAL:
6027 case SECCAT_DATA_REL_RO:
6028 case SECCAT_DATA_REL_RO_LOCAL:
6029 prefix = one_only ? ".ld" : ".ldata";
6030 break;
6031 case SECCAT_BSS:
6032 prefix = one_only ? ".lb" : ".lbss";
6033 break;
6034 case SECCAT_RODATA:
6035 case SECCAT_RODATA_MERGE_STR:
6036 case SECCAT_RODATA_MERGE_STR_INIT:
6037 case SECCAT_RODATA_MERGE_CONST:
6038 prefix = one_only ? ".lr" : ".lrodata";
6039 break;
6040 case SECCAT_SRODATA:
6041 case SECCAT_SDATA:
6042 case SECCAT_SBSS:
6043 gcc_unreachable ();
6044 case SECCAT_TEXT:
6045 case SECCAT_TDATA:
6046 case SECCAT_TBSS:
6047 /* We don't split these for medium model. Place them into
6048 default sections and hope for best. */
6049 break;
6051 if (prefix)
6053 const char *name, *linkonce;
6054 char *string;
6056 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
6057 name = targetm.strip_name_encoding (name);
6059 /* If we're using one_only, then there needs to be a .gnu.linkonce
6060 prefix to the section name. */
6061 linkonce = one_only ? ".gnu.linkonce" : "";
6063 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
6065 set_decl_section_name (decl, string);
6066 return;
6069 default_unique_section (decl, reloc);
6072 #ifdef COMMON_ASM_OP
6074 #ifndef LARGECOMM_SECTION_ASM_OP
6075 #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
6076 #endif
6078 /* This says how to output assembler code to declare an
6079 uninitialized external linkage data object.
6081 For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
6082 large objects. */
6083 void
6084 x86_elf_aligned_decl_common (FILE *file, tree decl,
6085 const char *name, unsigned HOST_WIDE_INT size,
6086 int align)
6088 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
6089 && size > (unsigned int)ix86_section_threshold)
6091 switch_to_section (get_named_section (decl, ".lbss", 0));
6092 fputs (LARGECOMM_SECTION_ASM_OP, file);
6094 else
6095 fputs (COMMON_ASM_OP, file);
6096 assemble_name (file, name);
6097 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
6098 size, align / BITS_PER_UNIT);
6100 #endif
6102 /* Utility function for targets to use in implementing
6103 ASM_OUTPUT_ALIGNED_BSS. */
6105 void
6106 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
6107 unsigned HOST_WIDE_INT size, int align)
6109 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
6110 && size > (unsigned int)ix86_section_threshold)
6111 switch_to_section (get_named_section (decl, ".lbss", 0));
6112 else
6113 switch_to_section (bss_section);
6114 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
6115 #ifdef ASM_DECLARE_OBJECT_NAME
6116 last_assemble_variable_decl = decl;
6117 ASM_DECLARE_OBJECT_NAME (file, name, decl);
6118 #else
6119 /* Standard thing is just output label for the object. */
6120 ASM_OUTPUT_LABEL (file, name);
6121 #endif /* ASM_DECLARE_OBJECT_NAME */
6122 ASM_OUTPUT_SKIP (file, size ? size : 1);
6125 /* Decide whether we must probe the stack before any space allocation
6126 on this target. It's essentially TARGET_STACK_PROBE except when
6127 -fstack-check causes the stack to be already probed differently. */
6129 bool
6130 ix86_target_stack_probe (void)
6132 /* Do not probe the stack twice if static stack checking is enabled. */
6133 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
6134 return false;
6136 return TARGET_STACK_PROBE;
6139 /* Decide whether we can make a sibling call to a function. DECL is the
6140 declaration of the function being targeted by the call and EXP is the
6141 CALL_EXPR representing the call. */
6143 static bool
6144 ix86_function_ok_for_sibcall (tree decl, tree exp)
6146 tree type, decl_or_type;
6147 rtx a, b;
6148 bool bind_global = decl && !targetm.binds_local_p (decl);
6150 if (ix86_function_naked (current_function_decl))
6151 return false;
6153 /* Sibling call isn't OK if there are no caller-saved registers
6154 since all registers must be preserved before return. */
6155 if (cfun->machine->no_caller_saved_registers)
6156 return false;
6158 /* If we are generating position-independent code, we cannot sibcall
6159 optimize direct calls to global functions, as the PLT requires
6160 %ebx be live. (Darwin does not have a PLT.) */
6161 if (!TARGET_MACHO
6162 && !TARGET_64BIT
6163 && flag_pic
6164 && flag_plt
6165 && bind_global)
6166 return false;
6168 /* If we need to align the outgoing stack, then sibcalling would
6169 unalign the stack, which may break the called function. */
6170 if (ix86_minimum_incoming_stack_boundary (true)
6171 < PREFERRED_STACK_BOUNDARY)
6172 return false;
6174 if (decl)
6176 decl_or_type = decl;
6177 type = TREE_TYPE (decl);
6179 else
6181 /* We're looking at the CALL_EXPR, we need the type of the function. */
6182 type = CALL_EXPR_FN (exp); /* pointer expression */
6183 type = TREE_TYPE (type); /* pointer type */
6184 type = TREE_TYPE (type); /* function type */
6185 decl_or_type = type;
6188 /* Check that the return value locations are the same. Like
6189 if we are returning floats on the 80387 register stack, we cannot
6190 make a sibcall from a function that doesn't return a float to a
6191 function that does or, conversely, from a function that does return
6192 a float to a function that doesn't; the necessary stack adjustment
6193 would not be executed. This is also the place we notice
6194 differences in the return value ABI. Note that it is ok for one
6195 of the functions to have void return type as long as the return
6196 value of the other is passed in a register. */
6197 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
6198 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6199 cfun->decl, false);
6200 if (STACK_REG_P (a) || STACK_REG_P (b))
6202 if (!rtx_equal_p (a, b))
6203 return false;
6205 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6207 else if (!rtx_equal_p (a, b))
6208 return false;
6210 if (TARGET_64BIT)
6212 /* The SYSV ABI has more call-clobbered registers;
6213 disallow sibcalls from MS to SYSV. */
6214 if (cfun->machine->call_abi == MS_ABI
6215 && ix86_function_type_abi (type) == SYSV_ABI)
6216 return false;
6218 else
6220 /* If this call is indirect, we'll need to be able to use a
6221 call-clobbered register for the address of the target function.
6222 Make sure that all such registers are not used for passing
6223 parameters. Note that DLLIMPORT functions and call to global
6224 function via GOT slot are indirect. */
6225 if (!decl
6226 || (bind_global && flag_pic && !flag_plt)
6227 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
6229 /* Check if regparm >= 3 since arg_reg_available is set to
6230 false if regparm == 0. If regparm is 1 or 2, there is
6231 always a call-clobbered register available.
6233 ??? The symbol indirect call doesn't need a call-clobbered
6234 register. But we don't know if this is a symbol indirect
6235 call or not here. */
6236 if (ix86_function_regparm (type, NULL) >= 3
6237 && !cfun->machine->arg_reg_available)
6238 return false;
6242 /* Otherwise okay. That also includes certain types of indirect calls. */
6243 return true;
6246 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
6247 and "sseregparm" calling convention attributes;
6248 arguments as in struct attribute_spec.handler. */
6250 static tree
6251 ix86_handle_cconv_attribute (tree *node, tree name,
6252 tree args,
6253 int,
6254 bool *no_add_attrs)
6256 if (TREE_CODE (*node) != FUNCTION_TYPE
6257 && TREE_CODE (*node) != METHOD_TYPE
6258 && TREE_CODE (*node) != FIELD_DECL
6259 && TREE_CODE (*node) != TYPE_DECL)
6261 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6262 name);
6263 *no_add_attrs = true;
6264 return NULL_TREE;
6267 /* Can combine regparm with all attributes but fastcall, and thiscall. */
6268 if (is_attribute_p ("regparm", name))
6270 tree cst;
6272 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6274 error ("fastcall and regparm attributes are not compatible");
6277 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6279 error ("regparam and thiscall attributes are not compatible");
6282 cst = TREE_VALUE (args);
6283 if (TREE_CODE (cst) != INTEGER_CST)
6285 warning (OPT_Wattributes,
6286 "%qE attribute requires an integer constant argument",
6287 name);
6288 *no_add_attrs = true;
6290 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
6292 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
6293 name, REGPARM_MAX);
6294 *no_add_attrs = true;
6297 return NULL_TREE;
6300 if (TARGET_64BIT)
6302 /* Do not warn when emulating the MS ABI. */
6303 if ((TREE_CODE (*node) != FUNCTION_TYPE
6304 && TREE_CODE (*node) != METHOD_TYPE)
6305 || ix86_function_type_abi (*node) != MS_ABI)
6306 warning (OPT_Wattributes, "%qE attribute ignored",
6307 name);
6308 *no_add_attrs = true;
6309 return NULL_TREE;
6312 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
6313 if (is_attribute_p ("fastcall", name))
6315 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6317 error ("fastcall and cdecl attributes are not compatible");
6319 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6321 error ("fastcall and stdcall attributes are not compatible");
6323 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
6325 error ("fastcall and regparm attributes are not compatible");
6327 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6329 error ("fastcall and thiscall attributes are not compatible");
6333 /* Can combine stdcall with fastcall (redundant), regparm and
6334 sseregparm. */
6335 else if (is_attribute_p ("stdcall", name))
6337 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6339 error ("stdcall and cdecl attributes are not compatible");
6341 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6343 error ("stdcall and fastcall attributes are not compatible");
6345 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6347 error ("stdcall and thiscall attributes are not compatible");
6351 /* Can combine cdecl with regparm and sseregparm. */
6352 else if (is_attribute_p ("cdecl", name))
6354 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6356 error ("stdcall and cdecl attributes are not compatible");
6358 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6360 error ("fastcall and cdecl attributes are not compatible");
6362 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6364 error ("cdecl and thiscall attributes are not compatible");
6367 else if (is_attribute_p ("thiscall", name))
6369 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
6370 warning (OPT_Wattributes, "%qE attribute is used for non-class method",
6371 name);
6372 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6374 error ("stdcall and thiscall attributes are not compatible");
6376 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6378 error ("fastcall and thiscall attributes are not compatible");
6380 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6382 error ("cdecl and thiscall attributes are not compatible");
6386 /* Can combine sseregparm with all attributes. */
6388 return NULL_TREE;
6391 /* The transactional memory builtins are implicitly regparm or fastcall
6392 depending on the ABI. Override the generic do-nothing attribute that
6393 these builtins were declared with, and replace it with one of the two
6394 attributes that we expect elsewhere. */
6396 static tree
6397 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
6398 int flags, bool *no_add_attrs)
6400 tree alt;
6402 /* In no case do we want to add the placeholder attribute. */
6403 *no_add_attrs = true;
6405 /* The 64-bit ABI is unchanged for transactional memory. */
6406 if (TARGET_64BIT)
6407 return NULL_TREE;
6409 /* ??? Is there a better way to validate 32-bit windows? We have
6410 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
6411 if (CHECK_STACK_LIMIT > 0)
6412 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
6413 else
6415 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
6416 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
6418 decl_attributes (node, alt, flags);
6420 return NULL_TREE;
6423 /* This function determines from TYPE the calling-convention. */
6425 unsigned int
6426 ix86_get_callcvt (const_tree type)
6428 unsigned int ret = 0;
6429 bool is_stdarg;
6430 tree attrs;
6432 if (TARGET_64BIT)
6433 return IX86_CALLCVT_CDECL;
6435 attrs = TYPE_ATTRIBUTES (type);
6436 if (attrs != NULL_TREE)
6438 if (lookup_attribute ("cdecl", attrs))
6439 ret |= IX86_CALLCVT_CDECL;
6440 else if (lookup_attribute ("stdcall", attrs))
6441 ret |= IX86_CALLCVT_STDCALL;
6442 else if (lookup_attribute ("fastcall", attrs))
6443 ret |= IX86_CALLCVT_FASTCALL;
6444 else if (lookup_attribute ("thiscall", attrs))
6445 ret |= IX86_CALLCVT_THISCALL;
6447 /* Regparam isn't allowed for thiscall and fastcall. */
6448 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
6450 if (lookup_attribute ("regparm", attrs))
6451 ret |= IX86_CALLCVT_REGPARM;
6452 if (lookup_attribute ("sseregparm", attrs))
6453 ret |= IX86_CALLCVT_SSEREGPARM;
6456 if (IX86_BASE_CALLCVT(ret) != 0)
6457 return ret;
6460 is_stdarg = stdarg_p (type);
6461 if (TARGET_RTD && !is_stdarg)
6462 return IX86_CALLCVT_STDCALL | ret;
6464 if (ret != 0
6465 || is_stdarg
6466 || TREE_CODE (type) != METHOD_TYPE
6467 || ix86_function_type_abi (type) != MS_ABI)
6468 return IX86_CALLCVT_CDECL | ret;
6470 return IX86_CALLCVT_THISCALL;
6473 /* Return 0 if the attributes for two types are incompatible, 1 if they
6474 are compatible, and 2 if they are nearly compatible (which causes a
6475 warning to be generated). */
6477 static int
6478 ix86_comp_type_attributes (const_tree type1, const_tree type2)
6480 unsigned int ccvt1, ccvt2;
6482 if (TREE_CODE (type1) != FUNCTION_TYPE
6483 && TREE_CODE (type1) != METHOD_TYPE)
6484 return 1;
6486 ccvt1 = ix86_get_callcvt (type1);
6487 ccvt2 = ix86_get_callcvt (type2);
6488 if (ccvt1 != ccvt2)
6489 return 0;
6490 if (ix86_function_regparm (type1, NULL)
6491 != ix86_function_regparm (type2, NULL))
6492 return 0;
6494 return 1;
6497 /* Return the regparm value for a function with the indicated TYPE and DECL.
6498 DECL may be NULL when calling function indirectly
6499 or considering a libcall. */
6501 static int
6502 ix86_function_regparm (const_tree type, const_tree decl)
6504 tree attr;
6505 int regparm;
6506 unsigned int ccvt;
6508 if (TARGET_64BIT)
6509 return (ix86_function_type_abi (type) == SYSV_ABI
6510 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
6511 ccvt = ix86_get_callcvt (type);
6512 regparm = ix86_regparm;
6514 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
6516 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
6517 if (attr)
6519 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
6520 return regparm;
6523 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6524 return 2;
6525 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6526 return 1;
6528 /* Use register calling convention for local functions when possible. */
6529 if (decl
6530 && TREE_CODE (decl) == FUNCTION_DECL)
6532 cgraph_node *target = cgraph_node::get (decl);
6533 if (target)
6534 target = target->function_symbol ();
6536 /* Caller and callee must agree on the calling convention, so
6537 checking here just optimize means that with
6538 __attribute__((optimize (...))) caller could use regparm convention
6539 and callee not, or vice versa. Instead look at whether the callee
6540 is optimized or not. */
6541 if (target && opt_for_fn (target->decl, optimize)
6542 && !(profile_flag && !flag_fentry))
6544 cgraph_local_info *i = &target->local;
6545 if (i && i->local && i->can_change_signature)
6547 int local_regparm, globals = 0, regno;
6549 /* Make sure no regparm register is taken by a
6550 fixed register variable. */
6551 for (local_regparm = 0; local_regparm < REGPARM_MAX;
6552 local_regparm++)
6553 if (fixed_regs[local_regparm])
6554 break;
6556 /* We don't want to use regparm(3) for nested functions as
6557 these use a static chain pointer in the third argument. */
6558 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
6559 local_regparm = 2;
6561 /* Save a register for the split stack. */
6562 if (flag_split_stack)
6564 if (local_regparm == 3)
6565 local_regparm = 2;
6566 else if (local_regparm == 2
6567 && DECL_STATIC_CHAIN (target->decl))
6568 local_regparm = 1;
6571 /* Each fixed register usage increases register pressure,
6572 so less registers should be used for argument passing.
6573 This functionality can be overriden by an explicit
6574 regparm value. */
6575 for (regno = AX_REG; regno <= DI_REG; regno++)
6576 if (fixed_regs[regno])
6577 globals++;
6579 local_regparm
6580 = globals < local_regparm ? local_regparm - globals : 0;
6582 if (local_regparm > regparm)
6583 regparm = local_regparm;
6588 return regparm;
6591 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
6592 DFmode (2) arguments in SSE registers for a function with the
6593 indicated TYPE and DECL. DECL may be NULL when calling function
6594 indirectly or considering a libcall. Return -1 if any FP parameter
6595 should be rejected by error. This is used in siutation we imply SSE
6596 calling convetion but the function is called from another function with
6597 SSE disabled. Otherwise return 0. */
6599 static int
6600 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
6602 gcc_assert (!TARGET_64BIT);
6604 /* Use SSE registers to pass SFmode and DFmode arguments if requested
6605 by the sseregparm attribute. */
6606 if (TARGET_SSEREGPARM
6607 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
6609 if (!TARGET_SSE)
6611 if (warn)
6613 if (decl)
6614 error ("calling %qD with attribute sseregparm without "
6615 "SSE/SSE2 enabled", decl);
6616 else
6617 error ("calling %qT with attribute sseregparm without "
6618 "SSE/SSE2 enabled", type);
6620 return 0;
6623 return 2;
6626 if (!decl)
6627 return 0;
6629 cgraph_node *target = cgraph_node::get (decl);
6630 if (target)
6631 target = target->function_symbol ();
6633 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
6634 (and DFmode for SSE2) arguments in SSE registers. */
6635 if (target
6636 /* TARGET_SSE_MATH */
6637 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
6638 && opt_for_fn (target->decl, optimize)
6639 && !(profile_flag && !flag_fentry))
6641 cgraph_local_info *i = &target->local;
6642 if (i && i->local && i->can_change_signature)
6644 /* Refuse to produce wrong code when local function with SSE enabled
6645 is called from SSE disabled function.
6646 FIXME: We need a way to detect these cases cross-ltrans partition
6647 and avoid using SSE calling conventions on local functions called
6648 from function with SSE disabled. For now at least delay the
6649 warning until we know we are going to produce wrong code.
6650 See PR66047 */
6651 if (!TARGET_SSE && warn)
6652 return -1;
6653 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
6654 ->x_ix86_isa_flags) ? 2 : 1;
6658 return 0;
6661 /* Return true if EAX is live at the start of the function. Used by
6662 ix86_expand_prologue to determine if we need special help before
6663 calling allocate_stack_worker. */
6665 static bool
6666 ix86_eax_live_at_start_p (void)
6668 /* Cheat. Don't bother working forward from ix86_function_regparm
6669 to the function type to whether an actual argument is located in
6670 eax. Instead just look at cfg info, which is still close enough
6671 to correct at this point. This gives false positives for broken
6672 functions that might use uninitialized data that happens to be
6673 allocated in eax, but who cares? */
6674 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
6677 static bool
6678 ix86_keep_aggregate_return_pointer (tree fntype)
6680 tree attr;
6682 if (!TARGET_64BIT)
6684 attr = lookup_attribute ("callee_pop_aggregate_return",
6685 TYPE_ATTRIBUTES (fntype));
6686 if (attr)
6687 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
6689 /* For 32-bit MS-ABI the default is to keep aggregate
6690 return pointer. */
6691 if (ix86_function_type_abi (fntype) == MS_ABI)
6692 return true;
6694 return KEEP_AGGREGATE_RETURN_POINTER != 0;
6697 /* Value is the number of bytes of arguments automatically
6698 popped when returning from a subroutine call.
6699 FUNDECL is the declaration node of the function (as a tree),
6700 FUNTYPE is the data type of the function (as a tree),
6701 or for a library call it is an identifier node for the subroutine name.
6702 SIZE is the number of bytes of arguments passed on the stack.
6704 On the 80386, the RTD insn may be used to pop them if the number
6705 of args is fixed, but if the number is variable then the caller
6706 must pop them all. RTD can't be used for library calls now
6707 because the library is compiled with the Unix compiler.
6708 Use of RTD is a selectable option, since it is incompatible with
6709 standard Unix calling sequences. If the option is not selected,
6710 the caller must always pop the args.
6712 The attribute stdcall is equivalent to RTD on a per module basis. */
6714 static int
6715 ix86_return_pops_args (tree fundecl, tree funtype, int size)
6717 unsigned int ccvt;
6719 /* None of the 64-bit ABIs pop arguments. */
6720 if (TARGET_64BIT)
6721 return 0;
6723 ccvt = ix86_get_callcvt (funtype);
6725 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
6726 | IX86_CALLCVT_THISCALL)) != 0
6727 && ! stdarg_p (funtype))
6728 return size;
6730 /* Lose any fake structure return argument if it is passed on the stack. */
6731 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
6732 && !ix86_keep_aggregate_return_pointer (funtype))
6734 int nregs = ix86_function_regparm (funtype, fundecl);
6735 if (nregs == 0)
6736 return GET_MODE_SIZE (Pmode);
6739 return 0;
6742 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
6744 static bool
6745 ix86_legitimate_combined_insn (rtx_insn *insn)
6747 int i;
6749 /* Check operand constraints in case hard registers were propagated
6750 into insn pattern. This check prevents combine pass from
6751 generating insn patterns with invalid hard register operands.
6752 These invalid insns can eventually confuse reload to error out
6753 with a spill failure. See also PRs 46829 and 46843. */
6755 gcc_assert (INSN_CODE (insn) >= 0);
6757 extract_insn (insn);
6758 preprocess_constraints (insn);
6760 int n_operands = recog_data.n_operands;
6761 int n_alternatives = recog_data.n_alternatives;
6762 for (i = 0; i < n_operands; i++)
6764 rtx op = recog_data.operand[i];
6765 machine_mode mode = GET_MODE (op);
6766 const operand_alternative *op_alt;
6767 int offset = 0;
6768 bool win;
6769 int j;
6771 /* A unary operator may be accepted by the predicate, but it
6772 is irrelevant for matching constraints. */
6773 if (UNARY_P (op))
6774 op = XEXP (op, 0);
6776 if (SUBREG_P (op))
6778 if (REG_P (SUBREG_REG (op))
6779 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
6780 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
6781 GET_MODE (SUBREG_REG (op)),
6782 SUBREG_BYTE (op),
6783 GET_MODE (op));
6784 op = SUBREG_REG (op);
6787 if (!(REG_P (op) && HARD_REGISTER_P (op)))
6788 continue;
6790 op_alt = recog_op_alt;
6792 /* Operand has no constraints, anything is OK. */
6793 win = !n_alternatives;
6795 alternative_mask preferred = get_preferred_alternatives (insn);
6796 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
6798 if (!TEST_BIT (preferred, j))
6799 continue;
6800 if (op_alt[i].anything_ok
6801 || (op_alt[i].matches != -1
6802 && operands_match_p
6803 (recog_data.operand[i],
6804 recog_data.operand[op_alt[i].matches]))
6805 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
6807 win = true;
6808 break;
6812 if (!win)
6813 return false;
6816 return true;
6819 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
6821 static unsigned HOST_WIDE_INT
6822 ix86_asan_shadow_offset (void)
6824 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
6825 : HOST_WIDE_INT_C (0x7fff8000))
6826 : (HOST_WIDE_INT_1 << 29);
6829 /* Argument support functions. */
6831 /* Return true when register may be used to pass function parameters. */
6832 bool
6833 ix86_function_arg_regno_p (int regno)
6835 int i;
6836 enum calling_abi call_abi;
6837 const int *parm_regs;
6839 if (TARGET_MPX && BND_REGNO_P (regno))
6840 return true;
6842 if (!TARGET_64BIT)
6844 if (TARGET_MACHO)
6845 return (regno < REGPARM_MAX
6846 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
6847 else
6848 return (regno < REGPARM_MAX
6849 || (TARGET_MMX && MMX_REGNO_P (regno)
6850 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
6851 || (TARGET_SSE && SSE_REGNO_P (regno)
6852 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
6855 if (TARGET_SSE && SSE_REGNO_P (regno)
6856 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
6857 return true;
6859 /* TODO: The function should depend on current function ABI but
6860 builtins.c would need updating then. Therefore we use the
6861 default ABI. */
6862 call_abi = ix86_cfun_abi ();
6864 /* RAX is used as hidden argument to va_arg functions. */
6865 if (call_abi == SYSV_ABI && regno == AX_REG)
6866 return true;
6868 if (call_abi == MS_ABI)
6869 parm_regs = x86_64_ms_abi_int_parameter_registers;
6870 else
6871 parm_regs = x86_64_int_parameter_registers;
6873 for (i = 0; i < (call_abi == MS_ABI
6874 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
6875 if (regno == parm_regs[i])
6876 return true;
6877 return false;
6880 /* Return if we do not know how to pass TYPE solely in registers. */
6882 static bool
6883 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
6885 if (must_pass_in_stack_var_size_or_pad (mode, type))
6886 return true;
6888 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
6889 The layout_type routine is crafty and tries to trick us into passing
6890 currently unsupported vector types on the stack by using TImode. */
6891 return (!TARGET_64BIT && mode == TImode
6892 && type && TREE_CODE (type) != VECTOR_TYPE);
6895 /* It returns the size, in bytes, of the area reserved for arguments passed
6896 in registers for the function represented by fndecl dependent to the used
6897 abi format. */
6899 ix86_reg_parm_stack_space (const_tree fndecl)
6901 enum calling_abi call_abi = SYSV_ABI;
6902 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6903 call_abi = ix86_function_abi (fndecl);
6904 else
6905 call_abi = ix86_function_type_abi (fndecl);
6906 if (TARGET_64BIT && call_abi == MS_ABI)
6907 return 32;
6908 return 0;
6911 /* We add this as a workaround in order to use libc_has_function
6912 hook in i386.md. */
6913 bool
6914 ix86_libc_has_function (enum function_class fn_class)
6916 return targetm.libc_has_function (fn_class);
6919 /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
6920 specifying the call abi used. */
6921 enum calling_abi
6922 ix86_function_type_abi (const_tree fntype)
6924 enum calling_abi abi = ix86_abi;
6926 if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
6927 return abi;
6929 if (abi == SYSV_ABI
6930 && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6932 static int warned;
6933 if (TARGET_X32 && !warned)
6935 error ("X32 does not support ms_abi attribute");
6936 warned = 1;
6939 abi = MS_ABI;
6941 else if (abi == MS_ABI
6942 && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6943 abi = SYSV_ABI;
6945 return abi;
6948 static enum calling_abi
6949 ix86_function_abi (const_tree fndecl)
6951 return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
6954 /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
6955 specifying the call abi used. */
6956 enum calling_abi
6957 ix86_cfun_abi (void)
6959 return cfun ? cfun->machine->call_abi : ix86_abi;
6962 static bool
6963 ix86_function_ms_hook_prologue (const_tree fn)
6965 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6967 if (decl_function_context (fn) != NULL_TREE)
6968 error_at (DECL_SOURCE_LOCATION (fn),
6969 "ms_hook_prologue is not compatible with nested function");
6970 else
6971 return true;
6973 return false;
6976 static bool
6977 ix86_function_naked (const_tree fn)
6979 if (fn && lookup_attribute ("naked", DECL_ATTRIBUTES (fn)))
6980 return true;
6982 return false;
6985 /* Write the extra assembler code needed to declare a function properly. */
6987 void
6988 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6989 tree decl)
6991 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6993 if (is_ms_hook)
6995 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6996 unsigned int filler_cc = 0xcccccccc;
6998 for (i = 0; i < filler_count; i += 4)
6999 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
7002 #ifdef SUBTARGET_ASM_UNWIND_INIT
7003 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
7004 #endif
7006 ASM_OUTPUT_LABEL (asm_out_file, fname);
7008 /* Output magic byte marker, if hot-patch attribute is set. */
7009 if (is_ms_hook)
7011 if (TARGET_64BIT)
7013 /* leaq [%rsp + 0], %rsp */
7014 fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
7015 asm_out_file);
7017 else
7019 /* movl.s %edi, %edi
7020 push %ebp
7021 movl.s %esp, %ebp */
7022 fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", asm_out_file);
7027 /* Implementation of call abi switching target hook. Specific to FNDECL
7028 the specific call register sets are set. See also
7029 ix86_conditional_register_usage for more details. */
7030 void
7031 ix86_call_abi_override (const_tree fndecl)
7033 cfun->machine->call_abi = ix86_function_abi (fndecl);
7036 /* Return 1 if pseudo register should be created and used to hold
7037 GOT address for PIC code. */
7038 bool
7039 ix86_use_pseudo_pic_reg (void)
7041 if ((TARGET_64BIT
7042 && (ix86_cmodel == CM_SMALL_PIC
7043 || TARGET_PECOFF))
7044 || !flag_pic)
7045 return false;
7046 return true;
7049 /* Initialize large model PIC register. */
7051 static void
7052 ix86_init_large_pic_reg (unsigned int tmp_regno)
7054 rtx_code_label *label;
7055 rtx tmp_reg;
7057 gcc_assert (Pmode == DImode);
7058 label = gen_label_rtx ();
7059 emit_label (label);
7060 LABEL_PRESERVE_P (label) = 1;
7061 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
7062 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
7063 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
7064 label));
7065 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
7066 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
7067 pic_offset_table_rtx, tmp_reg));
7068 const char *name = LABEL_NAME (label);
7069 PUT_CODE (label, NOTE);
7070 NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL;
7071 NOTE_DELETED_LABEL_NAME (label) = name;
7074 /* Create and initialize PIC register if required. */
7075 static void
7076 ix86_init_pic_reg (void)
7078 edge entry_edge;
7079 rtx_insn *seq;
7081 if (!ix86_use_pseudo_pic_reg ())
7082 return;
7084 start_sequence ();
7086 if (TARGET_64BIT)
7088 if (ix86_cmodel == CM_LARGE_PIC)
7089 ix86_init_large_pic_reg (R11_REG);
7090 else
7091 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
7093 else
7095 /* If there is future mcount call in the function it is more profitable
7096 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
7097 rtx reg = crtl->profile
7098 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
7099 : pic_offset_table_rtx;
7100 rtx_insn *insn = emit_insn (gen_set_got (reg));
7101 RTX_FRAME_RELATED_P (insn) = 1;
7102 if (crtl->profile)
7103 emit_move_insn (pic_offset_table_rtx, reg);
7104 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
7107 seq = get_insns ();
7108 end_sequence ();
7110 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
7111 insert_insn_on_edge (seq, entry_edge);
7112 commit_one_edge_insertion (entry_edge);
7115 /* Initialize a variable CUM of type CUMULATIVE_ARGS
7116 for a call to a function whose data type is FNTYPE.
7117 For a library call, FNTYPE is 0. */
7119 void
7120 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
7121 tree fntype, /* tree ptr for function decl */
7122 rtx libname, /* SYMBOL_REF of library name or 0 */
7123 tree fndecl,
7124 int caller)
7126 struct cgraph_local_info *i = NULL;
7127 struct cgraph_node *target = NULL;
7129 memset (cum, 0, sizeof (*cum));
7131 if (fndecl)
7133 target = cgraph_node::get (fndecl);
7134 if (target)
7136 target = target->function_symbol ();
7137 i = cgraph_node::local_info (target->decl);
7138 cum->call_abi = ix86_function_abi (target->decl);
7140 else
7141 cum->call_abi = ix86_function_abi (fndecl);
7143 else
7144 cum->call_abi = ix86_function_type_abi (fntype);
7146 cum->caller = caller;
7148 /* Set up the number of registers to use for passing arguments. */
7149 cum->nregs = ix86_regparm;
7150 if (TARGET_64BIT)
7152 cum->nregs = (cum->call_abi == SYSV_ABI
7153 ? X86_64_REGPARM_MAX
7154 : X86_64_MS_REGPARM_MAX);
7156 if (TARGET_SSE)
7158 cum->sse_nregs = SSE_REGPARM_MAX;
7159 if (TARGET_64BIT)
7161 cum->sse_nregs = (cum->call_abi == SYSV_ABI
7162 ? X86_64_SSE_REGPARM_MAX
7163 : X86_64_MS_SSE_REGPARM_MAX);
7166 if (TARGET_MMX)
7167 cum->mmx_nregs = MMX_REGPARM_MAX;
7168 cum->warn_avx512f = true;
7169 cum->warn_avx = true;
7170 cum->warn_sse = true;
7171 cum->warn_mmx = true;
7173 /* Because type might mismatch in between caller and callee, we need to
7174 use actual type of function for local calls.
7175 FIXME: cgraph_analyze can be told to actually record if function uses
7176 va_start so for local functions maybe_vaarg can be made aggressive
7177 helping K&R code.
7178 FIXME: once typesytem is fixed, we won't need this code anymore. */
7179 if (i && i->local && i->can_change_signature)
7180 fntype = TREE_TYPE (target->decl);
7181 cum->stdarg = stdarg_p (fntype);
7182 cum->maybe_vaarg = (fntype
7183 ? (!prototype_p (fntype) || stdarg_p (fntype))
7184 : !libname);
7186 cum->bnd_regno = FIRST_BND_REG;
7187 cum->bnds_in_bt = 0;
7188 cum->force_bnd_pass = 0;
7189 cum->decl = fndecl;
7191 if (!TARGET_64BIT)
7193 /* If there are variable arguments, then we won't pass anything
7194 in registers in 32-bit mode. */
7195 if (stdarg_p (fntype))
7197 cum->nregs = 0;
7198 /* Since in 32-bit, variable arguments are always passed on
7199 stack, there is scratch register available for indirect
7200 sibcall. */
7201 cfun->machine->arg_reg_available = true;
7202 cum->sse_nregs = 0;
7203 cum->mmx_nregs = 0;
7204 cum->warn_avx512f = false;
7205 cum->warn_avx = false;
7206 cum->warn_sse = false;
7207 cum->warn_mmx = false;
7208 return;
7211 /* Use ecx and edx registers if function has fastcall attribute,
7212 else look for regparm information. */
7213 if (fntype)
7215 unsigned int ccvt = ix86_get_callcvt (fntype);
7216 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
7218 cum->nregs = 1;
7219 cum->fastcall = 1; /* Same first register as in fastcall. */
7221 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
7223 cum->nregs = 2;
7224 cum->fastcall = 1;
7226 else
7227 cum->nregs = ix86_function_regparm (fntype, fndecl);
7230 /* Set up the number of SSE registers used for passing SFmode
7231 and DFmode arguments. Warn for mismatching ABI. */
7232 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
7235 cfun->machine->arg_reg_available = (cum->nregs > 0);
7238 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
7239 But in the case of vector types, it is some vector mode.
7241 When we have only some of our vector isa extensions enabled, then there
7242 are some modes for which vector_mode_supported_p is false. For these
7243 modes, the generic vector support in gcc will choose some non-vector mode
7244 in order to implement the type. By computing the natural mode, we'll
7245 select the proper ABI location for the operand and not depend on whatever
7246 the middle-end decides to do with these vector types.
7248 The midde-end can't deal with the vector types > 16 bytes. In this
7249 case, we return the original mode and warn ABI change if CUM isn't
7250 NULL.
7252 If INT_RETURN is true, warn ABI change if the vector mode isn't
7253 available for function return value. */
7255 static machine_mode
7256 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
7257 bool in_return)
7259 machine_mode mode = TYPE_MODE (type);
7261 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
7263 HOST_WIDE_INT size = int_size_in_bytes (type);
7264 if ((size == 8 || size == 16 || size == 32 || size == 64)
7265 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
7266 && TYPE_VECTOR_SUBPARTS (type) > 1)
7268 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
7270 /* There are no XFmode vector modes. */
7271 if (innermode == XFmode)
7272 return mode;
7274 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
7275 mode = MIN_MODE_VECTOR_FLOAT;
7276 else
7277 mode = MIN_MODE_VECTOR_INT;
7279 /* Get the mode which has this inner mode and number of units. */
7280 FOR_EACH_MODE_FROM (mode, mode)
7281 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
7282 && GET_MODE_INNER (mode) == innermode)
7284 if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
7286 static bool warnedavx512f;
7287 static bool warnedavx512f_ret;
7289 if (cum && cum->warn_avx512f && !warnedavx512f)
7291 if (warning (OPT_Wpsabi, "AVX512F vector argument "
7292 "without AVX512F enabled changes the ABI"))
7293 warnedavx512f = true;
7295 else if (in_return && !warnedavx512f_ret)
7297 if (warning (OPT_Wpsabi, "AVX512F vector return "
7298 "without AVX512F enabled changes the ABI"))
7299 warnedavx512f_ret = true;
7302 return TYPE_MODE (type);
7304 else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
7306 static bool warnedavx;
7307 static bool warnedavx_ret;
7309 if (cum && cum->warn_avx && !warnedavx)
7311 if (warning (OPT_Wpsabi, "AVX vector argument "
7312 "without AVX enabled changes the ABI"))
7313 warnedavx = true;
7315 else if (in_return && !warnedavx_ret)
7317 if (warning (OPT_Wpsabi, "AVX vector return "
7318 "without AVX enabled changes the ABI"))
7319 warnedavx_ret = true;
7322 return TYPE_MODE (type);
7324 else if (((size == 8 && TARGET_64BIT) || size == 16)
7325 && !TARGET_SSE
7326 && !TARGET_IAMCU)
7328 static bool warnedsse;
7329 static bool warnedsse_ret;
7331 if (cum && cum->warn_sse && !warnedsse)
7333 if (warning (OPT_Wpsabi, "SSE vector argument "
7334 "without SSE enabled changes the ABI"))
7335 warnedsse = true;
7337 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
7339 if (warning (OPT_Wpsabi, "SSE vector return "
7340 "without SSE enabled changes the ABI"))
7341 warnedsse_ret = true;
7344 else if ((size == 8 && !TARGET_64BIT)
7345 && (!cfun
7346 || cfun->machine->func_type == TYPE_NORMAL)
7347 && !TARGET_MMX
7348 && !TARGET_IAMCU)
7350 static bool warnedmmx;
7351 static bool warnedmmx_ret;
7353 if (cum && cum->warn_mmx && !warnedmmx)
7355 if (warning (OPT_Wpsabi, "MMX vector argument "
7356 "without MMX enabled changes the ABI"))
7357 warnedmmx = true;
7359 else if (in_return && !warnedmmx_ret)
7361 if (warning (OPT_Wpsabi, "MMX vector return "
7362 "without MMX enabled changes the ABI"))
7363 warnedmmx_ret = true;
7366 return mode;
7369 gcc_unreachable ();
7373 return mode;
7376 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
7377 this may not agree with the mode that the type system has chosen for the
7378 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
7379 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
7381 static rtx
7382 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
7383 unsigned int regno)
7385 rtx tmp;
7387 if (orig_mode != BLKmode)
7388 tmp = gen_rtx_REG (orig_mode, regno);
7389 else
7391 tmp = gen_rtx_REG (mode, regno);
7392 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
7393 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
7396 return tmp;
7399 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
7400 of this code is to classify each 8bytes of incoming argument by the register
7401 class and assign registers accordingly. */
7403 /* Return the union class of CLASS1 and CLASS2.
7404 See the x86-64 PS ABI for details. */
7406 static enum x86_64_reg_class
7407 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
7409 /* Rule #1: If both classes are equal, this is the resulting class. */
7410 if (class1 == class2)
7411 return class1;
7413 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
7414 the other class. */
7415 if (class1 == X86_64_NO_CLASS)
7416 return class2;
7417 if (class2 == X86_64_NO_CLASS)
7418 return class1;
7420 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
7421 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
7422 return X86_64_MEMORY_CLASS;
7424 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
7425 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
7426 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
7427 return X86_64_INTEGERSI_CLASS;
7428 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
7429 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
7430 return X86_64_INTEGER_CLASS;
7432 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
7433 MEMORY is used. */
7434 if (class1 == X86_64_X87_CLASS
7435 || class1 == X86_64_X87UP_CLASS
7436 || class1 == X86_64_COMPLEX_X87_CLASS
7437 || class2 == X86_64_X87_CLASS
7438 || class2 == X86_64_X87UP_CLASS
7439 || class2 == X86_64_COMPLEX_X87_CLASS)
7440 return X86_64_MEMORY_CLASS;
7442 /* Rule #6: Otherwise class SSE is used. */
7443 return X86_64_SSE_CLASS;
7446 /* Classify the argument of type TYPE and mode MODE.
7447 CLASSES will be filled by the register class used to pass each word
7448 of the operand. The number of words is returned. In case the parameter
7449 should be passed in memory, 0 is returned. As a special case for zero
7450 sized containers, classes[0] will be NO_CLASS and 1 is returned.
7452 BIT_OFFSET is used internally for handling records and specifies offset
7453 of the offset in bits modulo 512 to avoid overflow cases.
7455 See the x86-64 PS ABI for details.
7458 static int
7459 classify_argument (machine_mode mode, const_tree type,
7460 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
7462 HOST_WIDE_INT bytes =
7463 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7464 int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
7466 /* Variable sized entities are always passed/returned in memory. */
7467 if (bytes < 0)
7468 return 0;
7470 if (mode != VOIDmode
7471 && targetm.calls.must_pass_in_stack (mode, type))
7472 return 0;
7474 if (type && AGGREGATE_TYPE_P (type))
7476 int i;
7477 tree field;
7478 enum x86_64_reg_class subclasses[MAX_CLASSES];
7480 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
7481 if (bytes > 64)
7482 return 0;
7484 for (i = 0; i < words; i++)
7485 classes[i] = X86_64_NO_CLASS;
7487 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
7488 signalize memory class, so handle it as special case. */
7489 if (!words)
7491 classes[0] = X86_64_NO_CLASS;
7492 return 1;
7495 /* Classify each field of record and merge classes. */
7496 switch (TREE_CODE (type))
7498 case RECORD_TYPE:
7499 /* And now merge the fields of structure. */
7500 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7502 if (TREE_CODE (field) == FIELD_DECL)
7504 int num;
7506 if (TREE_TYPE (field) == error_mark_node)
7507 continue;
7509 /* Bitfields are always classified as integer. Handle them
7510 early, since later code would consider them to be
7511 misaligned integers. */
7512 if (DECL_BIT_FIELD (field))
7514 for (i = (int_bit_position (field)
7515 + (bit_offset % 64)) / 8 / 8;
7516 i < ((int_bit_position (field) + (bit_offset % 64))
7517 + tree_to_shwi (DECL_SIZE (field))
7518 + 63) / 8 / 8; i++)
7519 classes[i] =
7520 merge_classes (X86_64_INTEGER_CLASS,
7521 classes[i]);
7523 else
7525 int pos;
7527 type = TREE_TYPE (field);
7529 /* Flexible array member is ignored. */
7530 if (TYPE_MODE (type) == BLKmode
7531 && TREE_CODE (type) == ARRAY_TYPE
7532 && TYPE_SIZE (type) == NULL_TREE
7533 && TYPE_DOMAIN (type) != NULL_TREE
7534 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
7535 == NULL_TREE))
7537 static bool warned;
7539 if (!warned && warn_psabi)
7541 warned = true;
7542 inform (input_location,
7543 "the ABI of passing struct with"
7544 " a flexible array member has"
7545 " changed in GCC 4.4");
7547 continue;
7549 num = classify_argument (TYPE_MODE (type), type,
7550 subclasses,
7551 (int_bit_position (field)
7552 + bit_offset) % 512);
7553 if (!num)
7554 return 0;
7555 pos = (int_bit_position (field)
7556 + (bit_offset % 64)) / 8 / 8;
7557 for (i = 0; i < num && (i + pos) < words; i++)
7558 classes[i + pos] =
7559 merge_classes (subclasses[i], classes[i + pos]);
7563 break;
7565 case ARRAY_TYPE:
7566 /* Arrays are handled as small records. */
7568 int num;
7569 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
7570 TREE_TYPE (type), subclasses, bit_offset);
7571 if (!num)
7572 return 0;
7574 /* The partial classes are now full classes. */
7575 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
7576 subclasses[0] = X86_64_SSE_CLASS;
7577 if (subclasses[0] == X86_64_INTEGERSI_CLASS
7578 && !((bit_offset % 64) == 0 && bytes == 4))
7579 subclasses[0] = X86_64_INTEGER_CLASS;
7581 for (i = 0; i < words; i++)
7582 classes[i] = subclasses[i % num];
7584 break;
7586 case UNION_TYPE:
7587 case QUAL_UNION_TYPE:
7588 /* Unions are similar to RECORD_TYPE but offset is always 0.
7590 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7592 if (TREE_CODE (field) == FIELD_DECL)
7594 int num;
7596 if (TREE_TYPE (field) == error_mark_node)
7597 continue;
7599 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
7600 TREE_TYPE (field), subclasses,
7601 bit_offset);
7602 if (!num)
7603 return 0;
7604 for (i = 0; i < num && i < words; i++)
7605 classes[i] = merge_classes (subclasses[i], classes[i]);
7608 break;
7610 default:
7611 gcc_unreachable ();
7614 if (words > 2)
7616 /* When size > 16 bytes, if the first one isn't
7617 X86_64_SSE_CLASS or any other ones aren't
7618 X86_64_SSEUP_CLASS, everything should be passed in
7619 memory. */
7620 if (classes[0] != X86_64_SSE_CLASS)
7621 return 0;
7623 for (i = 1; i < words; i++)
7624 if (classes[i] != X86_64_SSEUP_CLASS)
7625 return 0;
7628 /* Final merger cleanup. */
7629 for (i = 0; i < words; i++)
7631 /* If one class is MEMORY, everything should be passed in
7632 memory. */
7633 if (classes[i] == X86_64_MEMORY_CLASS)
7634 return 0;
7636 /* The X86_64_SSEUP_CLASS should be always preceded by
7637 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
7638 if (classes[i] == X86_64_SSEUP_CLASS
7639 && classes[i - 1] != X86_64_SSE_CLASS
7640 && classes[i - 1] != X86_64_SSEUP_CLASS)
7642 /* The first one should never be X86_64_SSEUP_CLASS. */
7643 gcc_assert (i != 0);
7644 classes[i] = X86_64_SSE_CLASS;
7647 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
7648 everything should be passed in memory. */
7649 if (classes[i] == X86_64_X87UP_CLASS
7650 && (classes[i - 1] != X86_64_X87_CLASS))
7652 static bool warned;
7654 /* The first one should never be X86_64_X87UP_CLASS. */
7655 gcc_assert (i != 0);
7656 if (!warned && warn_psabi)
7658 warned = true;
7659 inform (input_location,
7660 "the ABI of passing union with long double"
7661 " has changed in GCC 4.4");
7663 return 0;
7666 return words;
7669 /* Compute alignment needed. We align all types to natural boundaries with
7670 exception of XFmode that is aligned to 64bits. */
7671 if (mode != VOIDmode && mode != BLKmode)
7673 int mode_alignment = GET_MODE_BITSIZE (mode);
7675 if (mode == XFmode)
7676 mode_alignment = 128;
7677 else if (mode == XCmode)
7678 mode_alignment = 256;
7679 if (COMPLEX_MODE_P (mode))
7680 mode_alignment /= 2;
7681 /* Misaligned fields are always returned in memory. */
7682 if (bit_offset % mode_alignment)
7683 return 0;
7686 /* for V1xx modes, just use the base mode */
7687 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
7688 && GET_MODE_UNIT_SIZE (mode) == bytes)
7689 mode = GET_MODE_INNER (mode);
7691 /* Classification of atomic types. */
7692 switch (mode)
7694 case E_SDmode:
7695 case E_DDmode:
7696 classes[0] = X86_64_SSE_CLASS;
7697 return 1;
7698 case E_TDmode:
7699 classes[0] = X86_64_SSE_CLASS;
7700 classes[1] = X86_64_SSEUP_CLASS;
7701 return 2;
7702 case E_DImode:
7703 case E_SImode:
7704 case E_HImode:
7705 case E_QImode:
7706 case E_CSImode:
7707 case E_CHImode:
7708 case E_CQImode:
7710 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
7712 /* Analyze last 128 bits only. */
7713 size = (size - 1) & 0x7f;
7715 if (size < 32)
7717 classes[0] = X86_64_INTEGERSI_CLASS;
7718 return 1;
7720 else if (size < 64)
7722 classes[0] = X86_64_INTEGER_CLASS;
7723 return 1;
7725 else if (size < 64+32)
7727 classes[0] = X86_64_INTEGER_CLASS;
7728 classes[1] = X86_64_INTEGERSI_CLASS;
7729 return 2;
7731 else if (size < 64+64)
7733 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7734 return 2;
7736 else
7737 gcc_unreachable ();
7739 case E_CDImode:
7740 case E_TImode:
7741 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7742 return 2;
7743 case E_COImode:
7744 case E_OImode:
7745 /* OImode shouldn't be used directly. */
7746 gcc_unreachable ();
7747 case E_CTImode:
7748 return 0;
7749 case E_SFmode:
7750 if (!(bit_offset % 64))
7751 classes[0] = X86_64_SSESF_CLASS;
7752 else
7753 classes[0] = X86_64_SSE_CLASS;
7754 return 1;
7755 case E_DFmode:
7756 classes[0] = X86_64_SSEDF_CLASS;
7757 return 1;
7758 case E_XFmode:
7759 classes[0] = X86_64_X87_CLASS;
7760 classes[1] = X86_64_X87UP_CLASS;
7761 return 2;
7762 case E_TFmode:
7763 classes[0] = X86_64_SSE_CLASS;
7764 classes[1] = X86_64_SSEUP_CLASS;
7765 return 2;
7766 case E_SCmode:
7767 classes[0] = X86_64_SSE_CLASS;
7768 if (!(bit_offset % 64))
7769 return 1;
7770 else
7772 static bool warned;
7774 if (!warned && warn_psabi)
7776 warned = true;
7777 inform (input_location,
7778 "the ABI of passing structure with complex float"
7779 " member has changed in GCC 4.4");
7781 classes[1] = X86_64_SSESF_CLASS;
7782 return 2;
7784 case E_DCmode:
7785 classes[0] = X86_64_SSEDF_CLASS;
7786 classes[1] = X86_64_SSEDF_CLASS;
7787 return 2;
7788 case E_XCmode:
7789 classes[0] = X86_64_COMPLEX_X87_CLASS;
7790 return 1;
7791 case E_TCmode:
7792 /* This modes is larger than 16 bytes. */
7793 return 0;
7794 case E_V8SFmode:
7795 case E_V8SImode:
7796 case E_V32QImode:
7797 case E_V16HImode:
7798 case E_V4DFmode:
7799 case E_V4DImode:
7800 classes[0] = X86_64_SSE_CLASS;
7801 classes[1] = X86_64_SSEUP_CLASS;
7802 classes[2] = X86_64_SSEUP_CLASS;
7803 classes[3] = X86_64_SSEUP_CLASS;
7804 return 4;
7805 case E_V8DFmode:
7806 case E_V16SFmode:
7807 case E_V8DImode:
7808 case E_V16SImode:
7809 case E_V32HImode:
7810 case E_V64QImode:
7811 classes[0] = X86_64_SSE_CLASS;
7812 classes[1] = X86_64_SSEUP_CLASS;
7813 classes[2] = X86_64_SSEUP_CLASS;
7814 classes[3] = X86_64_SSEUP_CLASS;
7815 classes[4] = X86_64_SSEUP_CLASS;
7816 classes[5] = X86_64_SSEUP_CLASS;
7817 classes[6] = X86_64_SSEUP_CLASS;
7818 classes[7] = X86_64_SSEUP_CLASS;
7819 return 8;
7820 case E_V4SFmode:
7821 case E_V4SImode:
7822 case E_V16QImode:
7823 case E_V8HImode:
7824 case E_V2DFmode:
7825 case E_V2DImode:
7826 classes[0] = X86_64_SSE_CLASS;
7827 classes[1] = X86_64_SSEUP_CLASS;
7828 return 2;
7829 case E_V1TImode:
7830 case E_V1DImode:
7831 case E_V2SFmode:
7832 case E_V2SImode:
7833 case E_V4HImode:
7834 case E_V8QImode:
7835 classes[0] = X86_64_SSE_CLASS;
7836 return 1;
7837 case E_BLKmode:
7838 case E_VOIDmode:
7839 return 0;
7840 default:
7841 gcc_assert (VECTOR_MODE_P (mode));
7843 if (bytes > 16)
7844 return 0;
7846 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
7848 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
7849 classes[0] = X86_64_INTEGERSI_CLASS;
7850 else
7851 classes[0] = X86_64_INTEGER_CLASS;
7852 classes[1] = X86_64_INTEGER_CLASS;
7853 return 1 + (bytes > 8);
7857 /* Examine the argument and return set number of register required in each
7858 class. Return true iff parameter should be passed in memory. */
7860 static bool
7861 examine_argument (machine_mode mode, const_tree type, int in_return,
7862 int *int_nregs, int *sse_nregs)
7864 enum x86_64_reg_class regclass[MAX_CLASSES];
7865 int n = classify_argument (mode, type, regclass, 0);
7867 *int_nregs = 0;
7868 *sse_nregs = 0;
7870 if (!n)
7871 return true;
7872 for (n--; n >= 0; n--)
7873 switch (regclass[n])
7875 case X86_64_INTEGER_CLASS:
7876 case X86_64_INTEGERSI_CLASS:
7877 (*int_nregs)++;
7878 break;
7879 case X86_64_SSE_CLASS:
7880 case X86_64_SSESF_CLASS:
7881 case X86_64_SSEDF_CLASS:
7882 (*sse_nregs)++;
7883 break;
7884 case X86_64_NO_CLASS:
7885 case X86_64_SSEUP_CLASS:
7886 break;
7887 case X86_64_X87_CLASS:
7888 case X86_64_X87UP_CLASS:
7889 case X86_64_COMPLEX_X87_CLASS:
7890 if (!in_return)
7891 return true;
7892 break;
7893 case X86_64_MEMORY_CLASS:
7894 gcc_unreachable ();
7897 return false;
7900 /* Construct container for the argument used by GCC interface. See
7901 FUNCTION_ARG for the detailed description. */
7903 static rtx
7904 construct_container (machine_mode mode, machine_mode orig_mode,
7905 const_tree type, int in_return, int nintregs, int nsseregs,
7906 const int *intreg, int sse_regno)
7908 /* The following variables hold the static issued_error state. */
7909 static bool issued_sse_arg_error;
7910 static bool issued_sse_ret_error;
7911 static bool issued_x87_ret_error;
7913 machine_mode tmpmode;
7914 int bytes =
7915 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7916 enum x86_64_reg_class regclass[MAX_CLASSES];
7917 int n;
7918 int i;
7919 int nexps = 0;
7920 int needed_sseregs, needed_intregs;
7921 rtx exp[MAX_CLASSES];
7922 rtx ret;
7924 n = classify_argument (mode, type, regclass, 0);
7925 if (!n)
7926 return NULL;
7927 if (examine_argument (mode, type, in_return, &needed_intregs,
7928 &needed_sseregs))
7929 return NULL;
7930 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
7931 return NULL;
7933 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
7934 some less clueful developer tries to use floating-point anyway. */
7935 if (needed_sseregs && !TARGET_SSE)
7937 if (in_return)
7939 if (!issued_sse_ret_error)
7941 error ("SSE register return with SSE disabled");
7942 issued_sse_ret_error = true;
7945 else if (!issued_sse_arg_error)
7947 error ("SSE register argument with SSE disabled");
7948 issued_sse_arg_error = true;
7950 return NULL;
7953 /* Likewise, error if the ABI requires us to return values in the
7954 x87 registers and the user specified -mno-80387. */
7955 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
7956 for (i = 0; i < n; i++)
7957 if (regclass[i] == X86_64_X87_CLASS
7958 || regclass[i] == X86_64_X87UP_CLASS
7959 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
7961 if (!issued_x87_ret_error)
7963 error ("x87 register return with x87 disabled");
7964 issued_x87_ret_error = true;
7966 return NULL;
7969 /* First construct simple cases. Avoid SCmode, since we want to use
7970 single register to pass this type. */
7971 if (n == 1 && mode != SCmode)
7972 switch (regclass[0])
7974 case X86_64_INTEGER_CLASS:
7975 case X86_64_INTEGERSI_CLASS:
7976 return gen_rtx_REG (mode, intreg[0]);
7977 case X86_64_SSE_CLASS:
7978 case X86_64_SSESF_CLASS:
7979 case X86_64_SSEDF_CLASS:
7980 if (mode != BLKmode)
7981 return gen_reg_or_parallel (mode, orig_mode,
7982 SSE_REGNO (sse_regno));
7983 break;
7984 case X86_64_X87_CLASS:
7985 case X86_64_COMPLEX_X87_CLASS:
7986 return gen_rtx_REG (mode, FIRST_STACK_REG);
7987 case X86_64_NO_CLASS:
7988 /* Zero sized array, struct or class. */
7989 return NULL;
7990 default:
7991 gcc_unreachable ();
7993 if (n == 2
7994 && regclass[0] == X86_64_SSE_CLASS
7995 && regclass[1] == X86_64_SSEUP_CLASS
7996 && mode != BLKmode)
7997 return gen_reg_or_parallel (mode, orig_mode,
7998 SSE_REGNO (sse_regno));
7999 if (n == 4
8000 && regclass[0] == X86_64_SSE_CLASS
8001 && regclass[1] == X86_64_SSEUP_CLASS
8002 && regclass[2] == X86_64_SSEUP_CLASS
8003 && regclass[3] == X86_64_SSEUP_CLASS
8004 && mode != BLKmode)
8005 return gen_reg_or_parallel (mode, orig_mode,
8006 SSE_REGNO (sse_regno));
8007 if (n == 8
8008 && regclass[0] == X86_64_SSE_CLASS
8009 && regclass[1] == X86_64_SSEUP_CLASS
8010 && regclass[2] == X86_64_SSEUP_CLASS
8011 && regclass[3] == X86_64_SSEUP_CLASS
8012 && regclass[4] == X86_64_SSEUP_CLASS
8013 && regclass[5] == X86_64_SSEUP_CLASS
8014 && regclass[6] == X86_64_SSEUP_CLASS
8015 && regclass[7] == X86_64_SSEUP_CLASS
8016 && mode != BLKmode)
8017 return gen_reg_or_parallel (mode, orig_mode,
8018 SSE_REGNO (sse_regno));
8019 if (n == 2
8020 && regclass[0] == X86_64_X87_CLASS
8021 && regclass[1] == X86_64_X87UP_CLASS)
8022 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
8024 if (n == 2
8025 && regclass[0] == X86_64_INTEGER_CLASS
8026 && regclass[1] == X86_64_INTEGER_CLASS
8027 && (mode == CDImode || mode == TImode)
8028 && intreg[0] + 1 == intreg[1])
8029 return gen_rtx_REG (mode, intreg[0]);
8031 /* Otherwise figure out the entries of the PARALLEL. */
8032 for (i = 0; i < n; i++)
8034 int pos;
8036 switch (regclass[i])
8038 case X86_64_NO_CLASS:
8039 break;
8040 case X86_64_INTEGER_CLASS:
8041 case X86_64_INTEGERSI_CLASS:
8042 /* Merge TImodes on aligned occasions here too. */
8043 if (i * 8 + 8 > bytes)
8045 unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT;
8046 if (!int_mode_for_size (tmpbits, 0).exists (&tmpmode))
8047 /* We've requested 24 bytes we
8048 don't have mode for. Use DImode. */
8049 tmpmode = DImode;
8051 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
8052 tmpmode = SImode;
8053 else
8054 tmpmode = DImode;
8055 exp [nexps++]
8056 = gen_rtx_EXPR_LIST (VOIDmode,
8057 gen_rtx_REG (tmpmode, *intreg),
8058 GEN_INT (i*8));
8059 intreg++;
8060 break;
8061 case X86_64_SSESF_CLASS:
8062 exp [nexps++]
8063 = gen_rtx_EXPR_LIST (VOIDmode,
8064 gen_rtx_REG (SFmode,
8065 SSE_REGNO (sse_regno)),
8066 GEN_INT (i*8));
8067 sse_regno++;
8068 break;
8069 case X86_64_SSEDF_CLASS:
8070 exp [nexps++]
8071 = gen_rtx_EXPR_LIST (VOIDmode,
8072 gen_rtx_REG (DFmode,
8073 SSE_REGNO (sse_regno)),
8074 GEN_INT (i*8));
8075 sse_regno++;
8076 break;
8077 case X86_64_SSE_CLASS:
8078 pos = i;
8079 switch (n)
8081 case 1:
8082 tmpmode = DImode;
8083 break;
8084 case 2:
8085 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
8087 tmpmode = TImode;
8088 i++;
8090 else
8091 tmpmode = DImode;
8092 break;
8093 case 4:
8094 gcc_assert (i == 0
8095 && regclass[1] == X86_64_SSEUP_CLASS
8096 && regclass[2] == X86_64_SSEUP_CLASS
8097 && regclass[3] == X86_64_SSEUP_CLASS);
8098 tmpmode = OImode;
8099 i += 3;
8100 break;
8101 case 8:
8102 gcc_assert (i == 0
8103 && regclass[1] == X86_64_SSEUP_CLASS
8104 && regclass[2] == X86_64_SSEUP_CLASS
8105 && regclass[3] == X86_64_SSEUP_CLASS
8106 && regclass[4] == X86_64_SSEUP_CLASS
8107 && regclass[5] == X86_64_SSEUP_CLASS
8108 && regclass[6] == X86_64_SSEUP_CLASS
8109 && regclass[7] == X86_64_SSEUP_CLASS);
8110 tmpmode = XImode;
8111 i += 7;
8112 break;
8113 default:
8114 gcc_unreachable ();
8116 exp [nexps++]
8117 = gen_rtx_EXPR_LIST (VOIDmode,
8118 gen_rtx_REG (tmpmode,
8119 SSE_REGNO (sse_regno)),
8120 GEN_INT (pos*8));
8121 sse_regno++;
8122 break;
8123 default:
8124 gcc_unreachable ();
8128 /* Empty aligned struct, union or class. */
8129 if (nexps == 0)
8130 return NULL;
8132 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
8133 for (i = 0; i < nexps; i++)
8134 XVECEXP (ret, 0, i) = exp [i];
8135 return ret;
8138 /* Update the data in CUM to advance over an argument of mode MODE
8139 and data type TYPE. (TYPE is null for libcalls where that information
8140 may not be available.)
8142 Return a number of integer regsiters advanced over. */
8144 static int
8145 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
8146 const_tree type, HOST_WIDE_INT bytes,
8147 HOST_WIDE_INT words)
8149 int res = 0;
8150 bool error_p = false;
8152 if (TARGET_IAMCU)
8154 /* Intel MCU psABI passes scalars and aggregates no larger than 8
8155 bytes in registers. */
8156 if (!VECTOR_MODE_P (mode) && bytes <= 8)
8157 goto pass_in_reg;
8158 return res;
8161 switch (mode)
8163 default:
8164 break;
8166 case E_BLKmode:
8167 if (bytes < 0)
8168 break;
8169 /* FALLTHRU */
8171 case E_DImode:
8172 case E_SImode:
8173 case E_HImode:
8174 case E_QImode:
8175 pass_in_reg:
8176 cum->words += words;
8177 cum->nregs -= words;
8178 cum->regno += words;
8179 if (cum->nregs >= 0)
8180 res = words;
8181 if (cum->nregs <= 0)
8183 cum->nregs = 0;
8184 cfun->machine->arg_reg_available = false;
8185 cum->regno = 0;
8187 break;
8189 case E_OImode:
8190 /* OImode shouldn't be used directly. */
8191 gcc_unreachable ();
8193 case E_DFmode:
8194 if (cum->float_in_sse == -1)
8195 error_p = true;
8196 if (cum->float_in_sse < 2)
8197 break;
8198 /* FALLTHRU */
8199 case E_SFmode:
8200 if (cum->float_in_sse == -1)
8201 error_p = true;
8202 if (cum->float_in_sse < 1)
8203 break;
8204 /* FALLTHRU */
8206 case E_V8SFmode:
8207 case E_V8SImode:
8208 case E_V64QImode:
8209 case E_V32HImode:
8210 case E_V16SImode:
8211 case E_V8DImode:
8212 case E_V16SFmode:
8213 case E_V8DFmode:
8214 case E_V32QImode:
8215 case E_V16HImode:
8216 case E_V4DFmode:
8217 case E_V4DImode:
8218 case E_TImode:
8219 case E_V16QImode:
8220 case E_V8HImode:
8221 case E_V4SImode:
8222 case E_V2DImode:
8223 case E_V4SFmode:
8224 case E_V2DFmode:
8225 if (!type || !AGGREGATE_TYPE_P (type))
8227 cum->sse_words += words;
8228 cum->sse_nregs -= 1;
8229 cum->sse_regno += 1;
8230 if (cum->sse_nregs <= 0)
8232 cum->sse_nregs = 0;
8233 cum->sse_regno = 0;
8236 break;
8238 case E_V8QImode:
8239 case E_V4HImode:
8240 case E_V2SImode:
8241 case E_V2SFmode:
8242 case E_V1TImode:
8243 case E_V1DImode:
8244 if (!type || !AGGREGATE_TYPE_P (type))
8246 cum->mmx_words += words;
8247 cum->mmx_nregs -= 1;
8248 cum->mmx_regno += 1;
8249 if (cum->mmx_nregs <= 0)
8251 cum->mmx_nregs = 0;
8252 cum->mmx_regno = 0;
8255 break;
8257 if (error_p)
8259 cum->float_in_sse = 0;
8260 error ("calling %qD with SSE calling convention without "
8261 "SSE/SSE2 enabled", cum->decl);
8262 sorry ("this is a GCC bug that can be worked around by adding "
8263 "attribute used to function called");
8266 return res;
8269 static int
8270 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
8271 const_tree type, HOST_WIDE_INT words, bool named)
8273 int int_nregs, sse_nregs;
8275 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
8276 if (!named && (VALID_AVX512F_REG_MODE (mode)
8277 || VALID_AVX256_REG_MODE (mode)))
8278 return 0;
8280 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
8281 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
8283 cum->nregs -= int_nregs;
8284 cum->sse_nregs -= sse_nregs;
8285 cum->regno += int_nregs;
8286 cum->sse_regno += sse_nregs;
8287 return int_nregs;
8289 else
8291 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
8292 cum->words = ROUND_UP (cum->words, align);
8293 cum->words += words;
8294 return 0;
8298 static int
8299 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
8300 HOST_WIDE_INT words)
8302 /* Otherwise, this should be passed indirect. */
8303 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
8305 cum->words += words;
8306 if (cum->nregs > 0)
8308 cum->nregs -= 1;
8309 cum->regno += 1;
8310 return 1;
8312 return 0;
8315 /* Update the data in CUM to advance over an argument of mode MODE and
8316 data type TYPE. (TYPE is null for libcalls where that information
8317 may not be available.) */
8319 static void
8320 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
8321 const_tree type, bool named)
8323 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8324 HOST_WIDE_INT bytes, words;
8325 int nregs;
8327 /* The argument of interrupt handler is a special case and is
8328 handled in ix86_function_arg. */
8329 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
8330 return;
8332 if (mode == BLKmode)
8333 bytes = int_size_in_bytes (type);
8334 else
8335 bytes = GET_MODE_SIZE (mode);
8336 words = CEIL (bytes, UNITS_PER_WORD);
8338 if (type)
8339 mode = type_natural_mode (type, NULL, false);
8341 if ((type && POINTER_BOUNDS_TYPE_P (type))
8342 || POINTER_BOUNDS_MODE_P (mode))
8344 /* If we pass bounds in BT then just update remained bounds count. */
8345 if (cum->bnds_in_bt)
8347 cum->bnds_in_bt--;
8348 return;
8351 /* Update remained number of bounds to force. */
8352 if (cum->force_bnd_pass)
8353 cum->force_bnd_pass--;
8355 cum->bnd_regno++;
8357 return;
8360 /* The first arg not going to Bounds Tables resets this counter. */
8361 cum->bnds_in_bt = 0;
8362 /* For unnamed args we always pass bounds to avoid bounds mess when
8363 passed and received types do not match. If bounds do not follow
8364 unnamed arg, still pretend required number of bounds were passed. */
8365 if (cum->force_bnd_pass)
8367 cum->bnd_regno += cum->force_bnd_pass;
8368 cum->force_bnd_pass = 0;
8371 if (TARGET_64BIT)
8373 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8375 if (call_abi == MS_ABI)
8376 nregs = function_arg_advance_ms_64 (cum, bytes, words);
8377 else
8378 nregs = function_arg_advance_64 (cum, mode, type, words, named);
8380 else
8381 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
8383 /* For stdarg we expect bounds to be passed for each value passed
8384 in register. */
8385 if (cum->stdarg)
8386 cum->force_bnd_pass = nregs;
8387 /* For pointers passed in memory we expect bounds passed in Bounds
8388 Table. */
8389 if (!nregs)
8391 /* Track if there are outgoing arguments on stack. */
8392 if (cum->caller)
8393 cfun->machine->outgoing_args_on_stack = true;
8395 cum->bnds_in_bt = chkp_type_bounds_count (type);
8399 /* Define where to put the arguments to a function.
8400 Value is zero to push the argument on the stack,
8401 or a hard register in which to store the argument.
8403 MODE is the argument's machine mode.
8404 TYPE is the data type of the argument (as a tree).
8405 This is null for libcalls where that information may
8406 not be available.
8407 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8408 the preceding args and about the function being called.
8409 NAMED is nonzero if this argument is a named parameter
8410 (otherwise it is an extra parameter matching an ellipsis). */
8412 static rtx
8413 function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
8414 machine_mode orig_mode, const_tree type,
8415 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
8417 bool error_p = false;
8419 /* Avoid the AL settings for the Unix64 ABI. */
8420 if (mode == VOIDmode)
8421 return constm1_rtx;
8423 if (TARGET_IAMCU)
8425 /* Intel MCU psABI passes scalars and aggregates no larger than 8
8426 bytes in registers. */
8427 if (!VECTOR_MODE_P (mode) && bytes <= 8)
8428 goto pass_in_reg;
8429 return NULL_RTX;
8432 switch (mode)
8434 default:
8435 break;
8437 case E_BLKmode:
8438 if (bytes < 0)
8439 break;
8440 /* FALLTHRU */
8441 case E_DImode:
8442 case E_SImode:
8443 case E_HImode:
8444 case E_QImode:
8445 pass_in_reg:
8446 if (words <= cum->nregs)
8448 int regno = cum->regno;
8450 /* Fastcall allocates the first two DWORD (SImode) or
8451 smaller arguments to ECX and EDX if it isn't an
8452 aggregate type . */
8453 if (cum->fastcall)
8455 if (mode == BLKmode
8456 || mode == DImode
8457 || (type && AGGREGATE_TYPE_P (type)))
8458 break;
8460 /* ECX not EAX is the first allocated register. */
8461 if (regno == AX_REG)
8462 regno = CX_REG;
8464 return gen_rtx_REG (mode, regno);
8466 break;
8468 case E_DFmode:
8469 if (cum->float_in_sse == -1)
8470 error_p = true;
8471 if (cum->float_in_sse < 2)
8472 break;
8473 /* FALLTHRU */
8474 case E_SFmode:
8475 if (cum->float_in_sse == -1)
8476 error_p = true;
8477 if (cum->float_in_sse < 1)
8478 break;
8479 /* FALLTHRU */
8480 case E_TImode:
8481 /* In 32bit, we pass TImode in xmm registers. */
8482 case E_V16QImode:
8483 case E_V8HImode:
8484 case E_V4SImode:
8485 case E_V2DImode:
8486 case E_V4SFmode:
8487 case E_V2DFmode:
8488 if (!type || !AGGREGATE_TYPE_P (type))
8490 if (cum->sse_nregs)
8491 return gen_reg_or_parallel (mode, orig_mode,
8492 cum->sse_regno + FIRST_SSE_REG);
8494 break;
8496 case E_OImode:
8497 case E_XImode:
8498 /* OImode and XImode shouldn't be used directly. */
8499 gcc_unreachable ();
8501 case E_V64QImode:
8502 case E_V32HImode:
8503 case E_V16SImode:
8504 case E_V8DImode:
8505 case E_V16SFmode:
8506 case E_V8DFmode:
8507 case E_V8SFmode:
8508 case E_V8SImode:
8509 case E_V32QImode:
8510 case E_V16HImode:
8511 case E_V4DFmode:
8512 case E_V4DImode:
8513 if (!type || !AGGREGATE_TYPE_P (type))
8515 if (cum->sse_nregs)
8516 return gen_reg_or_parallel (mode, orig_mode,
8517 cum->sse_regno + FIRST_SSE_REG);
8519 break;
8521 case E_V8QImode:
8522 case E_V4HImode:
8523 case E_V2SImode:
8524 case E_V2SFmode:
8525 case E_V1TImode:
8526 case E_V1DImode:
8527 if (!type || !AGGREGATE_TYPE_P (type))
8529 if (cum->mmx_nregs)
8530 return gen_reg_or_parallel (mode, orig_mode,
8531 cum->mmx_regno + FIRST_MMX_REG);
8533 break;
8535 if (error_p)
8537 cum->float_in_sse = 0;
8538 error ("calling %qD with SSE calling convention without "
8539 "SSE/SSE2 enabled", cum->decl);
8540 sorry ("this is a GCC bug that can be worked around by adding "
8541 "attribute used to function called");
8544 return NULL_RTX;
8547 static rtx
8548 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
8549 machine_mode orig_mode, const_tree type, bool named)
8551 /* Handle a hidden AL argument containing number of registers
8552 for varargs x86-64 functions. */
8553 if (mode == VOIDmode)
8554 return GEN_INT (cum->maybe_vaarg
8555 ? (cum->sse_nregs < 0
8556 ? X86_64_SSE_REGPARM_MAX
8557 : cum->sse_regno)
8558 : -1);
8560 switch (mode)
8562 default:
8563 break;
8565 case E_V8SFmode:
8566 case E_V8SImode:
8567 case E_V32QImode:
8568 case E_V16HImode:
8569 case E_V4DFmode:
8570 case E_V4DImode:
8571 case E_V16SFmode:
8572 case E_V16SImode:
8573 case E_V64QImode:
8574 case E_V32HImode:
8575 case E_V8DFmode:
8576 case E_V8DImode:
8577 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8578 if (!named)
8579 return NULL;
8580 break;
8583 return construct_container (mode, orig_mode, type, 0, cum->nregs,
8584 cum->sse_nregs,
8585 &x86_64_int_parameter_registers [cum->regno],
8586 cum->sse_regno);
8589 static rtx
8590 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
8591 machine_mode orig_mode, bool named,
8592 HOST_WIDE_INT bytes)
8594 unsigned int regno;
8596 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
8597 We use value of -2 to specify that current function call is MSABI. */
8598 if (mode == VOIDmode)
8599 return GEN_INT (-2);
8601 /* If we've run out of registers, it goes on the stack. */
8602 if (cum->nregs == 0)
8603 return NULL_RTX;
8605 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
8607 /* Only floating point modes are passed in anything but integer regs. */
8608 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
8610 if (named)
8611 regno = cum->regno + FIRST_SSE_REG;
8612 else
8614 rtx t1, t2;
8616 /* Unnamed floating parameters are passed in both the
8617 SSE and integer registers. */
8618 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
8619 t2 = gen_rtx_REG (mode, regno);
8620 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
8621 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
8622 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
8625 /* Handle aggregated types passed in register. */
8626 if (orig_mode == BLKmode)
8628 if (bytes > 0 && bytes <= 8)
8629 mode = (bytes > 4 ? DImode : SImode);
8630 if (mode == BLKmode)
8631 mode = DImode;
8634 return gen_reg_or_parallel (mode, orig_mode, regno);
8637 /* Return where to put the arguments to a function.
8638 Return zero to push the argument on the stack, or a hard register in which to store the argument.
8640 MODE is the argument's machine mode. TYPE is the data type of the
8641 argument. It is null for libcalls where that information may not be
8642 available. CUM gives information about the preceding args and about
8643 the function being called. NAMED is nonzero if this argument is a
8644 named parameter (otherwise it is an extra parameter matching an
8645 ellipsis). */
8647 static rtx
8648 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
8649 const_tree type, bool named)
8651 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8652 machine_mode mode = omode;
8653 HOST_WIDE_INT bytes, words;
8654 rtx arg;
8656 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
8658 gcc_assert (type != NULL_TREE);
8659 if (POINTER_TYPE_P (type))
8661 /* This is the pointer argument. */
8662 gcc_assert (TYPE_MODE (type) == Pmode);
8663 /* It is at -WORD(AP) in the current frame in interrupt and
8664 exception handlers. */
8665 arg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD);
8667 else
8669 gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION
8670 && TREE_CODE (type) == INTEGER_TYPE
8671 && TYPE_MODE (type) == word_mode);
8672 /* The error code is the word-mode integer argument at
8673 -2 * WORD(AP) in the current frame of the exception
8674 handler. */
8675 arg = gen_rtx_MEM (word_mode,
8676 plus_constant (Pmode,
8677 arg_pointer_rtx,
8678 -2 * UNITS_PER_WORD));
8680 return arg;
8683 /* All pointer bounds arguments are handled separately here. */
8684 if ((type && POINTER_BOUNDS_TYPE_P (type))
8685 || POINTER_BOUNDS_MODE_P (mode))
8687 /* Return NULL if bounds are forced to go in Bounds Table. */
8688 if (cum->bnds_in_bt)
8689 arg = NULL;
8690 /* Return the next available bound reg if any. */
8691 else if (cum->bnd_regno <= LAST_BND_REG)
8692 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
8693 /* Return the next special slot number otherwise. */
8694 else
8695 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
8697 return arg;
8700 if (mode == BLKmode)
8701 bytes = int_size_in_bytes (type);
8702 else
8703 bytes = GET_MODE_SIZE (mode);
8704 words = CEIL (bytes, UNITS_PER_WORD);
8706 /* To simplify the code below, represent vector types with a vector mode
8707 even if MMX/SSE are not active. */
8708 if (type && TREE_CODE (type) == VECTOR_TYPE)
8709 mode = type_natural_mode (type, cum, false);
8711 if (TARGET_64BIT)
8713 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8715 if (call_abi == MS_ABI)
8716 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
8717 else
8718 arg = function_arg_64 (cum, mode, omode, type, named);
8720 else
8721 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
8723 /* Track if there are outgoing arguments on stack. */
8724 if (arg == NULL_RTX && cum->caller)
8725 cfun->machine->outgoing_args_on_stack = true;
8727 return arg;
8730 /* A C expression that indicates when an argument must be passed by
8731 reference. If nonzero for an argument, a copy of that argument is
8732 made in memory and a pointer to the argument is passed instead of
8733 the argument itself. The pointer is passed in whatever way is
8734 appropriate for passing a pointer to that type. */
8736 static bool
8737 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
8738 const_tree type, bool)
8740 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8742 /* Bounds are never passed by reference. */
8743 if ((type && POINTER_BOUNDS_TYPE_P (type))
8744 || POINTER_BOUNDS_MODE_P (mode))
8745 return false;
8747 if (TARGET_64BIT)
8749 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8751 /* See Windows x64 Software Convention. */
8752 if (call_abi == MS_ABI)
8754 HOST_WIDE_INT msize = GET_MODE_SIZE (mode);
8756 if (type)
8758 /* Arrays are passed by reference. */
8759 if (TREE_CODE (type) == ARRAY_TYPE)
8760 return true;
8762 if (RECORD_OR_UNION_TYPE_P (type))
8764 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
8765 are passed by reference. */
8766 msize = int_size_in_bytes (type);
8770 /* __m128 is passed by reference. */
8771 return msize != 1 && msize != 2 && msize != 4 && msize != 8;
8773 else if (type && int_size_in_bytes (type) == -1)
8774 return true;
8777 return false;
8780 /* Return true when TYPE should be 128bit aligned for 32bit argument
8781 passing ABI. XXX: This function is obsolete and is only used for
8782 checking psABI compatibility with previous versions of GCC. */
8784 static bool
8785 ix86_compat_aligned_value_p (const_tree type)
8787 machine_mode mode = TYPE_MODE (type);
8788 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
8789 || mode == TDmode
8790 || mode == TFmode
8791 || mode == TCmode)
8792 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
8793 return true;
8794 if (TYPE_ALIGN (type) < 128)
8795 return false;
8797 if (AGGREGATE_TYPE_P (type))
8799 /* Walk the aggregates recursively. */
8800 switch (TREE_CODE (type))
8802 case RECORD_TYPE:
8803 case UNION_TYPE:
8804 case QUAL_UNION_TYPE:
8806 tree field;
8808 /* Walk all the structure fields. */
8809 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8811 if (TREE_CODE (field) == FIELD_DECL
8812 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
8813 return true;
8815 break;
8818 case ARRAY_TYPE:
8819 /* Just for use if some languages passes arrays by value. */
8820 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
8821 return true;
8822 break;
8824 default:
8825 gcc_unreachable ();
8828 return false;
8831 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
8832 XXX: This function is obsolete and is only used for checking psABI
8833 compatibility with previous versions of GCC. */
8835 static unsigned int
8836 ix86_compat_function_arg_boundary (machine_mode mode,
8837 const_tree type, unsigned int align)
8839 /* In 32bit, only _Decimal128 and __float128 are aligned to their
8840 natural boundaries. */
8841 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
8843 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
8844 make an exception for SSE modes since these require 128bit
8845 alignment.
8847 The handling here differs from field_alignment. ICC aligns MMX
8848 arguments to 4 byte boundaries, while structure fields are aligned
8849 to 8 byte boundaries. */
8850 if (!type)
8852 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
8853 align = PARM_BOUNDARY;
8855 else
8857 if (!ix86_compat_aligned_value_p (type))
8858 align = PARM_BOUNDARY;
8861 if (align > BIGGEST_ALIGNMENT)
8862 align = BIGGEST_ALIGNMENT;
8863 return align;
8866 /* Return true when TYPE should be 128bit aligned for 32bit argument
8867 passing ABI. */
8869 static bool
8870 ix86_contains_aligned_value_p (const_tree type)
8872 machine_mode mode = TYPE_MODE (type);
8874 if (mode == XFmode || mode == XCmode)
8875 return false;
8877 if (TYPE_ALIGN (type) < 128)
8878 return false;
8880 if (AGGREGATE_TYPE_P (type))
8882 /* Walk the aggregates recursively. */
8883 switch (TREE_CODE (type))
8885 case RECORD_TYPE:
8886 case UNION_TYPE:
8887 case QUAL_UNION_TYPE:
8889 tree field;
8891 /* Walk all the structure fields. */
8892 for (field = TYPE_FIELDS (type);
8893 field;
8894 field = DECL_CHAIN (field))
8896 if (TREE_CODE (field) == FIELD_DECL
8897 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
8898 return true;
8900 break;
8903 case ARRAY_TYPE:
8904 /* Just for use if some languages passes arrays by value. */
8905 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
8906 return true;
8907 break;
8909 default:
8910 gcc_unreachable ();
8913 else
8914 return TYPE_ALIGN (type) >= 128;
8916 return false;
8919 /* Gives the alignment boundary, in bits, of an argument with the
8920 specified mode and type. */
8922 static unsigned int
8923 ix86_function_arg_boundary (machine_mode mode, const_tree type)
8925 unsigned int align;
8926 if (type)
8928 /* Since the main variant type is used for call, we convert it to
8929 the main variant type. */
8930 type = TYPE_MAIN_VARIANT (type);
8931 align = TYPE_ALIGN (type);
8933 else
8934 align = GET_MODE_ALIGNMENT (mode);
8935 if (align < PARM_BOUNDARY)
8936 align = PARM_BOUNDARY;
8937 else
8939 static bool warned;
8940 unsigned int saved_align = align;
8942 if (!TARGET_64BIT)
8944 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
8945 if (!type)
8947 if (mode == XFmode || mode == XCmode)
8948 align = PARM_BOUNDARY;
8950 else if (!ix86_contains_aligned_value_p (type))
8951 align = PARM_BOUNDARY;
8953 if (align < 128)
8954 align = PARM_BOUNDARY;
8957 if (warn_psabi
8958 && !warned
8959 && align != ix86_compat_function_arg_boundary (mode, type,
8960 saved_align))
8962 warned = true;
8963 inform (input_location,
8964 "The ABI for passing parameters with %d-byte"
8965 " alignment has changed in GCC 4.6",
8966 align / BITS_PER_UNIT);
8970 return align;
8973 /* Return true if N is a possible register number of function value. */
8975 static bool
8976 ix86_function_value_regno_p (const unsigned int regno)
8978 switch (regno)
8980 case AX_REG:
8981 return true;
8982 case DX_REG:
8983 return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
8984 case DI_REG:
8985 case SI_REG:
8986 return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
8988 case BND0_REG:
8989 case BND1_REG:
8990 return chkp_function_instrumented_p (current_function_decl);
8992 /* Complex values are returned in %st(0)/%st(1) pair. */
8993 case ST0_REG:
8994 case ST1_REG:
8995 /* TODO: The function should depend on current function ABI but
8996 builtins.c would need updating then. Therefore we use the
8997 default ABI. */
8998 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
8999 return false;
9000 return TARGET_FLOAT_RETURNS_IN_80387;
9002 /* Complex values are returned in %xmm0/%xmm1 pair. */
9003 case XMM0_REG:
9004 case XMM1_REG:
9005 return TARGET_SSE;
9007 case MM0_REG:
9008 if (TARGET_MACHO || TARGET_64BIT)
9009 return false;
9010 return TARGET_MMX;
9013 return false;
9016 /* Define how to find the value returned by a function.
9017 VALTYPE is the data type of the value (as a tree).
9018 If the precise function being called is known, FUNC is its FUNCTION_DECL;
9019 otherwise, FUNC is 0. */
9021 static rtx
9022 function_value_32 (machine_mode orig_mode, machine_mode mode,
9023 const_tree fntype, const_tree fn)
9025 unsigned int regno;
9027 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
9028 we normally prevent this case when mmx is not available. However
9029 some ABIs may require the result to be returned like DImode. */
9030 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
9031 regno = FIRST_MMX_REG;
9033 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
9034 we prevent this case when sse is not available. However some ABIs
9035 may require the result to be returned like integer TImode. */
9036 else if (mode == TImode
9037 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
9038 regno = FIRST_SSE_REG;
9040 /* 32-byte vector modes in %ymm0. */
9041 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
9042 regno = FIRST_SSE_REG;
9044 /* 64-byte vector modes in %zmm0. */
9045 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
9046 regno = FIRST_SSE_REG;
9048 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
9049 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
9050 regno = FIRST_FLOAT_REG;
9051 else
9052 /* Most things go in %eax. */
9053 regno = AX_REG;
9055 /* Override FP return register with %xmm0 for local functions when
9056 SSE math is enabled or for functions with sseregparm attribute. */
9057 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
9059 int sse_level = ix86_function_sseregparm (fntype, fn, false);
9060 if (sse_level == -1)
9062 error ("calling %qD with SSE calling convention without "
9063 "SSE/SSE2 enabled", fn);
9064 sorry ("this is a GCC bug that can be worked around by adding "
9065 "attribute used to function called");
9067 else if ((sse_level >= 1 && mode == SFmode)
9068 || (sse_level == 2 && mode == DFmode))
9069 regno = FIRST_SSE_REG;
9072 /* OImode shouldn't be used directly. */
9073 gcc_assert (mode != OImode);
9075 return gen_rtx_REG (orig_mode, regno);
9078 static rtx
9079 function_value_64 (machine_mode orig_mode, machine_mode mode,
9080 const_tree valtype)
9082 rtx ret;
9084 /* Handle libcalls, which don't provide a type node. */
9085 if (valtype == NULL)
9087 unsigned int regno;
9089 switch (mode)
9091 case E_SFmode:
9092 case E_SCmode:
9093 case E_DFmode:
9094 case E_DCmode:
9095 case E_TFmode:
9096 case E_SDmode:
9097 case E_DDmode:
9098 case E_TDmode:
9099 regno = FIRST_SSE_REG;
9100 break;
9101 case E_XFmode:
9102 case E_XCmode:
9103 regno = FIRST_FLOAT_REG;
9104 break;
9105 case E_TCmode:
9106 return NULL;
9107 default:
9108 regno = AX_REG;
9111 return gen_rtx_REG (mode, regno);
9113 else if (POINTER_TYPE_P (valtype))
9115 /* Pointers are always returned in word_mode. */
9116 mode = word_mode;
9119 ret = construct_container (mode, orig_mode, valtype, 1,
9120 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
9121 x86_64_int_return_registers, 0);
9123 /* For zero sized structures, construct_container returns NULL, but we
9124 need to keep rest of compiler happy by returning meaningful value. */
9125 if (!ret)
9126 ret = gen_rtx_REG (orig_mode, AX_REG);
9128 return ret;
9131 static rtx
9132 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
9133 const_tree valtype)
9135 unsigned int regno = AX_REG;
9137 if (TARGET_SSE)
9139 switch (GET_MODE_SIZE (mode))
9141 case 16:
9142 if (valtype != NULL_TREE
9143 && !VECTOR_INTEGER_TYPE_P (valtype)
9144 && !VECTOR_INTEGER_TYPE_P (valtype)
9145 && !INTEGRAL_TYPE_P (valtype)
9146 && !VECTOR_FLOAT_TYPE_P (valtype))
9147 break;
9148 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
9149 && !COMPLEX_MODE_P (mode))
9150 regno = FIRST_SSE_REG;
9151 break;
9152 case 8:
9153 case 4:
9154 if (mode == SFmode || mode == DFmode)
9155 regno = FIRST_SSE_REG;
9156 break;
9157 default:
9158 break;
9161 return gen_rtx_REG (orig_mode, regno);
9164 static rtx
9165 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
9166 machine_mode orig_mode, machine_mode mode)
9168 const_tree fn, fntype;
9170 fn = NULL_TREE;
9171 if (fntype_or_decl && DECL_P (fntype_or_decl))
9172 fn = fntype_or_decl;
9173 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
9175 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
9176 || POINTER_BOUNDS_MODE_P (mode))
9177 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
9178 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
9179 return function_value_ms_64 (orig_mode, mode, valtype);
9180 else if (TARGET_64BIT)
9181 return function_value_64 (orig_mode, mode, valtype);
9182 else
9183 return function_value_32 (orig_mode, mode, fntype, fn);
9186 static rtx
9187 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
9189 machine_mode mode, orig_mode;
9191 orig_mode = TYPE_MODE (valtype);
9192 mode = type_natural_mode (valtype, NULL, true);
9193 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
9196 /* Return an RTX representing a place where a function returns
9197 or recieves pointer bounds or NULL if no bounds are returned.
9199 VALTYPE is a data type of a value returned by the function.
9201 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
9202 or FUNCTION_TYPE of the function.
9204 If OUTGOING is false, return a place in which the caller will
9205 see the return value. Otherwise, return a place where a
9206 function returns a value. */
9208 static rtx
9209 ix86_function_value_bounds (const_tree valtype,
9210 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
9211 bool outgoing ATTRIBUTE_UNUSED)
9213 rtx res = NULL_RTX;
9215 if (BOUNDED_TYPE_P (valtype))
9216 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
9217 else if (chkp_type_has_pointer (valtype))
9219 bitmap slots;
9220 rtx bounds[2];
9221 bitmap_iterator bi;
9222 unsigned i, bnd_no = 0;
9224 bitmap_obstack_initialize (NULL);
9225 slots = BITMAP_ALLOC (NULL);
9226 chkp_find_bound_slots (valtype, slots);
9228 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
9230 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
9231 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
9232 gcc_assert (bnd_no < 2);
9233 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
9236 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
9238 BITMAP_FREE (slots);
9239 bitmap_obstack_release (NULL);
9241 else
9242 res = NULL_RTX;
9244 return res;
9247 /* Pointer function arguments and return values are promoted to
9248 word_mode for normal functions. */
9250 static machine_mode
9251 ix86_promote_function_mode (const_tree type, machine_mode mode,
9252 int *punsignedp, const_tree fntype,
9253 int for_return)
9255 if (cfun->machine->func_type == TYPE_NORMAL
9256 && type != NULL_TREE
9257 && POINTER_TYPE_P (type))
9259 *punsignedp = POINTERS_EXTEND_UNSIGNED;
9260 return word_mode;
9262 return default_promote_function_mode (type, mode, punsignedp, fntype,
9263 for_return);
9266 /* Return true if a structure, union or array with MODE containing FIELD
9267 should be accessed using BLKmode. */
9269 static bool
9270 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
9272 /* Union with XFmode must be in BLKmode. */
9273 return (mode == XFmode
9274 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
9275 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
9279 ix86_libcall_value (machine_mode mode)
9281 return ix86_function_value_1 (NULL, NULL, mode, mode);
9284 /* Return true iff type is returned in memory. */
9286 static bool
9287 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9289 #ifdef SUBTARGET_RETURN_IN_MEMORY
9290 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
9291 #else
9292 const machine_mode mode = type_natural_mode (type, NULL, true);
9293 HOST_WIDE_INT size;
9295 if (POINTER_BOUNDS_TYPE_P (type))
9296 return false;
9298 if (TARGET_64BIT)
9300 if (ix86_function_type_abi (fntype) == MS_ABI)
9302 size = int_size_in_bytes (type);
9304 /* __m128 is returned in xmm0. */
9305 if ((!type || VECTOR_INTEGER_TYPE_P (type)
9306 || INTEGRAL_TYPE_P (type)
9307 || VECTOR_FLOAT_TYPE_P (type))
9308 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
9309 && !COMPLEX_MODE_P (mode)
9310 && (GET_MODE_SIZE (mode) == 16 || size == 16))
9311 return false;
9313 /* Otherwise, the size must be exactly in [1248]. */
9314 return size != 1 && size != 2 && size != 4 && size != 8;
9316 else
9318 int needed_intregs, needed_sseregs;
9320 return examine_argument (mode, type, 1,
9321 &needed_intregs, &needed_sseregs);
9324 else
9326 size = int_size_in_bytes (type);
9328 /* Intel MCU psABI returns scalars and aggregates no larger than 8
9329 bytes in registers. */
9330 if (TARGET_IAMCU)
9331 return VECTOR_MODE_P (mode) || size < 0 || size > 8;
9333 if (mode == BLKmode)
9334 return true;
9336 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
9337 return false;
9339 if (VECTOR_MODE_P (mode) || mode == TImode)
9341 /* User-created vectors small enough to fit in EAX. */
9342 if (size < 8)
9343 return false;
9345 /* Unless ABI prescibes otherwise,
9346 MMX/3dNow values are returned in MM0 if available. */
9348 if (size == 8)
9349 return TARGET_VECT8_RETURNS || !TARGET_MMX;
9351 /* SSE values are returned in XMM0 if available. */
9352 if (size == 16)
9353 return !TARGET_SSE;
9355 /* AVX values are returned in YMM0 if available. */
9356 if (size == 32)
9357 return !TARGET_AVX;
9359 /* AVX512F values are returned in ZMM0 if available. */
9360 if (size == 64)
9361 return !TARGET_AVX512F;
9364 if (mode == XFmode)
9365 return false;
9367 if (size > 12)
9368 return true;
9370 /* OImode shouldn't be used directly. */
9371 gcc_assert (mode != OImode);
9373 return false;
9375 #endif
9379 /* Create the va_list data type. */
9381 static tree
9382 ix86_build_builtin_va_list_64 (void)
9384 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
9386 record = lang_hooks.types.make_type (RECORD_TYPE);
9387 type_decl = build_decl (BUILTINS_LOCATION,
9388 TYPE_DECL, get_identifier ("__va_list_tag"), record);
9390 f_gpr = build_decl (BUILTINS_LOCATION,
9391 FIELD_DECL, get_identifier ("gp_offset"),
9392 unsigned_type_node);
9393 f_fpr = build_decl (BUILTINS_LOCATION,
9394 FIELD_DECL, get_identifier ("fp_offset"),
9395 unsigned_type_node);
9396 f_ovf = build_decl (BUILTINS_LOCATION,
9397 FIELD_DECL, get_identifier ("overflow_arg_area"),
9398 ptr_type_node);
9399 f_sav = build_decl (BUILTINS_LOCATION,
9400 FIELD_DECL, get_identifier ("reg_save_area"),
9401 ptr_type_node);
9403 va_list_gpr_counter_field = f_gpr;
9404 va_list_fpr_counter_field = f_fpr;
9406 DECL_FIELD_CONTEXT (f_gpr) = record;
9407 DECL_FIELD_CONTEXT (f_fpr) = record;
9408 DECL_FIELD_CONTEXT (f_ovf) = record;
9409 DECL_FIELD_CONTEXT (f_sav) = record;
9411 TYPE_STUB_DECL (record) = type_decl;
9412 TYPE_NAME (record) = type_decl;
9413 TYPE_FIELDS (record) = f_gpr;
9414 DECL_CHAIN (f_gpr) = f_fpr;
9415 DECL_CHAIN (f_fpr) = f_ovf;
9416 DECL_CHAIN (f_ovf) = f_sav;
9418 layout_type (record);
9420 TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list"),
9421 NULL_TREE, TYPE_ATTRIBUTES (record));
9423 /* The correct type is an array type of one element. */
9424 return build_array_type (record, build_index_type (size_zero_node));
9427 /* Setup the builtin va_list data type and for 64-bit the additional
9428 calling convention specific va_list data types. */
9430 static tree
9431 ix86_build_builtin_va_list (void)
9433 if (TARGET_64BIT)
9435 /* Initialize ABI specific va_list builtin types.
9437 In lto1, we can encounter two va_list types:
9438 - one as a result of the type-merge across TUs, and
9439 - the one constructed here.
9440 These two types will not have the same TYPE_MAIN_VARIANT, and therefore
9441 a type identity check in canonical_va_list_type based on
9442 TYPE_MAIN_VARIANT (which we used to have) will not work.
9443 Instead, we tag each va_list_type_node with its unique attribute, and
9444 look for the attribute in the type identity check in
9445 canonical_va_list_type.
9447 Tagging sysv_va_list_type_node directly with the attribute is
9448 problematic since it's a array of one record, which will degrade into a
9449 pointer to record when used as parameter (see build_va_arg comments for
9450 an example), dropping the attribute in the process. So we tag the
9451 record instead. */
9453 /* For SYSV_ABI we use an array of one record. */
9454 sysv_va_list_type_node = ix86_build_builtin_va_list_64 ();
9456 /* For MS_ABI we use plain pointer to argument area. */
9457 tree char_ptr_type = build_pointer_type (char_type_node);
9458 tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE,
9459 TYPE_ATTRIBUTES (char_ptr_type));
9460 ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr);
9462 return ((ix86_abi == MS_ABI)
9463 ? ms_va_list_type_node
9464 : sysv_va_list_type_node);
9466 else
9468 /* For i386 we use plain pointer to argument area. */
9469 return build_pointer_type (char_type_node);
9473 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
9475 static void
9476 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
9478 rtx save_area, mem;
9479 alias_set_type set;
9480 int i, max;
9482 /* GPR size of varargs save area. */
9483 if (cfun->va_list_gpr_size)
9484 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
9485 else
9486 ix86_varargs_gpr_size = 0;
9488 /* FPR size of varargs save area. We don't need it if we don't pass
9489 anything in SSE registers. */
9490 if (TARGET_SSE && cfun->va_list_fpr_size)
9491 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
9492 else
9493 ix86_varargs_fpr_size = 0;
9495 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
9496 return;
9498 save_area = frame_pointer_rtx;
9499 set = get_varargs_alias_set ();
9501 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
9502 if (max > X86_64_REGPARM_MAX)
9503 max = X86_64_REGPARM_MAX;
9505 for (i = cum->regno; i < max; i++)
9507 mem = gen_rtx_MEM (word_mode,
9508 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
9509 MEM_NOTRAP_P (mem) = 1;
9510 set_mem_alias_set (mem, set);
9511 emit_move_insn (mem,
9512 gen_rtx_REG (word_mode,
9513 x86_64_int_parameter_registers[i]));
9516 if (ix86_varargs_fpr_size)
9518 machine_mode smode;
9519 rtx_code_label *label;
9520 rtx test;
9522 /* Now emit code to save SSE registers. The AX parameter contains number
9523 of SSE parameter registers used to call this function, though all we
9524 actually check here is the zero/non-zero status. */
9526 label = gen_label_rtx ();
9527 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
9528 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
9529 label));
9531 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
9532 we used movdqa (i.e. TImode) instead? Perhaps even better would
9533 be if we could determine the real mode of the data, via a hook
9534 into pass_stdarg. Ignore all that for now. */
9535 smode = V4SFmode;
9536 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
9537 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
9539 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
9540 if (max > X86_64_SSE_REGPARM_MAX)
9541 max = X86_64_SSE_REGPARM_MAX;
9543 for (i = cum->sse_regno; i < max; ++i)
9545 mem = plus_constant (Pmode, save_area,
9546 i * 16 + ix86_varargs_gpr_size);
9547 mem = gen_rtx_MEM (smode, mem);
9548 MEM_NOTRAP_P (mem) = 1;
9549 set_mem_alias_set (mem, set);
9550 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
9552 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
9555 emit_label (label);
9559 static void
9560 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
9562 alias_set_type set = get_varargs_alias_set ();
9563 int i;
9565 /* Reset to zero, as there might be a sysv vaarg used
9566 before. */
9567 ix86_varargs_gpr_size = 0;
9568 ix86_varargs_fpr_size = 0;
9570 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
9572 rtx reg, mem;
9574 mem = gen_rtx_MEM (Pmode,
9575 plus_constant (Pmode, virtual_incoming_args_rtx,
9576 i * UNITS_PER_WORD));
9577 MEM_NOTRAP_P (mem) = 1;
9578 set_mem_alias_set (mem, set);
9580 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
9581 emit_move_insn (mem, reg);
9585 static void
9586 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
9587 tree type, int *, int no_rtl)
9589 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9590 CUMULATIVE_ARGS next_cum;
9591 tree fntype;
9593 /* This argument doesn't appear to be used anymore. Which is good,
9594 because the old code here didn't suppress rtl generation. */
9595 gcc_assert (!no_rtl);
9597 if (!TARGET_64BIT)
9598 return;
9600 fntype = TREE_TYPE (current_function_decl);
9602 /* For varargs, we do not want to skip the dummy va_dcl argument.
9603 For stdargs, we do want to skip the last named argument. */
9604 next_cum = *cum;
9605 if (stdarg_p (fntype))
9606 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
9607 true);
9609 if (cum->call_abi == MS_ABI)
9610 setup_incoming_varargs_ms_64 (&next_cum);
9611 else
9612 setup_incoming_varargs_64 (&next_cum);
9615 static void
9616 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
9617 machine_mode mode,
9618 tree type,
9619 int *pretend_size ATTRIBUTE_UNUSED,
9620 int no_rtl)
9622 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9623 CUMULATIVE_ARGS next_cum;
9624 tree fntype;
9625 rtx save_area;
9626 int bnd_reg, i, max;
9628 gcc_assert (!no_rtl);
9630 /* Do nothing if we use plain pointer to argument area. */
9631 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
9632 return;
9634 fntype = TREE_TYPE (current_function_decl);
9636 /* For varargs, we do not want to skip the dummy va_dcl argument.
9637 For stdargs, we do want to skip the last named argument. */
9638 next_cum = *cum;
9639 if (stdarg_p (fntype))
9640 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
9641 true);
9642 save_area = frame_pointer_rtx;
9644 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
9645 if (max > X86_64_REGPARM_MAX)
9646 max = X86_64_REGPARM_MAX;
9648 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
9649 if (chkp_function_instrumented_p (current_function_decl))
9650 for (i = cum->regno; i < max; i++)
9652 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
9653 rtx ptr = gen_rtx_REG (Pmode,
9654 x86_64_int_parameter_registers[i]);
9655 rtx bounds;
9657 if (bnd_reg <= LAST_BND_REG)
9658 bounds = gen_rtx_REG (BNDmode, bnd_reg);
9659 else
9661 rtx ldx_addr =
9662 plus_constant (Pmode, arg_pointer_rtx,
9663 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
9664 bounds = gen_reg_rtx (BNDmode);
9665 emit_insn (BNDmode == BND64mode
9666 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
9667 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
9670 emit_insn (BNDmode == BND64mode
9671 ? gen_bnd64_stx (addr, ptr, bounds)
9672 : gen_bnd32_stx (addr, ptr, bounds));
9674 bnd_reg++;
9679 /* Checks if TYPE is of kind va_list char *. */
9681 static bool
9682 is_va_list_char_pointer (tree type)
9684 tree canonic;
9686 /* For 32-bit it is always true. */
9687 if (!TARGET_64BIT)
9688 return true;
9689 canonic = ix86_canonical_va_list_type (type);
9690 return (canonic == ms_va_list_type_node
9691 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
9694 /* Implement va_start. */
9696 static void
9697 ix86_va_start (tree valist, rtx nextarg)
9699 HOST_WIDE_INT words, n_gpr, n_fpr;
9700 tree f_gpr, f_fpr, f_ovf, f_sav;
9701 tree gpr, fpr, ovf, sav, t;
9702 tree type;
9703 rtx ovf_rtx;
9705 if (flag_split_stack
9706 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9708 unsigned int scratch_regno;
9710 /* When we are splitting the stack, we can't refer to the stack
9711 arguments using internal_arg_pointer, because they may be on
9712 the old stack. The split stack prologue will arrange to
9713 leave a pointer to the old stack arguments in a scratch
9714 register, which we here copy to a pseudo-register. The split
9715 stack prologue can't set the pseudo-register directly because
9716 it (the prologue) runs before any registers have been saved. */
9718 scratch_regno = split_stack_prologue_scratch_regno ();
9719 if (scratch_regno != INVALID_REGNUM)
9721 rtx reg;
9722 rtx_insn *seq;
9724 reg = gen_reg_rtx (Pmode);
9725 cfun->machine->split_stack_varargs_pointer = reg;
9727 start_sequence ();
9728 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
9729 seq = get_insns ();
9730 end_sequence ();
9732 push_topmost_sequence ();
9733 emit_insn_after (seq, entry_of_function ());
9734 pop_topmost_sequence ();
9738 /* Only 64bit target needs something special. */
9739 if (is_va_list_char_pointer (TREE_TYPE (valist)))
9741 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9742 std_expand_builtin_va_start (valist, nextarg);
9743 else
9745 rtx va_r, next;
9747 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
9748 next = expand_binop (ptr_mode, add_optab,
9749 cfun->machine->split_stack_varargs_pointer,
9750 crtl->args.arg_offset_rtx,
9751 NULL_RTX, 0, OPTAB_LIB_WIDEN);
9752 convert_move (va_r, next, 0);
9754 /* Store zero bounds for va_list. */
9755 if (chkp_function_instrumented_p (current_function_decl))
9756 chkp_expand_bounds_reset_for_mem (valist,
9757 make_tree (TREE_TYPE (valist),
9758 next));
9761 return;
9764 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9765 f_fpr = DECL_CHAIN (f_gpr);
9766 f_ovf = DECL_CHAIN (f_fpr);
9767 f_sav = DECL_CHAIN (f_ovf);
9769 valist = build_simple_mem_ref (valist);
9770 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
9771 /* The following should be folded into the MEM_REF offset. */
9772 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
9773 f_gpr, NULL_TREE);
9774 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
9775 f_fpr, NULL_TREE);
9776 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
9777 f_ovf, NULL_TREE);
9778 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
9779 f_sav, NULL_TREE);
9781 /* Count number of gp and fp argument registers used. */
9782 words = crtl->args.info.words;
9783 n_gpr = crtl->args.info.regno;
9784 n_fpr = crtl->args.info.sse_regno;
9786 if (cfun->va_list_gpr_size)
9788 type = TREE_TYPE (gpr);
9789 t = build2 (MODIFY_EXPR, type,
9790 gpr, build_int_cst (type, n_gpr * 8));
9791 TREE_SIDE_EFFECTS (t) = 1;
9792 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9795 if (TARGET_SSE && cfun->va_list_fpr_size)
9797 type = TREE_TYPE (fpr);
9798 t = build2 (MODIFY_EXPR, type, fpr,
9799 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
9800 TREE_SIDE_EFFECTS (t) = 1;
9801 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9804 /* Find the overflow area. */
9805 type = TREE_TYPE (ovf);
9806 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9807 ovf_rtx = crtl->args.internal_arg_pointer;
9808 else
9809 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
9810 t = make_tree (type, ovf_rtx);
9811 if (words != 0)
9812 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
9814 /* Store zero bounds for overflow area pointer. */
9815 if (chkp_function_instrumented_p (current_function_decl))
9816 chkp_expand_bounds_reset_for_mem (ovf, t);
9818 t = build2 (MODIFY_EXPR, type, ovf, t);
9819 TREE_SIDE_EFFECTS (t) = 1;
9820 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9822 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
9824 /* Find the register save area.
9825 Prologue of the function save it right above stack frame. */
9826 type = TREE_TYPE (sav);
9827 t = make_tree (type, frame_pointer_rtx);
9828 if (!ix86_varargs_gpr_size)
9829 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
9831 /* Store zero bounds for save area pointer. */
9832 if (chkp_function_instrumented_p (current_function_decl))
9833 chkp_expand_bounds_reset_for_mem (sav, t);
9835 t = build2 (MODIFY_EXPR, type, sav, t);
9836 TREE_SIDE_EFFECTS (t) = 1;
9837 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9841 /* Implement va_arg. */
9843 static tree
9844 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
9845 gimple_seq *post_p)
9847 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
9848 tree f_gpr, f_fpr, f_ovf, f_sav;
9849 tree gpr, fpr, ovf, sav, t;
9850 int size, rsize;
9851 tree lab_false, lab_over = NULL_TREE;
9852 tree addr, t2;
9853 rtx container;
9854 int indirect_p = 0;
9855 tree ptrtype;
9856 machine_mode nat_mode;
9857 unsigned int arg_boundary;
9859 /* Only 64bit target needs something special. */
9860 if (is_va_list_char_pointer (TREE_TYPE (valist)))
9861 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
9863 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9864 f_fpr = DECL_CHAIN (f_gpr);
9865 f_ovf = DECL_CHAIN (f_fpr);
9866 f_sav = DECL_CHAIN (f_ovf);
9868 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
9869 valist, f_gpr, NULL_TREE);
9871 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
9872 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
9873 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
9875 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
9876 if (indirect_p)
9877 type = build_pointer_type (type);
9878 size = int_size_in_bytes (type);
9879 rsize = CEIL (size, UNITS_PER_WORD);
9881 nat_mode = type_natural_mode (type, NULL, false);
9882 switch (nat_mode)
9884 case E_V8SFmode:
9885 case E_V8SImode:
9886 case E_V32QImode:
9887 case E_V16HImode:
9888 case E_V4DFmode:
9889 case E_V4DImode:
9890 case E_V16SFmode:
9891 case E_V16SImode:
9892 case E_V64QImode:
9893 case E_V32HImode:
9894 case E_V8DFmode:
9895 case E_V8DImode:
9896 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
9897 if (!TARGET_64BIT_MS_ABI)
9899 container = NULL;
9900 break;
9902 /* FALLTHRU */
9904 default:
9905 container = construct_container (nat_mode, TYPE_MODE (type),
9906 type, 0, X86_64_REGPARM_MAX,
9907 X86_64_SSE_REGPARM_MAX, intreg,
9909 break;
9912 /* Pull the value out of the saved registers. */
9914 addr = create_tmp_var (ptr_type_node, "addr");
9916 if (container)
9918 int needed_intregs, needed_sseregs;
9919 bool need_temp;
9920 tree int_addr, sse_addr;
9922 lab_false = create_artificial_label (UNKNOWN_LOCATION);
9923 lab_over = create_artificial_label (UNKNOWN_LOCATION);
9925 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
9927 need_temp = (!REG_P (container)
9928 && ((needed_intregs && TYPE_ALIGN (type) > 64)
9929 || TYPE_ALIGN (type) > 128));
9931 /* In case we are passing structure, verify that it is consecutive block
9932 on the register save area. If not we need to do moves. */
9933 if (!need_temp && !REG_P (container))
9935 /* Verify that all registers are strictly consecutive */
9936 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
9938 int i;
9940 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9942 rtx slot = XVECEXP (container, 0, i);
9943 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
9944 || INTVAL (XEXP (slot, 1)) != i * 16)
9945 need_temp = true;
9948 else
9950 int i;
9952 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9954 rtx slot = XVECEXP (container, 0, i);
9955 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
9956 || INTVAL (XEXP (slot, 1)) != i * 8)
9957 need_temp = true;
9961 if (!need_temp)
9963 int_addr = addr;
9964 sse_addr = addr;
9966 else
9968 int_addr = create_tmp_var (ptr_type_node, "int_addr");
9969 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
9972 /* First ensure that we fit completely in registers. */
9973 if (needed_intregs)
9975 t = build_int_cst (TREE_TYPE (gpr),
9976 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
9977 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
9978 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9979 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9980 gimplify_and_add (t, pre_p);
9982 if (needed_sseregs)
9984 t = build_int_cst (TREE_TYPE (fpr),
9985 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
9986 + X86_64_REGPARM_MAX * 8);
9987 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
9988 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9989 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9990 gimplify_and_add (t, pre_p);
9993 /* Compute index to start of area used for integer regs. */
9994 if (needed_intregs)
9996 /* int_addr = gpr + sav; */
9997 t = fold_build_pointer_plus (sav, gpr);
9998 gimplify_assign (int_addr, t, pre_p);
10000 if (needed_sseregs)
10002 /* sse_addr = fpr + sav; */
10003 t = fold_build_pointer_plus (sav, fpr);
10004 gimplify_assign (sse_addr, t, pre_p);
10006 if (need_temp)
10008 int i, prev_size = 0;
10009 tree temp = create_tmp_var (type, "va_arg_tmp");
10011 /* addr = &temp; */
10012 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
10013 gimplify_assign (addr, t, pre_p);
10015 for (i = 0; i < XVECLEN (container, 0); i++)
10017 rtx slot = XVECEXP (container, 0, i);
10018 rtx reg = XEXP (slot, 0);
10019 machine_mode mode = GET_MODE (reg);
10020 tree piece_type;
10021 tree addr_type;
10022 tree daddr_type;
10023 tree src_addr, src;
10024 int src_offset;
10025 tree dest_addr, dest;
10026 int cur_size = GET_MODE_SIZE (mode);
10028 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
10029 prev_size = INTVAL (XEXP (slot, 1));
10030 if (prev_size + cur_size > size)
10032 cur_size = size - prev_size;
10033 unsigned int nbits = cur_size * BITS_PER_UNIT;
10034 if (!int_mode_for_size (nbits, 1).exists (&mode))
10035 mode = QImode;
10037 piece_type = lang_hooks.types.type_for_mode (mode, 1);
10038 if (mode == GET_MODE (reg))
10039 addr_type = build_pointer_type (piece_type);
10040 else
10041 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
10042 true);
10043 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
10044 true);
10046 if (SSE_REGNO_P (REGNO (reg)))
10048 src_addr = sse_addr;
10049 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
10051 else
10053 src_addr = int_addr;
10054 src_offset = REGNO (reg) * 8;
10056 src_addr = fold_convert (addr_type, src_addr);
10057 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
10059 dest_addr = fold_convert (daddr_type, addr);
10060 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
10061 if (cur_size == GET_MODE_SIZE (mode))
10063 src = build_va_arg_indirect_ref (src_addr);
10064 dest = build_va_arg_indirect_ref (dest_addr);
10066 gimplify_assign (dest, src, pre_p);
10068 else
10070 tree copy
10071 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
10072 3, dest_addr, src_addr,
10073 size_int (cur_size));
10074 gimplify_and_add (copy, pre_p);
10076 prev_size += cur_size;
10080 if (needed_intregs)
10082 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
10083 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
10084 gimplify_assign (gpr, t, pre_p);
10087 if (needed_sseregs)
10089 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
10090 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
10091 gimplify_assign (unshare_expr (fpr), t, pre_p);
10094 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
10096 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
10099 /* ... otherwise out of the overflow area. */
10101 /* When we align parameter on stack for caller, if the parameter
10102 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
10103 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
10104 here with caller. */
10105 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
10106 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
10107 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
10109 /* Care for on-stack alignment if needed. */
10110 if (arg_boundary <= 64 || size == 0)
10111 t = ovf;
10112 else
10114 HOST_WIDE_INT align = arg_boundary / 8;
10115 t = fold_build_pointer_plus_hwi (ovf, align - 1);
10116 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
10117 build_int_cst (TREE_TYPE (t), -align));
10120 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
10121 gimplify_assign (addr, t, pre_p);
10123 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
10124 gimplify_assign (unshare_expr (ovf), t, pre_p);
10126 if (container)
10127 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
10129 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
10130 addr = fold_convert (ptrtype, addr);
10132 if (indirect_p)
10133 addr = build_va_arg_indirect_ref (addr);
10134 return build_va_arg_indirect_ref (addr);
10137 /* Return true if OPNUM's MEM should be matched
10138 in movabs* patterns. */
10140 bool
10141 ix86_check_movabs (rtx insn, int opnum)
10143 rtx set, mem;
10145 set = PATTERN (insn);
10146 if (GET_CODE (set) == PARALLEL)
10147 set = XVECEXP (set, 0, 0);
10148 gcc_assert (GET_CODE (set) == SET);
10149 mem = XEXP (set, opnum);
10150 while (SUBREG_P (mem))
10151 mem = SUBREG_REG (mem);
10152 gcc_assert (MEM_P (mem));
10153 return volatile_ok || !MEM_VOLATILE_P (mem);
10156 /* Return false if INSN contains a MEM with a non-default address space. */
10157 bool
10158 ix86_check_no_addr_space (rtx insn)
10160 subrtx_var_iterator::array_type array;
10161 FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL)
10163 rtx x = *iter;
10164 if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)))
10165 return false;
10167 return true;
10170 /* Initialize the table of extra 80387 mathematical constants. */
10172 static void
10173 init_ext_80387_constants (void)
10175 static const char * cst[5] =
10177 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
10178 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
10179 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
10180 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
10181 "3.1415926535897932385128089594061862044", /* 4: fldpi */
10183 int i;
10185 for (i = 0; i < 5; i++)
10187 real_from_string (&ext_80387_constants_table[i], cst[i]);
10188 /* Ensure each constant is rounded to XFmode precision. */
10189 real_convert (&ext_80387_constants_table[i],
10190 XFmode, &ext_80387_constants_table[i]);
10193 ext_80387_constants_init = 1;
10196 /* Return non-zero if the constant is something that
10197 can be loaded with a special instruction. */
10200 standard_80387_constant_p (rtx x)
10202 machine_mode mode = GET_MODE (x);
10204 const REAL_VALUE_TYPE *r;
10206 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
10207 return -1;
10209 if (x == CONST0_RTX (mode))
10210 return 1;
10211 if (x == CONST1_RTX (mode))
10212 return 2;
10214 r = CONST_DOUBLE_REAL_VALUE (x);
10216 /* For XFmode constants, try to find a special 80387 instruction when
10217 optimizing for size or on those CPUs that benefit from them. */
10218 if (mode == XFmode
10219 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
10221 int i;
10223 if (! ext_80387_constants_init)
10224 init_ext_80387_constants ();
10226 for (i = 0; i < 5; i++)
10227 if (real_identical (r, &ext_80387_constants_table[i]))
10228 return i + 3;
10231 /* Load of the constant -0.0 or -1.0 will be split as
10232 fldz;fchs or fld1;fchs sequence. */
10233 if (real_isnegzero (r))
10234 return 8;
10235 if (real_identical (r, &dconstm1))
10236 return 9;
10238 return 0;
10241 /* Return the opcode of the special instruction to be used to load
10242 the constant X. */
10244 const char *
10245 standard_80387_constant_opcode (rtx x)
10247 switch (standard_80387_constant_p (x))
10249 case 1:
10250 return "fldz";
10251 case 2:
10252 return "fld1";
10253 case 3:
10254 return "fldlg2";
10255 case 4:
10256 return "fldln2";
10257 case 5:
10258 return "fldl2e";
10259 case 6:
10260 return "fldl2t";
10261 case 7:
10262 return "fldpi";
10263 case 8:
10264 case 9:
10265 return "#";
10266 default:
10267 gcc_unreachable ();
10271 /* Return the CONST_DOUBLE representing the 80387 constant that is
10272 loaded by the specified special instruction. The argument IDX
10273 matches the return value from standard_80387_constant_p. */
10276 standard_80387_constant_rtx (int idx)
10278 int i;
10280 if (! ext_80387_constants_init)
10281 init_ext_80387_constants ();
10283 switch (idx)
10285 case 3:
10286 case 4:
10287 case 5:
10288 case 6:
10289 case 7:
10290 i = idx - 3;
10291 break;
10293 default:
10294 gcc_unreachable ();
10297 return const_double_from_real_value (ext_80387_constants_table[i],
10298 XFmode);
10301 /* Return 1 if X is all bits 0 and 2 if X is all bits 1
10302 in supported SSE/AVX vector mode. */
10305 standard_sse_constant_p (rtx x, machine_mode pred_mode)
10307 machine_mode mode;
10309 if (!TARGET_SSE)
10310 return 0;
10312 mode = GET_MODE (x);
10314 if (x == const0_rtx || const0_operand (x, mode))
10315 return 1;
10317 if (x == constm1_rtx || vector_all_ones_operand (x, mode))
10319 /* VOIDmode integer constant, get mode from the predicate. */
10320 if (mode == VOIDmode)
10321 mode = pred_mode;
10323 switch (GET_MODE_SIZE (mode))
10325 case 64:
10326 if (TARGET_AVX512F)
10327 return 2;
10328 break;
10329 case 32:
10330 if (TARGET_AVX2)
10331 return 2;
10332 break;
10333 case 16:
10334 if (TARGET_SSE2)
10335 return 2;
10336 break;
10337 case 0:
10338 /* VOIDmode */
10339 gcc_unreachable ();
10340 default:
10341 break;
10345 return 0;
10348 /* Return the opcode of the special instruction to be used to load
10349 the constant X. */
10351 const char *
10352 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
10354 machine_mode mode;
10356 gcc_assert (TARGET_SSE);
10358 mode = GET_MODE (x);
10360 if (x == const0_rtx || const0_operand (x, mode))
10362 switch (get_attr_mode (insn))
10364 case MODE_XI:
10365 return "vpxord\t%g0, %g0, %g0";
10366 case MODE_OI:
10367 return (TARGET_AVX512VL
10368 ? "vpxord\t%x0, %x0, %x0"
10369 : "vpxor\t%x0, %x0, %x0");
10370 case MODE_TI:
10371 return (TARGET_AVX512VL
10372 ? "vpxord\t%t0, %t0, %t0"
10373 : "%vpxor\t%0, %d0");
10375 case MODE_V8DF:
10376 return (TARGET_AVX512DQ
10377 ? "vxorpd\t%g0, %g0, %g0"
10378 : "vpxorq\t%g0, %g0, %g0");
10379 case MODE_V4DF:
10380 return "vxorpd\t%x0, %x0, %x0";
10381 case MODE_V2DF:
10382 return "%vxorpd\t%0, %d0";
10384 case MODE_V16SF:
10385 return (TARGET_AVX512DQ
10386 ? "vxorps\t%g0, %g0, %g0"
10387 : "vpxord\t%g0, %g0, %g0");
10388 case MODE_V8SF:
10389 return "vxorps\t%x0, %x0, %x0";
10390 case MODE_V4SF:
10391 return "%vxorps\t%0, %d0";
10393 default:
10394 gcc_unreachable ();
10397 else if (x == constm1_rtx || vector_all_ones_operand (x, mode))
10399 enum attr_mode insn_mode = get_attr_mode (insn);
10401 switch (insn_mode)
10403 case MODE_XI:
10404 case MODE_V8DF:
10405 case MODE_V16SF:
10406 gcc_assert (TARGET_AVX512F);
10407 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
10409 case MODE_OI:
10410 case MODE_V4DF:
10411 case MODE_V8SF:
10412 gcc_assert (TARGET_AVX2);
10413 /* FALLTHRU */
10414 case MODE_TI:
10415 case MODE_V2DF:
10416 case MODE_V4SF:
10417 gcc_assert (TARGET_SSE2);
10418 return (TARGET_AVX
10419 ? "vpcmpeqd\t%0, %0, %0"
10420 : "pcmpeqd\t%0, %0");
10422 default:
10423 gcc_unreachable ();
10427 gcc_unreachable ();
10430 /* Returns true if INSN can be transformed from a memory load
10431 to a supported FP constant load. */
10433 bool
10434 ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst)
10436 rtx src = find_constant_src (insn);
10438 gcc_assert (REG_P (dst));
10440 if (src == NULL
10441 || (SSE_REGNO_P (REGNO (dst))
10442 && standard_sse_constant_p (src, GET_MODE (dst)) != 1)
10443 || (STACK_REGNO_P (REGNO (dst))
10444 && standard_80387_constant_p (src) < 1))
10445 return false;
10447 return true;
10450 /* Returns true if OP contains a symbol reference */
10452 bool
10453 symbolic_reference_mentioned_p (rtx op)
10455 const char *fmt;
10456 int i;
10458 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
10459 return true;
10461 fmt = GET_RTX_FORMAT (GET_CODE (op));
10462 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
10464 if (fmt[i] == 'E')
10466 int j;
10468 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
10469 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
10470 return true;
10473 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
10474 return true;
10477 return false;
10480 /* Return true if it is appropriate to emit `ret' instructions in the
10481 body of a function. Do this only if the epilogue is simple, needing a
10482 couple of insns. Prior to reloading, we can't tell how many registers
10483 must be saved, so return false then. Return false if there is no frame
10484 marker to de-allocate. */
10486 bool
10487 ix86_can_use_return_insn_p (void)
10489 if (ix86_function_naked (current_function_decl))
10490 return false;
10492 /* Don't use `ret' instruction in interrupt handler. */
10493 if (! reload_completed
10494 || frame_pointer_needed
10495 || cfun->machine->func_type != TYPE_NORMAL)
10496 return 0;
10498 /* Don't allow more than 32k pop, since that's all we can do
10499 with one instruction. */
10500 if (crtl->args.pops_args && crtl->args.size >= 32768)
10501 return 0;
10503 struct ix86_frame &frame = cfun->machine->frame;
10504 return (frame.stack_pointer_offset == UNITS_PER_WORD
10505 && (frame.nregs + frame.nsseregs) == 0);
10508 /* Value should be nonzero if functions must have frame pointers.
10509 Zero means the frame pointer need not be set up (and parms may
10510 be accessed via the stack pointer) in functions that seem suitable. */
10512 static bool
10513 ix86_frame_pointer_required (void)
10515 /* If we accessed previous frames, then the generated code expects
10516 to be able to access the saved ebp value in our frame. */
10517 if (cfun->machine->accesses_prev_frame)
10518 return true;
10520 /* Several x86 os'es need a frame pointer for other reasons,
10521 usually pertaining to setjmp. */
10522 if (SUBTARGET_FRAME_POINTER_REQUIRED)
10523 return true;
10525 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
10526 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
10527 return true;
10529 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
10530 allocation is 4GB. */
10531 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
10532 return true;
10534 /* SSE saves require frame-pointer when stack is misaligned. */
10535 if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
10536 return true;
10538 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
10539 turns off the frame pointer by default. Turn it back on now if
10540 we've not got a leaf function. */
10541 if (TARGET_OMIT_LEAF_FRAME_POINTER
10542 && (!crtl->is_leaf
10543 || ix86_current_function_calls_tls_descriptor))
10544 return true;
10546 if (crtl->profile && !flag_fentry)
10547 return true;
10549 return false;
10552 /* Record that the current function accesses previous call frames. */
10554 void
10555 ix86_setup_frame_addresses (void)
10557 cfun->machine->accesses_prev_frame = 1;
10560 #ifndef USE_HIDDEN_LINKONCE
10561 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
10562 # define USE_HIDDEN_LINKONCE 1
10563 # else
10564 # define USE_HIDDEN_LINKONCE 0
10565 # endif
10566 #endif
10568 static int pic_labels_used;
10570 /* Fills in the label name that should be used for a pc thunk for
10571 the given register. */
10573 static void
10574 get_pc_thunk_name (char name[32], unsigned int regno)
10576 gcc_assert (!TARGET_64BIT);
10578 if (USE_HIDDEN_LINKONCE)
10579 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
10580 else
10581 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
10585 /* This function generates code for -fpic that loads %ebx with
10586 the return address of the caller and then returns. */
10588 static void
10589 ix86_code_end (void)
10591 rtx xops[2];
10592 int regno;
10594 for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++)
10596 char name[32];
10597 tree decl;
10599 if (!(pic_labels_used & (1 << regno)))
10600 continue;
10602 get_pc_thunk_name (name, regno);
10604 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
10605 get_identifier (name),
10606 build_function_type_list (void_type_node, NULL_TREE));
10607 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
10608 NULL_TREE, void_type_node);
10609 TREE_PUBLIC (decl) = 1;
10610 TREE_STATIC (decl) = 1;
10611 DECL_IGNORED_P (decl) = 1;
10613 #if TARGET_MACHO
10614 if (TARGET_MACHO)
10616 switch_to_section (darwin_sections[picbase_thunk_section]);
10617 fputs ("\t.weak_definition\t", asm_out_file);
10618 assemble_name (asm_out_file, name);
10619 fputs ("\n\t.private_extern\t", asm_out_file);
10620 assemble_name (asm_out_file, name);
10621 putc ('\n', asm_out_file);
10622 ASM_OUTPUT_LABEL (asm_out_file, name);
10623 DECL_WEAK (decl) = 1;
10625 else
10626 #endif
10627 if (USE_HIDDEN_LINKONCE)
10629 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
10631 targetm.asm_out.unique_section (decl, 0);
10632 switch_to_section (get_named_section (decl, NULL, 0));
10634 targetm.asm_out.globalize_label (asm_out_file, name);
10635 fputs ("\t.hidden\t", asm_out_file);
10636 assemble_name (asm_out_file, name);
10637 putc ('\n', asm_out_file);
10638 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
10640 else
10642 switch_to_section (text_section);
10643 ASM_OUTPUT_LABEL (asm_out_file, name);
10646 DECL_INITIAL (decl) = make_node (BLOCK);
10647 current_function_decl = decl;
10648 allocate_struct_function (decl, false);
10649 init_function_start (decl);
10650 /* We're about to hide the function body from callees of final_* by
10651 emitting it directly; tell them we're a thunk, if they care. */
10652 cfun->is_thunk = true;
10653 first_function_block_is_cold = false;
10654 /* Make sure unwind info is emitted for the thunk if needed. */
10655 final_start_function (emit_barrier (), asm_out_file, 1);
10657 /* Pad stack IP move with 4 instructions (two NOPs count
10658 as one instruction). */
10659 if (TARGET_PAD_SHORT_FUNCTION)
10661 int i = 8;
10663 while (i--)
10664 fputs ("\tnop\n", asm_out_file);
10667 xops[0] = gen_rtx_REG (Pmode, regno);
10668 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
10669 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
10670 output_asm_insn ("%!ret", NULL);
10671 final_end_function ();
10672 init_insn_lengths ();
10673 free_after_compilation (cfun);
10674 set_cfun (NULL);
10675 current_function_decl = NULL;
10678 if (flag_split_stack)
10679 file_end_indicate_split_stack ();
10682 /* Emit code for the SET_GOT patterns. */
10684 const char *
10685 output_set_got (rtx dest, rtx label)
10687 rtx xops[3];
10689 xops[0] = dest;
10691 if (TARGET_VXWORKS_RTP && flag_pic)
10693 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
10694 xops[2] = gen_rtx_MEM (Pmode,
10695 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
10696 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
10698 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
10699 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
10700 an unadorned address. */
10701 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
10702 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
10703 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
10704 return "";
10707 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
10709 if (flag_pic)
10711 char name[32];
10712 get_pc_thunk_name (name, REGNO (dest));
10713 pic_labels_used |= 1 << REGNO (dest);
10715 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
10716 xops[2] = gen_rtx_MEM (QImode, xops[2]);
10717 output_asm_insn ("%!call\t%X2", xops);
10719 #if TARGET_MACHO
10720 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
10721 This is what will be referenced by the Mach-O PIC subsystem. */
10722 if (machopic_should_output_picbase_label () || !label)
10723 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
10725 /* When we are restoring the pic base at the site of a nonlocal label,
10726 and we decided to emit the pic base above, we will still output a
10727 local label used for calculating the correction offset (even though
10728 the offset will be 0 in that case). */
10729 if (label)
10730 targetm.asm_out.internal_label (asm_out_file, "L",
10731 CODE_LABEL_NUMBER (label));
10732 #endif
10734 else
10736 if (TARGET_MACHO)
10737 /* We don't need a pic base, we're not producing pic. */
10738 gcc_unreachable ();
10740 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
10741 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
10742 targetm.asm_out.internal_label (asm_out_file, "L",
10743 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
10746 if (!TARGET_MACHO)
10747 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
10749 return "";
10752 /* Generate an "push" pattern for input ARG. */
10754 static rtx
10755 gen_push (rtx arg)
10757 struct machine_function *m = cfun->machine;
10759 if (m->fs.cfa_reg == stack_pointer_rtx)
10760 m->fs.cfa_offset += UNITS_PER_WORD;
10761 m->fs.sp_offset += UNITS_PER_WORD;
10763 if (REG_P (arg) && GET_MODE (arg) != word_mode)
10764 arg = gen_rtx_REG (word_mode, REGNO (arg));
10766 return gen_rtx_SET (gen_rtx_MEM (word_mode,
10767 gen_rtx_PRE_DEC (Pmode,
10768 stack_pointer_rtx)),
10769 arg);
10772 /* Generate an "pop" pattern for input ARG. */
10774 static rtx
10775 gen_pop (rtx arg)
10777 if (REG_P (arg) && GET_MODE (arg) != word_mode)
10778 arg = gen_rtx_REG (word_mode, REGNO (arg));
10780 return gen_rtx_SET (arg,
10781 gen_rtx_MEM (word_mode,
10782 gen_rtx_POST_INC (Pmode,
10783 stack_pointer_rtx)));
10786 /* Return >= 0 if there is an unused call-clobbered register available
10787 for the entire function. */
10789 static unsigned int
10790 ix86_select_alt_pic_regnum (void)
10792 if (ix86_use_pseudo_pic_reg ())
10793 return INVALID_REGNUM;
10795 if (crtl->is_leaf
10796 && !crtl->profile
10797 && !ix86_current_function_calls_tls_descriptor)
10799 int i, drap;
10800 /* Can't use the same register for both PIC and DRAP. */
10801 if (crtl->drap_reg)
10802 drap = REGNO (crtl->drap_reg);
10803 else
10804 drap = -1;
10805 for (i = 2; i >= 0; --i)
10806 if (i != drap && !df_regs_ever_live_p (i))
10807 return i;
10810 return INVALID_REGNUM;
10813 /* Return true if REGNO is used by the epilogue. */
10815 bool
10816 ix86_epilogue_uses (int regno)
10818 /* If there are no caller-saved registers, we preserve all registers,
10819 except for MMX and x87 registers which aren't supported when saving
10820 and restoring registers. Don't explicitly save SP register since
10821 it is always preserved. */
10822 return (epilogue_completed
10823 && cfun->machine->no_caller_saved_registers
10824 && !fixed_regs[regno]
10825 && !STACK_REGNO_P (regno)
10826 && !MMX_REGNO_P (regno));
10829 /* Return nonzero if register REGNO can be used as a scratch register
10830 in peephole2. */
10832 static bool
10833 ix86_hard_regno_scratch_ok (unsigned int regno)
10835 /* If there are no caller-saved registers, we can't use any register
10836 as a scratch register after epilogue and use REGNO as scratch
10837 register only if it has been used before to avoid saving and
10838 restoring it. */
10839 return (!cfun->machine->no_caller_saved_registers
10840 || (!epilogue_completed
10841 && df_regs_ever_live_p (regno)));
10844 /* Return true if register class CL should be an additional allocno
10845 class. */
10847 static bool
10848 ix86_additional_allocno_class_p (reg_class_t cl)
10850 return cl == MOD4_SSE_REGS;
10853 /* Return TRUE if we need to save REGNO. */
10855 static bool
10856 ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined)
10858 /* If there are no caller-saved registers, we preserve all registers,
10859 except for MMX and x87 registers which aren't supported when saving
10860 and restoring registers. Don't explicitly save SP register since
10861 it is always preserved. */
10862 if (cfun->machine->no_caller_saved_registers)
10864 /* Don't preserve registers used for function return value. */
10865 rtx reg = crtl->return_rtx;
10866 if (reg)
10868 unsigned int i = REGNO (reg);
10869 unsigned int nregs = REG_NREGS (reg);
10870 while (nregs-- > 0)
10871 if ((i + nregs) == regno)
10872 return false;
10874 reg = crtl->return_bnd;
10875 if (reg)
10877 i = REGNO (reg);
10878 nregs = REG_NREGS (reg);
10879 while (nregs-- > 0)
10880 if ((i + nregs) == regno)
10881 return false;
10885 return (df_regs_ever_live_p (regno)
10886 && !fixed_regs[regno]
10887 && !STACK_REGNO_P (regno)
10888 && !MMX_REGNO_P (regno)
10889 && (regno != HARD_FRAME_POINTER_REGNUM
10890 || !frame_pointer_needed));
10893 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
10894 && pic_offset_table_rtx)
10896 if (ix86_use_pseudo_pic_reg ())
10898 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
10899 _mcount in prologue. */
10900 if (!TARGET_64BIT && flag_pic && crtl->profile)
10901 return true;
10903 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10904 || crtl->profile
10905 || crtl->calls_eh_return
10906 || crtl->uses_const_pool
10907 || cfun->has_nonlocal_label)
10908 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
10911 if (crtl->calls_eh_return && maybe_eh_return)
10913 unsigned i;
10914 for (i = 0; ; i++)
10916 unsigned test = EH_RETURN_DATA_REGNO (i);
10917 if (test == INVALID_REGNUM)
10918 break;
10919 if (test == regno)
10920 return true;
10924 if (ignore_outlined && cfun->machine->call_ms2sysv)
10926 unsigned count = cfun->machine->call_ms2sysv_extra_regs
10927 + xlogue_layout::MIN_REGS;
10928 if (xlogue_layout::is_stub_managed_reg (regno, count))
10929 return false;
10932 if (crtl->drap_reg
10933 && regno == REGNO (crtl->drap_reg)
10934 && !cfun->machine->no_drap_save_restore)
10935 return true;
10937 return (df_regs_ever_live_p (regno)
10938 && !call_used_regs[regno]
10939 && !fixed_regs[regno]
10940 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
10943 /* Return number of saved general prupose registers. */
10945 static int
10946 ix86_nsaved_regs (void)
10948 int nregs = 0;
10949 int regno;
10951 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10952 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
10953 nregs ++;
10954 return nregs;
10957 /* Return number of saved SSE registers. */
10959 static int
10960 ix86_nsaved_sseregs (void)
10962 int nregs = 0;
10963 int regno;
10965 if (!TARGET_64BIT_MS_ABI)
10966 return 0;
10967 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10968 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
10969 nregs ++;
10970 return nregs;
10973 /* Given FROM and TO register numbers, say whether this elimination is
10974 allowed. If stack alignment is needed, we can only replace argument
10975 pointer with hard frame pointer, or replace frame pointer with stack
10976 pointer. Otherwise, frame pointer elimination is automatically
10977 handled and all other eliminations are valid. */
10979 static bool
10980 ix86_can_eliminate (const int from, const int to)
10982 if (stack_realign_fp)
10983 return ((from == ARG_POINTER_REGNUM
10984 && to == HARD_FRAME_POINTER_REGNUM)
10985 || (from == FRAME_POINTER_REGNUM
10986 && to == STACK_POINTER_REGNUM));
10987 else
10988 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
10991 /* Return the offset between two registers, one to be eliminated, and the other
10992 its replacement, at the start of a routine. */
10994 HOST_WIDE_INT
10995 ix86_initial_elimination_offset (int from, int to)
10997 struct ix86_frame &frame = cfun->machine->frame;
10999 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
11000 return frame.hard_frame_pointer_offset;
11001 else if (from == FRAME_POINTER_REGNUM
11002 && to == HARD_FRAME_POINTER_REGNUM)
11003 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
11004 else
11006 gcc_assert (to == STACK_POINTER_REGNUM);
11008 if (from == ARG_POINTER_REGNUM)
11009 return frame.stack_pointer_offset;
11011 gcc_assert (from == FRAME_POINTER_REGNUM);
11012 return frame.stack_pointer_offset - frame.frame_pointer_offset;
11016 /* In a dynamically-aligned function, we can't know the offset from
11017 stack pointer to frame pointer, so we must ensure that setjmp
11018 eliminates fp against the hard fp (%ebp) rather than trying to
11019 index from %esp up to the top of the frame across a gap that is
11020 of unknown (at compile-time) size. */
11021 static rtx
11022 ix86_builtin_setjmp_frame_value (void)
11024 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
11027 /* Emits a warning for unsupported msabi to sysv pro/epilogues. */
11028 static void warn_once_call_ms2sysv_xlogues (const char *feature)
11030 static bool warned_once = false;
11031 if (!warned_once)
11033 warning (0, "-mcall-ms2sysv-xlogues is not compatible with %s",
11034 feature);
11035 warned_once = true;
11039 /* When using -fsplit-stack, the allocation routines set a field in
11040 the TCB to the bottom of the stack plus this much space, measured
11041 in bytes. */
11043 #define SPLIT_STACK_AVAILABLE 256
11045 /* Fill structure ix86_frame about frame of currently computed function. */
11047 static void
11048 ix86_compute_frame_layout (void)
11050 struct ix86_frame *frame = &cfun->machine->frame;
11051 struct machine_function *m = cfun->machine;
11052 unsigned HOST_WIDE_INT stack_alignment_needed;
11053 HOST_WIDE_INT offset;
11054 unsigned HOST_WIDE_INT preferred_alignment;
11055 HOST_WIDE_INT size = get_frame_size ();
11056 HOST_WIDE_INT to_allocate;
11058 /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
11059 * ms_abi functions that call a sysv function. We now need to prune away
11060 * cases where it should be disabled. */
11061 if (TARGET_64BIT && m->call_ms2sysv)
11063 gcc_assert (TARGET_64BIT_MS_ABI);
11064 gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES);
11065 gcc_assert (!TARGET_SEH);
11066 gcc_assert (TARGET_SSE);
11067 gcc_assert (!ix86_using_red_zone ());
11069 if (crtl->calls_eh_return)
11071 gcc_assert (!reload_completed);
11072 m->call_ms2sysv = false;
11073 warn_once_call_ms2sysv_xlogues ("__builtin_eh_return");
11076 else if (ix86_static_chain_on_stack)
11078 gcc_assert (!reload_completed);
11079 m->call_ms2sysv = false;
11080 warn_once_call_ms2sysv_xlogues ("static call chains");
11083 /* Finally, compute which registers the stub will manage. */
11084 else
11086 unsigned count = xlogue_layout::count_stub_managed_regs ();
11087 m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS;
11088 m->call_ms2sysv_pad_in = 0;
11092 frame->nregs = ix86_nsaved_regs ();
11093 frame->nsseregs = ix86_nsaved_sseregs ();
11095 /* 64-bit MS ABI seem to require stack alignment to be always 16,
11096 except for function prologues, leaf functions and when the defult
11097 incoming stack boundary is overriden at command line or via
11098 force_align_arg_pointer attribute. */
11099 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
11100 && (!crtl->is_leaf || cfun->calls_alloca != 0
11101 || ix86_current_function_calls_tls_descriptor
11102 || ix86_incoming_stack_boundary < 128))
11104 crtl->preferred_stack_boundary = 128;
11105 crtl->stack_alignment_needed = 128;
11108 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
11109 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
11111 gcc_assert (!size || stack_alignment_needed);
11112 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
11113 gcc_assert (preferred_alignment <= stack_alignment_needed);
11115 /* The only ABI saving SSE regs should be 64-bit ms_abi. */
11116 gcc_assert (TARGET_64BIT || !frame->nsseregs);
11117 if (TARGET_64BIT && m->call_ms2sysv)
11119 gcc_assert (stack_alignment_needed >= 16);
11120 gcc_assert (!frame->nsseregs);
11123 /* For SEH we have to limit the amount of code movement into the prologue.
11124 At present we do this via a BLOCKAGE, at which point there's very little
11125 scheduling that can be done, which means that there's very little point
11126 in doing anything except PUSHs. */
11127 if (TARGET_SEH)
11128 m->use_fast_prologue_epilogue = false;
11129 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)))
11131 int count = frame->nregs;
11132 struct cgraph_node *node = cgraph_node::get (current_function_decl);
11134 /* The fast prologue uses move instead of push to save registers. This
11135 is significantly longer, but also executes faster as modern hardware
11136 can execute the moves in parallel, but can't do that for push/pop.
11138 Be careful about choosing what prologue to emit: When function takes
11139 many instructions to execute we may use slow version as well as in
11140 case function is known to be outside hot spot (this is known with
11141 feedback only). Weight the size of function by number of registers
11142 to save as it is cheap to use one or two push instructions but very
11143 slow to use many of them. */
11144 if (count)
11145 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
11146 if (node->frequency < NODE_FREQUENCY_NORMAL
11147 || (flag_branch_probabilities
11148 && node->frequency < NODE_FREQUENCY_HOT))
11149 m->use_fast_prologue_epilogue = false;
11150 else
11151 m->use_fast_prologue_epilogue
11152 = !expensive_function_p (count);
11155 frame->save_regs_using_mov
11156 = (TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue
11157 /* If static stack checking is enabled and done with probes,
11158 the registers need to be saved before allocating the frame. */
11159 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
11161 /* Skip return address and error code in exception handler. */
11162 offset = INCOMING_FRAME_SP_OFFSET;
11164 /* Skip pushed static chain. */
11165 if (ix86_static_chain_on_stack)
11166 offset += UNITS_PER_WORD;
11168 /* Skip saved base pointer. */
11169 if (frame_pointer_needed)
11170 offset += UNITS_PER_WORD;
11171 frame->hfp_save_offset = offset;
11173 /* The traditional frame pointer location is at the top of the frame. */
11174 frame->hard_frame_pointer_offset = offset;
11176 /* Register save area */
11177 offset += frame->nregs * UNITS_PER_WORD;
11178 frame->reg_save_offset = offset;
11180 /* On SEH target, registers are pushed just before the frame pointer
11181 location. */
11182 if (TARGET_SEH)
11183 frame->hard_frame_pointer_offset = offset;
11185 /* Calculate the size of the va-arg area (not including padding, if any). */
11186 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
11188 if (stack_realign_fp)
11190 /* We may need a 16-byte aligned stack for the remainder of the
11191 register save area, but the stack frame for the local function
11192 may require a greater alignment if using AVX/2/512. In order
11193 to avoid wasting space, we first calculate the space needed for
11194 the rest of the register saves, add that to the stack pointer,
11195 and then realign the stack to the boundary of the start of the
11196 frame for the local function. */
11197 HOST_WIDE_INT space_needed = 0;
11198 HOST_WIDE_INT sse_reg_space_needed = 0;
11200 if (TARGET_64BIT)
11202 if (m->call_ms2sysv)
11204 m->call_ms2sysv_pad_in = 0;
11205 space_needed = xlogue_layout::get_instance ().get_stack_space_used ();
11208 else if (frame->nsseregs)
11209 /* The only ABI that has saved SSE registers (Win64) also has a
11210 16-byte aligned default stack. However, many programs violate
11211 the ABI, and Wine64 forces stack realignment to compensate. */
11212 space_needed = frame->nsseregs * 16;
11214 sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16);
11216 /* 64-bit frame->va_arg_size should always be a multiple of 16, but
11217 rounding to be pedantic. */
11218 space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16);
11220 else
11221 space_needed = frame->va_arg_size;
11223 /* Record the allocation size required prior to the realignment AND. */
11224 frame->stack_realign_allocate = space_needed;
11226 /* The re-aligned stack starts at frame->stack_realign_offset. Values
11227 before this point are not directly comparable with values below
11228 this point. Use sp_valid_at to determine if the stack pointer is
11229 valid for a given offset, fp_valid_at for the frame pointer, or
11230 choose_baseaddr to have a base register chosen for you.
11232 Note that the result of (frame->stack_realign_offset
11233 & (stack_alignment_needed - 1)) may not equal zero. */
11234 offset = ROUND_UP (offset + space_needed, stack_alignment_needed);
11235 frame->stack_realign_offset = offset - space_needed;
11236 frame->sse_reg_save_offset = frame->stack_realign_offset
11237 + sse_reg_space_needed;
11239 else
11241 frame->stack_realign_offset = offset;
11243 if (TARGET_64BIT && m->call_ms2sysv)
11245 m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD);
11246 offset += xlogue_layout::get_instance ().get_stack_space_used ();
11249 /* Align and set SSE register save area. */
11250 else if (frame->nsseregs)
11252 /* If the incoming stack boundary is at least 16 bytes, or DRAP is
11253 required and the DRAP re-alignment boundary is at least 16 bytes,
11254 then we want the SSE register save area properly aligned. */
11255 if (ix86_incoming_stack_boundary >= 128
11256 || (stack_realign_drap && stack_alignment_needed >= 16))
11257 offset = ROUND_UP (offset, 16);
11258 offset += frame->nsseregs * 16;
11260 frame->sse_reg_save_offset = offset;
11261 offset += frame->va_arg_size;
11264 /* Align start of frame for local function. */
11265 if (m->call_ms2sysv
11266 || frame->va_arg_size != 0
11267 || size != 0
11268 || !crtl->is_leaf
11269 || cfun->calls_alloca
11270 || ix86_current_function_calls_tls_descriptor)
11271 offset = ROUND_UP (offset, stack_alignment_needed);
11273 /* Frame pointer points here. */
11274 frame->frame_pointer_offset = offset;
11276 offset += size;
11278 /* Add outgoing arguments area. Can be skipped if we eliminated
11279 all the function calls as dead code.
11280 Skipping is however impossible when function calls alloca. Alloca
11281 expander assumes that last crtl->outgoing_args_size
11282 of stack frame are unused. */
11283 if (ACCUMULATE_OUTGOING_ARGS
11284 && (!crtl->is_leaf || cfun->calls_alloca
11285 || ix86_current_function_calls_tls_descriptor))
11287 offset += crtl->outgoing_args_size;
11288 frame->outgoing_arguments_size = crtl->outgoing_args_size;
11290 else
11291 frame->outgoing_arguments_size = 0;
11293 /* Align stack boundary. Only needed if we're calling another function
11294 or using alloca. */
11295 if (!crtl->is_leaf || cfun->calls_alloca
11296 || ix86_current_function_calls_tls_descriptor)
11297 offset = ROUND_UP (offset, preferred_alignment);
11299 /* We've reached end of stack frame. */
11300 frame->stack_pointer_offset = offset;
11302 /* Size prologue needs to allocate. */
11303 to_allocate = offset - frame->sse_reg_save_offset;
11305 if ((!to_allocate && frame->nregs <= 1)
11306 || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000)))
11307 frame->save_regs_using_mov = false;
11309 if (ix86_using_red_zone ()
11310 && crtl->sp_is_unchanging
11311 && crtl->is_leaf
11312 && !ix86_pc_thunk_call_expanded
11313 && !ix86_current_function_calls_tls_descriptor)
11315 frame->red_zone_size = to_allocate;
11316 if (frame->save_regs_using_mov)
11317 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
11318 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
11319 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
11321 else
11322 frame->red_zone_size = 0;
11323 frame->stack_pointer_offset -= frame->red_zone_size;
11325 /* The SEH frame pointer location is near the bottom of the frame.
11326 This is enforced by the fact that the difference between the
11327 stack pointer and the frame pointer is limited to 240 bytes in
11328 the unwind data structure. */
11329 if (TARGET_SEH)
11331 HOST_WIDE_INT diff;
11333 /* If we can leave the frame pointer where it is, do so. Also, returns
11334 the establisher frame for __builtin_frame_address (0). */
11335 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
11336 if (diff <= SEH_MAX_FRAME_SIZE
11337 && (diff > 240 || (diff & 15) != 0)
11338 && !crtl->accesses_prior_frames)
11340 /* Ideally we'd determine what portion of the local stack frame
11341 (within the constraint of the lowest 240) is most heavily used.
11342 But without that complication, simply bias the frame pointer
11343 by 128 bytes so as to maximize the amount of the local stack
11344 frame that is addressable with 8-bit offsets. */
11345 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
11350 /* This is semi-inlined memory_address_length, but simplified
11351 since we know that we're always dealing with reg+offset, and
11352 to avoid having to create and discard all that rtl. */
11354 static inline int
11355 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
11357 int len = 4;
11359 if (offset == 0)
11361 /* EBP and R13 cannot be encoded without an offset. */
11362 len = (regno == BP_REG || regno == R13_REG);
11364 else if (IN_RANGE (offset, -128, 127))
11365 len = 1;
11367 /* ESP and R12 must be encoded with a SIB byte. */
11368 if (regno == SP_REG || regno == R12_REG)
11369 len++;
11371 return len;
11374 /* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
11375 the frame save area. The register is saved at CFA - CFA_OFFSET. */
11377 static bool
11378 sp_valid_at (HOST_WIDE_INT cfa_offset)
11380 const struct machine_frame_state &fs = cfun->machine->fs;
11381 if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset)
11383 /* Validate that the cfa_offset isn't in a "no-man's land". */
11384 gcc_assert (cfa_offset <= fs.sp_realigned_fp_last);
11385 return false;
11387 return fs.sp_valid;
11390 /* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
11391 the frame save area. The register is saved at CFA - CFA_OFFSET. */
11393 static inline bool
11394 fp_valid_at (HOST_WIDE_INT cfa_offset)
11396 const struct machine_frame_state &fs = cfun->machine->fs;
11397 if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last)
11399 /* Validate that the cfa_offset isn't in a "no-man's land". */
11400 gcc_assert (cfa_offset >= fs.sp_realigned_offset);
11401 return false;
11403 return fs.fp_valid;
11406 /* Choose a base register based upon alignment requested, speed and/or
11407 size. */
11409 static void
11410 choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg,
11411 HOST_WIDE_INT &base_offset,
11412 unsigned int align_reqested, unsigned int *align)
11414 const struct machine_function *m = cfun->machine;
11415 unsigned int hfp_align;
11416 unsigned int drap_align;
11417 unsigned int sp_align;
11418 bool hfp_ok = fp_valid_at (cfa_offset);
11419 bool drap_ok = m->fs.drap_valid;
11420 bool sp_ok = sp_valid_at (cfa_offset);
11422 hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY;
11424 /* Filter out any registers that don't meet the requested alignment
11425 criteria. */
11426 if (align_reqested)
11428 if (m->fs.realigned)
11429 hfp_align = drap_align = sp_align = crtl->stack_alignment_needed;
11430 /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
11431 notes (which we would need to use a realigned stack pointer),
11432 so disable on SEH targets. */
11433 else if (m->fs.sp_realigned)
11434 sp_align = crtl->stack_alignment_needed;
11436 hfp_ok = hfp_ok && hfp_align >= align_reqested;
11437 drap_ok = drap_ok && drap_align >= align_reqested;
11438 sp_ok = sp_ok && sp_align >= align_reqested;
11441 if (m->use_fast_prologue_epilogue)
11443 /* Choose the base register most likely to allow the most scheduling
11444 opportunities. Generally FP is valid throughout the function,
11445 while DRAP must be reloaded within the epilogue. But choose either
11446 over the SP due to increased encoding size. */
11448 if (hfp_ok)
11450 base_reg = hard_frame_pointer_rtx;
11451 base_offset = m->fs.fp_offset - cfa_offset;
11453 else if (drap_ok)
11455 base_reg = crtl->drap_reg;
11456 base_offset = 0 - cfa_offset;
11458 else if (sp_ok)
11460 base_reg = stack_pointer_rtx;
11461 base_offset = m->fs.sp_offset - cfa_offset;
11464 else
11466 HOST_WIDE_INT toffset;
11467 int len = 16, tlen;
11469 /* Choose the base register with the smallest address encoding.
11470 With a tie, choose FP > DRAP > SP. */
11471 if (sp_ok)
11473 base_reg = stack_pointer_rtx;
11474 base_offset = m->fs.sp_offset - cfa_offset;
11475 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
11477 if (drap_ok)
11479 toffset = 0 - cfa_offset;
11480 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
11481 if (tlen <= len)
11483 base_reg = crtl->drap_reg;
11484 base_offset = toffset;
11485 len = tlen;
11488 if (hfp_ok)
11490 toffset = m->fs.fp_offset - cfa_offset;
11491 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
11492 if (tlen <= len)
11494 base_reg = hard_frame_pointer_rtx;
11495 base_offset = toffset;
11496 len = tlen;
11501 /* Set the align return value. */
11502 if (align)
11504 if (base_reg == stack_pointer_rtx)
11505 *align = sp_align;
11506 else if (base_reg == crtl->drap_reg)
11507 *align = drap_align;
11508 else if (base_reg == hard_frame_pointer_rtx)
11509 *align = hfp_align;
11513 /* Return an RTX that points to CFA_OFFSET within the stack frame and
11514 the alignment of address. If ALIGN is non-null, it should point to
11515 an alignment value (in bits) that is preferred or zero and will
11516 recieve the alignment of the base register that was selected,
11517 irrespective of rather or not CFA_OFFSET is a multiple of that
11518 alignment value. If it is possible for the base register offset to be
11519 non-immediate then SCRATCH_REGNO should specify a scratch register to
11520 use.
11522 The valid base registers are taken from CFUN->MACHINE->FS. */
11524 static rtx
11525 choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align,
11526 unsigned int scratch_regno = INVALID_REGNUM)
11528 rtx base_reg = NULL;
11529 HOST_WIDE_INT base_offset = 0;
11531 /* If a specific alignment is requested, try to get a base register
11532 with that alignment first. */
11533 if (align && *align)
11534 choose_basereg (cfa_offset, base_reg, base_offset, *align, align);
11536 if (!base_reg)
11537 choose_basereg (cfa_offset, base_reg, base_offset, 0, align);
11539 gcc_assert (base_reg != NULL);
11541 rtx base_offset_rtx = GEN_INT (base_offset);
11543 if (!x86_64_immediate_operand (base_offset_rtx, Pmode))
11545 gcc_assert (scratch_regno != INVALID_REGNUM);
11547 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11548 emit_move_insn (scratch_reg, base_offset_rtx);
11550 return gen_rtx_PLUS (Pmode, base_reg, scratch_reg);
11553 return plus_constant (Pmode, base_reg, base_offset);
11556 /* Emit code to save registers in the prologue. */
11558 static void
11559 ix86_emit_save_regs (void)
11561 unsigned int regno;
11562 rtx_insn *insn;
11564 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
11565 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
11567 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
11568 RTX_FRAME_RELATED_P (insn) = 1;
11572 /* Emit a single register save at CFA - CFA_OFFSET. */
11574 static void
11575 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
11576 HOST_WIDE_INT cfa_offset)
11578 struct machine_function *m = cfun->machine;
11579 rtx reg = gen_rtx_REG (mode, regno);
11580 rtx mem, addr, base, insn;
11581 unsigned int align = GET_MODE_ALIGNMENT (mode);
11583 addr = choose_baseaddr (cfa_offset, &align);
11584 mem = gen_frame_mem (mode, addr);
11586 /* The location aligment depends upon the base register. */
11587 align = MIN (GET_MODE_ALIGNMENT (mode), align);
11588 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
11589 set_mem_align (mem, align);
11591 insn = emit_insn (gen_rtx_SET (mem, reg));
11592 RTX_FRAME_RELATED_P (insn) = 1;
11594 base = addr;
11595 if (GET_CODE (base) == PLUS)
11596 base = XEXP (base, 0);
11597 gcc_checking_assert (REG_P (base));
11599 /* When saving registers into a re-aligned local stack frame, avoid
11600 any tricky guessing by dwarf2out. */
11601 if (m->fs.realigned)
11603 gcc_checking_assert (stack_realign_drap);
11605 if (regno == REGNO (crtl->drap_reg))
11607 /* A bit of a hack. We force the DRAP register to be saved in
11608 the re-aligned stack frame, which provides us with a copy
11609 of the CFA that will last past the prologue. Install it. */
11610 gcc_checking_assert (cfun->machine->fs.fp_valid);
11611 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
11612 cfun->machine->fs.fp_offset - cfa_offset);
11613 mem = gen_rtx_MEM (mode, addr);
11614 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
11616 else
11618 /* The frame pointer is a stable reference within the
11619 aligned frame. Use it. */
11620 gcc_checking_assert (cfun->machine->fs.fp_valid);
11621 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
11622 cfun->machine->fs.fp_offset - cfa_offset);
11623 mem = gen_rtx_MEM (mode, addr);
11624 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
11628 else if (base == stack_pointer_rtx && m->fs.sp_realigned
11629 && cfa_offset >= m->fs.sp_realigned_offset)
11631 gcc_checking_assert (stack_realign_fp);
11632 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
11635 /* The memory may not be relative to the current CFA register,
11636 which means that we may need to generate a new pattern for
11637 use by the unwind info. */
11638 else if (base != m->fs.cfa_reg)
11640 addr = plus_constant (Pmode, m->fs.cfa_reg,
11641 m->fs.cfa_offset - cfa_offset);
11642 mem = gen_rtx_MEM (mode, addr);
11643 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
11647 /* Emit code to save registers using MOV insns.
11648 First register is stored at CFA - CFA_OFFSET. */
11649 static void
11650 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
11652 unsigned int regno;
11654 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11655 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
11657 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
11658 cfa_offset -= UNITS_PER_WORD;
11662 /* Emit code to save SSE registers using MOV insns.
11663 First register is stored at CFA - CFA_OFFSET. */
11664 static void
11665 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
11667 unsigned int regno;
11669 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11670 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
11672 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
11673 cfa_offset -= GET_MODE_SIZE (V4SFmode);
11677 static GTY(()) rtx queued_cfa_restores;
11679 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
11680 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
11681 Don't add the note if the previously saved value will be left untouched
11682 within stack red-zone till return, as unwinders can find the same value
11683 in the register and on the stack. */
11685 static void
11686 ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
11688 if (!crtl->shrink_wrapped
11689 && cfa_offset <= cfun->machine->fs.red_zone_offset)
11690 return;
11692 if (insn)
11694 add_reg_note (insn, REG_CFA_RESTORE, reg);
11695 RTX_FRAME_RELATED_P (insn) = 1;
11697 else
11698 queued_cfa_restores
11699 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
11702 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
11704 static void
11705 ix86_add_queued_cfa_restore_notes (rtx insn)
11707 rtx last;
11708 if (!queued_cfa_restores)
11709 return;
11710 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
11712 XEXP (last, 1) = REG_NOTES (insn);
11713 REG_NOTES (insn) = queued_cfa_restores;
11714 queued_cfa_restores = NULL_RTX;
11715 RTX_FRAME_RELATED_P (insn) = 1;
11718 /* Expand prologue or epilogue stack adjustment.
11719 The pattern exist to put a dependency on all ebp-based memory accesses.
11720 STYLE should be negative if instructions should be marked as frame related,
11721 zero if %r11 register is live and cannot be freely used and positive
11722 otherwise. */
11724 static rtx
11725 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
11726 int style, bool set_cfa)
11728 struct machine_function *m = cfun->machine;
11729 rtx insn;
11730 bool add_frame_related_expr = false;
11732 if (Pmode == SImode)
11733 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
11734 else if (x86_64_immediate_operand (offset, DImode))
11735 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
11736 else
11738 rtx tmp;
11739 /* r11 is used by indirect sibcall return as well, set before the
11740 epilogue and used after the epilogue. */
11741 if (style)
11742 tmp = gen_rtx_REG (DImode, R11_REG);
11743 else
11745 gcc_assert (src != hard_frame_pointer_rtx
11746 && dest != hard_frame_pointer_rtx);
11747 tmp = hard_frame_pointer_rtx;
11749 insn = emit_insn (gen_rtx_SET (tmp, offset));
11750 if (style < 0)
11751 add_frame_related_expr = true;
11753 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
11756 insn = emit_insn (insn);
11757 if (style >= 0)
11758 ix86_add_queued_cfa_restore_notes (insn);
11760 if (set_cfa)
11762 rtx r;
11764 gcc_assert (m->fs.cfa_reg == src);
11765 m->fs.cfa_offset += INTVAL (offset);
11766 m->fs.cfa_reg = dest;
11768 r = gen_rtx_PLUS (Pmode, src, offset);
11769 r = gen_rtx_SET (dest, r);
11770 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
11771 RTX_FRAME_RELATED_P (insn) = 1;
11773 else if (style < 0)
11775 RTX_FRAME_RELATED_P (insn) = 1;
11776 if (add_frame_related_expr)
11778 rtx r = gen_rtx_PLUS (Pmode, src, offset);
11779 r = gen_rtx_SET (dest, r);
11780 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
11784 if (dest == stack_pointer_rtx)
11786 HOST_WIDE_INT ooffset = m->fs.sp_offset;
11787 bool valid = m->fs.sp_valid;
11788 bool realigned = m->fs.sp_realigned;
11790 if (src == hard_frame_pointer_rtx)
11792 valid = m->fs.fp_valid;
11793 realigned = false;
11794 ooffset = m->fs.fp_offset;
11796 else if (src == crtl->drap_reg)
11798 valid = m->fs.drap_valid;
11799 realigned = false;
11800 ooffset = 0;
11802 else
11804 /* Else there are two possibilities: SP itself, which we set
11805 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
11806 taken care of this by hand along the eh_return path. */
11807 gcc_checking_assert (src == stack_pointer_rtx
11808 || offset == const0_rtx);
11811 m->fs.sp_offset = ooffset - INTVAL (offset);
11812 m->fs.sp_valid = valid;
11813 m->fs.sp_realigned = realigned;
11815 return insn;
11818 /* Find an available register to be used as dynamic realign argument
11819 pointer regsiter. Such a register will be written in prologue and
11820 used in begin of body, so it must not be
11821 1. parameter passing register.
11822 2. GOT pointer.
11823 We reuse static-chain register if it is available. Otherwise, we
11824 use DI for i386 and R13 for x86-64. We chose R13 since it has
11825 shorter encoding.
11827 Return: the regno of chosen register. */
11829 static unsigned int
11830 find_drap_reg (void)
11832 tree decl = cfun->decl;
11834 /* Always use callee-saved register if there are no caller-saved
11835 registers. */
11836 if (TARGET_64BIT)
11838 /* Use R13 for nested function or function need static chain.
11839 Since function with tail call may use any caller-saved
11840 registers in epilogue, DRAP must not use caller-saved
11841 register in such case. */
11842 if (DECL_STATIC_CHAIN (decl)
11843 || cfun->machine->no_caller_saved_registers
11844 || crtl->tail_call_emit)
11845 return R13_REG;
11847 return R10_REG;
11849 else
11851 /* Use DI for nested function or function need static chain.
11852 Since function with tail call may use any caller-saved
11853 registers in epilogue, DRAP must not use caller-saved
11854 register in such case. */
11855 if (DECL_STATIC_CHAIN (decl)
11856 || cfun->machine->no_caller_saved_registers
11857 || crtl->tail_call_emit)
11858 return DI_REG;
11860 /* Reuse static chain register if it isn't used for parameter
11861 passing. */
11862 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
11864 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
11865 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
11866 return CX_REG;
11868 return DI_REG;
11872 /* Handle a "force_align_arg_pointer" attribute. */
11874 static tree
11875 ix86_handle_force_align_arg_pointer_attribute (tree *node, tree name,
11876 tree, int, bool *no_add_attrs)
11878 if (TREE_CODE (*node) != FUNCTION_TYPE
11879 && TREE_CODE (*node) != METHOD_TYPE
11880 && TREE_CODE (*node) != FIELD_DECL
11881 && TREE_CODE (*node) != TYPE_DECL)
11883 warning (OPT_Wattributes, "%qE attribute only applies to functions",
11884 name);
11885 *no_add_attrs = true;
11888 return NULL_TREE;
11891 /* Return minimum incoming stack alignment. */
11893 static unsigned int
11894 ix86_minimum_incoming_stack_boundary (bool sibcall)
11896 unsigned int incoming_stack_boundary;
11898 /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */
11899 if (cfun->machine->func_type != TYPE_NORMAL)
11900 incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY;
11901 /* Prefer the one specified at command line. */
11902 else if (ix86_user_incoming_stack_boundary)
11903 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
11904 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
11905 if -mstackrealign is used, it isn't used for sibcall check and
11906 estimated stack alignment is 128bit. */
11907 else if (!sibcall
11908 && ix86_force_align_arg_pointer
11909 && crtl->stack_alignment_estimated == 128)
11910 incoming_stack_boundary = MIN_STACK_BOUNDARY;
11911 else
11912 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
11914 /* Incoming stack alignment can be changed on individual functions
11915 via force_align_arg_pointer attribute. We use the smallest
11916 incoming stack boundary. */
11917 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
11918 && lookup_attribute (ix86_force_align_arg_pointer_string,
11919 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
11920 incoming_stack_boundary = MIN_STACK_BOUNDARY;
11922 /* The incoming stack frame has to be aligned at least at
11923 parm_stack_boundary. */
11924 if (incoming_stack_boundary < crtl->parm_stack_boundary)
11925 incoming_stack_boundary = crtl->parm_stack_boundary;
11927 /* Stack at entrance of main is aligned by runtime. We use the
11928 smallest incoming stack boundary. */
11929 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
11930 && DECL_NAME (current_function_decl)
11931 && MAIN_NAME_P (DECL_NAME (current_function_decl))
11932 && DECL_FILE_SCOPE_P (current_function_decl))
11933 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
11935 return incoming_stack_boundary;
11938 /* Update incoming stack boundary and estimated stack alignment. */
11940 static void
11941 ix86_update_stack_boundary (void)
11943 ix86_incoming_stack_boundary
11944 = ix86_minimum_incoming_stack_boundary (false);
11946 /* x86_64 vararg needs 16byte stack alignment for register save
11947 area. */
11948 if (TARGET_64BIT
11949 && cfun->stdarg
11950 && crtl->stack_alignment_estimated < 128)
11951 crtl->stack_alignment_estimated = 128;
11953 /* __tls_get_addr needs to be called with 16-byte aligned stack. */
11954 if (ix86_tls_descriptor_calls_expanded_in_cfun
11955 && crtl->preferred_stack_boundary < 128)
11956 crtl->preferred_stack_boundary = 128;
11959 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
11960 needed or an rtx for DRAP otherwise. */
11962 static rtx
11963 ix86_get_drap_rtx (void)
11965 /* We must use DRAP if there are outgoing arguments on stack and
11966 ACCUMULATE_OUTGOING_ARGS is false. */
11967 if (ix86_force_drap
11968 || (cfun->machine->outgoing_args_on_stack
11969 && !ACCUMULATE_OUTGOING_ARGS))
11970 crtl->need_drap = true;
11972 if (stack_realign_drap)
11974 /* Assign DRAP to vDRAP and returns vDRAP */
11975 unsigned int regno = find_drap_reg ();
11976 rtx drap_vreg;
11977 rtx arg_ptr;
11978 rtx_insn *seq, *insn;
11980 arg_ptr = gen_rtx_REG (Pmode, regno);
11981 crtl->drap_reg = arg_ptr;
11983 start_sequence ();
11984 drap_vreg = copy_to_reg (arg_ptr);
11985 seq = get_insns ();
11986 end_sequence ();
11988 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
11989 if (!optimize)
11991 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
11992 RTX_FRAME_RELATED_P (insn) = 1;
11994 return drap_vreg;
11996 else
11997 return NULL;
12000 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
12002 static rtx
12003 ix86_internal_arg_pointer (void)
12005 return virtual_incoming_args_rtx;
12008 struct scratch_reg {
12009 rtx reg;
12010 bool saved;
12013 /* Return a short-lived scratch register for use on function entry.
12014 In 32-bit mode, it is valid only after the registers are saved
12015 in the prologue. This register must be released by means of
12016 release_scratch_register_on_entry once it is dead. */
12018 static void
12019 get_scratch_register_on_entry (struct scratch_reg *sr)
12021 int regno;
12023 sr->saved = false;
12025 if (TARGET_64BIT)
12027 /* We always use R11 in 64-bit mode. */
12028 regno = R11_REG;
12030 else
12032 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
12033 bool fastcall_p
12034 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
12035 bool thiscall_p
12036 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
12037 bool static_chain_p = DECL_STATIC_CHAIN (decl);
12038 int regparm = ix86_function_regparm (fntype, decl);
12039 int drap_regno
12040 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
12042 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
12043 for the static chain register. */
12044 if ((regparm < 1 || (fastcall_p && !static_chain_p))
12045 && drap_regno != AX_REG)
12046 regno = AX_REG;
12047 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
12048 for the static chain register. */
12049 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
12050 regno = AX_REG;
12051 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
12052 regno = DX_REG;
12053 /* ecx is the static chain register. */
12054 else if (regparm < 3 && !fastcall_p && !thiscall_p
12055 && !static_chain_p
12056 && drap_regno != CX_REG)
12057 regno = CX_REG;
12058 else if (ix86_save_reg (BX_REG, true, false))
12059 regno = BX_REG;
12060 /* esi is the static chain register. */
12061 else if (!(regparm == 3 && static_chain_p)
12062 && ix86_save_reg (SI_REG, true, false))
12063 regno = SI_REG;
12064 else if (ix86_save_reg (DI_REG, true, false))
12065 regno = DI_REG;
12066 else
12068 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
12069 sr->saved = true;
12073 sr->reg = gen_rtx_REG (Pmode, regno);
12074 if (sr->saved)
12076 rtx_insn *insn = emit_insn (gen_push (sr->reg));
12077 RTX_FRAME_RELATED_P (insn) = 1;
12081 /* Release a scratch register obtained from the preceding function. */
12083 static void
12084 release_scratch_register_on_entry (struct scratch_reg *sr)
12086 if (sr->saved)
12088 struct machine_function *m = cfun->machine;
12089 rtx x, insn = emit_insn (gen_pop (sr->reg));
12091 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
12092 RTX_FRAME_RELATED_P (insn) = 1;
12093 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
12094 x = gen_rtx_SET (stack_pointer_rtx, x);
12095 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
12096 m->fs.sp_offset -= UNITS_PER_WORD;
12100 /* Return the probing interval for -fstack-clash-protection. */
12102 static HOST_WIDE_INT
12103 get_probe_interval (void)
12105 if (flag_stack_clash_protection)
12106 return (HOST_WIDE_INT_1U
12107 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL));
12108 else
12109 return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP);
12112 /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
12114 This differs from the next routine in that it tries hard to prevent
12115 attacks that jump the stack guard. Thus it is never allowed to allocate
12116 more than PROBE_INTERVAL bytes of stack space without a suitable
12117 probe. */
12119 static void
12120 ix86_adjust_stack_and_probe_stack_clash (const HOST_WIDE_INT size)
12122 struct machine_function *m = cfun->machine;
12124 /* If this function does not statically allocate stack space, then
12125 no probes are needed. */
12126 if (!size)
12128 /* However, the allocation of space via pushes for register
12129 saves could be viewed as allocating space, but without the
12130 need to probe. */
12131 if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)
12132 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
12133 else
12134 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
12135 return;
12138 /* If we are a noreturn function, then we have to consider the
12139 possibility that we're called via a jump rather than a call.
12141 Thus we don't have the implicit probe generated by saving the
12142 return address into the stack at the call. Thus, the stack
12143 pointer could be anywhere in the guard page. The safe thing
12144 to do is emit a probe now.
12146 ?!? This should be revamped to work like aarch64 and s390 where
12147 we track the offset from the most recent probe. Normally that
12148 offset would be zero. For a noreturn function we would reset
12149 it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then
12150 we just probe when we cross PROBE_INTERVAL. */
12151 if (TREE_THIS_VOLATILE (cfun->decl))
12153 /* We can safely use any register here since we're just going to push
12154 its value and immediately pop it back. But we do try and avoid
12155 argument passing registers so as not to introduce dependencies in
12156 the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */
12157 rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG);
12158 rtx_insn *insn = emit_insn (gen_push (dummy_reg));
12159 RTX_FRAME_RELATED_P (insn) = 1;
12160 ix86_emit_restore_reg_using_pop (dummy_reg);
12161 emit_insn (gen_blockage ());
12164 /* If we allocate less than the size of the guard statically,
12165 then no probing is necessary, but we do need to allocate
12166 the stack. */
12167 if (size < (1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE)))
12169 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12170 GEN_INT (-size), -1,
12171 m->fs.cfa_reg == stack_pointer_rtx);
12172 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
12173 return;
12176 /* We're allocating a large enough stack frame that we need to
12177 emit probes. Either emit them inline or in a loop depending
12178 on the size. */
12179 HOST_WIDE_INT probe_interval = get_probe_interval ();
12180 if (size <= 4 * probe_interval)
12182 HOST_WIDE_INT i;
12183 for (i = probe_interval; i <= size; i += probe_interval)
12185 /* Allocate PROBE_INTERVAL bytes. */
12186 rtx insn
12187 = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12188 GEN_INT (-probe_interval), -1,
12189 m->fs.cfa_reg == stack_pointer_rtx);
12190 add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
12192 /* And probe at *sp. */
12193 emit_stack_probe (stack_pointer_rtx);
12194 emit_insn (gen_blockage ());
12197 /* We need to allocate space for the residual, but we do not need
12198 to probe the residual. */
12199 HOST_WIDE_INT residual = (i - probe_interval - size);
12200 if (residual)
12201 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12202 GEN_INT (residual), -1,
12203 m->fs.cfa_reg == stack_pointer_rtx);
12204 dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
12206 else
12208 struct scratch_reg sr;
12209 get_scratch_register_on_entry (&sr);
12211 /* Step 1: round SIZE down to a multiple of the interval. */
12212 HOST_WIDE_INT rounded_size = size & -probe_interval;
12214 /* Step 2: compute final value of the loop counter. Use lea if
12215 possible. */
12216 rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size);
12217 rtx insn;
12218 if (address_no_seg_operand (addr, Pmode))
12219 insn = emit_insn (gen_rtx_SET (sr.reg, addr));
12220 else
12222 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
12223 insn = emit_insn (gen_rtx_SET (sr.reg,
12224 gen_rtx_PLUS (Pmode, sr.reg,
12225 stack_pointer_rtx)));
12227 if (m->fs.cfa_reg == stack_pointer_rtx)
12229 add_reg_note (insn, REG_CFA_DEF_CFA,
12230 plus_constant (Pmode, sr.reg,
12231 m->fs.cfa_offset + rounded_size));
12232 RTX_FRAME_RELATED_P (insn) = 1;
12235 /* Step 3: the loop. */
12236 rtx size_rtx = GEN_INT (rounded_size);
12237 insn = emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg,
12238 size_rtx));
12239 if (m->fs.cfa_reg == stack_pointer_rtx)
12241 m->fs.cfa_offset += rounded_size;
12242 add_reg_note (insn, REG_CFA_DEF_CFA,
12243 plus_constant (Pmode, stack_pointer_rtx,
12244 m->fs.cfa_offset));
12245 RTX_FRAME_RELATED_P (insn) = 1;
12247 m->fs.sp_offset += rounded_size;
12248 emit_insn (gen_blockage ());
12250 /* Step 4: adjust SP if we cannot assert at compile-time that SIZE
12251 is equal to ROUNDED_SIZE. */
12253 if (size != rounded_size)
12254 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12255 GEN_INT (rounded_size - size), -1,
12256 m->fs.cfa_reg == stack_pointer_rtx);
12257 dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
12259 release_scratch_register_on_entry (&sr);
12262 /* Make sure nothing is scheduled before we are done. */
12263 emit_insn (gen_blockage ());
12266 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
12268 static void
12269 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
12271 /* We skip the probe for the first interval + a small dope of 4 words and
12272 probe that many bytes past the specified size to maintain a protection
12273 area at the botton of the stack. */
12274 const int dope = 4 * UNITS_PER_WORD;
12275 rtx size_rtx = GEN_INT (size), last;
12277 /* See if we have a constant small number of probes to generate. If so,
12278 that's the easy case. The run-time loop is made up of 9 insns in the
12279 generic case while the compile-time loop is made up of 3+2*(n-1) insns
12280 for n # of intervals. */
12281 if (size <= 4 * get_probe_interval ())
12283 HOST_WIDE_INT i, adjust;
12284 bool first_probe = true;
12286 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
12287 values of N from 1 until it exceeds SIZE. If only one probe is
12288 needed, this will not generate any code. Then adjust and probe
12289 to PROBE_INTERVAL + SIZE. */
12290 for (i = get_probe_interval (); i < size; i += get_probe_interval ())
12292 if (first_probe)
12294 adjust = 2 * get_probe_interval () + dope;
12295 first_probe = false;
12297 else
12298 adjust = get_probe_interval ();
12300 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12301 plus_constant (Pmode, stack_pointer_rtx,
12302 -adjust)));
12303 emit_stack_probe (stack_pointer_rtx);
12306 if (first_probe)
12307 adjust = size + get_probe_interval () + dope;
12308 else
12309 adjust = size + get_probe_interval () - i;
12311 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12312 plus_constant (Pmode, stack_pointer_rtx,
12313 -adjust)));
12314 emit_stack_probe (stack_pointer_rtx);
12316 /* Adjust back to account for the additional first interval. */
12317 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
12318 plus_constant (Pmode, stack_pointer_rtx,
12319 (get_probe_interval ()
12320 + dope))));
12323 /* Otherwise, do the same as above, but in a loop. Note that we must be
12324 extra careful with variables wrapping around because we might be at
12325 the very top (or the very bottom) of the address space and we have
12326 to be able to handle this case properly; in particular, we use an
12327 equality test for the loop condition. */
12328 else
12330 HOST_WIDE_INT rounded_size;
12331 struct scratch_reg sr;
12333 get_scratch_register_on_entry (&sr);
12336 /* Step 1: round SIZE to the previous multiple of the interval. */
12338 rounded_size = ROUND_DOWN (size, get_probe_interval ());
12341 /* Step 2: compute initial and final value of the loop counter. */
12343 /* SP = SP_0 + PROBE_INTERVAL. */
12344 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12345 plus_constant (Pmode, stack_pointer_rtx,
12346 - (get_probe_interval () + dope))));
12348 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
12349 if (rounded_size <= (HOST_WIDE_INT_1 << 31))
12350 emit_insn (gen_rtx_SET (sr.reg,
12351 plus_constant (Pmode, stack_pointer_rtx,
12352 -rounded_size)));
12353 else
12355 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
12356 emit_insn (gen_rtx_SET (sr.reg,
12357 gen_rtx_PLUS (Pmode, sr.reg,
12358 stack_pointer_rtx)));
12362 /* Step 3: the loop
12366 SP = SP + PROBE_INTERVAL
12367 probe at SP
12369 while (SP != LAST_ADDR)
12371 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
12372 values of N from 1 until it is equal to ROUNDED_SIZE. */
12374 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
12377 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
12378 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
12380 if (size != rounded_size)
12382 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12383 plus_constant (Pmode, stack_pointer_rtx,
12384 rounded_size - size)));
12385 emit_stack_probe (stack_pointer_rtx);
12388 /* Adjust back to account for the additional first interval. */
12389 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
12390 plus_constant (Pmode, stack_pointer_rtx,
12391 (get_probe_interval ()
12392 + dope))));
12394 release_scratch_register_on_entry (&sr);
12397 /* Even if the stack pointer isn't the CFA register, we need to correctly
12398 describe the adjustments made to it, in particular differentiate the
12399 frame-related ones from the frame-unrelated ones. */
12400 if (size > 0)
12402 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
12403 XVECEXP (expr, 0, 0)
12404 = gen_rtx_SET (stack_pointer_rtx,
12405 plus_constant (Pmode, stack_pointer_rtx, -size));
12406 XVECEXP (expr, 0, 1)
12407 = gen_rtx_SET (stack_pointer_rtx,
12408 plus_constant (Pmode, stack_pointer_rtx,
12409 get_probe_interval () + dope + size));
12410 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
12411 RTX_FRAME_RELATED_P (last) = 1;
12413 cfun->machine->fs.sp_offset += size;
12416 /* Make sure nothing is scheduled before we are done. */
12417 emit_insn (gen_blockage ());
12420 /* Adjust the stack pointer up to REG while probing it. */
12422 const char *
12423 output_adjust_stack_and_probe (rtx reg)
12425 static int labelno = 0;
12426 char loop_lab[32];
12427 rtx xops[2];
12429 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
12431 /* Loop. */
12432 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
12434 /* SP = SP + PROBE_INTERVAL. */
12435 xops[0] = stack_pointer_rtx;
12436 xops[1] = GEN_INT (get_probe_interval ());
12437 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
12439 /* Probe at SP. */
12440 xops[1] = const0_rtx;
12441 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
12443 /* Test if SP == LAST_ADDR. */
12444 xops[0] = stack_pointer_rtx;
12445 xops[1] = reg;
12446 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
12448 /* Branch. */
12449 fputs ("\tjne\t", asm_out_file);
12450 assemble_name_raw (asm_out_file, loop_lab);
12451 fputc ('\n', asm_out_file);
12453 return "";
12456 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
12457 inclusive. These are offsets from the current stack pointer. */
12459 static void
12460 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
12462 /* See if we have a constant small number of probes to generate. If so,
12463 that's the easy case. The run-time loop is made up of 6 insns in the
12464 generic case while the compile-time loop is made up of n insns for n #
12465 of intervals. */
12466 if (size <= 6 * get_probe_interval ())
12468 HOST_WIDE_INT i;
12470 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
12471 it exceeds SIZE. If only one probe is needed, this will not
12472 generate any code. Then probe at FIRST + SIZE. */
12473 for (i = get_probe_interval (); i < size; i += get_probe_interval ())
12474 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
12475 -(first + i)));
12477 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
12478 -(first + size)));
12481 /* Otherwise, do the same as above, but in a loop. Note that we must be
12482 extra careful with variables wrapping around because we might be at
12483 the very top (or the very bottom) of the address space and we have
12484 to be able to handle this case properly; in particular, we use an
12485 equality test for the loop condition. */
12486 else
12488 HOST_WIDE_INT rounded_size, last;
12489 struct scratch_reg sr;
12491 get_scratch_register_on_entry (&sr);
12494 /* Step 1: round SIZE to the previous multiple of the interval. */
12496 rounded_size = ROUND_DOWN (size, get_probe_interval ());
12499 /* Step 2: compute initial and final value of the loop counter. */
12501 /* TEST_OFFSET = FIRST. */
12502 emit_move_insn (sr.reg, GEN_INT (-first));
12504 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
12505 last = first + rounded_size;
12508 /* Step 3: the loop
12512 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
12513 probe at TEST_ADDR
12515 while (TEST_ADDR != LAST_ADDR)
12517 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
12518 until it is equal to ROUNDED_SIZE. */
12520 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
12523 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
12524 that SIZE is equal to ROUNDED_SIZE. */
12526 if (size != rounded_size)
12527 emit_stack_probe (plus_constant (Pmode,
12528 gen_rtx_PLUS (Pmode,
12529 stack_pointer_rtx,
12530 sr.reg),
12531 rounded_size - size));
12533 release_scratch_register_on_entry (&sr);
12536 /* Make sure nothing is scheduled before we are done. */
12537 emit_insn (gen_blockage ());
12540 /* Probe a range of stack addresses from REG to END, inclusive. These are
12541 offsets from the current stack pointer. */
12543 const char *
12544 output_probe_stack_range (rtx reg, rtx end)
12546 static int labelno = 0;
12547 char loop_lab[32];
12548 rtx xops[3];
12550 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
12552 /* Loop. */
12553 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
12555 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
12556 xops[0] = reg;
12557 xops[1] = GEN_INT (get_probe_interval ());
12558 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
12560 /* Probe at TEST_ADDR. */
12561 xops[0] = stack_pointer_rtx;
12562 xops[1] = reg;
12563 xops[2] = const0_rtx;
12564 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
12566 /* Test if TEST_ADDR == LAST_ADDR. */
12567 xops[0] = reg;
12568 xops[1] = end;
12569 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
12571 /* Branch. */
12572 fputs ("\tjne\t", asm_out_file);
12573 assemble_name_raw (asm_out_file, loop_lab);
12574 fputc ('\n', asm_out_file);
12576 return "";
12579 /* Finalize stack_realign_needed and frame_pointer_needed flags, which
12580 will guide prologue/epilogue to be generated in correct form. */
12582 static void
12583 ix86_finalize_stack_frame_flags (void)
12585 /* Check if stack realign is really needed after reload, and
12586 stores result in cfun */
12587 unsigned int incoming_stack_boundary
12588 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
12589 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
12590 unsigned int stack_alignment
12591 = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
12592 ? crtl->max_used_stack_slot_alignment
12593 : crtl->stack_alignment_needed);
12594 unsigned int stack_realign
12595 = (incoming_stack_boundary < stack_alignment);
12596 bool recompute_frame_layout_p = false;
12598 if (crtl->stack_realign_finalized)
12600 /* After stack_realign_needed is finalized, we can't no longer
12601 change it. */
12602 gcc_assert (crtl->stack_realign_needed == stack_realign);
12603 return;
12606 /* If the only reason for frame_pointer_needed is that we conservatively
12607 assumed stack realignment might be needed or -fno-omit-frame-pointer
12608 is used, but in the end nothing that needed the stack alignment had
12609 been spilled nor stack access, clear frame_pointer_needed and say we
12610 don't need stack realignment. */
12611 if ((stack_realign || !flag_omit_frame_pointer)
12612 && frame_pointer_needed
12613 && crtl->is_leaf
12614 && crtl->sp_is_unchanging
12615 && !ix86_current_function_calls_tls_descriptor
12616 && !crtl->accesses_prior_frames
12617 && !cfun->calls_alloca
12618 && !crtl->calls_eh_return
12619 /* See ira_setup_eliminable_regset for the rationale. */
12620 && !(STACK_CHECK_MOVING_SP
12621 && flag_stack_check
12622 && flag_exceptions
12623 && cfun->can_throw_non_call_exceptions)
12624 && !ix86_frame_pointer_required ()
12625 && get_frame_size () == 0
12626 && ix86_nsaved_sseregs () == 0
12627 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
12629 HARD_REG_SET set_up_by_prologue, prologue_used;
12630 basic_block bb;
12632 CLEAR_HARD_REG_SET (prologue_used);
12633 CLEAR_HARD_REG_SET (set_up_by_prologue);
12634 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
12635 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
12636 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
12637 HARD_FRAME_POINTER_REGNUM);
12639 /* The preferred stack alignment is the minimum stack alignment. */
12640 if (stack_alignment > crtl->preferred_stack_boundary)
12641 stack_alignment = crtl->preferred_stack_boundary;
12643 bool require_stack_frame = false;
12645 FOR_EACH_BB_FN (bb, cfun)
12647 rtx_insn *insn;
12648 FOR_BB_INSNS (bb, insn)
12649 if (NONDEBUG_INSN_P (insn)
12650 && requires_stack_frame_p (insn, prologue_used,
12651 set_up_by_prologue))
12653 require_stack_frame = true;
12655 if (stack_realign)
12657 /* Find the maximum stack alignment. */
12658 subrtx_iterator::array_type array;
12659 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12660 if (MEM_P (*iter)
12661 && (reg_mentioned_p (stack_pointer_rtx,
12662 *iter)
12663 || reg_mentioned_p (frame_pointer_rtx,
12664 *iter)))
12666 unsigned int alignment = MEM_ALIGN (*iter);
12667 if (alignment > stack_alignment)
12668 stack_alignment = alignment;
12674 if (require_stack_frame)
12676 /* Stack frame is required. If stack alignment needed is less
12677 than incoming stack boundary, don't realign stack. */
12678 stack_realign = incoming_stack_boundary < stack_alignment;
12679 if (!stack_realign)
12681 crtl->max_used_stack_slot_alignment
12682 = incoming_stack_boundary;
12683 crtl->stack_alignment_needed
12684 = incoming_stack_boundary;
12685 /* Also update preferred_stack_boundary for leaf
12686 functions. */
12687 crtl->preferred_stack_boundary
12688 = incoming_stack_boundary;
12691 else
12693 /* If drap has been set, but it actually isn't live at the
12694 start of the function, there is no reason to set it up. */
12695 if (crtl->drap_reg)
12697 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
12698 if (! REGNO_REG_SET_P (DF_LR_IN (bb),
12699 REGNO (crtl->drap_reg)))
12701 crtl->drap_reg = NULL_RTX;
12702 crtl->need_drap = false;
12705 else
12706 cfun->machine->no_drap_save_restore = true;
12708 frame_pointer_needed = false;
12709 stack_realign = false;
12710 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
12711 crtl->stack_alignment_needed = incoming_stack_boundary;
12712 crtl->stack_alignment_estimated = incoming_stack_boundary;
12713 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
12714 crtl->preferred_stack_boundary = incoming_stack_boundary;
12715 df_finish_pass (true);
12716 df_scan_alloc (NULL);
12717 df_scan_blocks ();
12718 df_compute_regs_ever_live (true);
12719 df_analyze ();
12721 if (flag_var_tracking)
12723 /* Since frame pointer is no longer available, replace it with
12724 stack pointer - UNITS_PER_WORD in debug insns. */
12725 df_ref ref, next;
12726 for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM);
12727 ref; ref = next)
12729 next = DF_REF_NEXT_REG (ref);
12730 if (!DF_REF_INSN_INFO (ref))
12731 continue;
12733 /* Make sure the next ref is for a different instruction,
12734 so that we're not affected by the rescan. */
12735 rtx_insn *insn = DF_REF_INSN (ref);
12736 while (next && DF_REF_INSN (next) == insn)
12737 next = DF_REF_NEXT_REG (next);
12739 if (DEBUG_INSN_P (insn))
12741 bool changed = false;
12742 for (; ref != next; ref = DF_REF_NEXT_REG (ref))
12744 rtx *loc = DF_REF_LOC (ref);
12745 if (*loc == hard_frame_pointer_rtx)
12747 *loc = plus_constant (Pmode,
12748 stack_pointer_rtx,
12749 -UNITS_PER_WORD);
12750 changed = true;
12753 if (changed)
12754 df_insn_rescan (insn);
12759 recompute_frame_layout_p = true;
12763 if (crtl->stack_realign_needed != stack_realign)
12764 recompute_frame_layout_p = true;
12765 crtl->stack_realign_needed = stack_realign;
12766 crtl->stack_realign_finalized = true;
12767 if (recompute_frame_layout_p)
12768 ix86_compute_frame_layout ();
12771 /* Delete SET_GOT right after entry block if it is allocated to reg. */
12773 static void
12774 ix86_elim_entry_set_got (rtx reg)
12776 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
12777 rtx_insn *c_insn = BB_HEAD (bb);
12778 if (!NONDEBUG_INSN_P (c_insn))
12779 c_insn = next_nonnote_nondebug_insn (c_insn);
12780 if (c_insn && NONJUMP_INSN_P (c_insn))
12782 rtx pat = PATTERN (c_insn);
12783 if (GET_CODE (pat) == PARALLEL)
12785 rtx vec = XVECEXP (pat, 0, 0);
12786 if (GET_CODE (vec) == SET
12787 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
12788 && REGNO (XEXP (vec, 0)) == REGNO (reg))
12789 delete_insn (c_insn);
12794 static rtx
12795 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
12797 rtx addr, mem;
12799 if (offset)
12800 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
12801 mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg);
12802 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
12805 static inline rtx
12806 gen_frame_load (rtx reg, rtx frame_reg, int offset)
12808 return gen_frame_set (reg, frame_reg, offset, false);
12811 static inline rtx
12812 gen_frame_store (rtx reg, rtx frame_reg, int offset)
12814 return gen_frame_set (reg, frame_reg, offset, true);
12817 static void
12818 ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame)
12820 struct machine_function *m = cfun->machine;
12821 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
12822 + m->call_ms2sysv_extra_regs;
12823 rtvec v = rtvec_alloc (ncregs + 1);
12824 unsigned int align, i, vi = 0;
12825 rtx_insn *insn;
12826 rtx sym, addr;
12827 rtx rax = gen_rtx_REG (word_mode, AX_REG);
12828 const struct xlogue_layout &xlogue = xlogue_layout::get_instance ();
12830 /* AL should only be live with sysv_abi. */
12831 gcc_assert (!ix86_eax_live_at_start_p ());
12832 gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset);
12834 /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather
12835 we've actually realigned the stack or not. */
12836 align = GET_MODE_ALIGNMENT (V4SFmode);
12837 addr = choose_baseaddr (frame.stack_realign_offset
12838 + xlogue.get_stub_ptr_offset (), &align, AX_REG);
12839 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
12841 emit_insn (gen_rtx_SET (rax, addr));
12843 /* Get the stub symbol. */
12844 sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
12845 : XLOGUE_STUB_SAVE);
12846 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
12848 for (i = 0; i < ncregs; ++i)
12850 const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
12851 rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode),
12852 r.regno);
12853 RTVEC_ELT (v, vi++) = gen_frame_store (reg, rax, -r.offset);
12856 gcc_assert (vi == (unsigned)GET_NUM_ELEM (v));
12858 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
12859 RTX_FRAME_RELATED_P (insn) = true;
12862 /* Expand the prologue into a bunch of separate insns. */
12864 void
12865 ix86_expand_prologue (void)
12867 struct machine_function *m = cfun->machine;
12868 rtx insn, t;
12869 struct ix86_frame frame;
12870 HOST_WIDE_INT allocate;
12871 bool int_registers_saved;
12872 bool sse_registers_saved;
12873 bool save_stub_call_needed;
12874 rtx static_chain = NULL_RTX;
12876 if (ix86_function_naked (current_function_decl))
12877 return;
12879 ix86_finalize_stack_frame_flags ();
12881 /* DRAP should not coexist with stack_realign_fp */
12882 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
12884 memset (&m->fs, 0, sizeof (m->fs));
12886 /* Initialize CFA state for before the prologue. */
12887 m->fs.cfa_reg = stack_pointer_rtx;
12888 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
12890 /* Track SP offset to the CFA. We continue tracking this after we've
12891 swapped the CFA register away from SP. In the case of re-alignment
12892 this is fudged; we're interested to offsets within the local frame. */
12893 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
12894 m->fs.sp_valid = true;
12895 m->fs.sp_realigned = false;
12897 frame = m->frame;
12899 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
12901 /* We should have already generated an error for any use of
12902 ms_hook on a nested function. */
12903 gcc_checking_assert (!ix86_static_chain_on_stack);
12905 /* Check if profiling is active and we shall use profiling before
12906 prologue variant. If so sorry. */
12907 if (crtl->profile && flag_fentry != 0)
12908 sorry ("ms_hook_prologue attribute isn%'t compatible "
12909 "with -mfentry for 32-bit");
12911 /* In ix86_asm_output_function_label we emitted:
12912 8b ff movl.s %edi,%edi
12913 55 push %ebp
12914 8b ec movl.s %esp,%ebp
12916 This matches the hookable function prologue in Win32 API
12917 functions in Microsoft Windows XP Service Pack 2 and newer.
12918 Wine uses this to enable Windows apps to hook the Win32 API
12919 functions provided by Wine.
12921 What that means is that we've already set up the frame pointer. */
12923 if (frame_pointer_needed
12924 && !(crtl->drap_reg && crtl->stack_realign_needed))
12926 rtx push, mov;
12928 /* We've decided to use the frame pointer already set up.
12929 Describe this to the unwinder by pretending that both
12930 push and mov insns happen right here.
12932 Putting the unwind info here at the end of the ms_hook
12933 is done so that we can make absolutely certain we get
12934 the required byte sequence at the start of the function,
12935 rather than relying on an assembler that can produce
12936 the exact encoding required.
12938 However it does mean (in the unpatched case) that we have
12939 a 1 insn window where the asynchronous unwind info is
12940 incorrect. However, if we placed the unwind info at
12941 its correct location we would have incorrect unwind info
12942 in the patched case. Which is probably all moot since
12943 I don't expect Wine generates dwarf2 unwind info for the
12944 system libraries that use this feature. */
12946 insn = emit_insn (gen_blockage ());
12948 push = gen_push (hard_frame_pointer_rtx);
12949 mov = gen_rtx_SET (hard_frame_pointer_rtx,
12950 stack_pointer_rtx);
12951 RTX_FRAME_RELATED_P (push) = 1;
12952 RTX_FRAME_RELATED_P (mov) = 1;
12954 RTX_FRAME_RELATED_P (insn) = 1;
12955 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
12956 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
12958 /* Note that gen_push incremented m->fs.cfa_offset, even
12959 though we didn't emit the push insn here. */
12960 m->fs.cfa_reg = hard_frame_pointer_rtx;
12961 m->fs.fp_offset = m->fs.cfa_offset;
12962 m->fs.fp_valid = true;
12964 else
12966 /* The frame pointer is not needed so pop %ebp again.
12967 This leaves us with a pristine state. */
12968 emit_insn (gen_pop (hard_frame_pointer_rtx));
12972 /* The first insn of a function that accepts its static chain on the
12973 stack is to push the register that would be filled in by a direct
12974 call. This insn will be skipped by the trampoline. */
12975 else if (ix86_static_chain_on_stack)
12977 static_chain = ix86_static_chain (cfun->decl, false);
12978 insn = emit_insn (gen_push (static_chain));
12979 emit_insn (gen_blockage ());
12981 /* We don't want to interpret this push insn as a register save,
12982 only as a stack adjustment. The real copy of the register as
12983 a save will be done later, if needed. */
12984 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
12985 t = gen_rtx_SET (stack_pointer_rtx, t);
12986 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
12987 RTX_FRAME_RELATED_P (insn) = 1;
12990 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
12991 of DRAP is needed and stack realignment is really needed after reload */
12992 if (stack_realign_drap)
12994 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
12996 /* Can't use DRAP in interrupt function. */
12997 if (cfun->machine->func_type != TYPE_NORMAL)
12998 sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
12999 "in interrupt service routine. This may be worked "
13000 "around by avoiding functions with aggregate return.");
13002 /* Only need to push parameter pointer reg if it is caller saved. */
13003 if (!call_used_regs[REGNO (crtl->drap_reg)])
13005 /* Push arg pointer reg */
13006 insn = emit_insn (gen_push (crtl->drap_reg));
13007 RTX_FRAME_RELATED_P (insn) = 1;
13010 /* Grab the argument pointer. */
13011 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
13012 insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
13013 RTX_FRAME_RELATED_P (insn) = 1;
13014 m->fs.cfa_reg = crtl->drap_reg;
13015 m->fs.cfa_offset = 0;
13017 /* Align the stack. */
13018 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
13019 stack_pointer_rtx,
13020 GEN_INT (-align_bytes)));
13021 RTX_FRAME_RELATED_P (insn) = 1;
13023 /* Replicate the return address on the stack so that return
13024 address can be reached via (argp - 1) slot. This is needed
13025 to implement macro RETURN_ADDR_RTX and intrinsic function
13026 expand_builtin_return_addr etc. */
13027 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
13028 t = gen_frame_mem (word_mode, t);
13029 insn = emit_insn (gen_push (t));
13030 RTX_FRAME_RELATED_P (insn) = 1;
13032 /* For the purposes of frame and register save area addressing,
13033 we've started over with a new frame. */
13034 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
13035 m->fs.realigned = true;
13037 if (static_chain)
13039 /* Replicate static chain on the stack so that static chain
13040 can be reached via (argp - 2) slot. This is needed for
13041 nested function with stack realignment. */
13042 insn = emit_insn (gen_push (static_chain));
13043 RTX_FRAME_RELATED_P (insn) = 1;
13047 int_registers_saved = (frame.nregs == 0);
13048 sse_registers_saved = (frame.nsseregs == 0);
13049 save_stub_call_needed = (m->call_ms2sysv);
13050 gcc_assert (sse_registers_saved || !save_stub_call_needed);
13052 if (frame_pointer_needed && !m->fs.fp_valid)
13054 /* Note: AT&T enter does NOT have reversed args. Enter is probably
13055 slower on all targets. Also sdb didn't like it. */
13056 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
13057 RTX_FRAME_RELATED_P (insn) = 1;
13059 /* Push registers now, before setting the frame pointer
13060 on SEH target. */
13061 if (!int_registers_saved
13062 && TARGET_SEH
13063 && !frame.save_regs_using_mov)
13065 ix86_emit_save_regs ();
13066 int_registers_saved = true;
13067 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
13070 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
13072 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
13073 RTX_FRAME_RELATED_P (insn) = 1;
13075 if (m->fs.cfa_reg == stack_pointer_rtx)
13076 m->fs.cfa_reg = hard_frame_pointer_rtx;
13077 m->fs.fp_offset = m->fs.sp_offset;
13078 m->fs.fp_valid = true;
13082 if (!int_registers_saved)
13084 /* If saving registers via PUSH, do so now. */
13085 if (!frame.save_regs_using_mov)
13087 ix86_emit_save_regs ();
13088 int_registers_saved = true;
13089 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
13092 /* When using red zone we may start register saving before allocating
13093 the stack frame saving one cycle of the prologue. However, avoid
13094 doing this if we have to probe the stack; at least on x86_64 the
13095 stack probe can turn into a call that clobbers a red zone location. */
13096 else if (ix86_using_red_zone ()
13097 && (! TARGET_STACK_PROBE
13098 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
13100 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
13101 int_registers_saved = true;
13105 if (stack_realign_fp)
13107 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
13108 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
13110 /* Record last valid frame pointer offset. */
13111 m->fs.sp_realigned_fp_last = frame.reg_save_offset;
13113 /* The computation of the size of the re-aligned stack frame means
13114 that we must allocate the size of the register save area before
13115 performing the actual alignment. Otherwise we cannot guarantee
13116 that there's enough storage above the realignment point. */
13117 allocate = frame.reg_save_offset - m->fs.sp_offset
13118 + frame.stack_realign_allocate;
13119 if (allocate)
13120 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13121 GEN_INT (-allocate), -1, false);
13123 /* Align the stack. */
13124 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
13125 stack_pointer_rtx,
13126 GEN_INT (-align_bytes)));
13127 m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
13128 m->fs.sp_realigned_offset = m->fs.sp_offset
13129 - frame.stack_realign_allocate;
13130 /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
13131 Beyond this point, stack access should be done via choose_baseaddr or
13132 by using sp_valid_at and fp_valid_at to determine the correct base
13133 register. Henceforth, any CFA offset should be thought of as logical
13134 and not physical. */
13135 gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last);
13136 gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset);
13137 m->fs.sp_realigned = true;
13139 /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
13140 is needed to describe where a register is saved using a realigned
13141 stack pointer, so we need to invalidate the stack pointer for that
13142 target. */
13143 if (TARGET_SEH)
13144 m->fs.sp_valid = false;
13146 /* If SP offset is non-immediate after allocation of the stack frame,
13147 then emit SSE saves or stub call prior to allocating the rest of the
13148 stack frame. This is less efficient for the out-of-line stub because
13149 we can't combine allocations across the call barrier, but it's better
13150 than using a scratch register. */
13151 else if (!x86_64_immediate_operand (GEN_INT (frame.stack_pointer_offset
13152 - m->fs.sp_realigned_offset),
13153 Pmode))
13155 if (!sse_registers_saved)
13157 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
13158 sse_registers_saved = true;
13160 else if (save_stub_call_needed)
13162 ix86_emit_outlined_ms2sysv_save (frame);
13163 save_stub_call_needed = false;
13168 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
13170 if (flag_stack_usage_info)
13172 /* We start to count from ARG_POINTER. */
13173 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
13175 /* If it was realigned, take into account the fake frame. */
13176 if (stack_realign_drap)
13178 if (ix86_static_chain_on_stack)
13179 stack_size += UNITS_PER_WORD;
13181 if (!call_used_regs[REGNO (crtl->drap_reg)])
13182 stack_size += UNITS_PER_WORD;
13184 /* This over-estimates by 1 minimal-stack-alignment-unit but
13185 mitigates that by counting in the new return address slot. */
13186 current_function_dynamic_stack_size
13187 += crtl->stack_alignment_needed / BITS_PER_UNIT;
13190 current_function_static_stack_size = stack_size;
13193 /* On SEH target with very large frame size, allocate an area to save
13194 SSE registers (as the very large allocation won't be described). */
13195 if (TARGET_SEH
13196 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
13197 && !sse_registers_saved)
13199 HOST_WIDE_INT sse_size =
13200 frame.sse_reg_save_offset - frame.reg_save_offset;
13202 gcc_assert (int_registers_saved);
13204 /* No need to do stack checking as the area will be immediately
13205 written. */
13206 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13207 GEN_INT (-sse_size), -1,
13208 m->fs.cfa_reg == stack_pointer_rtx);
13209 allocate -= sse_size;
13210 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
13211 sse_registers_saved = true;
13214 /* The stack has already been decremented by the instruction calling us
13215 so probe if the size is non-negative to preserve the protection area. */
13216 if (allocate >= 0
13217 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
13218 || flag_stack_clash_protection))
13220 /* This assert wants to verify that integer registers were saved
13221 prior to probing. This is necessary when probing may be implemented
13222 as a function call (Windows). It is not necessary for stack clash
13223 protection probing. */
13224 if (!flag_stack_clash_protection)
13225 gcc_assert (int_registers_saved);
13227 if (flag_stack_clash_protection)
13229 ix86_adjust_stack_and_probe_stack_clash (allocate);
13230 allocate = 0;
13232 else if (STACK_CHECK_MOVING_SP)
13234 if (!(crtl->is_leaf && !cfun->calls_alloca
13235 && allocate <= get_probe_interval ()))
13237 ix86_adjust_stack_and_probe (allocate);
13238 allocate = 0;
13241 else
13243 HOST_WIDE_INT size = allocate;
13245 if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
13246 size = 0x80000000 - get_stack_check_protect () - 1;
13248 if (TARGET_STACK_PROBE)
13250 if (crtl->is_leaf && !cfun->calls_alloca)
13252 if (size > get_probe_interval ())
13253 ix86_emit_probe_stack_range (0, size);
13255 else
13256 ix86_emit_probe_stack_range (0,
13257 size + get_stack_check_protect ());
13259 else
13261 if (crtl->is_leaf && !cfun->calls_alloca)
13263 if (size > get_probe_interval ()
13264 && size > get_stack_check_protect ())
13265 ix86_emit_probe_stack_range (get_stack_check_protect (),
13266 size - get_stack_check_protect ());
13268 else
13269 ix86_emit_probe_stack_range (get_stack_check_protect (), size);
13274 if (allocate == 0)
13276 else if (!ix86_target_stack_probe ()
13277 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
13279 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13280 GEN_INT (-allocate), -1,
13281 m->fs.cfa_reg == stack_pointer_rtx);
13283 else
13285 rtx eax = gen_rtx_REG (Pmode, AX_REG);
13286 rtx r10 = NULL;
13287 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
13288 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
13289 bool eax_live = ix86_eax_live_at_start_p ();
13290 bool r10_live = false;
13292 if (TARGET_64BIT)
13293 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
13295 if (eax_live)
13297 insn = emit_insn (gen_push (eax));
13298 allocate -= UNITS_PER_WORD;
13299 /* Note that SEH directives need to continue tracking the stack
13300 pointer even after the frame pointer has been set up. */
13301 if (sp_is_cfa_reg || TARGET_SEH)
13303 if (sp_is_cfa_reg)
13304 m->fs.cfa_offset += UNITS_PER_WORD;
13305 RTX_FRAME_RELATED_P (insn) = 1;
13306 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
13307 gen_rtx_SET (stack_pointer_rtx,
13308 plus_constant (Pmode, stack_pointer_rtx,
13309 -UNITS_PER_WORD)));
13313 if (r10_live)
13315 r10 = gen_rtx_REG (Pmode, R10_REG);
13316 insn = emit_insn (gen_push (r10));
13317 allocate -= UNITS_PER_WORD;
13318 if (sp_is_cfa_reg || TARGET_SEH)
13320 if (sp_is_cfa_reg)
13321 m->fs.cfa_offset += UNITS_PER_WORD;
13322 RTX_FRAME_RELATED_P (insn) = 1;
13323 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
13324 gen_rtx_SET (stack_pointer_rtx,
13325 plus_constant (Pmode, stack_pointer_rtx,
13326 -UNITS_PER_WORD)));
13330 emit_move_insn (eax, GEN_INT (allocate));
13331 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
13333 /* Use the fact that AX still contains ALLOCATE. */
13334 adjust_stack_insn = (Pmode == DImode
13335 ? gen_pro_epilogue_adjust_stack_di_sub
13336 : gen_pro_epilogue_adjust_stack_si_sub);
13338 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
13339 stack_pointer_rtx, eax));
13341 if (sp_is_cfa_reg || TARGET_SEH)
13343 if (sp_is_cfa_reg)
13344 m->fs.cfa_offset += allocate;
13345 RTX_FRAME_RELATED_P (insn) = 1;
13346 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
13347 gen_rtx_SET (stack_pointer_rtx,
13348 plus_constant (Pmode, stack_pointer_rtx,
13349 -allocate)));
13351 m->fs.sp_offset += allocate;
13353 /* Use stack_pointer_rtx for relative addressing so that code
13354 works for realigned stack, too. */
13355 if (r10_live && eax_live)
13357 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
13358 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
13359 gen_frame_mem (word_mode, t));
13360 t = plus_constant (Pmode, t, UNITS_PER_WORD);
13361 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
13362 gen_frame_mem (word_mode, t));
13364 else if (eax_live || r10_live)
13366 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
13367 emit_move_insn (gen_rtx_REG (word_mode,
13368 (eax_live ? AX_REG : R10_REG)),
13369 gen_frame_mem (word_mode, t));
13372 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
13374 /* If we havn't already set up the frame pointer, do so now. */
13375 if (frame_pointer_needed && !m->fs.fp_valid)
13377 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
13378 GEN_INT (frame.stack_pointer_offset
13379 - frame.hard_frame_pointer_offset));
13380 insn = emit_insn (insn);
13381 RTX_FRAME_RELATED_P (insn) = 1;
13382 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
13384 if (m->fs.cfa_reg == stack_pointer_rtx)
13385 m->fs.cfa_reg = hard_frame_pointer_rtx;
13386 m->fs.fp_offset = frame.hard_frame_pointer_offset;
13387 m->fs.fp_valid = true;
13390 if (!int_registers_saved)
13391 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
13392 if (!sse_registers_saved)
13393 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
13394 else if (save_stub_call_needed)
13395 ix86_emit_outlined_ms2sysv_save (frame);
13397 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
13398 in PROLOGUE. */
13399 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
13401 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
13402 insn = emit_insn (gen_set_got (pic));
13403 RTX_FRAME_RELATED_P (insn) = 1;
13404 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
13405 emit_insn (gen_prologue_use (pic));
13406 /* Deleting already emmitted SET_GOT if exist and allocated to
13407 REAL_PIC_OFFSET_TABLE_REGNUM. */
13408 ix86_elim_entry_set_got (pic);
13411 if (crtl->drap_reg && !crtl->stack_realign_needed)
13413 /* vDRAP is setup but after reload it turns out stack realign
13414 isn't necessary, here we will emit prologue to setup DRAP
13415 without stack realign adjustment */
13416 t = choose_baseaddr (0, NULL);
13417 emit_insn (gen_rtx_SET (crtl->drap_reg, t));
13420 /* Prevent instructions from being scheduled into register save push
13421 sequence when access to the redzone area is done through frame pointer.
13422 The offset between the frame pointer and the stack pointer is calculated
13423 relative to the value of the stack pointer at the end of the function
13424 prologue, and moving instructions that access redzone area via frame
13425 pointer inside push sequence violates this assumption. */
13426 if (frame_pointer_needed && frame.red_zone_size)
13427 emit_insn (gen_memory_blockage ());
13429 /* SEH requires that the prologue end within 256 bytes of the start of
13430 the function. Prevent instruction schedules that would extend that.
13431 Further, prevent alloca modifications to the stack pointer from being
13432 combined with prologue modifications. */
13433 if (TARGET_SEH)
13434 emit_insn (gen_prologue_use (stack_pointer_rtx));
13437 /* Emit code to restore REG using a POP insn. */
13439 static void
13440 ix86_emit_restore_reg_using_pop (rtx reg)
13442 struct machine_function *m = cfun->machine;
13443 rtx_insn *insn = emit_insn (gen_pop (reg));
13445 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
13446 m->fs.sp_offset -= UNITS_PER_WORD;
13448 if (m->fs.cfa_reg == crtl->drap_reg
13449 && REGNO (reg) == REGNO (crtl->drap_reg))
13451 /* Previously we'd represented the CFA as an expression
13452 like *(%ebp - 8). We've just popped that value from
13453 the stack, which means we need to reset the CFA to
13454 the drap register. This will remain until we restore
13455 the stack pointer. */
13456 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
13457 RTX_FRAME_RELATED_P (insn) = 1;
13459 /* This means that the DRAP register is valid for addressing too. */
13460 m->fs.drap_valid = true;
13461 return;
13464 if (m->fs.cfa_reg == stack_pointer_rtx)
13466 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
13467 x = gen_rtx_SET (stack_pointer_rtx, x);
13468 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
13469 RTX_FRAME_RELATED_P (insn) = 1;
13471 m->fs.cfa_offset -= UNITS_PER_WORD;
13474 /* When the frame pointer is the CFA, and we pop it, we are
13475 swapping back to the stack pointer as the CFA. This happens
13476 for stack frames that don't allocate other data, so we assume
13477 the stack pointer is now pointing at the return address, i.e.
13478 the function entry state, which makes the offset be 1 word. */
13479 if (reg == hard_frame_pointer_rtx)
13481 m->fs.fp_valid = false;
13482 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
13484 m->fs.cfa_reg = stack_pointer_rtx;
13485 m->fs.cfa_offset -= UNITS_PER_WORD;
13487 add_reg_note (insn, REG_CFA_DEF_CFA,
13488 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13489 GEN_INT (m->fs.cfa_offset)));
13490 RTX_FRAME_RELATED_P (insn) = 1;
13495 /* Emit code to restore saved registers using POP insns. */
13497 static void
13498 ix86_emit_restore_regs_using_pop (void)
13500 unsigned int regno;
13502 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13503 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
13504 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
13507 /* Emit code and notes for the LEAVE instruction. If insn is non-null,
13508 omits the emit and only attaches the notes. */
13510 static void
13511 ix86_emit_leave (rtx_insn *insn)
13513 struct machine_function *m = cfun->machine;
13514 if (!insn)
13515 insn = emit_insn (ix86_gen_leave ());
13517 ix86_add_queued_cfa_restore_notes (insn);
13519 gcc_assert (m->fs.fp_valid);
13520 m->fs.sp_valid = true;
13521 m->fs.sp_realigned = false;
13522 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
13523 m->fs.fp_valid = false;
13525 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
13527 m->fs.cfa_reg = stack_pointer_rtx;
13528 m->fs.cfa_offset = m->fs.sp_offset;
13530 add_reg_note (insn, REG_CFA_DEF_CFA,
13531 plus_constant (Pmode, stack_pointer_rtx,
13532 m->fs.sp_offset));
13533 RTX_FRAME_RELATED_P (insn) = 1;
13535 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
13536 m->fs.fp_offset);
13539 /* Emit code to restore saved registers using MOV insns.
13540 First register is restored from CFA - CFA_OFFSET. */
13541 static void
13542 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
13543 bool maybe_eh_return)
13545 struct machine_function *m = cfun->machine;
13546 unsigned int regno;
13548 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13549 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
13551 rtx reg = gen_rtx_REG (word_mode, regno);
13552 rtx mem;
13553 rtx_insn *insn;
13555 mem = choose_baseaddr (cfa_offset, NULL);
13556 mem = gen_frame_mem (word_mode, mem);
13557 insn = emit_move_insn (reg, mem);
13559 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
13561 /* Previously we'd represented the CFA as an expression
13562 like *(%ebp - 8). We've just popped that value from
13563 the stack, which means we need to reset the CFA to
13564 the drap register. This will remain until we restore
13565 the stack pointer. */
13566 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
13567 RTX_FRAME_RELATED_P (insn) = 1;
13569 /* This means that the DRAP register is valid for addressing. */
13570 m->fs.drap_valid = true;
13572 else
13573 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
13575 cfa_offset -= UNITS_PER_WORD;
13579 /* Emit code to restore saved registers using MOV insns.
13580 First register is restored from CFA - CFA_OFFSET. */
13581 static void
13582 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
13583 bool maybe_eh_return)
13585 unsigned int regno;
13587 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13588 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
13590 rtx reg = gen_rtx_REG (V4SFmode, regno);
13591 rtx mem;
13592 unsigned int align = GET_MODE_ALIGNMENT (V4SFmode);
13594 mem = choose_baseaddr (cfa_offset, &align);
13595 mem = gen_rtx_MEM (V4SFmode, mem);
13597 /* The location aligment depends upon the base register. */
13598 align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align);
13599 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
13600 set_mem_align (mem, align);
13601 emit_insn (gen_rtx_SET (reg, mem));
13603 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
13605 cfa_offset -= GET_MODE_SIZE (V4SFmode);
13609 static void
13610 ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame,
13611 bool use_call, int style)
13613 struct machine_function *m = cfun->machine;
13614 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
13615 + m->call_ms2sysv_extra_regs;
13616 rtvec v;
13617 unsigned int elems_needed, align, i, vi = 0;
13618 rtx_insn *insn;
13619 rtx sym, tmp;
13620 rtx rsi = gen_rtx_REG (word_mode, SI_REG);
13621 rtx r10 = NULL_RTX;
13622 const struct xlogue_layout &xlogue = xlogue_layout::get_instance ();
13623 HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset ();
13624 HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset;
13625 rtx rsi_frame_load = NULL_RTX;
13626 HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1;
13627 enum xlogue_stub stub;
13629 gcc_assert (!m->fs.fp_valid || frame_pointer_needed);
13631 /* If using a realigned stack, we should never start with padding. */
13632 gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ());
13634 /* Setup RSI as the stub's base pointer. */
13635 align = GET_MODE_ALIGNMENT (V4SFmode);
13636 tmp = choose_baseaddr (rsi_offset, &align, SI_REG);
13637 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
13639 emit_insn (gen_rtx_SET (rsi, tmp));
13641 /* Get a symbol for the stub. */
13642 if (frame_pointer_needed)
13643 stub = use_call ? XLOGUE_STUB_RESTORE_HFP
13644 : XLOGUE_STUB_RESTORE_HFP_TAIL;
13645 else
13646 stub = use_call ? XLOGUE_STUB_RESTORE
13647 : XLOGUE_STUB_RESTORE_TAIL;
13648 sym = xlogue.get_stub_rtx (stub);
13650 elems_needed = ncregs;
13651 if (use_call)
13652 elems_needed += 1;
13653 else
13654 elems_needed += frame_pointer_needed ? 5 : 3;
13655 v = rtvec_alloc (elems_needed);
13657 /* We call the epilogue stub when we need to pop incoming args or we are
13658 doing a sibling call as the tail. Otherwise, we will emit a jmp to the
13659 epilogue stub and it is the tail-call. */
13660 if (use_call)
13661 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
13662 else
13664 RTVEC_ELT (v, vi++) = ret_rtx;
13665 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
13666 if (frame_pointer_needed)
13668 rtx rbp = gen_rtx_REG (DImode, BP_REG);
13669 gcc_assert (m->fs.fp_valid);
13670 gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx);
13672 tmp = gen_rtx_PLUS (DImode, rbp, GEN_INT (8));
13673 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp);
13674 RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp));
13675 tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
13676 RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp);
13678 else
13680 /* If no hard frame pointer, we set R10 to the SP restore value. */
13681 gcc_assert (!m->fs.fp_valid);
13682 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
13683 gcc_assert (m->fs.sp_valid);
13685 r10 = gen_rtx_REG (DImode, R10_REG);
13686 tmp = gen_rtx_PLUS (Pmode, rsi, GEN_INT (stub_ptr_offset));
13687 emit_insn (gen_rtx_SET (r10, tmp));
13689 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10);
13693 /* Generate frame load insns and restore notes. */
13694 for (i = 0; i < ncregs; ++i)
13696 const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
13697 machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode;
13698 rtx reg, frame_load;
13700 reg = gen_rtx_REG (mode, r.regno);
13701 frame_load = gen_frame_load (reg, rsi, r.offset);
13703 /* Save RSI frame load insn & note to add last. */
13704 if (r.regno == SI_REG)
13706 gcc_assert (!rsi_frame_load);
13707 rsi_frame_load = frame_load;
13708 rsi_restore_offset = r.offset;
13710 else
13712 RTVEC_ELT (v, vi++) = frame_load;
13713 ix86_add_cfa_restore_note (NULL, reg, r.offset);
13717 /* Add RSI frame load & restore note at the end. */
13718 gcc_assert (rsi_frame_load);
13719 gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1);
13720 RTVEC_ELT (v, vi++) = rsi_frame_load;
13721 ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG),
13722 rsi_restore_offset);
13724 /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */
13725 if (!use_call && !frame_pointer_needed)
13727 gcc_assert (m->fs.sp_valid);
13728 gcc_assert (!m->fs.sp_realigned);
13730 /* At this point, R10 should point to frame.stack_realign_offset. */
13731 if (m->fs.cfa_reg == stack_pointer_rtx)
13732 m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset;
13733 m->fs.sp_offset = frame.stack_realign_offset;
13736 gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v));
13737 tmp = gen_rtx_PARALLEL (VOIDmode, v);
13738 if (use_call)
13739 insn = emit_insn (tmp);
13740 else
13742 insn = emit_jump_insn (tmp);
13743 JUMP_LABEL (insn) = ret_rtx;
13745 if (frame_pointer_needed)
13746 ix86_emit_leave (insn);
13747 else
13749 /* Need CFA adjust note. */
13750 tmp = gen_rtx_SET (stack_pointer_rtx, r10);
13751 add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp);
13755 RTX_FRAME_RELATED_P (insn) = true;
13756 ix86_add_queued_cfa_restore_notes (insn);
13758 /* If we're not doing a tail-call, we need to adjust the stack. */
13759 if (use_call && m->fs.sp_valid)
13761 HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset;
13762 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13763 GEN_INT (dealloc), style,
13764 m->fs.cfa_reg == stack_pointer_rtx);
13768 /* Restore function stack, frame, and registers. */
13770 void
13771 ix86_expand_epilogue (int style)
13773 struct machine_function *m = cfun->machine;
13774 struct machine_frame_state frame_state_save = m->fs;
13775 struct ix86_frame frame;
13776 bool restore_regs_via_mov;
13777 bool using_drap;
13778 bool restore_stub_is_tail = false;
13780 if (ix86_function_naked (current_function_decl))
13782 /* The program should not reach this point. */
13783 emit_insn (gen_ud2 ());
13784 return;
13787 ix86_finalize_stack_frame_flags ();
13788 frame = m->frame;
13790 m->fs.sp_realigned = stack_realign_fp;
13791 m->fs.sp_valid = stack_realign_fp
13792 || !frame_pointer_needed
13793 || crtl->sp_is_unchanging;
13794 gcc_assert (!m->fs.sp_valid
13795 || m->fs.sp_offset == frame.stack_pointer_offset);
13797 /* The FP must be valid if the frame pointer is present. */
13798 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
13799 gcc_assert (!m->fs.fp_valid
13800 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
13802 /* We must have *some* valid pointer to the stack frame. */
13803 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
13805 /* The DRAP is never valid at this point. */
13806 gcc_assert (!m->fs.drap_valid);
13808 /* See the comment about red zone and frame
13809 pointer usage in ix86_expand_prologue. */
13810 if (frame_pointer_needed && frame.red_zone_size)
13811 emit_insn (gen_memory_blockage ());
13813 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
13814 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
13816 /* Determine the CFA offset of the end of the red-zone. */
13817 m->fs.red_zone_offset = 0;
13818 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
13820 /* The red-zone begins below return address and error code in
13821 exception handler. */
13822 m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET;
13824 /* When the register save area is in the aligned portion of
13825 the stack, determine the maximum runtime displacement that
13826 matches up with the aligned frame. */
13827 if (stack_realign_drap)
13828 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
13829 + UNITS_PER_WORD);
13832 /* Special care must be taken for the normal return case of a function
13833 using eh_return: the eax and edx registers are marked as saved, but
13834 not restored along this path. Adjust the save location to match. */
13835 if (crtl->calls_eh_return && style != 2)
13836 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
13838 /* EH_RETURN requires the use of moves to function properly. */
13839 if (crtl->calls_eh_return)
13840 restore_regs_via_mov = true;
13841 /* SEH requires the use of pops to identify the epilogue. */
13842 else if (TARGET_SEH)
13843 restore_regs_via_mov = false;
13844 /* If we're only restoring one register and sp cannot be used then
13845 using a move instruction to restore the register since it's
13846 less work than reloading sp and popping the register. */
13847 else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1)
13848 restore_regs_via_mov = true;
13849 else if (TARGET_EPILOGUE_USING_MOVE
13850 && cfun->machine->use_fast_prologue_epilogue
13851 && (frame.nregs > 1
13852 || m->fs.sp_offset != frame.reg_save_offset))
13853 restore_regs_via_mov = true;
13854 else if (frame_pointer_needed
13855 && !frame.nregs
13856 && m->fs.sp_offset != frame.reg_save_offset)
13857 restore_regs_via_mov = true;
13858 else if (frame_pointer_needed
13859 && TARGET_USE_LEAVE
13860 && cfun->machine->use_fast_prologue_epilogue
13861 && frame.nregs == 1)
13862 restore_regs_via_mov = true;
13863 else
13864 restore_regs_via_mov = false;
13866 if (restore_regs_via_mov || frame.nsseregs)
13868 /* Ensure that the entire register save area is addressable via
13869 the stack pointer, if we will restore SSE regs via sp. */
13870 if (TARGET_64BIT
13871 && m->fs.sp_offset > 0x7fffffff
13872 && sp_valid_at (frame.stack_realign_offset + 1)
13873 && (frame.nsseregs + frame.nregs) != 0)
13875 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13876 GEN_INT (m->fs.sp_offset
13877 - frame.sse_reg_save_offset),
13878 style,
13879 m->fs.cfa_reg == stack_pointer_rtx);
13883 /* If there are any SSE registers to restore, then we have to do it
13884 via moves, since there's obviously no pop for SSE regs. */
13885 if (frame.nsseregs)
13886 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
13887 style == 2);
13889 if (m->call_ms2sysv)
13891 int pop_incoming_args = crtl->args.pops_args && crtl->args.size;
13893 /* We cannot use a tail-call for the stub if:
13894 1. We have to pop incoming args,
13895 2. We have additional int regs to restore, or
13896 3. A sibling call will be the tail-call, or
13897 4. We are emitting an eh_return_internal epilogue.
13899 TODO: Item 4 has not yet tested!
13901 If any of the above are true, we will call the stub rather than
13902 jump to it. */
13903 restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1);
13904 ix86_emit_outlined_ms2sysv_restore (frame, !restore_stub_is_tail, style);
13907 /* If using out-of-line stub that is a tail-call, then...*/
13908 if (m->call_ms2sysv && restore_stub_is_tail)
13910 /* TODO: parinoid tests. (remove eventually) */
13911 gcc_assert (m->fs.sp_valid);
13912 gcc_assert (!m->fs.sp_realigned);
13913 gcc_assert (!m->fs.fp_valid);
13914 gcc_assert (!m->fs.realigned);
13915 gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
13916 gcc_assert (!crtl->drap_reg);
13917 gcc_assert (!frame.nregs);
13919 else if (restore_regs_via_mov)
13921 rtx t;
13923 if (frame.nregs)
13924 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
13926 /* eh_return epilogues need %ecx added to the stack pointer. */
13927 if (style == 2)
13929 rtx sa = EH_RETURN_STACKADJ_RTX;
13930 rtx_insn *insn;
13932 /* %ecx can't be used for both DRAP register and eh_return. */
13933 if (crtl->drap_reg)
13934 gcc_assert (REGNO (crtl->drap_reg) != CX_REG);
13936 /* regparm nested functions don't work with eh_return. */
13937 gcc_assert (!ix86_static_chain_on_stack);
13939 if (frame_pointer_needed)
13941 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
13942 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
13943 emit_insn (gen_rtx_SET (sa, t));
13945 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
13946 insn = emit_move_insn (hard_frame_pointer_rtx, t);
13948 /* Note that we use SA as a temporary CFA, as the return
13949 address is at the proper place relative to it. We
13950 pretend this happens at the FP restore insn because
13951 prior to this insn the FP would be stored at the wrong
13952 offset relative to SA, and after this insn we have no
13953 other reasonable register to use for the CFA. We don't
13954 bother resetting the CFA to the SP for the duration of
13955 the return insn. */
13956 add_reg_note (insn, REG_CFA_DEF_CFA,
13957 plus_constant (Pmode, sa, UNITS_PER_WORD));
13958 ix86_add_queued_cfa_restore_notes (insn);
13959 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
13960 RTX_FRAME_RELATED_P (insn) = 1;
13962 m->fs.cfa_reg = sa;
13963 m->fs.cfa_offset = UNITS_PER_WORD;
13964 m->fs.fp_valid = false;
13966 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
13967 const0_rtx, style, false);
13969 else
13971 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
13972 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
13973 insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
13974 ix86_add_queued_cfa_restore_notes (insn);
13976 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
13977 if (m->fs.cfa_offset != UNITS_PER_WORD)
13979 m->fs.cfa_offset = UNITS_PER_WORD;
13980 add_reg_note (insn, REG_CFA_DEF_CFA,
13981 plus_constant (Pmode, stack_pointer_rtx,
13982 UNITS_PER_WORD));
13983 RTX_FRAME_RELATED_P (insn) = 1;
13986 m->fs.sp_offset = UNITS_PER_WORD;
13987 m->fs.sp_valid = true;
13988 m->fs.sp_realigned = false;
13991 else
13993 /* SEH requires that the function end with (1) a stack adjustment
13994 if necessary, (2) a sequence of pops, and (3) a return or
13995 jump instruction. Prevent insns from the function body from
13996 being scheduled into this sequence. */
13997 if (TARGET_SEH)
13999 /* Prevent a catch region from being adjacent to the standard
14000 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
14001 several other flags that would be interesting to test are
14002 not yet set up. */
14003 if (flag_non_call_exceptions)
14004 emit_insn (gen_nops (const1_rtx));
14005 else
14006 emit_insn (gen_blockage ());
14009 /* First step is to deallocate the stack frame so that we can
14010 pop the registers. If the stack pointer was realigned, it needs
14011 to be restored now. Also do it on SEH target for very large
14012 frame as the emitted instructions aren't allowed by the ABI
14013 in epilogues. */
14014 if (!m->fs.sp_valid || m->fs.sp_realigned
14015 || (TARGET_SEH
14016 && (m->fs.sp_offset - frame.reg_save_offset
14017 >= SEH_MAX_FRAME_SIZE)))
14019 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
14020 GEN_INT (m->fs.fp_offset
14021 - frame.reg_save_offset),
14022 style, false);
14024 else if (m->fs.sp_offset != frame.reg_save_offset)
14026 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
14027 GEN_INT (m->fs.sp_offset
14028 - frame.reg_save_offset),
14029 style,
14030 m->fs.cfa_reg == stack_pointer_rtx);
14033 ix86_emit_restore_regs_using_pop ();
14036 /* If we used a stack pointer and haven't already got rid of it,
14037 then do so now. */
14038 if (m->fs.fp_valid)
14040 /* If the stack pointer is valid and pointing at the frame
14041 pointer store address, then we only need a pop. */
14042 if (sp_valid_at (frame.hfp_save_offset)
14043 && m->fs.sp_offset == frame.hfp_save_offset)
14044 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
14045 /* Leave results in shorter dependency chains on CPUs that are
14046 able to grok it fast. */
14047 else if (TARGET_USE_LEAVE
14048 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
14049 || !cfun->machine->use_fast_prologue_epilogue)
14050 ix86_emit_leave (NULL);
14051 else
14053 pro_epilogue_adjust_stack (stack_pointer_rtx,
14054 hard_frame_pointer_rtx,
14055 const0_rtx, style, !using_drap);
14056 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
14060 if (using_drap)
14062 int param_ptr_offset = UNITS_PER_WORD;
14063 rtx_insn *insn;
14065 gcc_assert (stack_realign_drap);
14067 if (ix86_static_chain_on_stack)
14068 param_ptr_offset += UNITS_PER_WORD;
14069 if (!call_used_regs[REGNO (crtl->drap_reg)])
14070 param_ptr_offset += UNITS_PER_WORD;
14072 insn = emit_insn (gen_rtx_SET
14073 (stack_pointer_rtx,
14074 gen_rtx_PLUS (Pmode,
14075 crtl->drap_reg,
14076 GEN_INT (-param_ptr_offset))));
14077 m->fs.cfa_reg = stack_pointer_rtx;
14078 m->fs.cfa_offset = param_ptr_offset;
14079 m->fs.sp_offset = param_ptr_offset;
14080 m->fs.realigned = false;
14082 add_reg_note (insn, REG_CFA_DEF_CFA,
14083 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14084 GEN_INT (param_ptr_offset)));
14085 RTX_FRAME_RELATED_P (insn) = 1;
14087 if (!call_used_regs[REGNO (crtl->drap_reg)])
14088 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
14091 /* At this point the stack pointer must be valid, and we must have
14092 restored all of the registers. We may not have deallocated the
14093 entire stack frame. We've delayed this until now because it may
14094 be possible to merge the local stack deallocation with the
14095 deallocation forced by ix86_static_chain_on_stack. */
14096 gcc_assert (m->fs.sp_valid);
14097 gcc_assert (!m->fs.sp_realigned);
14098 gcc_assert (!m->fs.fp_valid);
14099 gcc_assert (!m->fs.realigned);
14100 if (m->fs.sp_offset != UNITS_PER_WORD)
14102 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
14103 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
14104 style, true);
14106 else
14107 ix86_add_queued_cfa_restore_notes (get_last_insn ());
14109 /* Sibcall epilogues don't want a return instruction. */
14110 if (style == 0)
14112 m->fs = frame_state_save;
14113 return;
14116 if (cfun->machine->func_type != TYPE_NORMAL)
14117 emit_jump_insn (gen_interrupt_return ());
14118 else if (crtl->args.pops_args && crtl->args.size)
14120 rtx popc = GEN_INT (crtl->args.pops_args);
14122 /* i386 can only pop 64K bytes. If asked to pop more, pop return
14123 address, do explicit add, and jump indirectly to the caller. */
14125 if (crtl->args.pops_args >= 65536)
14127 rtx ecx = gen_rtx_REG (SImode, CX_REG);
14128 rtx_insn *insn;
14130 /* There is no "pascal" calling convention in any 64bit ABI. */
14131 gcc_assert (!TARGET_64BIT);
14133 insn = emit_insn (gen_pop (ecx));
14134 m->fs.cfa_offset -= UNITS_PER_WORD;
14135 m->fs.sp_offset -= UNITS_PER_WORD;
14137 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
14138 x = gen_rtx_SET (stack_pointer_rtx, x);
14139 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
14140 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
14141 RTX_FRAME_RELATED_P (insn) = 1;
14143 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
14144 popc, -1, true);
14145 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
14147 else
14148 emit_jump_insn (gen_simple_return_pop_internal (popc));
14150 else if (!m->call_ms2sysv || !restore_stub_is_tail)
14151 emit_jump_insn (gen_simple_return_internal ());
14153 /* Restore the state back to the state from the prologue,
14154 so that it's correct for the next epilogue. */
14155 m->fs = frame_state_save;
14158 /* Reset from the function's potential modifications. */
14160 static void
14161 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED)
14163 if (pic_offset_table_rtx
14164 && !ix86_use_pseudo_pic_reg ())
14165 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
14167 if (TARGET_MACHO)
14169 rtx_insn *insn = get_last_insn ();
14170 rtx_insn *deleted_debug_label = NULL;
14172 /* Mach-O doesn't support labels at the end of objects, so if
14173 it looks like we might want one, take special action.
14174 First, collect any sequence of deleted debug labels. */
14175 while (insn
14176 && NOTE_P (insn)
14177 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
14179 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
14180 notes only, instead set their CODE_LABEL_NUMBER to -1,
14181 otherwise there would be code generation differences
14182 in between -g and -g0. */
14183 if (NOTE_P (insn) && NOTE_KIND (insn)
14184 == NOTE_INSN_DELETED_DEBUG_LABEL)
14185 deleted_debug_label = insn;
14186 insn = PREV_INSN (insn);
14189 /* If we have:
14190 label:
14191 barrier
14192 then this needs to be detected, so skip past the barrier. */
14194 if (insn && BARRIER_P (insn))
14195 insn = PREV_INSN (insn);
14197 /* Up to now we've only seen notes or barriers. */
14198 if (insn)
14200 if (LABEL_P (insn)
14201 || (NOTE_P (insn)
14202 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
14203 /* Trailing label. */
14204 fputs ("\tnop\n", file);
14205 else if (cfun && ! cfun->is_thunk)
14207 /* See if we have a completely empty function body, skipping
14208 the special case of the picbase thunk emitted as asm. */
14209 while (insn && ! INSN_P (insn))
14210 insn = PREV_INSN (insn);
14211 /* If we don't find any insns, we've got an empty function body;
14212 I.e. completely empty - without a return or branch. This is
14213 taken as the case where a function body has been removed
14214 because it contains an inline __builtin_unreachable(). GCC
14215 declares that reaching __builtin_unreachable() means UB so
14216 we're not obliged to do anything special; however, we want
14217 non-zero-sized function bodies. To meet this, and help the
14218 user out, let's trap the case. */
14219 if (insn == NULL)
14220 fputs ("\tud2\n", file);
14223 else if (deleted_debug_label)
14224 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
14225 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
14226 CODE_LABEL_NUMBER (insn) = -1;
14230 /* Return a scratch register to use in the split stack prologue. The
14231 split stack prologue is used for -fsplit-stack. It is the first
14232 instructions in the function, even before the regular prologue.
14233 The scratch register can be any caller-saved register which is not
14234 used for parameters or for the static chain. */
14236 static unsigned int
14237 split_stack_prologue_scratch_regno (void)
14239 if (TARGET_64BIT)
14240 return R11_REG;
14241 else
14243 bool is_fastcall, is_thiscall;
14244 int regparm;
14246 is_fastcall = (lookup_attribute ("fastcall",
14247 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
14248 != NULL);
14249 is_thiscall = (lookup_attribute ("thiscall",
14250 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
14251 != NULL);
14252 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
14254 if (is_fastcall)
14256 if (DECL_STATIC_CHAIN (cfun->decl))
14258 sorry ("-fsplit-stack does not support fastcall with "
14259 "nested function");
14260 return INVALID_REGNUM;
14262 return AX_REG;
14264 else if (is_thiscall)
14266 if (!DECL_STATIC_CHAIN (cfun->decl))
14267 return DX_REG;
14268 return AX_REG;
14270 else if (regparm < 3)
14272 if (!DECL_STATIC_CHAIN (cfun->decl))
14273 return CX_REG;
14274 else
14276 if (regparm >= 2)
14278 sorry ("-fsplit-stack does not support 2 register "
14279 "parameters for a nested function");
14280 return INVALID_REGNUM;
14282 return DX_REG;
14285 else
14287 /* FIXME: We could make this work by pushing a register
14288 around the addition and comparison. */
14289 sorry ("-fsplit-stack does not support 3 register parameters");
14290 return INVALID_REGNUM;
14295 /* A SYMBOL_REF for the function which allocates new stackspace for
14296 -fsplit-stack. */
14298 static GTY(()) rtx split_stack_fn;
14300 /* A SYMBOL_REF for the more stack function when using the large
14301 model. */
14303 static GTY(()) rtx split_stack_fn_large;
14305 /* Return location of the stack guard value in the TLS block. */
14308 ix86_split_stack_guard (void)
14310 int offset;
14311 addr_space_t as = DEFAULT_TLS_SEG_REG;
14312 rtx r;
14314 gcc_assert (flag_split_stack);
14316 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14317 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
14318 #else
14319 gcc_unreachable ();
14320 #endif
14322 r = GEN_INT (offset);
14323 r = gen_const_mem (Pmode, r);
14324 set_mem_addr_space (r, as);
14326 return r;
14329 /* Handle -fsplit-stack. These are the first instructions in the
14330 function, even before the regular prologue. */
14332 void
14333 ix86_expand_split_stack_prologue (void)
14335 HOST_WIDE_INT allocate;
14336 unsigned HOST_WIDE_INT args_size;
14337 rtx_code_label *label;
14338 rtx limit, current, allocate_rtx, call_insn, call_fusage;
14339 rtx scratch_reg = NULL_RTX;
14340 rtx_code_label *varargs_label = NULL;
14341 rtx fn;
14343 gcc_assert (flag_split_stack && reload_completed);
14345 ix86_finalize_stack_frame_flags ();
14346 struct ix86_frame &frame = cfun->machine->frame;
14347 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
14349 /* This is the label we will branch to if we have enough stack
14350 space. We expect the basic block reordering pass to reverse this
14351 branch if optimizing, so that we branch in the unlikely case. */
14352 label = gen_label_rtx ();
14354 /* We need to compare the stack pointer minus the frame size with
14355 the stack boundary in the TCB. The stack boundary always gives
14356 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
14357 can compare directly. Otherwise we need to do an addition. */
14359 limit = ix86_split_stack_guard ();
14361 if (allocate < SPLIT_STACK_AVAILABLE)
14362 current = stack_pointer_rtx;
14363 else
14365 unsigned int scratch_regno;
14366 rtx offset;
14368 /* We need a scratch register to hold the stack pointer minus
14369 the required frame size. Since this is the very start of the
14370 function, the scratch register can be any caller-saved
14371 register which is not used for parameters. */
14372 offset = GEN_INT (- allocate);
14373 scratch_regno = split_stack_prologue_scratch_regno ();
14374 if (scratch_regno == INVALID_REGNUM)
14375 return;
14376 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
14377 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
14379 /* We don't use ix86_gen_add3 in this case because it will
14380 want to split to lea, but when not optimizing the insn
14381 will not be split after this point. */
14382 emit_insn (gen_rtx_SET (scratch_reg,
14383 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14384 offset)));
14386 else
14388 emit_move_insn (scratch_reg, offset);
14389 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
14390 stack_pointer_rtx));
14392 current = scratch_reg;
14395 ix86_expand_branch (GEU, current, limit, label);
14396 rtx_insn *jump_insn = get_last_insn ();
14397 JUMP_LABEL (jump_insn) = label;
14399 /* Mark the jump as very likely to be taken. */
14400 add_reg_br_prob_note (jump_insn, profile_probability::very_likely ());
14402 if (split_stack_fn == NULL_RTX)
14404 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
14405 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
14407 fn = split_stack_fn;
14409 /* Get more stack space. We pass in the desired stack space and the
14410 size of the arguments to copy to the new stack. In 32-bit mode
14411 we push the parameters; __morestack will return on a new stack
14412 anyhow. In 64-bit mode we pass the parameters in r10 and
14413 r11. */
14414 allocate_rtx = GEN_INT (allocate);
14415 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
14416 call_fusage = NULL_RTX;
14417 rtx pop = NULL_RTX;
14418 if (TARGET_64BIT)
14420 rtx reg10, reg11;
14422 reg10 = gen_rtx_REG (Pmode, R10_REG);
14423 reg11 = gen_rtx_REG (Pmode, R11_REG);
14425 /* If this function uses a static chain, it will be in %r10.
14426 Preserve it across the call to __morestack. */
14427 if (DECL_STATIC_CHAIN (cfun->decl))
14429 rtx rax;
14431 rax = gen_rtx_REG (word_mode, AX_REG);
14432 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
14433 use_reg (&call_fusage, rax);
14436 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
14437 && !TARGET_PECOFF)
14439 HOST_WIDE_INT argval;
14441 gcc_assert (Pmode == DImode);
14442 /* When using the large model we need to load the address
14443 into a register, and we've run out of registers. So we
14444 switch to a different calling convention, and we call a
14445 different function: __morestack_large. We pass the
14446 argument size in the upper 32 bits of r10 and pass the
14447 frame size in the lower 32 bits. */
14448 gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate);
14449 gcc_assert ((args_size & 0xffffffff) == args_size);
14451 if (split_stack_fn_large == NULL_RTX)
14453 split_stack_fn_large =
14454 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
14455 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
14457 if (ix86_cmodel == CM_LARGE_PIC)
14459 rtx_code_label *label;
14460 rtx x;
14462 label = gen_label_rtx ();
14463 emit_label (label);
14464 LABEL_PRESERVE_P (label) = 1;
14465 emit_insn (gen_set_rip_rex64 (reg10, label));
14466 emit_insn (gen_set_got_offset_rex64 (reg11, label));
14467 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
14468 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
14469 UNSPEC_GOT);
14470 x = gen_rtx_CONST (Pmode, x);
14471 emit_move_insn (reg11, x);
14472 x = gen_rtx_PLUS (Pmode, reg10, reg11);
14473 x = gen_const_mem (Pmode, x);
14474 emit_move_insn (reg11, x);
14476 else
14477 emit_move_insn (reg11, split_stack_fn_large);
14479 fn = reg11;
14481 argval = ((args_size << 16) << 16) + allocate;
14482 emit_move_insn (reg10, GEN_INT (argval));
14484 else
14486 emit_move_insn (reg10, allocate_rtx);
14487 emit_move_insn (reg11, GEN_INT (args_size));
14488 use_reg (&call_fusage, reg11);
14491 use_reg (&call_fusage, reg10);
14493 else
14495 rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size)));
14496 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD));
14497 insn = emit_insn (gen_push (allocate_rtx));
14498 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD));
14499 pop = GEN_INT (2 * UNITS_PER_WORD);
14501 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
14502 GEN_INT (UNITS_PER_WORD), constm1_rtx,
14503 pop, false);
14504 add_function_usage_to (call_insn, call_fusage);
14505 if (!TARGET_64BIT)
14506 add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0));
14507 /* Indicate that this function can't jump to non-local gotos. */
14508 make_reg_eh_region_note_nothrow_nononlocal (as_a <rtx_insn *> (call_insn));
14510 /* In order to make call/return prediction work right, we now need
14511 to execute a return instruction. See
14512 libgcc/config/i386/morestack.S for the details on how this works.
14514 For flow purposes gcc must not see this as a return
14515 instruction--we need control flow to continue at the subsequent
14516 label. Therefore, we use an unspec. */
14517 gcc_assert (crtl->args.pops_args < 65536);
14518 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
14520 /* If we are in 64-bit mode and this function uses a static chain,
14521 we saved %r10 in %rax before calling _morestack. */
14522 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
14523 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
14524 gen_rtx_REG (word_mode, AX_REG));
14526 /* If this function calls va_start, we need to store a pointer to
14527 the arguments on the old stack, because they may not have been
14528 all copied to the new stack. At this point the old stack can be
14529 found at the frame pointer value used by __morestack, because
14530 __morestack has set that up before calling back to us. Here we
14531 store that pointer in a scratch register, and in
14532 ix86_expand_prologue we store the scratch register in a stack
14533 slot. */
14534 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
14536 unsigned int scratch_regno;
14537 rtx frame_reg;
14538 int words;
14540 scratch_regno = split_stack_prologue_scratch_regno ();
14541 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
14542 frame_reg = gen_rtx_REG (Pmode, BP_REG);
14544 /* 64-bit:
14545 fp -> old fp value
14546 return address within this function
14547 return address of caller of this function
14548 stack arguments
14549 So we add three words to get to the stack arguments.
14551 32-bit:
14552 fp -> old fp value
14553 return address within this function
14554 first argument to __morestack
14555 second argument to __morestack
14556 return address of caller of this function
14557 stack arguments
14558 So we add five words to get to the stack arguments.
14560 words = TARGET_64BIT ? 3 : 5;
14561 emit_insn (gen_rtx_SET (scratch_reg,
14562 gen_rtx_PLUS (Pmode, frame_reg,
14563 GEN_INT (words * UNITS_PER_WORD))));
14565 varargs_label = gen_label_rtx ();
14566 emit_jump_insn (gen_jump (varargs_label));
14567 JUMP_LABEL (get_last_insn ()) = varargs_label;
14569 emit_barrier ();
14572 emit_label (label);
14573 LABEL_NUSES (label) = 1;
14575 /* If this function calls va_start, we now have to set the scratch
14576 register for the case where we do not call __morestack. In this
14577 case we need to set it based on the stack pointer. */
14578 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
14580 emit_insn (gen_rtx_SET (scratch_reg,
14581 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14582 GEN_INT (UNITS_PER_WORD))));
14584 emit_label (varargs_label);
14585 LABEL_NUSES (varargs_label) = 1;
14589 /* We may have to tell the dataflow pass that the split stack prologue
14590 is initializing a scratch register. */
14592 static void
14593 ix86_live_on_entry (bitmap regs)
14595 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
14597 gcc_assert (flag_split_stack);
14598 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
14602 /* Extract the parts of an RTL expression that is a valid memory address
14603 for an instruction. Return 0 if the structure of the address is
14604 grossly off. Return -1 if the address contains ASHIFT, so it is not
14605 strictly valid, but still used for computing length of lea instruction. */
14608 ix86_decompose_address (rtx addr, struct ix86_address *out)
14610 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
14611 rtx base_reg, index_reg;
14612 HOST_WIDE_INT scale = 1;
14613 rtx scale_rtx = NULL_RTX;
14614 rtx tmp;
14615 int retval = 1;
14616 addr_space_t seg = ADDR_SPACE_GENERIC;
14618 /* Allow zero-extended SImode addresses,
14619 they will be emitted with addr32 prefix. */
14620 if (TARGET_64BIT && GET_MODE (addr) == DImode)
14622 if (GET_CODE (addr) == ZERO_EXTEND
14623 && GET_MODE (XEXP (addr, 0)) == SImode)
14625 addr = XEXP (addr, 0);
14626 if (CONST_INT_P (addr))
14627 return 0;
14629 else if (GET_CODE (addr) == AND
14630 && const_32bit_mask (XEXP (addr, 1), DImode))
14632 addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode);
14633 if (addr == NULL_RTX)
14634 return 0;
14636 if (CONST_INT_P (addr))
14637 return 0;
14641 /* Allow SImode subregs of DImode addresses,
14642 they will be emitted with addr32 prefix. */
14643 if (TARGET_64BIT && GET_MODE (addr) == SImode)
14645 if (SUBREG_P (addr)
14646 && GET_MODE (SUBREG_REG (addr)) == DImode)
14648 addr = SUBREG_REG (addr);
14649 if (CONST_INT_P (addr))
14650 return 0;
14654 if (REG_P (addr))
14655 base = addr;
14656 else if (SUBREG_P (addr))
14658 if (REG_P (SUBREG_REG (addr)))
14659 base = addr;
14660 else
14661 return 0;
14663 else if (GET_CODE (addr) == PLUS)
14665 rtx addends[4], op;
14666 int n = 0, i;
14668 op = addr;
14671 if (n >= 4)
14672 return 0;
14673 addends[n++] = XEXP (op, 1);
14674 op = XEXP (op, 0);
14676 while (GET_CODE (op) == PLUS);
14677 if (n >= 4)
14678 return 0;
14679 addends[n] = op;
14681 for (i = n; i >= 0; --i)
14683 op = addends[i];
14684 switch (GET_CODE (op))
14686 case MULT:
14687 if (index)
14688 return 0;
14689 index = XEXP (op, 0);
14690 scale_rtx = XEXP (op, 1);
14691 break;
14693 case ASHIFT:
14694 if (index)
14695 return 0;
14696 index = XEXP (op, 0);
14697 tmp = XEXP (op, 1);
14698 if (!CONST_INT_P (tmp))
14699 return 0;
14700 scale = INTVAL (tmp);
14701 if ((unsigned HOST_WIDE_INT) scale > 3)
14702 return 0;
14703 scale = 1 << scale;
14704 break;
14706 case ZERO_EXTEND:
14707 op = XEXP (op, 0);
14708 if (GET_CODE (op) != UNSPEC)
14709 return 0;
14710 /* FALLTHRU */
14712 case UNSPEC:
14713 if (XINT (op, 1) == UNSPEC_TP
14714 && TARGET_TLS_DIRECT_SEG_REFS
14715 && seg == ADDR_SPACE_GENERIC)
14716 seg = DEFAULT_TLS_SEG_REG;
14717 else
14718 return 0;
14719 break;
14721 case SUBREG:
14722 if (!REG_P (SUBREG_REG (op)))
14723 return 0;
14724 /* FALLTHRU */
14726 case REG:
14727 if (!base)
14728 base = op;
14729 else if (!index)
14730 index = op;
14731 else
14732 return 0;
14733 break;
14735 case CONST:
14736 case CONST_INT:
14737 case SYMBOL_REF:
14738 case LABEL_REF:
14739 if (disp)
14740 return 0;
14741 disp = op;
14742 break;
14744 default:
14745 return 0;
14749 else if (GET_CODE (addr) == MULT)
14751 index = XEXP (addr, 0); /* index*scale */
14752 scale_rtx = XEXP (addr, 1);
14754 else if (GET_CODE (addr) == ASHIFT)
14756 /* We're called for lea too, which implements ashift on occasion. */
14757 index = XEXP (addr, 0);
14758 tmp = XEXP (addr, 1);
14759 if (!CONST_INT_P (tmp))
14760 return 0;
14761 scale = INTVAL (tmp);
14762 if ((unsigned HOST_WIDE_INT) scale > 3)
14763 return 0;
14764 scale = 1 << scale;
14765 retval = -1;
14767 else
14768 disp = addr; /* displacement */
14770 if (index)
14772 if (REG_P (index))
14774 else if (SUBREG_P (index)
14775 && REG_P (SUBREG_REG (index)))
14777 else
14778 return 0;
14781 /* Extract the integral value of scale. */
14782 if (scale_rtx)
14784 if (!CONST_INT_P (scale_rtx))
14785 return 0;
14786 scale = INTVAL (scale_rtx);
14789 base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
14790 index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
14792 /* Avoid useless 0 displacement. */
14793 if (disp == const0_rtx && (base || index))
14794 disp = NULL_RTX;
14796 /* Allow arg pointer and stack pointer as index if there is not scaling. */
14797 if (base_reg && index_reg && scale == 1
14798 && (REGNO (index_reg) == ARG_POINTER_REGNUM
14799 || REGNO (index_reg) == FRAME_POINTER_REGNUM
14800 || REGNO (index_reg) == SP_REG))
14802 std::swap (base, index);
14803 std::swap (base_reg, index_reg);
14806 /* Special case: %ebp cannot be encoded as a base without a displacement.
14807 Similarly %r13. */
14808 if (!disp && base_reg
14809 && (REGNO (base_reg) == ARG_POINTER_REGNUM
14810 || REGNO (base_reg) == FRAME_POINTER_REGNUM
14811 || REGNO (base_reg) == BP_REG
14812 || REGNO (base_reg) == R13_REG))
14813 disp = const0_rtx;
14815 /* Special case: on K6, [%esi] makes the instruction vector decoded.
14816 Avoid this by transforming to [%esi+0].
14817 Reload calls address legitimization without cfun defined, so we need
14818 to test cfun for being non-NULL. */
14819 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
14820 && base_reg && !index_reg && !disp
14821 && REGNO (base_reg) == SI_REG)
14822 disp = const0_rtx;
14824 /* Special case: encode reg+reg instead of reg*2. */
14825 if (!base && index && scale == 2)
14826 base = index, base_reg = index_reg, scale = 1;
14828 /* Special case: scaling cannot be encoded without base or displacement. */
14829 if (!base && !disp && index && scale != 1)
14830 disp = const0_rtx;
14832 out->base = base;
14833 out->index = index;
14834 out->disp = disp;
14835 out->scale = scale;
14836 out->seg = seg;
14838 return retval;
14841 /* Return cost of the memory address x.
14842 For i386, it is better to use a complex address than let gcc copy
14843 the address into a reg and make a new pseudo. But not if the address
14844 requires to two regs - that would mean more pseudos with longer
14845 lifetimes. */
14846 static int
14847 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
14849 struct ix86_address parts;
14850 int cost = 1;
14851 int ok = ix86_decompose_address (x, &parts);
14853 gcc_assert (ok);
14855 if (parts.base && SUBREG_P (parts.base))
14856 parts.base = SUBREG_REG (parts.base);
14857 if (parts.index && SUBREG_P (parts.index))
14858 parts.index = SUBREG_REG (parts.index);
14860 /* Attempt to minimize number of registers in the address by increasing
14861 address cost for each used register. We don't increase address cost
14862 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
14863 is not invariant itself it most likely means that base or index is not
14864 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
14865 which is not profitable for x86. */
14866 if (parts.base
14867 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
14868 && (current_pass->type == GIMPLE_PASS
14869 || !pic_offset_table_rtx
14870 || !REG_P (parts.base)
14871 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
14872 cost++;
14874 if (parts.index
14875 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
14876 && (current_pass->type == GIMPLE_PASS
14877 || !pic_offset_table_rtx
14878 || !REG_P (parts.index)
14879 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
14880 cost++;
14882 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
14883 since it's predecode logic can't detect the length of instructions
14884 and it degenerates to vector decoded. Increase cost of such
14885 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
14886 to split such addresses or even refuse such addresses at all.
14888 Following addressing modes are affected:
14889 [base+scale*index]
14890 [scale*index+disp]
14891 [base+index]
14893 The first and last case may be avoidable by explicitly coding the zero in
14894 memory address, but I don't have AMD-K6 machine handy to check this
14895 theory. */
14897 if (TARGET_K6
14898 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
14899 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
14900 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
14901 cost += 10;
14903 return cost;
14906 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
14907 this is used for to form addresses to local data when -fPIC is in
14908 use. */
14910 static bool
14911 darwin_local_data_pic (rtx disp)
14913 return (GET_CODE (disp) == UNSPEC
14914 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
14917 /* True if operand X should be loaded from GOT. */
14919 bool
14920 ix86_force_load_from_GOT_p (rtx x)
14922 return ((TARGET_64BIT || HAVE_AS_IX86_GOT32X)
14923 && !TARGET_PECOFF && !TARGET_MACHO
14924 && !flag_plt && !flag_pic
14925 && ix86_cmodel != CM_LARGE
14926 && GET_CODE (x) == SYMBOL_REF
14927 && SYMBOL_REF_FUNCTION_P (x)
14928 && !SYMBOL_REF_LOCAL_P (x));
14931 /* Determine if a given RTX is a valid constant. We already know this
14932 satisfies CONSTANT_P. */
14934 static bool
14935 ix86_legitimate_constant_p (machine_mode mode, rtx x)
14937 /* Pointer bounds constants are not valid. */
14938 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
14939 return false;
14941 switch (GET_CODE (x))
14943 case CONST:
14944 x = XEXP (x, 0);
14946 if (GET_CODE (x) == PLUS)
14948 if (!CONST_INT_P (XEXP (x, 1)))
14949 return false;
14950 x = XEXP (x, 0);
14953 if (TARGET_MACHO && darwin_local_data_pic (x))
14954 return true;
14956 /* Only some unspecs are valid as "constants". */
14957 if (GET_CODE (x) == UNSPEC)
14958 switch (XINT (x, 1))
14960 case UNSPEC_GOT:
14961 case UNSPEC_GOTOFF:
14962 case UNSPEC_PLTOFF:
14963 return TARGET_64BIT;
14964 case UNSPEC_TPOFF:
14965 case UNSPEC_NTPOFF:
14966 x = XVECEXP (x, 0, 0);
14967 return (GET_CODE (x) == SYMBOL_REF
14968 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
14969 case UNSPEC_DTPOFF:
14970 x = XVECEXP (x, 0, 0);
14971 return (GET_CODE (x) == SYMBOL_REF
14972 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
14973 default:
14974 return false;
14977 /* We must have drilled down to a symbol. */
14978 if (GET_CODE (x) == LABEL_REF)
14979 return true;
14980 if (GET_CODE (x) != SYMBOL_REF)
14981 return false;
14982 /* FALLTHRU */
14984 case SYMBOL_REF:
14985 /* TLS symbols are never valid. */
14986 if (SYMBOL_REF_TLS_MODEL (x))
14987 return false;
14989 /* DLLIMPORT symbols are never valid. */
14990 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
14991 && SYMBOL_REF_DLLIMPORT_P (x))
14992 return false;
14994 #if TARGET_MACHO
14995 /* mdynamic-no-pic */
14996 if (MACHO_DYNAMIC_NO_PIC_P)
14997 return machopic_symbol_defined_p (x);
14998 #endif
15000 /* External function address should be loaded
15001 via the GOT slot to avoid PLT. */
15002 if (ix86_force_load_from_GOT_p (x))
15003 return false;
15005 break;
15007 CASE_CONST_SCALAR_INT:
15008 switch (mode)
15010 case E_TImode:
15011 if (TARGET_64BIT)
15012 return true;
15013 /* FALLTHRU */
15014 case E_OImode:
15015 case E_XImode:
15016 if (!standard_sse_constant_p (x, mode))
15017 return false;
15018 default:
15019 break;
15021 break;
15023 case CONST_VECTOR:
15024 if (!standard_sse_constant_p (x, mode))
15025 return false;
15027 default:
15028 break;
15031 /* Otherwise we handle everything else in the move patterns. */
15032 return true;
15035 /* Determine if it's legal to put X into the constant pool. This
15036 is not possible for the address of thread-local symbols, which
15037 is checked above. */
15039 static bool
15040 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
15042 /* We can put any immediate constant in memory. */
15043 switch (GET_CODE (x))
15045 CASE_CONST_ANY:
15046 return false;
15048 default:
15049 break;
15052 return !ix86_legitimate_constant_p (mode, x);
15055 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
15056 otherwise zero. */
15058 static bool
15059 is_imported_p (rtx x)
15061 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
15062 || GET_CODE (x) != SYMBOL_REF)
15063 return false;
15065 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
15069 /* Nonzero if the constant value X is a legitimate general operand
15070 when generating PIC code. It is given that flag_pic is on and
15071 that X satisfies CONSTANT_P. */
15073 bool
15074 legitimate_pic_operand_p (rtx x)
15076 rtx inner;
15078 switch (GET_CODE (x))
15080 case CONST:
15081 inner = XEXP (x, 0);
15082 if (GET_CODE (inner) == PLUS
15083 && CONST_INT_P (XEXP (inner, 1)))
15084 inner = XEXP (inner, 0);
15086 /* Only some unspecs are valid as "constants". */
15087 if (GET_CODE (inner) == UNSPEC)
15088 switch (XINT (inner, 1))
15090 case UNSPEC_GOT:
15091 case UNSPEC_GOTOFF:
15092 case UNSPEC_PLTOFF:
15093 return TARGET_64BIT;
15094 case UNSPEC_TPOFF:
15095 x = XVECEXP (inner, 0, 0);
15096 return (GET_CODE (x) == SYMBOL_REF
15097 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
15098 case UNSPEC_MACHOPIC_OFFSET:
15099 return legitimate_pic_address_disp_p (x);
15100 default:
15101 return false;
15103 /* FALLTHRU */
15105 case SYMBOL_REF:
15106 case LABEL_REF:
15107 return legitimate_pic_address_disp_p (x);
15109 default:
15110 return true;
15114 /* Determine if a given CONST RTX is a valid memory displacement
15115 in PIC mode. */
15117 bool
15118 legitimate_pic_address_disp_p (rtx disp)
15120 bool saw_plus;
15122 /* In 64bit mode we can allow direct addresses of symbols and labels
15123 when they are not dynamic symbols. */
15124 if (TARGET_64BIT)
15126 rtx op0 = disp, op1;
15128 switch (GET_CODE (disp))
15130 case LABEL_REF:
15131 return true;
15133 case CONST:
15134 if (GET_CODE (XEXP (disp, 0)) != PLUS)
15135 break;
15136 op0 = XEXP (XEXP (disp, 0), 0);
15137 op1 = XEXP (XEXP (disp, 0), 1);
15138 if (!CONST_INT_P (op1))
15139 break;
15140 if (GET_CODE (op0) == UNSPEC
15141 && (XINT (op0, 1) == UNSPEC_DTPOFF
15142 || XINT (op0, 1) == UNSPEC_NTPOFF)
15143 && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1))
15144 return true;
15145 if (INTVAL (op1) >= 16*1024*1024
15146 || INTVAL (op1) < -16*1024*1024)
15147 break;
15148 if (GET_CODE (op0) == LABEL_REF)
15149 return true;
15150 if (GET_CODE (op0) == CONST
15151 && GET_CODE (XEXP (op0, 0)) == UNSPEC
15152 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
15153 return true;
15154 if (GET_CODE (op0) == UNSPEC
15155 && XINT (op0, 1) == UNSPEC_PCREL)
15156 return true;
15157 if (GET_CODE (op0) != SYMBOL_REF)
15158 break;
15159 /* FALLTHRU */
15161 case SYMBOL_REF:
15162 /* TLS references should always be enclosed in UNSPEC.
15163 The dllimported symbol needs always to be resolved. */
15164 if (SYMBOL_REF_TLS_MODEL (op0)
15165 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
15166 return false;
15168 if (TARGET_PECOFF)
15170 if (is_imported_p (op0))
15171 return true;
15173 if (SYMBOL_REF_FAR_ADDR_P (op0)
15174 || !SYMBOL_REF_LOCAL_P (op0))
15175 break;
15177 /* Function-symbols need to be resolved only for
15178 large-model.
15179 For the small-model we don't need to resolve anything
15180 here. */
15181 if ((ix86_cmodel != CM_LARGE_PIC
15182 && SYMBOL_REF_FUNCTION_P (op0))
15183 || ix86_cmodel == CM_SMALL_PIC)
15184 return true;
15185 /* Non-external symbols don't need to be resolved for
15186 large, and medium-model. */
15187 if ((ix86_cmodel == CM_LARGE_PIC
15188 || ix86_cmodel == CM_MEDIUM_PIC)
15189 && !SYMBOL_REF_EXTERNAL_P (op0))
15190 return true;
15192 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
15193 && (SYMBOL_REF_LOCAL_P (op0)
15194 || (HAVE_LD_PIE_COPYRELOC
15195 && flag_pie
15196 && !SYMBOL_REF_WEAK (op0)
15197 && !SYMBOL_REF_FUNCTION_P (op0)))
15198 && ix86_cmodel != CM_LARGE_PIC)
15199 return true;
15200 break;
15202 default:
15203 break;
15206 if (GET_CODE (disp) != CONST)
15207 return false;
15208 disp = XEXP (disp, 0);
15210 if (TARGET_64BIT)
15212 /* We are unsafe to allow PLUS expressions. This limit allowed distance
15213 of GOT tables. We should not need these anyway. */
15214 if (GET_CODE (disp) != UNSPEC
15215 || (XINT (disp, 1) != UNSPEC_GOTPCREL
15216 && XINT (disp, 1) != UNSPEC_GOTOFF
15217 && XINT (disp, 1) != UNSPEC_PCREL
15218 && XINT (disp, 1) != UNSPEC_PLTOFF))
15219 return false;
15221 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
15222 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
15223 return false;
15224 return true;
15227 saw_plus = false;
15228 if (GET_CODE (disp) == PLUS)
15230 if (!CONST_INT_P (XEXP (disp, 1)))
15231 return false;
15232 disp = XEXP (disp, 0);
15233 saw_plus = true;
15236 if (TARGET_MACHO && darwin_local_data_pic (disp))
15237 return true;
15239 if (GET_CODE (disp) != UNSPEC)
15240 return false;
15242 switch (XINT (disp, 1))
15244 case UNSPEC_GOT:
15245 if (saw_plus)
15246 return false;
15247 /* We need to check for both symbols and labels because VxWorks loads
15248 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
15249 details. */
15250 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
15251 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
15252 case UNSPEC_GOTOFF:
15253 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
15254 While ABI specify also 32bit relocation but we don't produce it in
15255 small PIC model at all. */
15256 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
15257 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
15258 && !TARGET_64BIT)
15259 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
15260 return false;
15261 case UNSPEC_GOTTPOFF:
15262 case UNSPEC_GOTNTPOFF:
15263 case UNSPEC_INDNTPOFF:
15264 if (saw_plus)
15265 return false;
15266 disp = XVECEXP (disp, 0, 0);
15267 return (GET_CODE (disp) == SYMBOL_REF
15268 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
15269 case UNSPEC_NTPOFF:
15270 disp = XVECEXP (disp, 0, 0);
15271 return (GET_CODE (disp) == SYMBOL_REF
15272 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
15273 case UNSPEC_DTPOFF:
15274 disp = XVECEXP (disp, 0, 0);
15275 return (GET_CODE (disp) == SYMBOL_REF
15276 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
15279 return false;
15282 /* Determine if op is suitable RTX for an address register.
15283 Return naked register if a register or a register subreg is
15284 found, otherwise return NULL_RTX. */
15286 static rtx
15287 ix86_validate_address_register (rtx op)
15289 machine_mode mode = GET_MODE (op);
15291 /* Only SImode or DImode registers can form the address. */
15292 if (mode != SImode && mode != DImode)
15293 return NULL_RTX;
15295 if (REG_P (op))
15296 return op;
15297 else if (SUBREG_P (op))
15299 rtx reg = SUBREG_REG (op);
15301 if (!REG_P (reg))
15302 return NULL_RTX;
15304 mode = GET_MODE (reg);
15306 /* Don't allow SUBREGs that span more than a word. It can
15307 lead to spill failures when the register is one word out
15308 of a two word structure. */
15309 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
15310 return NULL_RTX;
15312 /* Allow only SUBREGs of non-eliminable hard registers. */
15313 if (register_no_elim_operand (reg, mode))
15314 return reg;
15317 /* Op is not a register. */
15318 return NULL_RTX;
15321 /* Recognizes RTL expressions that are valid memory addresses for an
15322 instruction. The MODE argument is the machine mode for the MEM
15323 expression that wants to use this address.
15325 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
15326 convert common non-canonical forms to canonical form so that they will
15327 be recognized. */
15329 static bool
15330 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
15332 struct ix86_address parts;
15333 rtx base, index, disp;
15334 HOST_WIDE_INT scale;
15335 addr_space_t seg;
15337 if (ix86_decompose_address (addr, &parts) <= 0)
15338 /* Decomposition failed. */
15339 return false;
15341 base = parts.base;
15342 index = parts.index;
15343 disp = parts.disp;
15344 scale = parts.scale;
15345 seg = parts.seg;
15347 /* Validate base register. */
15348 if (base)
15350 rtx reg = ix86_validate_address_register (base);
15352 if (reg == NULL_RTX)
15353 return false;
15355 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
15356 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
15357 /* Base is not valid. */
15358 return false;
15361 /* Validate index register. */
15362 if (index)
15364 rtx reg = ix86_validate_address_register (index);
15366 if (reg == NULL_RTX)
15367 return false;
15369 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
15370 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
15371 /* Index is not valid. */
15372 return false;
15375 /* Index and base should have the same mode. */
15376 if (base && index
15377 && GET_MODE (base) != GET_MODE (index))
15378 return false;
15380 /* Address override works only on the (%reg) part of %fs:(%reg). */
15381 if (seg != ADDR_SPACE_GENERIC
15382 && ((base && GET_MODE (base) != word_mode)
15383 || (index && GET_MODE (index) != word_mode)))
15384 return false;
15386 /* Validate scale factor. */
15387 if (scale != 1)
15389 if (!index)
15390 /* Scale without index. */
15391 return false;
15393 if (scale != 2 && scale != 4 && scale != 8)
15394 /* Scale is not a valid multiplier. */
15395 return false;
15398 /* Validate displacement. */
15399 if (disp)
15401 if (GET_CODE (disp) == CONST
15402 && GET_CODE (XEXP (disp, 0)) == UNSPEC
15403 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
15404 switch (XINT (XEXP (disp, 0), 1))
15406 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
15407 when used. While ABI specify also 32bit relocations, we
15408 don't produce them at all and use IP relative instead.
15409 Allow GOT in 32bit mode for both PIC and non-PIC if symbol
15410 should be loaded via GOT. */
15411 case UNSPEC_GOT:
15412 if (!TARGET_64BIT
15413 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
15414 goto is_legitimate_pic;
15415 /* FALLTHRU */
15416 case UNSPEC_GOTOFF:
15417 gcc_assert (flag_pic);
15418 if (!TARGET_64BIT)
15419 goto is_legitimate_pic;
15421 /* 64bit address unspec. */
15422 return false;
15424 case UNSPEC_GOTPCREL:
15425 if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
15426 goto is_legitimate_pic;
15427 /* FALLTHRU */
15428 case UNSPEC_PCREL:
15429 gcc_assert (flag_pic);
15430 goto is_legitimate_pic;
15432 case UNSPEC_GOTTPOFF:
15433 case UNSPEC_GOTNTPOFF:
15434 case UNSPEC_INDNTPOFF:
15435 case UNSPEC_NTPOFF:
15436 case UNSPEC_DTPOFF:
15437 break;
15439 default:
15440 /* Invalid address unspec. */
15441 return false;
15444 else if (SYMBOLIC_CONST (disp)
15445 && (flag_pic
15446 || (TARGET_MACHO
15447 #if TARGET_MACHO
15448 && MACHOPIC_INDIRECT
15449 && !machopic_operand_p (disp)
15450 #endif
15454 is_legitimate_pic:
15455 if (TARGET_64BIT && (index || base))
15457 /* foo@dtpoff(%rX) is ok. */
15458 if (GET_CODE (disp) != CONST
15459 || GET_CODE (XEXP (disp, 0)) != PLUS
15460 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
15461 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
15462 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
15463 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
15464 /* Non-constant pic memory reference. */
15465 return false;
15467 else if ((!TARGET_MACHO || flag_pic)
15468 && ! legitimate_pic_address_disp_p (disp))
15469 /* Displacement is an invalid pic construct. */
15470 return false;
15471 #if TARGET_MACHO
15472 else if (MACHO_DYNAMIC_NO_PIC_P
15473 && !ix86_legitimate_constant_p (Pmode, disp))
15474 /* displacment must be referenced via non_lazy_pointer */
15475 return false;
15476 #endif
15478 /* This code used to verify that a symbolic pic displacement
15479 includes the pic_offset_table_rtx register.
15481 While this is good idea, unfortunately these constructs may
15482 be created by "adds using lea" optimization for incorrect
15483 code like:
15485 int a;
15486 int foo(int i)
15488 return *(&a+i);
15491 This code is nonsensical, but results in addressing
15492 GOT table with pic_offset_table_rtx base. We can't
15493 just refuse it easily, since it gets matched by
15494 "addsi3" pattern, that later gets split to lea in the
15495 case output register differs from input. While this
15496 can be handled by separate addsi pattern for this case
15497 that never results in lea, this seems to be easier and
15498 correct fix for crash to disable this test. */
15500 else if (GET_CODE (disp) != LABEL_REF
15501 && !CONST_INT_P (disp)
15502 && (GET_CODE (disp) != CONST
15503 || !ix86_legitimate_constant_p (Pmode, disp))
15504 && (GET_CODE (disp) != SYMBOL_REF
15505 || !ix86_legitimate_constant_p (Pmode, disp)))
15506 /* Displacement is not constant. */
15507 return false;
15508 else if (TARGET_64BIT
15509 && !x86_64_immediate_operand (disp, VOIDmode))
15510 /* Displacement is out of range. */
15511 return false;
15512 /* In x32 mode, constant addresses are sign extended to 64bit, so
15513 we have to prevent addresses from 0x80000000 to 0xffffffff. */
15514 else if (TARGET_X32 && !(index || base)
15515 && CONST_INT_P (disp)
15516 && val_signbit_known_set_p (SImode, INTVAL (disp)))
15517 return false;
15520 /* Everything looks valid. */
15521 return true;
15524 /* Determine if a given RTX is a valid constant address. */
15526 bool
15527 constant_address_p (rtx x)
15529 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
15532 /* Return a unique alias set for the GOT. */
15534 static alias_set_type
15535 ix86_GOT_alias_set (void)
15537 static alias_set_type set = -1;
15538 if (set == -1)
15539 set = new_alias_set ();
15540 return set;
15543 /* Return a legitimate reference for ORIG (an address) using the
15544 register REG. If REG is 0, a new pseudo is generated.
15546 There are two types of references that must be handled:
15548 1. Global data references must load the address from the GOT, via
15549 the PIC reg. An insn is emitted to do this load, and the reg is
15550 returned.
15552 2. Static data references, constant pool addresses, and code labels
15553 compute the address as an offset from the GOT, whose base is in
15554 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
15555 differentiate them from global data objects. The returned
15556 address is the PIC reg + an unspec constant.
15558 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
15559 reg also appears in the address. */
15561 static rtx
15562 legitimize_pic_address (rtx orig, rtx reg)
15564 rtx addr = orig;
15565 rtx new_rtx = orig;
15567 #if TARGET_MACHO
15568 if (TARGET_MACHO && !TARGET_64BIT)
15570 if (reg == 0)
15571 reg = gen_reg_rtx (Pmode);
15572 /* Use the generic Mach-O PIC machinery. */
15573 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
15575 #endif
15577 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
15579 rtx tmp = legitimize_pe_coff_symbol (addr, true);
15580 if (tmp)
15581 return tmp;
15584 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
15585 new_rtx = addr;
15586 else if ((!TARGET_64BIT
15587 || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC)
15588 && !TARGET_PECOFF
15589 && gotoff_operand (addr, Pmode))
15591 /* This symbol may be referenced via a displacement
15592 from the PIC base address (@GOTOFF). */
15593 if (GET_CODE (addr) == CONST)
15594 addr = XEXP (addr, 0);
15596 if (GET_CODE (addr) == PLUS)
15598 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
15599 UNSPEC_GOTOFF);
15600 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
15602 else
15603 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
15605 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15607 if (TARGET_64BIT)
15608 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
15610 if (reg != 0)
15612 gcc_assert (REG_P (reg));
15613 new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
15614 new_rtx, reg, 1, OPTAB_DIRECT);
15616 else
15617 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
15619 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
15620 /* We can't use @GOTOFF for text labels
15621 on VxWorks, see gotoff_operand. */
15622 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
15624 rtx tmp = legitimize_pe_coff_symbol (addr, true);
15625 if (tmp)
15626 return tmp;
15628 /* For x64 PE-COFF there is no GOT table,
15629 so we use address directly. */
15630 if (TARGET_64BIT && TARGET_PECOFF)
15632 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
15633 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15635 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
15637 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
15638 UNSPEC_GOTPCREL);
15639 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15640 new_rtx = gen_const_mem (Pmode, new_rtx);
15641 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
15643 else
15645 /* This symbol must be referenced via a load
15646 from the Global Offset Table (@GOT). */
15647 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
15648 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15649 if (TARGET_64BIT)
15650 new_rtx = force_reg (Pmode, new_rtx);
15651 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
15652 new_rtx = gen_const_mem (Pmode, new_rtx);
15653 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
15656 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
15658 else
15660 if (CONST_INT_P (addr)
15661 && !x86_64_immediate_operand (addr, VOIDmode))
15662 new_rtx = copy_to_suggested_reg (addr, reg, Pmode);
15663 else if (GET_CODE (addr) == CONST)
15665 addr = XEXP (addr, 0);
15667 /* We must match stuff we generate before. Assume the only
15668 unspecs that can get here are ours. Not that we could do
15669 anything with them anyway.... */
15670 if (GET_CODE (addr) == UNSPEC
15671 || (GET_CODE (addr) == PLUS
15672 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
15673 return orig;
15674 gcc_assert (GET_CODE (addr) == PLUS);
15677 if (GET_CODE (addr) == PLUS)
15679 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
15681 /* Check first to see if this is a constant
15682 offset from a @GOTOFF symbol reference. */
15683 if (!TARGET_PECOFF
15684 && gotoff_operand (op0, Pmode)
15685 && CONST_INT_P (op1))
15687 if (!TARGET_64BIT)
15689 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
15690 UNSPEC_GOTOFF);
15691 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
15692 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15694 if (reg != 0)
15696 gcc_assert (REG_P (reg));
15697 new_rtx = expand_simple_binop (Pmode, PLUS,
15698 pic_offset_table_rtx,
15699 new_rtx, reg, 1,
15700 OPTAB_DIRECT);
15702 else
15703 new_rtx
15704 = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
15706 else
15708 if (INTVAL (op1) < -16*1024*1024
15709 || INTVAL (op1) >= 16*1024*1024)
15711 if (!x86_64_immediate_operand (op1, Pmode))
15712 op1 = force_reg (Pmode, op1);
15714 new_rtx
15715 = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
15719 else
15721 rtx base = legitimize_pic_address (op0, reg);
15722 machine_mode mode = GET_MODE (base);
15723 new_rtx
15724 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
15726 if (CONST_INT_P (new_rtx))
15728 if (INTVAL (new_rtx) < -16*1024*1024
15729 || INTVAL (new_rtx) >= 16*1024*1024)
15731 if (!x86_64_immediate_operand (new_rtx, mode))
15732 new_rtx = force_reg (mode, new_rtx);
15734 new_rtx
15735 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
15737 else
15738 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
15740 else
15742 /* For %rip addressing, we have to use
15743 just disp32, not base nor index. */
15744 if (TARGET_64BIT
15745 && (GET_CODE (base) == SYMBOL_REF
15746 || GET_CODE (base) == LABEL_REF))
15747 base = force_reg (mode, base);
15748 if (GET_CODE (new_rtx) == PLUS
15749 && CONSTANT_P (XEXP (new_rtx, 1)))
15751 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
15752 new_rtx = XEXP (new_rtx, 1);
15754 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
15759 return new_rtx;
15762 /* Load the thread pointer. If TO_REG is true, force it into a register. */
15764 static rtx
15765 get_thread_pointer (machine_mode tp_mode, bool to_reg)
15767 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
15769 if (GET_MODE (tp) != tp_mode)
15771 gcc_assert (GET_MODE (tp) == SImode);
15772 gcc_assert (tp_mode == DImode);
15774 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
15777 if (to_reg)
15778 tp = copy_to_mode_reg (tp_mode, tp);
15780 return tp;
15783 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15785 static GTY(()) rtx ix86_tls_symbol;
15787 static rtx
15788 ix86_tls_get_addr (void)
15790 if (!ix86_tls_symbol)
15792 const char *sym
15793 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
15794 ? "___tls_get_addr" : "__tls_get_addr");
15796 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
15799 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
15801 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
15802 UNSPEC_PLTOFF);
15803 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
15804 gen_rtx_CONST (Pmode, unspec));
15807 return ix86_tls_symbol;
15810 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15812 static GTY(()) rtx ix86_tls_module_base_symbol;
15815 ix86_tls_module_base (void)
15817 if (!ix86_tls_module_base_symbol)
15819 ix86_tls_module_base_symbol
15820 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
15822 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
15823 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
15826 return ix86_tls_module_base_symbol;
15829 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
15830 false if we expect this to be used for a memory address and true if
15831 we expect to load the address into a register. */
15833 static rtx
15834 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
15836 rtx dest, base, off;
15837 rtx pic = NULL_RTX, tp = NULL_RTX;
15838 machine_mode tp_mode = Pmode;
15839 int type;
15841 /* Fall back to global dynamic model if tool chain cannot support local
15842 dynamic. */
15843 if (TARGET_SUN_TLS && !TARGET_64BIT
15844 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
15845 && model == TLS_MODEL_LOCAL_DYNAMIC)
15846 model = TLS_MODEL_GLOBAL_DYNAMIC;
15848 switch (model)
15850 case TLS_MODEL_GLOBAL_DYNAMIC:
15851 dest = gen_reg_rtx (Pmode);
15853 if (!TARGET_64BIT)
15855 if (flag_pic && !TARGET_PECOFF)
15856 pic = pic_offset_table_rtx;
15857 else
15859 pic = gen_reg_rtx (Pmode);
15860 emit_insn (gen_set_got (pic));
15864 if (TARGET_GNU2_TLS)
15866 if (TARGET_64BIT)
15867 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
15868 else
15869 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
15871 tp = get_thread_pointer (Pmode, true);
15872 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
15874 if (GET_MODE (x) != Pmode)
15875 x = gen_rtx_ZERO_EXTEND (Pmode, x);
15877 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
15879 else
15881 rtx caddr = ix86_tls_get_addr ();
15883 if (TARGET_64BIT)
15885 rtx rax = gen_rtx_REG (Pmode, AX_REG);
15886 rtx_insn *insns;
15888 start_sequence ();
15889 emit_call_insn
15890 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
15891 insns = get_insns ();
15892 end_sequence ();
15894 if (GET_MODE (x) != Pmode)
15895 x = gen_rtx_ZERO_EXTEND (Pmode, x);
15897 RTL_CONST_CALL_P (insns) = 1;
15898 emit_libcall_block (insns, dest, rax, x);
15900 else
15901 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
15903 break;
15905 case TLS_MODEL_LOCAL_DYNAMIC:
15906 base = gen_reg_rtx (Pmode);
15908 if (!TARGET_64BIT)
15910 if (flag_pic)
15911 pic = pic_offset_table_rtx;
15912 else
15914 pic = gen_reg_rtx (Pmode);
15915 emit_insn (gen_set_got (pic));
15919 if (TARGET_GNU2_TLS)
15921 rtx tmp = ix86_tls_module_base ();
15923 if (TARGET_64BIT)
15924 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
15925 else
15926 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
15928 tp = get_thread_pointer (Pmode, true);
15929 set_unique_reg_note (get_last_insn (), REG_EQUAL,
15930 gen_rtx_MINUS (Pmode, tmp, tp));
15932 else
15934 rtx caddr = ix86_tls_get_addr ();
15936 if (TARGET_64BIT)
15938 rtx rax = gen_rtx_REG (Pmode, AX_REG);
15939 rtx_insn *insns;
15940 rtx eqv;
15942 start_sequence ();
15943 emit_call_insn
15944 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
15945 insns = get_insns ();
15946 end_sequence ();
15948 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
15949 share the LD_BASE result with other LD model accesses. */
15950 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
15951 UNSPEC_TLS_LD_BASE);
15953 RTL_CONST_CALL_P (insns) = 1;
15954 emit_libcall_block (insns, base, rax, eqv);
15956 else
15957 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
15960 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
15961 off = gen_rtx_CONST (Pmode, off);
15963 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
15965 if (TARGET_GNU2_TLS)
15967 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
15969 if (GET_MODE (x) != Pmode)
15970 x = gen_rtx_ZERO_EXTEND (Pmode, x);
15972 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
15974 break;
15976 case TLS_MODEL_INITIAL_EXEC:
15977 if (TARGET_64BIT)
15979 if (TARGET_SUN_TLS && !TARGET_X32)
15981 /* The Sun linker took the AMD64 TLS spec literally
15982 and can only handle %rax as destination of the
15983 initial executable code sequence. */
15985 dest = gen_reg_rtx (DImode);
15986 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
15987 return dest;
15990 /* Generate DImode references to avoid %fs:(%reg32)
15991 problems and linker IE->LE relaxation bug. */
15992 tp_mode = DImode;
15993 pic = NULL;
15994 type = UNSPEC_GOTNTPOFF;
15996 else if (flag_pic)
15998 pic = pic_offset_table_rtx;
15999 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
16001 else if (!TARGET_ANY_GNU_TLS)
16003 pic = gen_reg_rtx (Pmode);
16004 emit_insn (gen_set_got (pic));
16005 type = UNSPEC_GOTTPOFF;
16007 else
16009 pic = NULL;
16010 type = UNSPEC_INDNTPOFF;
16013 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
16014 off = gen_rtx_CONST (tp_mode, off);
16015 if (pic)
16016 off = gen_rtx_PLUS (tp_mode, pic, off);
16017 off = gen_const_mem (tp_mode, off);
16018 set_mem_alias_set (off, ix86_GOT_alias_set ());
16020 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
16022 base = get_thread_pointer (tp_mode,
16023 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
16024 off = force_reg (tp_mode, off);
16025 dest = gen_rtx_PLUS (tp_mode, base, off);
16026 if (tp_mode != Pmode)
16027 dest = convert_to_mode (Pmode, dest, 1);
16029 else
16031 base = get_thread_pointer (Pmode, true);
16032 dest = gen_reg_rtx (Pmode);
16033 emit_insn (ix86_gen_sub3 (dest, base, off));
16035 break;
16037 case TLS_MODEL_LOCAL_EXEC:
16038 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
16039 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
16040 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
16041 off = gen_rtx_CONST (Pmode, off);
16043 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
16045 base = get_thread_pointer (Pmode,
16046 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
16047 return gen_rtx_PLUS (Pmode, base, off);
16049 else
16051 base = get_thread_pointer (Pmode, true);
16052 dest = gen_reg_rtx (Pmode);
16053 emit_insn (ix86_gen_sub3 (dest, base, off));
16055 break;
16057 default:
16058 gcc_unreachable ();
16061 return dest;
16064 /* Return true if OP refers to a TLS address. */
16065 bool
16066 ix86_tls_address_pattern_p (rtx op)
16068 subrtx_var_iterator::array_type array;
16069 FOR_EACH_SUBRTX_VAR (iter, array, op, ALL)
16071 rtx op = *iter;
16072 if (MEM_P (op))
16074 rtx *x = &XEXP (op, 0);
16075 while (GET_CODE (*x) == PLUS)
16077 int i;
16078 for (i = 0; i < 2; i++)
16080 rtx u = XEXP (*x, i);
16081 if (GET_CODE (u) == ZERO_EXTEND)
16082 u = XEXP (u, 0);
16083 if (GET_CODE (u) == UNSPEC
16084 && XINT (u, 1) == UNSPEC_TP)
16085 return true;
16087 x = &XEXP (*x, 0);
16090 iter.skip_subrtxes ();
16094 return false;
16097 /* Rewrite *LOC so that it refers to a default TLS address space. */
16098 void
16099 ix86_rewrite_tls_address_1 (rtx *loc)
16101 subrtx_ptr_iterator::array_type array;
16102 FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL)
16104 rtx *loc = *iter;
16105 if (MEM_P (*loc))
16107 rtx addr = XEXP (*loc, 0);
16108 rtx *x = &addr;
16109 while (GET_CODE (*x) == PLUS)
16111 int i;
16112 for (i = 0; i < 2; i++)
16114 rtx u = XEXP (*x, i);
16115 if (GET_CODE (u) == ZERO_EXTEND)
16116 u = XEXP (u, 0);
16117 if (GET_CODE (u) == UNSPEC
16118 && XINT (u, 1) == UNSPEC_TP)
16120 addr_space_t as = DEFAULT_TLS_SEG_REG;
16122 *x = XEXP (*x, 1 - i);
16124 *loc = replace_equiv_address_nv (*loc, addr, true);
16125 set_mem_addr_space (*loc, as);
16126 return;
16129 x = &XEXP (*x, 0);
16132 iter.skip_subrtxes ();
16137 /* Rewrite instruction pattern involvning TLS address
16138 so that it refers to a default TLS address space. */
16140 ix86_rewrite_tls_address (rtx pattern)
16142 pattern = copy_insn (pattern);
16143 ix86_rewrite_tls_address_1 (&pattern);
16144 return pattern;
16147 /* Create or return the unique __imp_DECL dllimport symbol corresponding
16148 to symbol DECL if BEIMPORT is true. Otherwise create or return the
16149 unique refptr-DECL symbol corresponding to symbol DECL. */
16151 struct dllimport_hasher : ggc_cache_ptr_hash<tree_map>
16153 static inline hashval_t hash (tree_map *m) { return m->hash; }
16154 static inline bool
16155 equal (tree_map *a, tree_map *b)
16157 return a->base.from == b->base.from;
16160 static int
16161 keep_cache_entry (tree_map *&m)
16163 return ggc_marked_p (m->base.from);
16167 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
16169 static tree
16170 get_dllimport_decl (tree decl, bool beimport)
16172 struct tree_map *h, in;
16173 const char *name;
16174 const char *prefix;
16175 size_t namelen, prefixlen;
16176 char *imp_name;
16177 tree to;
16178 rtx rtl;
16180 if (!dllimport_map)
16181 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
16183 in.hash = htab_hash_pointer (decl);
16184 in.base.from = decl;
16185 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
16186 h = *loc;
16187 if (h)
16188 return h->to;
16190 *loc = h = ggc_alloc<tree_map> ();
16191 h->hash = in.hash;
16192 h->base.from = decl;
16193 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
16194 VAR_DECL, NULL, ptr_type_node);
16195 DECL_ARTIFICIAL (to) = 1;
16196 DECL_IGNORED_P (to) = 1;
16197 DECL_EXTERNAL (to) = 1;
16198 TREE_READONLY (to) = 1;
16200 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
16201 name = targetm.strip_name_encoding (name);
16202 if (beimport)
16203 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
16204 ? "*__imp_" : "*__imp__";
16205 else
16206 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
16207 namelen = strlen (name);
16208 prefixlen = strlen (prefix);
16209 imp_name = (char *) alloca (namelen + prefixlen + 1);
16210 memcpy (imp_name, prefix, prefixlen);
16211 memcpy (imp_name + prefixlen, name, namelen + 1);
16213 name = ggc_alloc_string (imp_name, namelen + prefixlen);
16214 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
16215 SET_SYMBOL_REF_DECL (rtl, to);
16216 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
16217 if (!beimport)
16219 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
16220 #ifdef SUB_TARGET_RECORD_STUB
16221 SUB_TARGET_RECORD_STUB (name);
16222 #endif
16225 rtl = gen_const_mem (Pmode, rtl);
16226 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
16228 SET_DECL_RTL (to, rtl);
16229 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
16231 return to;
16234 /* Expand SYMBOL into its corresponding far-address symbol.
16235 WANT_REG is true if we require the result be a register. */
16237 static rtx
16238 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
16240 tree imp_decl;
16241 rtx x;
16243 gcc_assert (SYMBOL_REF_DECL (symbol));
16244 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
16246 x = DECL_RTL (imp_decl);
16247 if (want_reg)
16248 x = force_reg (Pmode, x);
16249 return x;
16252 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
16253 true if we require the result be a register. */
16255 static rtx
16256 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
16258 tree imp_decl;
16259 rtx x;
16261 gcc_assert (SYMBOL_REF_DECL (symbol));
16262 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
16264 x = DECL_RTL (imp_decl);
16265 if (want_reg)
16266 x = force_reg (Pmode, x);
16267 return x;
16270 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
16271 is true if we require the result be a register. */
16273 static rtx
16274 legitimize_pe_coff_symbol (rtx addr, bool inreg)
16276 if (!TARGET_PECOFF)
16277 return NULL_RTX;
16279 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
16281 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
16282 return legitimize_dllimport_symbol (addr, inreg);
16283 if (GET_CODE (addr) == CONST
16284 && GET_CODE (XEXP (addr, 0)) == PLUS
16285 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
16286 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
16288 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
16289 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
16293 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
16294 return NULL_RTX;
16295 if (GET_CODE (addr) == SYMBOL_REF
16296 && !is_imported_p (addr)
16297 && SYMBOL_REF_EXTERNAL_P (addr)
16298 && SYMBOL_REF_DECL (addr))
16299 return legitimize_pe_coff_extern_decl (addr, inreg);
16301 if (GET_CODE (addr) == CONST
16302 && GET_CODE (XEXP (addr, 0)) == PLUS
16303 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
16304 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
16305 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
16306 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
16308 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
16309 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
16311 return NULL_RTX;
16314 /* Try machine-dependent ways of modifying an illegitimate address
16315 to be legitimate. If we find one, return the new, valid address.
16316 This macro is used in only one place: `memory_address' in explow.c.
16318 OLDX is the address as it was before break_out_memory_refs was called.
16319 In some cases it is useful to look at this to decide what needs to be done.
16321 It is always safe for this macro to do nothing. It exists to recognize
16322 opportunities to optimize the output.
16324 For the 80386, we handle X+REG by loading X into a register R and
16325 using R+REG. R will go in a general reg and indexing will be used.
16326 However, if REG is a broken-out memory address or multiplication,
16327 nothing needs to be done because REG can certainly go in a general reg.
16329 When -fpic is used, special handling is needed for symbolic references.
16330 See comments by legitimize_pic_address in i386.c for details. */
16332 static rtx
16333 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
16335 bool changed = false;
16336 unsigned log;
16338 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
16339 if (log)
16340 return legitimize_tls_address (x, (enum tls_model) log, false);
16341 if (GET_CODE (x) == CONST
16342 && GET_CODE (XEXP (x, 0)) == PLUS
16343 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
16344 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
16346 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
16347 (enum tls_model) log, false);
16348 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
16351 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
16353 rtx tmp = legitimize_pe_coff_symbol (x, true);
16354 if (tmp)
16355 return tmp;
16358 if (flag_pic && SYMBOLIC_CONST (x))
16359 return legitimize_pic_address (x, 0);
16361 #if TARGET_MACHO
16362 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
16363 return machopic_indirect_data_reference (x, 0);
16364 #endif
16366 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
16367 if (GET_CODE (x) == ASHIFT
16368 && CONST_INT_P (XEXP (x, 1))
16369 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
16371 changed = true;
16372 log = INTVAL (XEXP (x, 1));
16373 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
16374 GEN_INT (1 << log));
16377 if (GET_CODE (x) == PLUS)
16379 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
16381 if (GET_CODE (XEXP (x, 0)) == ASHIFT
16382 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
16383 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
16385 changed = true;
16386 log = INTVAL (XEXP (XEXP (x, 0), 1));
16387 XEXP (x, 0) = gen_rtx_MULT (Pmode,
16388 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
16389 GEN_INT (1 << log));
16392 if (GET_CODE (XEXP (x, 1)) == ASHIFT
16393 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
16394 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
16396 changed = true;
16397 log = INTVAL (XEXP (XEXP (x, 1), 1));
16398 XEXP (x, 1) = gen_rtx_MULT (Pmode,
16399 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
16400 GEN_INT (1 << log));
16403 /* Put multiply first if it isn't already. */
16404 if (GET_CODE (XEXP (x, 1)) == MULT)
16406 std::swap (XEXP (x, 0), XEXP (x, 1));
16407 changed = true;
16410 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
16411 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
16412 created by virtual register instantiation, register elimination, and
16413 similar optimizations. */
16414 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
16416 changed = true;
16417 x = gen_rtx_PLUS (Pmode,
16418 gen_rtx_PLUS (Pmode, XEXP (x, 0),
16419 XEXP (XEXP (x, 1), 0)),
16420 XEXP (XEXP (x, 1), 1));
16423 /* Canonicalize
16424 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
16425 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
16426 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
16427 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
16428 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
16429 && CONSTANT_P (XEXP (x, 1)))
16431 rtx constant;
16432 rtx other = NULL_RTX;
16434 if (CONST_INT_P (XEXP (x, 1)))
16436 constant = XEXP (x, 1);
16437 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
16439 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
16441 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
16442 other = XEXP (x, 1);
16444 else
16445 constant = 0;
16447 if (constant)
16449 changed = true;
16450 x = gen_rtx_PLUS (Pmode,
16451 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
16452 XEXP (XEXP (XEXP (x, 0), 1), 0)),
16453 plus_constant (Pmode, other,
16454 INTVAL (constant)));
16458 if (changed && ix86_legitimate_address_p (mode, x, false))
16459 return x;
16461 if (GET_CODE (XEXP (x, 0)) == MULT)
16463 changed = true;
16464 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
16467 if (GET_CODE (XEXP (x, 1)) == MULT)
16469 changed = true;
16470 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
16473 if (changed
16474 && REG_P (XEXP (x, 1))
16475 && REG_P (XEXP (x, 0)))
16476 return x;
16478 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
16480 changed = true;
16481 x = legitimize_pic_address (x, 0);
16484 if (changed && ix86_legitimate_address_p (mode, x, false))
16485 return x;
16487 if (REG_P (XEXP (x, 0)))
16489 rtx temp = gen_reg_rtx (Pmode);
16490 rtx val = force_operand (XEXP (x, 1), temp);
16491 if (val != temp)
16493 val = convert_to_mode (Pmode, val, 1);
16494 emit_move_insn (temp, val);
16497 XEXP (x, 1) = temp;
16498 return x;
16501 else if (REG_P (XEXP (x, 1)))
16503 rtx temp = gen_reg_rtx (Pmode);
16504 rtx val = force_operand (XEXP (x, 0), temp);
16505 if (val != temp)
16507 val = convert_to_mode (Pmode, val, 1);
16508 emit_move_insn (temp, val);
16511 XEXP (x, 0) = temp;
16512 return x;
16516 return x;
16519 /* Print an integer constant expression in assembler syntax. Addition
16520 and subtraction are the only arithmetic that may appear in these
16521 expressions. FILE is the stdio stream to write to, X is the rtx, and
16522 CODE is the operand print code from the output string. */
16524 static void
16525 output_pic_addr_const (FILE *file, rtx x, int code)
16527 char buf[256];
16529 switch (GET_CODE (x))
16531 case PC:
16532 gcc_assert (flag_pic);
16533 putc ('.', file);
16534 break;
16536 case SYMBOL_REF:
16537 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
16538 output_addr_const (file, x);
16539 else
16541 const char *name = XSTR (x, 0);
16543 /* Mark the decl as referenced so that cgraph will
16544 output the function. */
16545 if (SYMBOL_REF_DECL (x))
16546 mark_decl_referenced (SYMBOL_REF_DECL (x));
16548 #if TARGET_MACHO
16549 if (MACHOPIC_INDIRECT
16550 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
16551 name = machopic_indirection_name (x, /*stub_p=*/true);
16552 #endif
16553 assemble_name (file, name);
16555 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
16556 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
16557 fputs ("@PLT", file);
16558 break;
16560 case LABEL_REF:
16561 x = XEXP (x, 0);
16562 /* FALLTHRU */
16563 case CODE_LABEL:
16564 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
16565 assemble_name (asm_out_file, buf);
16566 break;
16568 case CONST_INT:
16569 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16570 break;
16572 case CONST:
16573 /* This used to output parentheses around the expression,
16574 but that does not work on the 386 (either ATT or BSD assembler). */
16575 output_pic_addr_const (file, XEXP (x, 0), code);
16576 break;
16578 case CONST_DOUBLE:
16579 /* We can't handle floating point constants;
16580 TARGET_PRINT_OPERAND must handle them. */
16581 output_operand_lossage ("floating constant misused");
16582 break;
16584 case PLUS:
16585 /* Some assemblers need integer constants to appear first. */
16586 if (CONST_INT_P (XEXP (x, 0)))
16588 output_pic_addr_const (file, XEXP (x, 0), code);
16589 putc ('+', file);
16590 output_pic_addr_const (file, XEXP (x, 1), code);
16592 else
16594 gcc_assert (CONST_INT_P (XEXP (x, 1)));
16595 output_pic_addr_const (file, XEXP (x, 1), code);
16596 putc ('+', file);
16597 output_pic_addr_const (file, XEXP (x, 0), code);
16599 break;
16601 case MINUS:
16602 if (!TARGET_MACHO)
16603 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
16604 output_pic_addr_const (file, XEXP (x, 0), code);
16605 putc ('-', file);
16606 output_pic_addr_const (file, XEXP (x, 1), code);
16607 if (!TARGET_MACHO)
16608 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
16609 break;
16611 case UNSPEC:
16612 gcc_assert (XVECLEN (x, 0) == 1);
16613 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
16614 switch (XINT (x, 1))
16616 case UNSPEC_GOT:
16617 fputs ("@GOT", file);
16618 break;
16619 case UNSPEC_GOTOFF:
16620 fputs ("@GOTOFF", file);
16621 break;
16622 case UNSPEC_PLTOFF:
16623 fputs ("@PLTOFF", file);
16624 break;
16625 case UNSPEC_PCREL:
16626 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16627 "(%rip)" : "[rip]", file);
16628 break;
16629 case UNSPEC_GOTPCREL:
16630 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16631 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
16632 break;
16633 case UNSPEC_GOTTPOFF:
16634 /* FIXME: This might be @TPOFF in Sun ld too. */
16635 fputs ("@gottpoff", file);
16636 break;
16637 case UNSPEC_TPOFF:
16638 fputs ("@tpoff", file);
16639 break;
16640 case UNSPEC_NTPOFF:
16641 if (TARGET_64BIT)
16642 fputs ("@tpoff", file);
16643 else
16644 fputs ("@ntpoff", file);
16645 break;
16646 case UNSPEC_DTPOFF:
16647 fputs ("@dtpoff", file);
16648 break;
16649 case UNSPEC_GOTNTPOFF:
16650 if (TARGET_64BIT)
16651 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16652 "@gottpoff(%rip)": "@gottpoff[rip]", file);
16653 else
16654 fputs ("@gotntpoff", file);
16655 break;
16656 case UNSPEC_INDNTPOFF:
16657 fputs ("@indntpoff", file);
16658 break;
16659 #if TARGET_MACHO
16660 case UNSPEC_MACHOPIC_OFFSET:
16661 putc ('-', file);
16662 machopic_output_function_base_name (file);
16663 break;
16664 #endif
16665 default:
16666 output_operand_lossage ("invalid UNSPEC as operand");
16667 break;
16669 break;
16671 default:
16672 output_operand_lossage ("invalid expression as operand");
16676 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
16677 We need to emit DTP-relative relocations. */
16679 static void ATTRIBUTE_UNUSED
16680 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
16682 fputs (ASM_LONG, file);
16683 output_addr_const (file, x);
16684 fputs ("@dtpoff", file);
16685 switch (size)
16687 case 4:
16688 break;
16689 case 8:
16690 fputs (", 0", file);
16691 break;
16692 default:
16693 gcc_unreachable ();
16697 /* Return true if X is a representation of the PIC register. This copes
16698 with calls from ix86_find_base_term, where the register might have
16699 been replaced by a cselib value. */
16701 static bool
16702 ix86_pic_register_p (rtx x)
16704 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
16705 return (pic_offset_table_rtx
16706 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
16707 else if (!REG_P (x))
16708 return false;
16709 else if (pic_offset_table_rtx)
16711 if (REGNO (x) == REGNO (pic_offset_table_rtx))
16712 return true;
16713 if (HARD_REGISTER_P (x)
16714 && !HARD_REGISTER_P (pic_offset_table_rtx)
16715 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
16716 return true;
16717 return false;
16719 else
16720 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
16723 /* Helper function for ix86_delegitimize_address.
16724 Attempt to delegitimize TLS local-exec accesses. */
16726 static rtx
16727 ix86_delegitimize_tls_address (rtx orig_x)
16729 rtx x = orig_x, unspec;
16730 struct ix86_address addr;
16732 if (!TARGET_TLS_DIRECT_SEG_REFS)
16733 return orig_x;
16734 if (MEM_P (x))
16735 x = XEXP (x, 0);
16736 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
16737 return orig_x;
16738 if (ix86_decompose_address (x, &addr) == 0
16739 || addr.seg != DEFAULT_TLS_SEG_REG
16740 || addr.disp == NULL_RTX
16741 || GET_CODE (addr.disp) != CONST)
16742 return orig_x;
16743 unspec = XEXP (addr.disp, 0);
16744 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
16745 unspec = XEXP (unspec, 0);
16746 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
16747 return orig_x;
16748 x = XVECEXP (unspec, 0, 0);
16749 gcc_assert (GET_CODE (x) == SYMBOL_REF);
16750 if (unspec != XEXP (addr.disp, 0))
16751 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
16752 if (addr.index)
16754 rtx idx = addr.index;
16755 if (addr.scale != 1)
16756 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
16757 x = gen_rtx_PLUS (Pmode, idx, x);
16759 if (addr.base)
16760 x = gen_rtx_PLUS (Pmode, addr.base, x);
16761 if (MEM_P (orig_x))
16762 x = replace_equiv_address_nv (orig_x, x);
16763 return x;
16766 /* In the name of slightly smaller debug output, and to cater to
16767 general assembler lossage, recognize PIC+GOTOFF and turn it back
16768 into a direct symbol reference.
16770 On Darwin, this is necessary to avoid a crash, because Darwin
16771 has a different PIC label for each routine but the DWARF debugging
16772 information is not associated with any particular routine, so it's
16773 necessary to remove references to the PIC label from RTL stored by
16774 the DWARF output code.
16776 This helper is used in the normal ix86_delegitimize_address
16777 entrypoint (e.g. used in the target delegitimization hook) and
16778 in ix86_find_base_term. As compile time memory optimization, we
16779 avoid allocating rtxes that will not change anything on the outcome
16780 of the callers (find_base_value and find_base_term). */
16782 static inline rtx
16783 ix86_delegitimize_address_1 (rtx x, bool base_term_p)
16785 rtx orig_x = delegitimize_mem_from_attrs (x);
16786 /* addend is NULL or some rtx if x is something+GOTOFF where
16787 something doesn't include the PIC register. */
16788 rtx addend = NULL_RTX;
16789 /* reg_addend is NULL or a multiple of some register. */
16790 rtx reg_addend = NULL_RTX;
16791 /* const_addend is NULL or a const_int. */
16792 rtx const_addend = NULL_RTX;
16793 /* This is the result, or NULL. */
16794 rtx result = NULL_RTX;
16796 x = orig_x;
16798 if (MEM_P (x))
16799 x = XEXP (x, 0);
16801 if (TARGET_64BIT)
16803 if (GET_CODE (x) == CONST
16804 && GET_CODE (XEXP (x, 0)) == PLUS
16805 && GET_MODE (XEXP (x, 0)) == Pmode
16806 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
16807 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
16808 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
16810 /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
16811 base. A CONST can't be arg_pointer_rtx based. */
16812 if (base_term_p && MEM_P (orig_x))
16813 return orig_x;
16814 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
16815 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
16816 if (MEM_P (orig_x))
16817 x = replace_equiv_address_nv (orig_x, x);
16818 return x;
16821 if (GET_CODE (x) == CONST
16822 && GET_CODE (XEXP (x, 0)) == UNSPEC
16823 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
16824 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
16825 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
16827 x = XVECEXP (XEXP (x, 0), 0, 0);
16828 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
16830 x = lowpart_subreg (GET_MODE (orig_x), x, GET_MODE (x));
16831 if (x == NULL_RTX)
16832 return orig_x;
16834 return x;
16837 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
16838 return ix86_delegitimize_tls_address (orig_x);
16840 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
16841 and -mcmodel=medium -fpic. */
16844 if (GET_CODE (x) != PLUS
16845 || GET_CODE (XEXP (x, 1)) != CONST)
16846 return ix86_delegitimize_tls_address (orig_x);
16848 if (ix86_pic_register_p (XEXP (x, 0)))
16849 /* %ebx + GOT/GOTOFF */
16851 else if (GET_CODE (XEXP (x, 0)) == PLUS)
16853 /* %ebx + %reg * scale + GOT/GOTOFF */
16854 reg_addend = XEXP (x, 0);
16855 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
16856 reg_addend = XEXP (reg_addend, 1);
16857 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
16858 reg_addend = XEXP (reg_addend, 0);
16859 else
16861 reg_addend = NULL_RTX;
16862 addend = XEXP (x, 0);
16865 else
16866 addend = XEXP (x, 0);
16868 x = XEXP (XEXP (x, 1), 0);
16869 if (GET_CODE (x) == PLUS
16870 && CONST_INT_P (XEXP (x, 1)))
16872 const_addend = XEXP (x, 1);
16873 x = XEXP (x, 0);
16876 if (GET_CODE (x) == UNSPEC
16877 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
16878 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
16879 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
16880 && !MEM_P (orig_x) && !addend)))
16881 result = XVECEXP (x, 0, 0);
16883 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
16884 && !MEM_P (orig_x))
16885 result = XVECEXP (x, 0, 0);
16887 if (! result)
16888 return ix86_delegitimize_tls_address (orig_x);
16890 /* For (PLUS something CONST_INT) both find_base_{value,term} just
16891 recurse on the first operand. */
16892 if (const_addend && !base_term_p)
16893 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
16894 if (reg_addend)
16895 result = gen_rtx_PLUS (Pmode, reg_addend, result);
16896 if (addend)
16898 /* If the rest of original X doesn't involve the PIC register, add
16899 addend and subtract pic_offset_table_rtx. This can happen e.g.
16900 for code like:
16901 leal (%ebx, %ecx, 4), %ecx
16903 movl foo@GOTOFF(%ecx), %edx
16904 in which case we return (%ecx - %ebx) + foo
16905 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
16906 and reload has completed. Don't do the latter for debug,
16907 as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */
16908 if (pic_offset_table_rtx
16909 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
16910 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
16911 pic_offset_table_rtx),
16912 result);
16913 else if (base_term_p
16914 && pic_offset_table_rtx
16915 && !TARGET_MACHO
16916 && !TARGET_VXWORKS_RTP)
16918 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
16919 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
16920 result = gen_rtx_PLUS (Pmode, tmp, result);
16922 else
16923 return orig_x;
16925 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
16927 result = lowpart_subreg (GET_MODE (orig_x), result, Pmode);
16928 if (result == NULL_RTX)
16929 return orig_x;
16931 return result;
16934 /* The normal instantiation of the above template. */
16936 static rtx
16937 ix86_delegitimize_address (rtx x)
16939 return ix86_delegitimize_address_1 (x, false);
16942 /* If X is a machine specific address (i.e. a symbol or label being
16943 referenced as a displacement from the GOT implemented using an
16944 UNSPEC), then return the base term. Otherwise return X. */
16947 ix86_find_base_term (rtx x)
16949 rtx term;
16951 if (TARGET_64BIT)
16953 if (GET_CODE (x) != CONST)
16954 return x;
16955 term = XEXP (x, 0);
16956 if (GET_CODE (term) == PLUS
16957 && CONST_INT_P (XEXP (term, 1)))
16958 term = XEXP (term, 0);
16959 if (GET_CODE (term) != UNSPEC
16960 || (XINT (term, 1) != UNSPEC_GOTPCREL
16961 && XINT (term, 1) != UNSPEC_PCREL))
16962 return x;
16964 return XVECEXP (term, 0, 0);
16967 return ix86_delegitimize_address_1 (x, true);
16970 /* Return true if X shouldn't be emitted into the debug info.
16971 Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_
16972 symbol easily into the .debug_info section, so we need not to
16973 delegitimize, but instead assemble as @gotoff.
16974 Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically
16975 assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */
16977 static bool
16978 ix86_const_not_ok_for_debug_p (rtx x)
16980 if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF)
16981 return true;
16983 if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0)
16984 return true;
16986 return false;
16989 static void
16990 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
16991 bool fp, FILE *file)
16993 const char *suffix;
16995 if (mode == CCFPmode)
16997 code = ix86_fp_compare_code_to_integer (code);
16998 mode = CCmode;
17000 if (reverse)
17001 code = reverse_condition (code);
17003 switch (code)
17005 case EQ:
17006 gcc_assert (mode != CCGZmode);
17007 switch (mode)
17009 case E_CCAmode:
17010 suffix = "a";
17011 break;
17012 case E_CCCmode:
17013 suffix = "c";
17014 break;
17015 case E_CCOmode:
17016 suffix = "o";
17017 break;
17018 case E_CCPmode:
17019 suffix = "p";
17020 break;
17021 case E_CCSmode:
17022 suffix = "s";
17023 break;
17024 default:
17025 suffix = "e";
17026 break;
17028 break;
17029 case NE:
17030 gcc_assert (mode != CCGZmode);
17031 switch (mode)
17033 case E_CCAmode:
17034 suffix = "na";
17035 break;
17036 case E_CCCmode:
17037 suffix = "nc";
17038 break;
17039 case E_CCOmode:
17040 suffix = "no";
17041 break;
17042 case E_CCPmode:
17043 suffix = "np";
17044 break;
17045 case E_CCSmode:
17046 suffix = "ns";
17047 break;
17048 default:
17049 suffix = "ne";
17050 break;
17052 break;
17053 case GT:
17054 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
17055 suffix = "g";
17056 break;
17057 case GTU:
17058 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
17059 Those same assemblers have the same but opposite lossage on cmov. */
17060 if (mode == CCmode)
17061 suffix = fp ? "nbe" : "a";
17062 else
17063 gcc_unreachable ();
17064 break;
17065 case LT:
17066 switch (mode)
17068 case E_CCNOmode:
17069 case E_CCGOCmode:
17070 suffix = "s";
17071 break;
17073 case E_CCmode:
17074 case E_CCGCmode:
17075 case E_CCGZmode:
17076 suffix = "l";
17077 break;
17079 default:
17080 gcc_unreachable ();
17082 break;
17083 case LTU:
17084 if (mode == CCmode || mode == CCGZmode)
17085 suffix = "b";
17086 else if (mode == CCCmode)
17087 suffix = fp ? "b" : "c";
17088 else
17089 gcc_unreachable ();
17090 break;
17091 case GE:
17092 switch (mode)
17094 case E_CCNOmode:
17095 case E_CCGOCmode:
17096 suffix = "ns";
17097 break;
17099 case E_CCmode:
17100 case E_CCGCmode:
17101 case E_CCGZmode:
17102 suffix = "ge";
17103 break;
17105 default:
17106 gcc_unreachable ();
17108 break;
17109 case GEU:
17110 if (mode == CCmode || mode == CCGZmode)
17111 suffix = "nb";
17112 else if (mode == CCCmode)
17113 suffix = fp ? "nb" : "nc";
17114 else
17115 gcc_unreachable ();
17116 break;
17117 case LE:
17118 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
17119 suffix = "le";
17120 break;
17121 case LEU:
17122 if (mode == CCmode)
17123 suffix = "be";
17124 else
17125 gcc_unreachable ();
17126 break;
17127 case UNORDERED:
17128 suffix = fp ? "u" : "p";
17129 break;
17130 case ORDERED:
17131 suffix = fp ? "nu" : "np";
17132 break;
17133 default:
17134 gcc_unreachable ();
17136 fputs (suffix, file);
17139 /* Print the name of register X to FILE based on its machine mode and number.
17140 If CODE is 'w', pretend the mode is HImode.
17141 If CODE is 'b', pretend the mode is QImode.
17142 If CODE is 'k', pretend the mode is SImode.
17143 If CODE is 'q', pretend the mode is DImode.
17144 If CODE is 'x', pretend the mode is V4SFmode.
17145 If CODE is 't', pretend the mode is V8SFmode.
17146 If CODE is 'g', pretend the mode is V16SFmode.
17147 If CODE is 'h', pretend the reg is the 'high' byte register.
17148 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
17149 If CODE is 'd', duplicate the operand for AVX instruction.
17152 void
17153 print_reg (rtx x, int code, FILE *file)
17155 const char *reg;
17156 int msize;
17157 unsigned int regno;
17158 bool duplicated;
17160 if (ASSEMBLER_DIALECT == ASM_ATT)
17161 putc ('%', file);
17163 if (x == pc_rtx)
17165 gcc_assert (TARGET_64BIT);
17166 fputs ("rip", file);
17167 return;
17170 if (code == 'y' && STACK_TOP_P (x))
17172 fputs ("st(0)", file);
17173 return;
17176 if (code == 'w')
17177 msize = 2;
17178 else if (code == 'b')
17179 msize = 1;
17180 else if (code == 'k')
17181 msize = 4;
17182 else if (code == 'q')
17183 msize = 8;
17184 else if (code == 'h')
17185 msize = 0;
17186 else if (code == 'x')
17187 msize = 16;
17188 else if (code == 't')
17189 msize = 32;
17190 else if (code == 'g')
17191 msize = 64;
17192 else
17193 msize = GET_MODE_SIZE (GET_MODE (x));
17195 regno = REGNO (x);
17197 if (regno == ARG_POINTER_REGNUM
17198 || regno == FRAME_POINTER_REGNUM
17199 || regno == FPSR_REG
17200 || regno == FPCR_REG)
17202 output_operand_lossage
17203 ("invalid use of register '%s'", reg_names[regno]);
17204 return;
17206 else if (regno == FLAGS_REG)
17208 output_operand_lossage ("invalid use of asm flag output");
17209 return;
17212 duplicated = code == 'd' && TARGET_AVX;
17214 switch (msize)
17216 case 16:
17217 case 12:
17218 case 8:
17219 if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode))
17220 warning (0, "unsupported size for integer register");
17221 /* FALLTHRU */
17222 case 4:
17223 if (LEGACY_INT_REGNO_P (regno))
17224 putc (msize > 4 && TARGET_64BIT ? 'r' : 'e', file);
17225 /* FALLTHRU */
17226 case 2:
17227 normal:
17228 reg = hi_reg_name[regno];
17229 break;
17230 case 1:
17231 if (regno >= ARRAY_SIZE (qi_reg_name))
17232 goto normal;
17233 if (!ANY_QI_REGNO_P (regno))
17234 error ("unsupported size for integer register");
17235 reg = qi_reg_name[regno];
17236 break;
17237 case 0:
17238 if (regno >= ARRAY_SIZE (qi_high_reg_name))
17239 goto normal;
17240 reg = qi_high_reg_name[regno];
17241 break;
17242 case 32:
17243 case 64:
17244 if (SSE_REGNO_P (regno))
17246 gcc_assert (!duplicated);
17247 putc (msize == 32 ? 'y' : 'z', file);
17248 reg = hi_reg_name[regno] + 1;
17249 break;
17251 goto normal;
17252 default:
17253 gcc_unreachable ();
17256 fputs (reg, file);
17258 /* Irritatingly, AMD extended registers use
17259 different naming convention: "r%d[bwd]" */
17260 if (REX_INT_REGNO_P (regno))
17262 gcc_assert (TARGET_64BIT);
17263 switch (msize)
17265 case 0:
17266 error ("extended registers have no high halves");
17267 break;
17268 case 1:
17269 putc ('b', file);
17270 break;
17271 case 2:
17272 putc ('w', file);
17273 break;
17274 case 4:
17275 putc ('d', file);
17276 break;
17277 case 8:
17278 /* no suffix */
17279 break;
17280 default:
17281 error ("unsupported operand size for extended register");
17282 break;
17284 return;
17287 if (duplicated)
17289 if (ASSEMBLER_DIALECT == ASM_ATT)
17290 fprintf (file, ", %%%s", reg);
17291 else
17292 fprintf (file, ", %s", reg);
17296 /* Meaning of CODE:
17297 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
17298 C -- print opcode suffix for set/cmov insn.
17299 c -- like C, but print reversed condition
17300 F,f -- likewise, but for floating-point.
17301 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
17302 otherwise nothing
17303 R -- print embeded rounding and sae.
17304 r -- print only sae.
17305 z -- print the opcode suffix for the size of the current operand.
17306 Z -- likewise, with special suffixes for x87 instructions.
17307 * -- print a star (in certain assembler syntax)
17308 A -- print an absolute memory reference.
17309 E -- print address with DImode register names if TARGET_64BIT.
17310 w -- print the operand as if it's a "word" (HImode) even if it isn't.
17311 s -- print a shift double count, followed by the assemblers argument
17312 delimiter.
17313 b -- print the QImode name of the register for the indicated operand.
17314 %b0 would print %al if operands[0] is reg 0.
17315 w -- likewise, print the HImode name of the register.
17316 k -- likewise, print the SImode name of the register.
17317 q -- likewise, print the DImode name of the register.
17318 x -- likewise, print the V4SFmode name of the register.
17319 t -- likewise, print the V8SFmode name of the register.
17320 g -- likewise, print the V16SFmode name of the register.
17321 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
17322 y -- print "st(0)" instead of "st" as a register.
17323 d -- print duplicated register operand for AVX instruction.
17324 D -- print condition for SSE cmp instruction.
17325 P -- if PIC, print an @PLT suffix.
17326 p -- print raw symbol name.
17327 X -- don't print any sort of PIC '@' suffix for a symbol.
17328 & -- print some in-use local-dynamic symbol name.
17329 H -- print a memory address offset by 8; used for sse high-parts
17330 Y -- print condition for XOP pcom* instruction.
17331 + -- print a branch hint as 'cs' or 'ds' prefix
17332 ; -- print a semicolon (after prefixes due to bug in older gas).
17333 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
17334 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
17335 ! -- print MPX prefix for jxx/call/ret instructions if required.
17338 void
17339 ix86_print_operand (FILE *file, rtx x, int code)
17341 if (code)
17343 switch (code)
17345 case 'A':
17346 switch (ASSEMBLER_DIALECT)
17348 case ASM_ATT:
17349 putc ('*', file);
17350 break;
17352 case ASM_INTEL:
17353 /* Intel syntax. For absolute addresses, registers should not
17354 be surrounded by braces. */
17355 if (!REG_P (x))
17357 putc ('[', file);
17358 ix86_print_operand (file, x, 0);
17359 putc (']', file);
17360 return;
17362 break;
17364 default:
17365 gcc_unreachable ();
17368 ix86_print_operand (file, x, 0);
17369 return;
17371 case 'E':
17372 /* Wrap address in an UNSPEC to declare special handling. */
17373 if (TARGET_64BIT)
17374 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
17376 output_address (VOIDmode, x);
17377 return;
17379 case 'L':
17380 if (ASSEMBLER_DIALECT == ASM_ATT)
17381 putc ('l', file);
17382 return;
17384 case 'W':
17385 if (ASSEMBLER_DIALECT == ASM_ATT)
17386 putc ('w', file);
17387 return;
17389 case 'B':
17390 if (ASSEMBLER_DIALECT == ASM_ATT)
17391 putc ('b', file);
17392 return;
17394 case 'Q':
17395 if (ASSEMBLER_DIALECT == ASM_ATT)
17396 putc ('l', file);
17397 return;
17399 case 'S':
17400 if (ASSEMBLER_DIALECT == ASM_ATT)
17401 putc ('s', file);
17402 return;
17404 case 'T':
17405 if (ASSEMBLER_DIALECT == ASM_ATT)
17406 putc ('t', file);
17407 return;
17409 case 'O':
17410 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
17411 if (ASSEMBLER_DIALECT != ASM_ATT)
17412 return;
17414 switch (GET_MODE_SIZE (GET_MODE (x)))
17416 case 2:
17417 putc ('w', file);
17418 break;
17420 case 4:
17421 putc ('l', file);
17422 break;
17424 case 8:
17425 putc ('q', file);
17426 break;
17428 default:
17429 output_operand_lossage ("invalid operand size for operand "
17430 "code 'O'");
17431 return;
17434 putc ('.', file);
17435 #endif
17436 return;
17438 case 'z':
17439 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
17441 /* Opcodes don't get size suffixes if using Intel opcodes. */
17442 if (ASSEMBLER_DIALECT == ASM_INTEL)
17443 return;
17445 switch (GET_MODE_SIZE (GET_MODE (x)))
17447 case 1:
17448 putc ('b', file);
17449 return;
17451 case 2:
17452 putc ('w', file);
17453 return;
17455 case 4:
17456 putc ('l', file);
17457 return;
17459 case 8:
17460 putc ('q', file);
17461 return;
17463 default:
17464 output_operand_lossage ("invalid operand size for operand "
17465 "code 'z'");
17466 return;
17470 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
17471 warning (0, "non-integer operand used with operand code 'z'");
17472 /* FALLTHRU */
17474 case 'Z':
17475 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
17476 if (ASSEMBLER_DIALECT == ASM_INTEL)
17477 return;
17479 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
17481 switch (GET_MODE_SIZE (GET_MODE (x)))
17483 case 2:
17484 #ifdef HAVE_AS_IX86_FILDS
17485 putc ('s', file);
17486 #endif
17487 return;
17489 case 4:
17490 putc ('l', file);
17491 return;
17493 case 8:
17494 #ifdef HAVE_AS_IX86_FILDQ
17495 putc ('q', file);
17496 #else
17497 fputs ("ll", file);
17498 #endif
17499 return;
17501 default:
17502 break;
17505 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
17507 /* 387 opcodes don't get size suffixes
17508 if the operands are registers. */
17509 if (STACK_REG_P (x))
17510 return;
17512 switch (GET_MODE_SIZE (GET_MODE (x)))
17514 case 4:
17515 putc ('s', file);
17516 return;
17518 case 8:
17519 putc ('l', file);
17520 return;
17522 case 12:
17523 case 16:
17524 putc ('t', file);
17525 return;
17527 default:
17528 break;
17531 else
17533 output_operand_lossage ("invalid operand type used with "
17534 "operand code 'Z'");
17535 return;
17538 output_operand_lossage ("invalid operand size for operand code 'Z'");
17539 return;
17541 case 'd':
17542 case 'b':
17543 case 'w':
17544 case 'k':
17545 case 'q':
17546 case 'h':
17547 case 't':
17548 case 'g':
17549 case 'y':
17550 case 'x':
17551 case 'X':
17552 case 'P':
17553 case 'p':
17554 break;
17556 case 's':
17557 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
17559 ix86_print_operand (file, x, 0);
17560 fputs (", ", file);
17562 return;
17564 case 'Y':
17565 switch (GET_CODE (x))
17567 case NE:
17568 fputs ("neq", file);
17569 break;
17570 case EQ:
17571 fputs ("eq", file);
17572 break;
17573 case GE:
17574 case GEU:
17575 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
17576 break;
17577 case GT:
17578 case GTU:
17579 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
17580 break;
17581 case LE:
17582 case LEU:
17583 fputs ("le", file);
17584 break;
17585 case LT:
17586 case LTU:
17587 fputs ("lt", file);
17588 break;
17589 case UNORDERED:
17590 fputs ("unord", file);
17591 break;
17592 case ORDERED:
17593 fputs ("ord", file);
17594 break;
17595 case UNEQ:
17596 fputs ("ueq", file);
17597 break;
17598 case UNGE:
17599 fputs ("nlt", file);
17600 break;
17601 case UNGT:
17602 fputs ("nle", file);
17603 break;
17604 case UNLE:
17605 fputs ("ule", file);
17606 break;
17607 case UNLT:
17608 fputs ("ult", file);
17609 break;
17610 case LTGT:
17611 fputs ("une", file);
17612 break;
17613 default:
17614 output_operand_lossage ("operand is not a condition code, "
17615 "invalid operand code 'Y'");
17616 return;
17618 return;
17620 case 'D':
17621 /* Little bit of braindamage here. The SSE compare instructions
17622 does use completely different names for the comparisons that the
17623 fp conditional moves. */
17624 switch (GET_CODE (x))
17626 case UNEQ:
17627 if (TARGET_AVX)
17629 fputs ("eq_us", file);
17630 break;
17632 /* FALLTHRU */
17633 case EQ:
17634 fputs ("eq", file);
17635 break;
17636 case UNLT:
17637 if (TARGET_AVX)
17639 fputs ("nge", file);
17640 break;
17642 /* FALLTHRU */
17643 case LT:
17644 fputs ("lt", file);
17645 break;
17646 case UNLE:
17647 if (TARGET_AVX)
17649 fputs ("ngt", file);
17650 break;
17652 /* FALLTHRU */
17653 case LE:
17654 fputs ("le", file);
17655 break;
17656 case UNORDERED:
17657 fputs ("unord", file);
17658 break;
17659 case LTGT:
17660 if (TARGET_AVX)
17662 fputs ("neq_oq", file);
17663 break;
17665 /* FALLTHRU */
17666 case NE:
17667 fputs ("neq", file);
17668 break;
17669 case GE:
17670 if (TARGET_AVX)
17672 fputs ("ge", file);
17673 break;
17675 /* FALLTHRU */
17676 case UNGE:
17677 fputs ("nlt", file);
17678 break;
17679 case GT:
17680 if (TARGET_AVX)
17682 fputs ("gt", file);
17683 break;
17685 /* FALLTHRU */
17686 case UNGT:
17687 fputs ("nle", file);
17688 break;
17689 case ORDERED:
17690 fputs ("ord", file);
17691 break;
17692 default:
17693 output_operand_lossage ("operand is not a condition code, "
17694 "invalid operand code 'D'");
17695 return;
17697 return;
17699 case 'F':
17700 case 'f':
17701 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
17702 if (ASSEMBLER_DIALECT == ASM_ATT)
17703 putc ('.', file);
17704 gcc_fallthrough ();
17705 #endif
17707 case 'C':
17708 case 'c':
17709 if (!COMPARISON_P (x))
17711 output_operand_lossage ("operand is not a condition code, "
17712 "invalid operand code '%c'", code);
17713 return;
17715 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
17716 code == 'c' || code == 'f',
17717 code == 'F' || code == 'f',
17718 file);
17719 return;
17721 case 'H':
17722 if (!offsettable_memref_p (x))
17724 output_operand_lossage ("operand is not an offsettable memory "
17725 "reference, invalid operand code 'H'");
17726 return;
17728 /* It doesn't actually matter what mode we use here, as we're
17729 only going to use this for printing. */
17730 x = adjust_address_nv (x, DImode, 8);
17731 /* Output 'qword ptr' for intel assembler dialect. */
17732 if (ASSEMBLER_DIALECT == ASM_INTEL)
17733 code = 'q';
17734 break;
17736 case 'K':
17737 if (!CONST_INT_P (x))
17739 output_operand_lossage ("operand is not an integer, invalid "
17740 "operand code 'K'");
17741 return;
17744 if (INTVAL (x) & IX86_HLE_ACQUIRE)
17745 #ifdef HAVE_AS_IX86_HLE
17746 fputs ("xacquire ", file);
17747 #else
17748 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
17749 #endif
17750 else if (INTVAL (x) & IX86_HLE_RELEASE)
17751 #ifdef HAVE_AS_IX86_HLE
17752 fputs ("xrelease ", file);
17753 #else
17754 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
17755 #endif
17756 /* We do not want to print value of the operand. */
17757 return;
17759 case 'N':
17760 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
17761 fputs ("{z}", file);
17762 return;
17764 case 'r':
17765 if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE)
17767 output_operand_lossage ("operand is not a specific integer, "
17768 "invalid operand code 'r'");
17769 return;
17772 if (ASSEMBLER_DIALECT == ASM_INTEL)
17773 fputs (", ", file);
17775 fputs ("{sae}", file);
17777 if (ASSEMBLER_DIALECT == ASM_ATT)
17778 fputs (", ", file);
17780 return;
17782 case 'R':
17783 if (!CONST_INT_P (x))
17785 output_operand_lossage ("operand is not an integer, invalid "
17786 "operand code 'R'");
17787 return;
17790 if (ASSEMBLER_DIALECT == ASM_INTEL)
17791 fputs (", ", file);
17793 switch (INTVAL (x))
17795 case ROUND_NEAREST_INT | ROUND_SAE:
17796 fputs ("{rn-sae}", file);
17797 break;
17798 case ROUND_NEG_INF | ROUND_SAE:
17799 fputs ("{rd-sae}", file);
17800 break;
17801 case ROUND_POS_INF | ROUND_SAE:
17802 fputs ("{ru-sae}", file);
17803 break;
17804 case ROUND_ZERO | ROUND_SAE:
17805 fputs ("{rz-sae}", file);
17806 break;
17807 default:
17808 output_operand_lossage ("operand is not a specific integer, "
17809 "invalid operand code 'R'");
17812 if (ASSEMBLER_DIALECT == ASM_ATT)
17813 fputs (", ", file);
17815 return;
17817 case '*':
17818 if (ASSEMBLER_DIALECT == ASM_ATT)
17819 putc ('*', file);
17820 return;
17822 case '&':
17824 const char *name = get_some_local_dynamic_name ();
17825 if (name == NULL)
17826 output_operand_lossage ("'%%&' used without any "
17827 "local dynamic TLS references");
17828 else
17829 assemble_name (file, name);
17830 return;
17833 case '+':
17835 rtx x;
17837 if (!optimize
17838 || optimize_function_for_size_p (cfun)
17839 || !TARGET_BRANCH_PREDICTION_HINTS)
17840 return;
17842 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
17843 if (x)
17845 int pred_val = profile_probability::from_reg_br_prob_note
17846 (XINT (x, 0)).to_reg_br_prob_base ();
17848 if (pred_val < REG_BR_PROB_BASE * 45 / 100
17849 || pred_val > REG_BR_PROB_BASE * 55 / 100)
17851 bool taken = pred_val > REG_BR_PROB_BASE / 2;
17852 bool cputaken
17853 = final_forward_branch_p (current_output_insn) == 0;
17855 /* Emit hints only in the case default branch prediction
17856 heuristics would fail. */
17857 if (taken != cputaken)
17859 /* We use 3e (DS) prefix for taken branches and
17860 2e (CS) prefix for not taken branches. */
17861 if (taken)
17862 fputs ("ds ; ", file);
17863 else
17864 fputs ("cs ; ", file);
17868 return;
17871 case ';':
17872 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
17873 putc (';', file);
17874 #endif
17875 return;
17877 case '~':
17878 putc (TARGET_AVX2 ? 'i' : 'f', file);
17879 return;
17881 case '^':
17882 if (TARGET_64BIT && Pmode != word_mode)
17883 fputs ("addr32 ", file);
17884 return;
17886 case '!':
17887 if (ix86_bnd_prefixed_insn_p (current_output_insn))
17888 fputs ("bnd ", file);
17889 if (ix86_notrack_prefixed_insn_p (current_output_insn))
17890 fputs ("notrack ", file);
17891 return;
17893 default:
17894 output_operand_lossage ("invalid operand code '%c'", code);
17898 if (REG_P (x))
17899 print_reg (x, code, file);
17901 else if (MEM_P (x))
17903 rtx addr = XEXP (x, 0);
17905 /* No `byte ptr' prefix for call instructions ... */
17906 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
17908 machine_mode mode = GET_MODE (x);
17909 const char *size;
17911 /* Check for explicit size override codes. */
17912 if (code == 'b')
17913 size = "BYTE";
17914 else if (code == 'w')
17915 size = "WORD";
17916 else if (code == 'k')
17917 size = "DWORD";
17918 else if (code == 'q')
17919 size = "QWORD";
17920 else if (code == 'x')
17921 size = "XMMWORD";
17922 else if (code == 't')
17923 size = "YMMWORD";
17924 else if (code == 'g')
17925 size = "ZMMWORD";
17926 else if (mode == BLKmode)
17927 /* ... or BLKmode operands, when not overridden. */
17928 size = NULL;
17929 else
17930 switch (GET_MODE_SIZE (mode))
17932 case 1: size = "BYTE"; break;
17933 case 2: size = "WORD"; break;
17934 case 4: size = "DWORD"; break;
17935 case 8: size = "QWORD"; break;
17936 case 12: size = "TBYTE"; break;
17937 case 16:
17938 if (mode == XFmode)
17939 size = "TBYTE";
17940 else
17941 size = "XMMWORD";
17942 break;
17943 case 32: size = "YMMWORD"; break;
17944 case 64: size = "ZMMWORD"; break;
17945 default:
17946 gcc_unreachable ();
17948 if (size)
17950 fputs (size, file);
17951 fputs (" PTR ", file);
17955 if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
17956 output_operand_lossage ("invalid constraints for operand");
17957 else
17958 ix86_print_operand_address_as
17959 (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
17962 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
17964 long l;
17966 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
17968 if (ASSEMBLER_DIALECT == ASM_ATT)
17969 putc ('$', file);
17970 /* Sign extend 32bit SFmode immediate to 8 bytes. */
17971 if (code == 'q')
17972 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
17973 (unsigned long long) (int) l);
17974 else
17975 fprintf (file, "0x%08x", (unsigned int) l);
17978 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
17980 long l[2];
17982 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
17984 if (ASSEMBLER_DIALECT == ASM_ATT)
17985 putc ('$', file);
17986 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
17989 /* These float cases don't actually occur as immediate operands. */
17990 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
17992 char dstr[30];
17994 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
17995 fputs (dstr, file);
17998 else
18000 /* We have patterns that allow zero sets of memory, for instance.
18001 In 64-bit mode, we should probably support all 8-byte vectors,
18002 since we can in fact encode that into an immediate. */
18003 if (GET_CODE (x) == CONST_VECTOR)
18005 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
18006 x = const0_rtx;
18009 if (code != 'P' && code != 'p')
18011 if (CONST_INT_P (x))
18013 if (ASSEMBLER_DIALECT == ASM_ATT)
18014 putc ('$', file);
18016 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
18017 || GET_CODE (x) == LABEL_REF)
18019 if (ASSEMBLER_DIALECT == ASM_ATT)
18020 putc ('$', file);
18021 else
18022 fputs ("OFFSET FLAT:", file);
18025 if (CONST_INT_P (x))
18026 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
18027 else if (flag_pic || MACHOPIC_INDIRECT)
18028 output_pic_addr_const (file, x, code);
18029 else
18030 output_addr_const (file, x);
18034 static bool
18035 ix86_print_operand_punct_valid_p (unsigned char code)
18037 return (code == '*' || code == '+' || code == '&' || code == ';'
18038 || code == '~' || code == '^' || code == '!');
18041 /* Print a memory operand whose address is ADDR. */
18043 static void
18044 ix86_print_operand_address_as (FILE *file, rtx addr,
18045 addr_space_t as, bool no_rip)
18047 struct ix86_address parts;
18048 rtx base, index, disp;
18049 int scale;
18050 int ok;
18051 bool vsib = false;
18052 int code = 0;
18054 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
18056 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
18057 gcc_assert (parts.index == NULL_RTX);
18058 parts.index = XVECEXP (addr, 0, 1);
18059 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
18060 addr = XVECEXP (addr, 0, 0);
18061 vsib = true;
18063 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
18065 gcc_assert (TARGET_64BIT);
18066 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
18067 code = 'q';
18069 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
18071 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
18072 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
18073 if (parts.base != NULL_RTX)
18075 parts.index = parts.base;
18076 parts.scale = 1;
18078 parts.base = XVECEXP (addr, 0, 0);
18079 addr = XVECEXP (addr, 0, 0);
18081 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
18083 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
18084 gcc_assert (parts.index == NULL_RTX);
18085 parts.index = XVECEXP (addr, 0, 1);
18086 addr = XVECEXP (addr, 0, 0);
18088 else
18089 ok = ix86_decompose_address (addr, &parts);
18091 gcc_assert (ok);
18093 base = parts.base;
18094 index = parts.index;
18095 disp = parts.disp;
18096 scale = parts.scale;
18098 if (ADDR_SPACE_GENERIC_P (as))
18099 as = parts.seg;
18100 else
18101 gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg));
18103 if (!ADDR_SPACE_GENERIC_P (as))
18105 const char *string;
18107 if (as == ADDR_SPACE_SEG_FS)
18108 string = (ASSEMBLER_DIALECT == ASM_ATT ? "%fs:" : "fs:");
18109 else if (as == ADDR_SPACE_SEG_GS)
18110 string = (ASSEMBLER_DIALECT == ASM_ATT ? "%gs:" : "gs:");
18111 else
18112 gcc_unreachable ();
18113 fputs (string, file);
18116 /* Use one byte shorter RIP relative addressing for 64bit mode. */
18117 if (TARGET_64BIT && !base && !index && !no_rip)
18119 rtx symbol = disp;
18121 if (GET_CODE (disp) == CONST
18122 && GET_CODE (XEXP (disp, 0)) == PLUS
18123 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
18124 symbol = XEXP (XEXP (disp, 0), 0);
18126 if (GET_CODE (symbol) == LABEL_REF
18127 || (GET_CODE (symbol) == SYMBOL_REF
18128 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
18129 base = pc_rtx;
18132 if (!base && !index)
18134 /* Displacement only requires special attention. */
18135 if (CONST_INT_P (disp))
18137 if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as))
18138 fputs ("ds:", file);
18139 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
18141 /* Load the external function address via the GOT slot to avoid PLT. */
18142 else if (GET_CODE (disp) == CONST
18143 && GET_CODE (XEXP (disp, 0)) == UNSPEC
18144 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL
18145 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT)
18146 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
18147 output_pic_addr_const (file, disp, 0);
18148 else if (flag_pic)
18149 output_pic_addr_const (file, disp, 0);
18150 else
18151 output_addr_const (file, disp);
18153 else
18155 /* Print SImode register names to force addr32 prefix. */
18156 if (SImode_address_operand (addr, VOIDmode))
18158 if (flag_checking)
18160 gcc_assert (TARGET_64BIT);
18161 switch (GET_CODE (addr))
18163 case SUBREG:
18164 gcc_assert (GET_MODE (addr) == SImode);
18165 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
18166 break;
18167 case ZERO_EXTEND:
18168 case AND:
18169 gcc_assert (GET_MODE (addr) == DImode);
18170 break;
18171 default:
18172 gcc_unreachable ();
18175 gcc_assert (!code);
18176 code = 'k';
18178 else if (code == 0
18179 && TARGET_X32
18180 && disp
18181 && CONST_INT_P (disp)
18182 && INTVAL (disp) < -16*1024*1024)
18184 /* X32 runs in 64-bit mode, where displacement, DISP, in
18185 address DISP(%r64), is encoded as 32-bit immediate sign-
18186 extended from 32-bit to 64-bit. For -0x40000300(%r64),
18187 address is %r64 + 0xffffffffbffffd00. When %r64 <
18188 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
18189 which is invalid for x32. The correct address is %r64
18190 - 0x40000300 == 0xf7ffdd64. To properly encode
18191 -0x40000300(%r64) for x32, we zero-extend negative
18192 displacement by forcing addr32 prefix which truncates
18193 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
18194 zero-extend all negative displacements, including -1(%rsp).
18195 However, for small negative displacements, sign-extension
18196 won't cause overflow. We only zero-extend negative
18197 displacements if they < -16*1024*1024, which is also used
18198 to check legitimate address displacements for PIC. */
18199 code = 'k';
18202 /* Since the upper 32 bits of RSP are always zero for x32,
18203 we can encode %esp as %rsp to avoid 0x67 prefix if
18204 there is no index register. */
18205 if (TARGET_X32 && Pmode == SImode
18206 && !index && base && REG_P (base) && REGNO (base) == SP_REG)
18207 code = 'q';
18209 if (ASSEMBLER_DIALECT == ASM_ATT)
18211 if (disp)
18213 if (flag_pic)
18214 output_pic_addr_const (file, disp, 0);
18215 else if (GET_CODE (disp) == LABEL_REF)
18216 output_asm_label (disp);
18217 else
18218 output_addr_const (file, disp);
18221 putc ('(', file);
18222 if (base)
18223 print_reg (base, code, file);
18224 if (index)
18226 putc (',', file);
18227 print_reg (index, vsib ? 0 : code, file);
18228 if (scale != 1 || vsib)
18229 fprintf (file, ",%d", scale);
18231 putc (')', file);
18233 else
18235 rtx offset = NULL_RTX;
18237 if (disp)
18239 /* Pull out the offset of a symbol; print any symbol itself. */
18240 if (GET_CODE (disp) == CONST
18241 && GET_CODE (XEXP (disp, 0)) == PLUS
18242 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
18244 offset = XEXP (XEXP (disp, 0), 1);
18245 disp = gen_rtx_CONST (VOIDmode,
18246 XEXP (XEXP (disp, 0), 0));
18249 if (flag_pic)
18250 output_pic_addr_const (file, disp, 0);
18251 else if (GET_CODE (disp) == LABEL_REF)
18252 output_asm_label (disp);
18253 else if (CONST_INT_P (disp))
18254 offset = disp;
18255 else
18256 output_addr_const (file, disp);
18259 putc ('[', file);
18260 if (base)
18262 print_reg (base, code, file);
18263 if (offset)
18265 if (INTVAL (offset) >= 0)
18266 putc ('+', file);
18267 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
18270 else if (offset)
18271 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
18272 else
18273 putc ('0', file);
18275 if (index)
18277 putc ('+', file);
18278 print_reg (index, vsib ? 0 : code, file);
18279 if (scale != 1 || vsib)
18280 fprintf (file, "*%d", scale);
18282 putc (']', file);
18287 static void
18288 ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
18290 ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false);
18293 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
18295 static bool
18296 i386_asm_output_addr_const_extra (FILE *file, rtx x)
18298 rtx op;
18300 if (GET_CODE (x) != UNSPEC)
18301 return false;
18303 op = XVECEXP (x, 0, 0);
18304 switch (XINT (x, 1))
18306 case UNSPEC_GOTOFF:
18307 output_addr_const (file, op);
18308 fputs ("@gotoff", file);
18309 break;
18310 case UNSPEC_GOTTPOFF:
18311 output_addr_const (file, op);
18312 /* FIXME: This might be @TPOFF in Sun ld. */
18313 fputs ("@gottpoff", file);
18314 break;
18315 case UNSPEC_TPOFF:
18316 output_addr_const (file, op);
18317 fputs ("@tpoff", file);
18318 break;
18319 case UNSPEC_NTPOFF:
18320 output_addr_const (file, op);
18321 if (TARGET_64BIT)
18322 fputs ("@tpoff", file);
18323 else
18324 fputs ("@ntpoff", file);
18325 break;
18326 case UNSPEC_DTPOFF:
18327 output_addr_const (file, op);
18328 fputs ("@dtpoff", file);
18329 break;
18330 case UNSPEC_GOTNTPOFF:
18331 output_addr_const (file, op);
18332 if (TARGET_64BIT)
18333 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
18334 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
18335 else
18336 fputs ("@gotntpoff", file);
18337 break;
18338 case UNSPEC_INDNTPOFF:
18339 output_addr_const (file, op);
18340 fputs ("@indntpoff", file);
18341 break;
18342 #if TARGET_MACHO
18343 case UNSPEC_MACHOPIC_OFFSET:
18344 output_addr_const (file, op);
18345 putc ('-', file);
18346 machopic_output_function_base_name (file);
18347 break;
18348 #endif
18350 default:
18351 return false;
18354 return true;
18357 /* Split one or more double-mode RTL references into pairs of half-mode
18358 references. The RTL can be REG, offsettable MEM, integer constant, or
18359 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
18360 split and "num" is its length. lo_half and hi_half are output arrays
18361 that parallel "operands". */
18363 void
18364 split_double_mode (machine_mode mode, rtx operands[],
18365 int num, rtx lo_half[], rtx hi_half[])
18367 machine_mode half_mode;
18368 unsigned int byte;
18370 switch (mode)
18372 case E_TImode:
18373 half_mode = DImode;
18374 break;
18375 case E_DImode:
18376 half_mode = SImode;
18377 break;
18378 default:
18379 gcc_unreachable ();
18382 byte = GET_MODE_SIZE (half_mode);
18384 while (num--)
18386 rtx op = operands[num];
18388 /* simplify_subreg refuse to split volatile memory addresses,
18389 but we still have to handle it. */
18390 if (MEM_P (op))
18392 lo_half[num] = adjust_address (op, half_mode, 0);
18393 hi_half[num] = adjust_address (op, half_mode, byte);
18395 else
18397 lo_half[num] = simplify_gen_subreg (half_mode, op,
18398 GET_MODE (op) == VOIDmode
18399 ? mode : GET_MODE (op), 0);
18400 hi_half[num] = simplify_gen_subreg (half_mode, op,
18401 GET_MODE (op) == VOIDmode
18402 ? mode : GET_MODE (op), byte);
18407 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
18408 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
18409 is the expression of the binary operation. The output may either be
18410 emitted here, or returned to the caller, like all output_* functions.
18412 There is no guarantee that the operands are the same mode, as they
18413 might be within FLOAT or FLOAT_EXTEND expressions. */
18415 #ifndef SYSV386_COMPAT
18416 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
18417 wants to fix the assemblers because that causes incompatibility
18418 with gcc. No-one wants to fix gcc because that causes
18419 incompatibility with assemblers... You can use the option of
18420 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
18421 #define SYSV386_COMPAT 1
18422 #endif
18424 const char *
18425 output_387_binary_op (rtx_insn *insn, rtx *operands)
18427 static char buf[40];
18428 const char *p;
18429 bool is_sse
18430 = (SSE_REG_P (operands[0])
18431 || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]));
18433 if (is_sse)
18434 p = "%v";
18435 else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
18436 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
18437 p = "fi";
18438 else
18439 p = "f";
18441 strcpy (buf, p);
18443 switch (GET_CODE (operands[3]))
18445 case PLUS:
18446 p = "add"; break;
18447 case MINUS:
18448 p = "sub"; break;
18449 case MULT:
18450 p = "mul"; break;
18451 case DIV:
18452 p = "div"; break;
18453 default:
18454 gcc_unreachable ();
18457 strcat (buf, p);
18459 if (is_sse)
18461 p = (GET_MODE (operands[0]) == SFmode) ? "ss" : "sd";
18462 strcat (buf, p);
18464 if (TARGET_AVX)
18465 p = "\t{%2, %1, %0|%0, %1, %2}";
18466 else
18467 p = "\t{%2, %0|%0, %2}";
18469 strcat (buf, p);
18470 return buf;
18473 /* Even if we do not want to check the inputs, this documents input
18474 constraints. Which helps in understanding the following code. */
18475 if (flag_checking)
18477 if (STACK_REG_P (operands[0])
18478 && ((REG_P (operands[1])
18479 && REGNO (operands[0]) == REGNO (operands[1])
18480 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
18481 || (REG_P (operands[2])
18482 && REGNO (operands[0]) == REGNO (operands[2])
18483 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
18484 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
18485 ; /* ok */
18486 else
18487 gcc_unreachable ();
18490 switch (GET_CODE (operands[3]))
18492 case MULT:
18493 case PLUS:
18494 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
18495 std::swap (operands[1], operands[2]);
18497 /* know operands[0] == operands[1]. */
18499 if (MEM_P (operands[2]))
18501 p = "%Z2\t%2";
18502 break;
18505 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
18507 if (STACK_TOP_P (operands[0]))
18508 /* How is it that we are storing to a dead operand[2]?
18509 Well, presumably operands[1] is dead too. We can't
18510 store the result to st(0) as st(0) gets popped on this
18511 instruction. Instead store to operands[2] (which I
18512 think has to be st(1)). st(1) will be popped later.
18513 gcc <= 2.8.1 didn't have this check and generated
18514 assembly code that the Unixware assembler rejected. */
18515 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
18516 else
18517 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
18518 break;
18521 if (STACK_TOP_P (operands[0]))
18522 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
18523 else
18524 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
18525 break;
18527 case MINUS:
18528 case DIV:
18529 if (MEM_P (operands[1]))
18531 p = "r%Z1\t%1";
18532 break;
18535 if (MEM_P (operands[2]))
18537 p = "%Z2\t%2";
18538 break;
18541 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
18543 #if SYSV386_COMPAT
18544 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
18545 derived assemblers, confusingly reverse the direction of
18546 the operation for fsub{r} and fdiv{r} when the
18547 destination register is not st(0). The Intel assembler
18548 doesn't have this brain damage. Read !SYSV386_COMPAT to
18549 figure out what the hardware really does. */
18550 if (STACK_TOP_P (operands[0]))
18551 p = "{p\t%0, %2|rp\t%2, %0}";
18552 else
18553 p = "{rp\t%2, %0|p\t%0, %2}";
18554 #else
18555 if (STACK_TOP_P (operands[0]))
18556 /* As above for fmul/fadd, we can't store to st(0). */
18557 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
18558 else
18559 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
18560 #endif
18561 break;
18564 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
18566 #if SYSV386_COMPAT
18567 if (STACK_TOP_P (operands[0]))
18568 p = "{rp\t%0, %1|p\t%1, %0}";
18569 else
18570 p = "{p\t%1, %0|rp\t%0, %1}";
18571 #else
18572 if (STACK_TOP_P (operands[0]))
18573 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
18574 else
18575 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
18576 #endif
18577 break;
18580 if (STACK_TOP_P (operands[0]))
18582 if (STACK_TOP_P (operands[1]))
18583 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
18584 else
18585 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
18586 break;
18588 else if (STACK_TOP_P (operands[1]))
18590 #if SYSV386_COMPAT
18591 p = "{\t%1, %0|r\t%0, %1}";
18592 #else
18593 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
18594 #endif
18596 else
18598 #if SYSV386_COMPAT
18599 p = "{r\t%2, %0|\t%0, %2}";
18600 #else
18601 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
18602 #endif
18604 break;
18606 default:
18607 gcc_unreachable ();
18610 strcat (buf, p);
18611 return buf;
18614 /* Return needed mode for entity in optimize_mode_switching pass. */
18616 static int
18617 ix86_dirflag_mode_needed (rtx_insn *insn)
18619 if (CALL_P (insn))
18621 if (cfun->machine->func_type == TYPE_NORMAL)
18622 return X86_DIRFLAG_ANY;
18623 else
18624 /* No need to emit CLD in interrupt handler for TARGET_CLD. */
18625 return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET;
18628 if (recog_memoized (insn) < 0)
18629 return X86_DIRFLAG_ANY;
18631 if (get_attr_type (insn) == TYPE_STR)
18633 /* Emit cld instruction if stringops are used in the function. */
18634 if (cfun->machine->func_type == TYPE_NORMAL)
18635 return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY;
18636 else
18637 return X86_DIRFLAG_RESET;
18640 return X86_DIRFLAG_ANY;
18643 /* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */
18645 static bool
18646 ix86_check_avx_upper_register (const_rtx exp)
18648 if (SUBREG_P (exp))
18649 exp = SUBREG_REG (exp);
18651 return (REG_P (exp)
18652 && (VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp))
18653 || VALID_AVX512F_REG_OR_XI_MODE (GET_MODE (exp))));
18656 /* Return needed mode for entity in optimize_mode_switching pass. */
18658 static int
18659 ix86_avx_u128_mode_needed (rtx_insn *insn)
18661 if (CALL_P (insn))
18663 rtx link;
18665 /* Needed mode is set to AVX_U128_CLEAN if there are
18666 no 256bit or 512bit modes used in function arguments. */
18667 for (link = CALL_INSN_FUNCTION_USAGE (insn);
18668 link;
18669 link = XEXP (link, 1))
18671 if (GET_CODE (XEXP (link, 0)) == USE)
18673 rtx arg = XEXP (XEXP (link, 0), 0);
18675 if (ix86_check_avx_upper_register (arg))
18676 return AVX_U128_DIRTY;
18680 return AVX_U128_CLEAN;
18683 /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced.
18684 Hardware changes state only when a 256bit register is written to,
18685 but we need to prevent the compiler from moving optimal insertion
18686 point above eventual read from 256bit or 512 bit register. */
18687 subrtx_iterator::array_type array;
18688 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
18689 if (ix86_check_avx_upper_register (*iter))
18690 return AVX_U128_DIRTY;
18692 return AVX_U128_ANY;
18695 /* Return mode that i387 must be switched into
18696 prior to the execution of insn. */
18698 static int
18699 ix86_i387_mode_needed (int entity, rtx_insn *insn)
18701 enum attr_i387_cw mode;
18703 /* The mode UNINITIALIZED is used to store control word after a
18704 function call or ASM pattern. The mode ANY specify that function
18705 has no requirements on the control word and make no changes in the
18706 bits we are interested in. */
18708 if (CALL_P (insn)
18709 || (NONJUMP_INSN_P (insn)
18710 && (asm_noperands (PATTERN (insn)) >= 0
18711 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
18712 return I387_CW_UNINITIALIZED;
18714 if (recog_memoized (insn) < 0)
18715 return I387_CW_ANY;
18717 mode = get_attr_i387_cw (insn);
18719 switch (entity)
18721 case I387_TRUNC:
18722 if (mode == I387_CW_TRUNC)
18723 return mode;
18724 break;
18726 case I387_FLOOR:
18727 if (mode == I387_CW_FLOOR)
18728 return mode;
18729 break;
18731 case I387_CEIL:
18732 if (mode == I387_CW_CEIL)
18733 return mode;
18734 break;
18736 case I387_MASK_PM:
18737 if (mode == I387_CW_MASK_PM)
18738 return mode;
18739 break;
18741 default:
18742 gcc_unreachable ();
18745 return I387_CW_ANY;
18748 /* Return mode that entity must be switched into
18749 prior to the execution of insn. */
18751 static int
18752 ix86_mode_needed (int entity, rtx_insn *insn)
18754 switch (entity)
18756 case X86_DIRFLAG:
18757 return ix86_dirflag_mode_needed (insn);
18758 case AVX_U128:
18759 return ix86_avx_u128_mode_needed (insn);
18760 case I387_TRUNC:
18761 case I387_FLOOR:
18762 case I387_CEIL:
18763 case I387_MASK_PM:
18764 return ix86_i387_mode_needed (entity, insn);
18765 default:
18766 gcc_unreachable ();
18768 return 0;
18771 /* Check if a 256bit or 512bit AVX register is referenced in stores. */
18773 static void
18774 ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
18776 if (ix86_check_avx_upper_register (dest))
18778 bool *used = (bool *) data;
18779 *used = true;
18783 /* Calculate mode of upper 128bit AVX registers after the insn. */
18785 static int
18786 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
18788 rtx pat = PATTERN (insn);
18790 if (vzeroupper_operation (pat, VOIDmode)
18791 || vzeroall_operation (pat, VOIDmode))
18792 return AVX_U128_CLEAN;
18794 /* We know that state is clean after CALL insn if there are no
18795 256bit or 512bit registers used in the function return register. */
18796 if (CALL_P (insn))
18798 bool avx_upper_reg_found = false;
18799 note_stores (pat, ix86_check_avx_upper_stores, &avx_upper_reg_found);
18801 return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
18804 /* Otherwise, return current mode. Remember that if insn
18805 references AVX 256bit or 512bit registers, the mode was already
18806 changed to DIRTY from MODE_NEEDED. */
18807 return mode;
18810 /* Return the mode that an insn results in. */
18812 static int
18813 ix86_mode_after (int entity, int mode, rtx_insn *insn)
18815 switch (entity)
18817 case X86_DIRFLAG:
18818 return mode;
18819 case AVX_U128:
18820 return ix86_avx_u128_mode_after (mode, insn);
18821 case I387_TRUNC:
18822 case I387_FLOOR:
18823 case I387_CEIL:
18824 case I387_MASK_PM:
18825 return mode;
18826 default:
18827 gcc_unreachable ();
18831 static int
18832 ix86_dirflag_mode_entry (void)
18834 /* For TARGET_CLD or in the interrupt handler we can't assume
18835 direction flag state at function entry. */
18836 if (TARGET_CLD
18837 || cfun->machine->func_type != TYPE_NORMAL)
18838 return X86_DIRFLAG_ANY;
18840 return X86_DIRFLAG_RESET;
18843 static int
18844 ix86_avx_u128_mode_entry (void)
18846 tree arg;
18848 /* Entry mode is set to AVX_U128_DIRTY if there are
18849 256bit or 512bit modes used in function arguments. */
18850 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
18851 arg = TREE_CHAIN (arg))
18853 rtx incoming = DECL_INCOMING_RTL (arg);
18855 if (incoming && ix86_check_avx_upper_register (incoming))
18856 return AVX_U128_DIRTY;
18859 return AVX_U128_CLEAN;
18862 /* Return a mode that ENTITY is assumed to be
18863 switched to at function entry. */
18865 static int
18866 ix86_mode_entry (int entity)
18868 switch (entity)
18870 case X86_DIRFLAG:
18871 return ix86_dirflag_mode_entry ();
18872 case AVX_U128:
18873 return ix86_avx_u128_mode_entry ();
18874 case I387_TRUNC:
18875 case I387_FLOOR:
18876 case I387_CEIL:
18877 case I387_MASK_PM:
18878 return I387_CW_ANY;
18879 default:
18880 gcc_unreachable ();
18884 static int
18885 ix86_avx_u128_mode_exit (void)
18887 rtx reg = crtl->return_rtx;
18889 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
18890 or 512 bit modes used in the function return register. */
18891 if (reg && ix86_check_avx_upper_register (reg))
18892 return AVX_U128_DIRTY;
18894 return AVX_U128_CLEAN;
18897 /* Return a mode that ENTITY is assumed to be
18898 switched to at function exit. */
18900 static int
18901 ix86_mode_exit (int entity)
18903 switch (entity)
18905 case X86_DIRFLAG:
18906 return X86_DIRFLAG_ANY;
18907 case AVX_U128:
18908 return ix86_avx_u128_mode_exit ();
18909 case I387_TRUNC:
18910 case I387_FLOOR:
18911 case I387_CEIL:
18912 case I387_MASK_PM:
18913 return I387_CW_ANY;
18914 default:
18915 gcc_unreachable ();
18919 static int
18920 ix86_mode_priority (int, int n)
18922 return n;
18925 /* Output code to initialize control word copies used by trunc?f?i and
18926 rounding patterns. CURRENT_MODE is set to current control word,
18927 while NEW_MODE is set to new control word. */
18929 static void
18930 emit_i387_cw_initialization (int mode)
18932 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
18933 rtx new_mode;
18935 enum ix86_stack_slot slot;
18937 rtx reg = gen_reg_rtx (HImode);
18939 emit_insn (gen_x86_fnstcw_1 (stored_mode));
18940 emit_move_insn (reg, copy_rtx (stored_mode));
18942 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
18943 || optimize_insn_for_size_p ())
18945 switch (mode)
18947 case I387_CW_TRUNC:
18948 /* round toward zero (truncate) */
18949 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
18950 slot = SLOT_CW_TRUNC;
18951 break;
18953 case I387_CW_FLOOR:
18954 /* round down toward -oo */
18955 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
18956 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
18957 slot = SLOT_CW_FLOOR;
18958 break;
18960 case I387_CW_CEIL:
18961 /* round up toward +oo */
18962 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
18963 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
18964 slot = SLOT_CW_CEIL;
18965 break;
18967 case I387_CW_MASK_PM:
18968 /* mask precision exception for nearbyint() */
18969 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
18970 slot = SLOT_CW_MASK_PM;
18971 break;
18973 default:
18974 gcc_unreachable ();
18977 else
18979 switch (mode)
18981 case I387_CW_TRUNC:
18982 /* round toward zero (truncate) */
18983 emit_insn (gen_insvsi_1 (reg, GEN_INT (0xc)));
18984 slot = SLOT_CW_TRUNC;
18985 break;
18987 case I387_CW_FLOOR:
18988 /* round down toward -oo */
18989 emit_insn (gen_insvsi_1 (reg, GEN_INT (0x4)));
18990 slot = SLOT_CW_FLOOR;
18991 break;
18993 case I387_CW_CEIL:
18994 /* round up toward +oo */
18995 emit_insn (gen_insvsi_1 (reg, GEN_INT (0x8)));
18996 slot = SLOT_CW_CEIL;
18997 break;
18999 case I387_CW_MASK_PM:
19000 /* mask precision exception for nearbyint() */
19001 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
19002 slot = SLOT_CW_MASK_PM;
19003 break;
19005 default:
19006 gcc_unreachable ();
19010 gcc_assert (slot < MAX_386_STACK_LOCALS);
19012 new_mode = assign_386_stack_local (HImode, slot);
19013 emit_move_insn (new_mode, reg);
19016 /* Emit vzeroupper. */
19018 void
19019 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
19021 int i;
19023 /* Cancel automatic vzeroupper insertion if there are
19024 live call-saved SSE registers at the insertion point. */
19026 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
19027 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
19028 return;
19030 if (TARGET_64BIT)
19031 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
19032 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
19033 return;
19035 emit_insn (gen_avx_vzeroupper ());
19038 /* Generate one or more insns to set ENTITY to MODE. */
19040 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
19041 is the set of hard registers live at the point where the insn(s)
19042 are to be inserted. */
19044 static void
19045 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
19046 HARD_REG_SET regs_live)
19048 switch (entity)
19050 case X86_DIRFLAG:
19051 if (mode == X86_DIRFLAG_RESET)
19052 emit_insn (gen_cld ());
19053 break;
19054 case AVX_U128:
19055 if (mode == AVX_U128_CLEAN)
19056 ix86_avx_emit_vzeroupper (regs_live);
19057 break;
19058 case I387_TRUNC:
19059 case I387_FLOOR:
19060 case I387_CEIL:
19061 case I387_MASK_PM:
19062 if (mode != I387_CW_ANY
19063 && mode != I387_CW_UNINITIALIZED)
19064 emit_i387_cw_initialization (mode);
19065 break;
19066 default:
19067 gcc_unreachable ();
19071 /* Output code for INSN to convert a float to a signed int. OPERANDS
19072 are the insn operands. The output may be [HSD]Imode and the input
19073 operand may be [SDX]Fmode. */
19075 const char *
19076 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
19078 bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
19079 bool dimode_p = GET_MODE (operands[0]) == DImode;
19080 int round_mode = get_attr_i387_cw (insn);
19082 static char buf[40];
19083 const char *p;
19085 /* Jump through a hoop or two for DImode, since the hardware has no
19086 non-popping instruction. We used to do this a different way, but
19087 that was somewhat fragile and broke with post-reload splitters. */
19088 if ((dimode_p || fisttp) && !stack_top_dies)
19089 output_asm_insn ("fld\t%y1", operands);
19091 gcc_assert (STACK_TOP_P (operands[1]));
19092 gcc_assert (MEM_P (operands[0]));
19093 gcc_assert (GET_MODE (operands[1]) != TFmode);
19095 if (fisttp)
19096 return "fisttp%Z0\t%0";
19098 strcpy (buf, "fist");
19100 if (round_mode != I387_CW_ANY)
19101 output_asm_insn ("fldcw\t%3", operands);
19103 p = "p%Z0\t%0";
19104 strcat (buf, p + !(stack_top_dies || dimode_p));
19106 output_asm_insn (buf, operands);
19108 if (round_mode != I387_CW_ANY)
19109 output_asm_insn ("fldcw\t%2", operands);
19111 return "";
19114 /* Output code for x87 ffreep insn. The OPNO argument, which may only
19115 have the values zero or one, indicates the ffreep insn's operand
19116 from the OPERANDS array. */
19118 static const char *
19119 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
19121 if (TARGET_USE_FFREEP)
19122 #ifdef HAVE_AS_IX86_FFREEP
19123 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
19124 #else
19126 static char retval[32];
19127 int regno = REGNO (operands[opno]);
19129 gcc_assert (STACK_REGNO_P (regno));
19131 regno -= FIRST_STACK_REG;
19133 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
19134 return retval;
19136 #endif
19138 return opno ? "fstp\t%y1" : "fstp\t%y0";
19142 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
19143 should be used. UNORDERED_P is true when fucom should be used. */
19145 const char *
19146 output_fp_compare (rtx_insn *insn, rtx *operands,
19147 bool eflags_p, bool unordered_p)
19149 rtx *xops = eflags_p ? &operands[0] : &operands[1];
19150 bool stack_top_dies;
19152 static char buf[40];
19153 const char *p;
19155 gcc_assert (STACK_TOP_P (xops[0]));
19157 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
19159 if (eflags_p)
19161 p = unordered_p ? "fucomi" : "fcomi";
19162 strcpy (buf, p);
19164 p = "p\t{%y1, %0|%0, %y1}";
19165 strcat (buf, p + !stack_top_dies);
19167 return buf;
19170 if (STACK_REG_P (xops[1])
19171 && stack_top_dies
19172 && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1))
19174 gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1);
19176 /* If both the top of the 387 stack die, and the other operand
19177 is also a stack register that dies, then this must be a
19178 `fcompp' float compare. */
19179 p = unordered_p ? "fucompp" : "fcompp";
19180 strcpy (buf, p);
19182 else if (const0_operand (xops[1], VOIDmode))
19184 gcc_assert (!unordered_p);
19185 strcpy (buf, "ftst");
19187 else
19189 if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT)
19191 gcc_assert (!unordered_p);
19192 p = "ficom";
19194 else
19195 p = unordered_p ? "fucom" : "fcom";
19197 strcpy (buf, p);
19199 p = "p%Z2\t%y2";
19200 strcat (buf, p + !stack_top_dies);
19203 output_asm_insn (buf, operands);
19204 return "fnstsw\t%0";
19207 void
19208 ix86_output_addr_vec_elt (FILE *file, int value)
19210 const char *directive = ASM_LONG;
19212 #ifdef ASM_QUAD
19213 if (TARGET_LP64)
19214 directive = ASM_QUAD;
19215 #else
19216 gcc_assert (!TARGET_64BIT);
19217 #endif
19219 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
19222 void
19223 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
19225 const char *directive = ASM_LONG;
19227 #ifdef ASM_QUAD
19228 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
19229 directive = ASM_QUAD;
19230 #else
19231 gcc_assert (!TARGET_64BIT);
19232 #endif
19233 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
19234 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
19235 fprintf (file, "%s%s%d-%s%d\n",
19236 directive, LPREFIX, value, LPREFIX, rel);
19237 else if (HAVE_AS_GOTOFF_IN_DATA)
19238 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
19239 #if TARGET_MACHO
19240 else if (TARGET_MACHO)
19242 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
19243 machopic_output_function_base_name (file);
19244 putc ('\n', file);
19246 #endif
19247 else
19248 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
19249 GOT_SYMBOL_NAME, LPREFIX, value);
19252 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
19253 for the target. */
19255 void
19256 ix86_expand_clear (rtx dest)
19258 rtx tmp;
19260 /* We play register width games, which are only valid after reload. */
19261 gcc_assert (reload_completed);
19263 /* Avoid HImode and its attendant prefix byte. */
19264 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
19265 dest = gen_rtx_REG (SImode, REGNO (dest));
19266 tmp = gen_rtx_SET (dest, const0_rtx);
19268 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
19270 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19271 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
19274 emit_insn (tmp);
19277 void
19278 ix86_expand_move (machine_mode mode, rtx operands[])
19280 rtx op0, op1;
19281 rtx tmp, addend = NULL_RTX;
19282 enum tls_model model;
19284 op0 = operands[0];
19285 op1 = operands[1];
19287 switch (GET_CODE (op1))
19289 case CONST:
19290 tmp = XEXP (op1, 0);
19292 if (GET_CODE (tmp) != PLUS
19293 || GET_CODE (XEXP (tmp, 0)) != SYMBOL_REF)
19294 break;
19296 op1 = XEXP (tmp, 0);
19297 addend = XEXP (tmp, 1);
19298 /* FALLTHRU */
19300 case SYMBOL_REF:
19301 model = SYMBOL_REF_TLS_MODEL (op1);
19303 if (model)
19304 op1 = legitimize_tls_address (op1, model, true);
19305 else if (ix86_force_load_from_GOT_p (op1))
19307 /* Load the external function address via GOT slot to avoid PLT. */
19308 op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1),
19309 (TARGET_64BIT
19310 ? UNSPEC_GOTPCREL
19311 : UNSPEC_GOT));
19312 op1 = gen_rtx_CONST (Pmode, op1);
19313 op1 = gen_const_mem (Pmode, op1);
19314 set_mem_alias_set (op1, ix86_GOT_alias_set ());
19316 else
19318 tmp = legitimize_pe_coff_symbol (op1, addend != NULL_RTX);
19319 if (tmp)
19321 op1 = tmp;
19322 if (!addend)
19323 break;
19325 else
19327 op1 = operands[1];
19328 break;
19332 if (addend)
19334 op1 = force_operand (op1, NULL_RTX);
19335 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
19336 op0, 1, OPTAB_DIRECT);
19338 else
19339 op1 = force_operand (op1, op0);
19341 if (op1 == op0)
19342 return;
19344 op1 = convert_to_mode (mode, op1, 1);
19346 default:
19347 break;
19350 if ((flag_pic || MACHOPIC_INDIRECT)
19351 && symbolic_operand (op1, mode))
19353 if (TARGET_MACHO && !TARGET_64BIT)
19355 #if TARGET_MACHO
19356 /* dynamic-no-pic */
19357 if (MACHOPIC_INDIRECT)
19359 rtx temp = (op0 && REG_P (op0) && mode == Pmode)
19360 ? op0 : gen_reg_rtx (Pmode);
19361 op1 = machopic_indirect_data_reference (op1, temp);
19362 if (MACHOPIC_PURE)
19363 op1 = machopic_legitimize_pic_address (op1, mode,
19364 temp == op1 ? 0 : temp);
19366 if (op0 != op1 && GET_CODE (op0) != MEM)
19368 rtx insn = gen_rtx_SET (op0, op1);
19369 emit_insn (insn);
19370 return;
19372 if (GET_CODE (op0) == MEM)
19373 op1 = force_reg (Pmode, op1);
19374 else
19376 rtx temp = op0;
19377 if (GET_CODE (temp) != REG)
19378 temp = gen_reg_rtx (Pmode);
19379 temp = legitimize_pic_address (op1, temp);
19380 if (temp == op0)
19381 return;
19382 op1 = temp;
19384 /* dynamic-no-pic */
19385 #endif
19387 else
19389 if (MEM_P (op0))
19390 op1 = force_reg (mode, op1);
19391 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
19393 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
19394 op1 = legitimize_pic_address (op1, reg);
19395 if (op0 == op1)
19396 return;
19397 op1 = convert_to_mode (mode, op1, 1);
19401 else
19403 if (MEM_P (op0)
19404 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
19405 || !push_operand (op0, mode))
19406 && MEM_P (op1))
19407 op1 = force_reg (mode, op1);
19409 if (push_operand (op0, mode)
19410 && ! general_no_elim_operand (op1, mode))
19411 op1 = copy_to_mode_reg (mode, op1);
19413 /* Force large constants in 64bit compilation into register
19414 to get them CSEed. */
19415 if (can_create_pseudo_p ()
19416 && (mode == DImode) && TARGET_64BIT
19417 && immediate_operand (op1, mode)
19418 && !x86_64_zext_immediate_operand (op1, VOIDmode)
19419 && !register_operand (op0, mode)
19420 && optimize)
19421 op1 = copy_to_mode_reg (mode, op1);
19423 if (can_create_pseudo_p ()
19424 && CONST_DOUBLE_P (op1))
19426 /* If we are loading a floating point constant to a register,
19427 force the value to memory now, since we'll get better code
19428 out the back end. */
19430 op1 = validize_mem (force_const_mem (mode, op1));
19431 if (!register_operand (op0, mode))
19433 rtx temp = gen_reg_rtx (mode);
19434 emit_insn (gen_rtx_SET (temp, op1));
19435 emit_move_insn (op0, temp);
19436 return;
19441 emit_insn (gen_rtx_SET (op0, op1));
19444 void
19445 ix86_expand_vector_move (machine_mode mode, rtx operands[])
19447 rtx op0 = operands[0], op1 = operands[1];
19448 /* Use GET_MODE_BITSIZE instead of GET_MODE_ALIGNMENT for IA MCU
19449 psABI since the biggest alignment is 4 byte for IA MCU psABI. */
19450 unsigned int align = (TARGET_IAMCU
19451 ? GET_MODE_BITSIZE (mode)
19452 : GET_MODE_ALIGNMENT (mode));
19454 if (push_operand (op0, VOIDmode))
19455 op0 = emit_move_resolve_push (mode, op0);
19457 /* Force constants other than zero into memory. We do not know how
19458 the instructions used to build constants modify the upper 64 bits
19459 of the register, once we have that information we may be able
19460 to handle some of them more efficiently. */
19461 if (can_create_pseudo_p ()
19462 && (CONSTANT_P (op1)
19463 || (SUBREG_P (op1)
19464 && CONSTANT_P (SUBREG_REG (op1))))
19465 && ((register_operand (op0, mode)
19466 && !standard_sse_constant_p (op1, mode))
19467 /* ix86_expand_vector_move_misalign() does not like constants. */
19468 || (SSE_REG_MODE_P (mode)
19469 && MEM_P (op0)
19470 && MEM_ALIGN (op0) < align)))
19472 if (SUBREG_P (op1))
19474 machine_mode imode = GET_MODE (SUBREG_REG (op1));
19475 rtx r = force_const_mem (imode, SUBREG_REG (op1));
19476 if (r)
19477 r = validize_mem (r);
19478 else
19479 r = force_reg (imode, SUBREG_REG (op1));
19480 op1 = simplify_gen_subreg (mode, r, imode, SUBREG_BYTE (op1));
19482 else
19483 op1 = validize_mem (force_const_mem (mode, op1));
19486 /* We need to check memory alignment for SSE mode since attribute
19487 can make operands unaligned. */
19488 if (can_create_pseudo_p ()
19489 && SSE_REG_MODE_P (mode)
19490 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
19491 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
19493 rtx tmp[2];
19495 /* ix86_expand_vector_move_misalign() does not like both
19496 arguments in memory. */
19497 if (!register_operand (op0, mode)
19498 && !register_operand (op1, mode))
19499 op1 = force_reg (mode, op1);
19501 tmp[0] = op0; tmp[1] = op1;
19502 ix86_expand_vector_move_misalign (mode, tmp);
19503 return;
19506 /* Make operand1 a register if it isn't already. */
19507 if (can_create_pseudo_p ()
19508 && !register_operand (op0, mode)
19509 && !register_operand (op1, mode))
19511 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
19512 return;
19515 emit_insn (gen_rtx_SET (op0, op1));
19518 /* Split 32-byte AVX unaligned load and store if needed. */
19520 static void
19521 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
19523 rtx m;
19524 rtx (*extract) (rtx, rtx, rtx);
19525 machine_mode mode;
19527 if ((MEM_P (op1) && !TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
19528 || (MEM_P (op0) && !TARGET_AVX256_SPLIT_UNALIGNED_STORE))
19530 emit_insn (gen_rtx_SET (op0, op1));
19531 return;
19534 rtx orig_op0 = NULL_RTX;
19535 mode = GET_MODE (op0);
19536 switch (GET_MODE_CLASS (mode))
19538 case MODE_VECTOR_INT:
19539 case MODE_INT:
19540 if (mode != V32QImode)
19542 if (!MEM_P (op0))
19544 orig_op0 = op0;
19545 op0 = gen_reg_rtx (V32QImode);
19547 else
19548 op0 = gen_lowpart (V32QImode, op0);
19549 op1 = gen_lowpart (V32QImode, op1);
19550 mode = V32QImode;
19552 break;
19553 case MODE_VECTOR_FLOAT:
19554 break;
19555 default:
19556 gcc_unreachable ();
19559 switch (mode)
19561 default:
19562 gcc_unreachable ();
19563 case E_V32QImode:
19564 extract = gen_avx_vextractf128v32qi;
19565 mode = V16QImode;
19566 break;
19567 case E_V8SFmode:
19568 extract = gen_avx_vextractf128v8sf;
19569 mode = V4SFmode;
19570 break;
19571 case E_V4DFmode:
19572 extract = gen_avx_vextractf128v4df;
19573 mode = V2DFmode;
19574 break;
19577 if (MEM_P (op1))
19579 rtx r = gen_reg_rtx (mode);
19580 m = adjust_address (op1, mode, 0);
19581 emit_move_insn (r, m);
19582 m = adjust_address (op1, mode, 16);
19583 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
19584 emit_move_insn (op0, r);
19586 else if (MEM_P (op0))
19588 m = adjust_address (op0, mode, 0);
19589 emit_insn (extract (m, op1, const0_rtx));
19590 m = adjust_address (op0, mode, 16);
19591 emit_insn (extract (m, copy_rtx (op1), const1_rtx));
19593 else
19594 gcc_unreachable ();
19596 if (orig_op0)
19597 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
19600 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
19601 straight to ix86_expand_vector_move. */
19602 /* Code generation for scalar reg-reg moves of single and double precision data:
19603 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
19604 movaps reg, reg
19605 else
19606 movss reg, reg
19607 if (x86_sse_partial_reg_dependency == true)
19608 movapd reg, reg
19609 else
19610 movsd reg, reg
19612 Code generation for scalar loads of double precision data:
19613 if (x86_sse_split_regs == true)
19614 movlpd mem, reg (gas syntax)
19615 else
19616 movsd mem, reg
19618 Code generation for unaligned packed loads of single precision data
19619 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
19620 if (x86_sse_unaligned_move_optimal)
19621 movups mem, reg
19623 if (x86_sse_partial_reg_dependency == true)
19625 xorps reg, reg
19626 movlps mem, reg
19627 movhps mem+8, reg
19629 else
19631 movlps mem, reg
19632 movhps mem+8, reg
19635 Code generation for unaligned packed loads of double precision data
19636 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
19637 if (x86_sse_unaligned_move_optimal)
19638 movupd mem, reg
19640 if (x86_sse_split_regs == true)
19642 movlpd mem, reg
19643 movhpd mem+8, reg
19645 else
19647 movsd mem, reg
19648 movhpd mem+8, reg
19652 void
19653 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
19655 rtx op0, op1, m;
19657 op0 = operands[0];
19658 op1 = operands[1];
19660 /* Use unaligned load/store for AVX512 or when optimizing for size. */
19661 if (GET_MODE_SIZE (mode) == 64 || optimize_insn_for_size_p ())
19663 emit_insn (gen_rtx_SET (op0, op1));
19664 return;
19667 if (TARGET_AVX)
19669 if (GET_MODE_SIZE (mode) == 32)
19670 ix86_avx256_split_vector_move_misalign (op0, op1);
19671 else
19672 /* Always use 128-bit mov<mode>_internal pattern for AVX. */
19673 emit_insn (gen_rtx_SET (op0, op1));
19674 return;
19677 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
19678 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
19680 emit_insn (gen_rtx_SET (op0, op1));
19681 return;
19684 /* ??? If we have typed data, then it would appear that using
19685 movdqu is the only way to get unaligned data loaded with
19686 integer type. */
19687 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19689 emit_insn (gen_rtx_SET (op0, op1));
19690 return;
19693 if (MEM_P (op1))
19695 if (TARGET_SSE2 && mode == V2DFmode)
19697 rtx zero;
19699 /* When SSE registers are split into halves, we can avoid
19700 writing to the top half twice. */
19701 if (TARGET_SSE_SPLIT_REGS)
19703 emit_clobber (op0);
19704 zero = op0;
19706 else
19708 /* ??? Not sure about the best option for the Intel chips.
19709 The following would seem to satisfy; the register is
19710 entirely cleared, breaking the dependency chain. We
19711 then store to the upper half, with a dependency depth
19712 of one. A rumor has it that Intel recommends two movsd
19713 followed by an unpacklpd, but this is unconfirmed. And
19714 given that the dependency depth of the unpacklpd would
19715 still be one, I'm not sure why this would be better. */
19716 zero = CONST0_RTX (V2DFmode);
19719 m = adjust_address (op1, DFmode, 0);
19720 emit_insn (gen_sse2_loadlpd (op0, zero, m));
19721 m = adjust_address (op1, DFmode, 8);
19722 emit_insn (gen_sse2_loadhpd (op0, op0, m));
19724 else
19726 rtx t;
19728 if (mode != V4SFmode)
19729 t = gen_reg_rtx (V4SFmode);
19730 else
19731 t = op0;
19733 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
19734 emit_move_insn (t, CONST0_RTX (V4SFmode));
19735 else
19736 emit_clobber (t);
19738 m = adjust_address (op1, V2SFmode, 0);
19739 emit_insn (gen_sse_loadlps (t, t, m));
19740 m = adjust_address (op1, V2SFmode, 8);
19741 emit_insn (gen_sse_loadhps (t, t, m));
19742 if (mode != V4SFmode)
19743 emit_move_insn (op0, gen_lowpart (mode, t));
19746 else if (MEM_P (op0))
19748 if (TARGET_SSE2 && mode == V2DFmode)
19750 m = adjust_address (op0, DFmode, 0);
19751 emit_insn (gen_sse2_storelpd (m, op1));
19752 m = adjust_address (op0, DFmode, 8);
19753 emit_insn (gen_sse2_storehpd (m, op1));
19755 else
19757 if (mode != V4SFmode)
19758 op1 = gen_lowpart (V4SFmode, op1);
19760 m = adjust_address (op0, V2SFmode, 0);
19761 emit_insn (gen_sse_storelps (m, op1));
19762 m = adjust_address (op0, V2SFmode, 8);
19763 emit_insn (gen_sse_storehps (m, copy_rtx (op1)));
19766 else
19767 gcc_unreachable ();
19770 /* Helper function of ix86_fixup_binary_operands to canonicalize
19771 operand order. Returns true if the operands should be swapped. */
19773 static bool
19774 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
19775 rtx operands[])
19777 rtx dst = operands[0];
19778 rtx src1 = operands[1];
19779 rtx src2 = operands[2];
19781 /* If the operation is not commutative, we can't do anything. */
19782 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH
19783 && GET_RTX_CLASS (code) != RTX_COMM_COMPARE)
19784 return false;
19786 /* Highest priority is that src1 should match dst. */
19787 if (rtx_equal_p (dst, src1))
19788 return false;
19789 if (rtx_equal_p (dst, src2))
19790 return true;
19792 /* Next highest priority is that immediate constants come second. */
19793 if (immediate_operand (src2, mode))
19794 return false;
19795 if (immediate_operand (src1, mode))
19796 return true;
19798 /* Lowest priority is that memory references should come second. */
19799 if (MEM_P (src2))
19800 return false;
19801 if (MEM_P (src1))
19802 return true;
19804 return false;
19808 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
19809 destination to use for the operation. If different from the true
19810 destination in operands[0], a copy operation will be required. */
19813 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
19814 rtx operands[])
19816 rtx dst = operands[0];
19817 rtx src1 = operands[1];
19818 rtx src2 = operands[2];
19820 /* Canonicalize operand order. */
19821 if (ix86_swap_binary_operands_p (code, mode, operands))
19823 /* It is invalid to swap operands of different modes. */
19824 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
19826 std::swap (src1, src2);
19829 /* Both source operands cannot be in memory. */
19830 if (MEM_P (src1) && MEM_P (src2))
19832 /* Optimization: Only read from memory once. */
19833 if (rtx_equal_p (src1, src2))
19835 src2 = force_reg (mode, src2);
19836 src1 = src2;
19838 else if (rtx_equal_p (dst, src1))
19839 src2 = force_reg (mode, src2);
19840 else
19841 src1 = force_reg (mode, src1);
19844 /* If the destination is memory, and we do not have matching source
19845 operands, do things in registers. */
19846 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
19847 dst = gen_reg_rtx (mode);
19849 /* Source 1 cannot be a constant. */
19850 if (CONSTANT_P (src1))
19851 src1 = force_reg (mode, src1);
19853 /* Source 1 cannot be a non-matching memory. */
19854 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
19855 src1 = force_reg (mode, src1);
19857 /* Improve address combine. */
19858 if (code == PLUS
19859 && GET_MODE_CLASS (mode) == MODE_INT
19860 && MEM_P (src2))
19861 src2 = force_reg (mode, src2);
19863 operands[1] = src1;
19864 operands[2] = src2;
19865 return dst;
19868 /* Similarly, but assume that the destination has already been
19869 set up properly. */
19871 void
19872 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
19873 machine_mode mode, rtx operands[])
19875 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
19876 gcc_assert (dst == operands[0]);
19879 /* Attempt to expand a binary operator. Make the expansion closer to the
19880 actual machine, then just general_operand, which will allow 3 separate
19881 memory references (one output, two input) in a single insn. */
19883 void
19884 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
19885 rtx operands[])
19887 rtx src1, src2, dst, op, clob;
19889 dst = ix86_fixup_binary_operands (code, mode, operands);
19890 src1 = operands[1];
19891 src2 = operands[2];
19893 /* Emit the instruction. */
19895 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, src1, src2));
19897 if (reload_completed
19898 && code == PLUS
19899 && !rtx_equal_p (dst, src1))
19901 /* This is going to be an LEA; avoid splitting it later. */
19902 emit_insn (op);
19904 else
19906 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19907 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
19910 /* Fix up the destination if needed. */
19911 if (dst != operands[0])
19912 emit_move_insn (operands[0], dst);
19915 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
19916 the given OPERANDS. */
19918 void
19919 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
19920 rtx operands[])
19922 rtx op1 = NULL_RTX, op2 = NULL_RTX;
19923 if (SUBREG_P (operands[1]))
19925 op1 = operands[1];
19926 op2 = operands[2];
19928 else if (SUBREG_P (operands[2]))
19930 op1 = operands[2];
19931 op2 = operands[1];
19933 /* Optimize (__m128i) d | (__m128i) e and similar code
19934 when d and e are float vectors into float vector logical
19935 insn. In C/C++ without using intrinsics there is no other way
19936 to express vector logical operation on float vectors than
19937 to cast them temporarily to integer vectors. */
19938 if (op1
19939 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
19940 && (SUBREG_P (op2) || GET_CODE (op2) == CONST_VECTOR)
19941 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
19942 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
19943 && SUBREG_BYTE (op1) == 0
19944 && (GET_CODE (op2) == CONST_VECTOR
19945 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
19946 && SUBREG_BYTE (op2) == 0))
19947 && can_create_pseudo_p ())
19949 rtx dst;
19950 switch (GET_MODE (SUBREG_REG (op1)))
19952 case E_V4SFmode:
19953 case E_V8SFmode:
19954 case E_V16SFmode:
19955 case E_V2DFmode:
19956 case E_V4DFmode:
19957 case E_V8DFmode:
19958 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
19959 if (GET_CODE (op2) == CONST_VECTOR)
19961 op2 = gen_lowpart (GET_MODE (dst), op2);
19962 op2 = force_reg (GET_MODE (dst), op2);
19964 else
19966 op1 = operands[1];
19967 op2 = SUBREG_REG (operands[2]);
19968 if (!vector_operand (op2, GET_MODE (dst)))
19969 op2 = force_reg (GET_MODE (dst), op2);
19971 op1 = SUBREG_REG (op1);
19972 if (!vector_operand (op1, GET_MODE (dst)))
19973 op1 = force_reg (GET_MODE (dst), op1);
19974 emit_insn (gen_rtx_SET (dst,
19975 gen_rtx_fmt_ee (code, GET_MODE (dst),
19976 op1, op2)));
19977 emit_move_insn (operands[0], gen_lowpart (mode, dst));
19978 return;
19979 default:
19980 break;
19983 if (!vector_operand (operands[1], mode))
19984 operands[1] = force_reg (mode, operands[1]);
19985 if (!vector_operand (operands[2], mode))
19986 operands[2] = force_reg (mode, operands[2]);
19987 ix86_fixup_binary_operands_no_copy (code, mode, operands);
19988 emit_insn (gen_rtx_SET (operands[0],
19989 gen_rtx_fmt_ee (code, mode, operands[1],
19990 operands[2])));
19993 /* Return TRUE or FALSE depending on whether the binary operator meets the
19994 appropriate constraints. */
19996 bool
19997 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
19998 rtx operands[3])
20000 rtx dst = operands[0];
20001 rtx src1 = operands[1];
20002 rtx src2 = operands[2];
20004 /* Both source operands cannot be in memory. */
20005 if (MEM_P (src1) && MEM_P (src2))
20006 return false;
20008 /* Canonicalize operand order for commutative operators. */
20009 if (ix86_swap_binary_operands_p (code, mode, operands))
20010 std::swap (src1, src2);
20012 /* If the destination is memory, we must have a matching source operand. */
20013 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
20014 return false;
20016 /* Source 1 cannot be a constant. */
20017 if (CONSTANT_P (src1))
20018 return false;
20020 /* Source 1 cannot be a non-matching memory. */
20021 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
20022 /* Support "andhi/andsi/anddi" as a zero-extending move. */
20023 return (code == AND
20024 && (mode == HImode
20025 || mode == SImode
20026 || (TARGET_64BIT && mode == DImode))
20027 && satisfies_constraint_L (src2));
20029 return true;
20032 /* Attempt to expand a unary operator. Make the expansion closer to the
20033 actual machine, then just general_operand, which will allow 2 separate
20034 memory references (one output, one input) in a single insn. */
20036 void
20037 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
20038 rtx operands[])
20040 bool matching_memory = false;
20041 rtx src, dst, op, clob;
20043 dst = operands[0];
20044 src = operands[1];
20046 /* If the destination is memory, and we do not have matching source
20047 operands, do things in registers. */
20048 if (MEM_P (dst))
20050 if (rtx_equal_p (dst, src))
20051 matching_memory = true;
20052 else
20053 dst = gen_reg_rtx (mode);
20056 /* When source operand is memory, destination must match. */
20057 if (MEM_P (src) && !matching_memory)
20058 src = force_reg (mode, src);
20060 /* Emit the instruction. */
20062 op = gen_rtx_SET (dst, gen_rtx_fmt_e (code, mode, src));
20064 if (code == NOT)
20065 emit_insn (op);
20066 else
20068 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
20069 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
20072 /* Fix up the destination if needed. */
20073 if (dst != operands[0])
20074 emit_move_insn (operands[0], dst);
20077 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
20078 divisor are within the range [0-255]. */
20080 void
20081 ix86_split_idivmod (machine_mode mode, rtx operands[],
20082 bool signed_p)
20084 rtx_code_label *end_label, *qimode_label;
20085 rtx div, mod;
20086 rtx_insn *insn;
20087 rtx scratch, tmp0, tmp1, tmp2;
20088 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
20089 rtx (*gen_zero_extend) (rtx, rtx);
20090 rtx (*gen_test_ccno_1) (rtx, rtx);
20092 switch (mode)
20094 case E_SImode:
20095 if (GET_MODE (operands[0]) == SImode)
20097 if (GET_MODE (operands[1]) == SImode)
20098 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
20099 else
20100 gen_divmod4_1
20101 = signed_p ? gen_divmodsi4_zext_2 : gen_udivmodsi4_zext_2;
20102 gen_zero_extend = gen_zero_extendqisi2;
20104 else
20106 gen_divmod4_1
20107 = signed_p ? gen_divmodsi4_zext_1 : gen_udivmodsi4_zext_1;
20108 gen_zero_extend = gen_zero_extendqidi2;
20110 gen_test_ccno_1 = gen_testsi_ccno_1;
20111 break;
20112 case E_DImode:
20113 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
20114 gen_test_ccno_1 = gen_testdi_ccno_1;
20115 gen_zero_extend = gen_zero_extendqidi2;
20116 break;
20117 default:
20118 gcc_unreachable ();
20121 end_label = gen_label_rtx ();
20122 qimode_label = gen_label_rtx ();
20124 scratch = gen_reg_rtx (mode);
20126 /* Use 8bit unsigned divimod if dividend and divisor are within
20127 the range [0-255]. */
20128 emit_move_insn (scratch, operands[2]);
20129 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
20130 scratch, 1, OPTAB_DIRECT);
20131 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
20132 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
20133 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
20134 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
20135 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
20136 pc_rtx);
20137 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp0));
20138 predict_jump (REG_BR_PROB_BASE * 50 / 100);
20139 JUMP_LABEL (insn) = qimode_label;
20141 /* Generate original signed/unsigned divimod. */
20142 div = gen_divmod4_1 (operands[0], operands[1],
20143 operands[2], operands[3]);
20144 emit_insn (div);
20146 /* Branch to the end. */
20147 emit_jump_insn (gen_jump (end_label));
20148 emit_barrier ();
20150 /* Generate 8bit unsigned divide. */
20151 emit_label (qimode_label);
20152 /* Don't use operands[0] for result of 8bit divide since not all
20153 registers support QImode ZERO_EXTRACT. */
20154 tmp0 = lowpart_subreg (HImode, scratch, mode);
20155 tmp1 = lowpart_subreg (HImode, operands[2], mode);
20156 tmp2 = lowpart_subreg (QImode, operands[3], mode);
20157 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
20159 if (signed_p)
20161 div = gen_rtx_DIV (mode, operands[2], operands[3]);
20162 mod = gen_rtx_MOD (mode, operands[2], operands[3]);
20164 else
20166 div = gen_rtx_UDIV (mode, operands[2], operands[3]);
20167 mod = gen_rtx_UMOD (mode, operands[2], operands[3]);
20169 if (mode == SImode)
20171 if (GET_MODE (operands[0]) != SImode)
20172 div = gen_rtx_ZERO_EXTEND (DImode, div);
20173 if (GET_MODE (operands[1]) != SImode)
20174 mod = gen_rtx_ZERO_EXTEND (DImode, mod);
20177 /* Extract remainder from AH. */
20178 tmp1 = gen_rtx_ZERO_EXTRACT (GET_MODE (operands[1]),
20179 tmp0, GEN_INT (8), GEN_INT (8));
20180 if (REG_P (operands[1]))
20181 insn = emit_move_insn (operands[1], tmp1);
20182 else
20184 /* Need a new scratch register since the old one has result
20185 of 8bit divide. */
20186 scratch = gen_reg_rtx (GET_MODE (operands[1]));
20187 emit_move_insn (scratch, tmp1);
20188 insn = emit_move_insn (operands[1], scratch);
20190 set_unique_reg_note (insn, REG_EQUAL, mod);
20192 /* Zero extend quotient from AL. */
20193 tmp1 = gen_lowpart (QImode, tmp0);
20194 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
20195 set_unique_reg_note (insn, REG_EQUAL, div);
20197 emit_label (end_label);
20200 #define LEA_MAX_STALL (3)
20201 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
20203 /* Increase given DISTANCE in half-cycles according to
20204 dependencies between PREV and NEXT instructions.
20205 Add 1 half-cycle if there is no dependency and
20206 go to next cycle if there is some dependecy. */
20208 static unsigned int
20209 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
20211 df_ref def, use;
20213 if (!prev || !next)
20214 return distance + (distance & 1) + 2;
20216 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
20217 return distance + 1;
20219 FOR_EACH_INSN_USE (use, next)
20220 FOR_EACH_INSN_DEF (def, prev)
20221 if (!DF_REF_IS_ARTIFICIAL (def)
20222 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
20223 return distance + (distance & 1) + 2;
20225 return distance + 1;
20228 /* Function checks if instruction INSN defines register number
20229 REGNO1 or REGNO2. */
20231 static bool
20232 insn_defines_reg (unsigned int regno1, unsigned int regno2,
20233 rtx_insn *insn)
20235 df_ref def;
20237 FOR_EACH_INSN_DEF (def, insn)
20238 if (DF_REF_REG_DEF_P (def)
20239 && !DF_REF_IS_ARTIFICIAL (def)
20240 && (regno1 == DF_REF_REGNO (def)
20241 || regno2 == DF_REF_REGNO (def)))
20242 return true;
20244 return false;
20247 /* Function checks if instruction INSN uses register number
20248 REGNO as a part of address expression. */
20250 static bool
20251 insn_uses_reg_mem (unsigned int regno, rtx insn)
20253 df_ref use;
20255 FOR_EACH_INSN_USE (use, insn)
20256 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
20257 return true;
20259 return false;
20262 /* Search backward for non-agu definition of register number REGNO1
20263 or register number REGNO2 in basic block starting from instruction
20264 START up to head of basic block or instruction INSN.
20266 Function puts true value into *FOUND var if definition was found
20267 and false otherwise.
20269 Distance in half-cycles between START and found instruction or head
20270 of BB is added to DISTANCE and returned. */
20272 static int
20273 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
20274 rtx_insn *insn, int distance,
20275 rtx_insn *start, bool *found)
20277 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
20278 rtx_insn *prev = start;
20279 rtx_insn *next = NULL;
20281 *found = false;
20283 while (prev
20284 && prev != insn
20285 && distance < LEA_SEARCH_THRESHOLD)
20287 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
20289 distance = increase_distance (prev, next, distance);
20290 if (insn_defines_reg (regno1, regno2, prev))
20292 if (recog_memoized (prev) < 0
20293 || get_attr_type (prev) != TYPE_LEA)
20295 *found = true;
20296 return distance;
20300 next = prev;
20302 if (prev == BB_HEAD (bb))
20303 break;
20305 prev = PREV_INSN (prev);
20308 return distance;
20311 /* Search backward for non-agu definition of register number REGNO1
20312 or register number REGNO2 in INSN's basic block until
20313 1. Pass LEA_SEARCH_THRESHOLD instructions, or
20314 2. Reach neighbor BBs boundary, or
20315 3. Reach agu definition.
20316 Returns the distance between the non-agu definition point and INSN.
20317 If no definition point, returns -1. */
20319 static int
20320 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
20321 rtx_insn *insn)
20323 basic_block bb = BLOCK_FOR_INSN (insn);
20324 int distance = 0;
20325 bool found = false;
20327 if (insn != BB_HEAD (bb))
20328 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
20329 distance, PREV_INSN (insn),
20330 &found);
20332 if (!found && distance < LEA_SEARCH_THRESHOLD)
20334 edge e;
20335 edge_iterator ei;
20336 bool simple_loop = false;
20338 FOR_EACH_EDGE (e, ei, bb->preds)
20339 if (e->src == bb)
20341 simple_loop = true;
20342 break;
20345 if (simple_loop)
20346 distance = distance_non_agu_define_in_bb (regno1, regno2,
20347 insn, distance,
20348 BB_END (bb), &found);
20349 else
20351 int shortest_dist = -1;
20352 bool found_in_bb = false;
20354 FOR_EACH_EDGE (e, ei, bb->preds)
20356 int bb_dist
20357 = distance_non_agu_define_in_bb (regno1, regno2,
20358 insn, distance,
20359 BB_END (e->src),
20360 &found_in_bb);
20361 if (found_in_bb)
20363 if (shortest_dist < 0)
20364 shortest_dist = bb_dist;
20365 else if (bb_dist > 0)
20366 shortest_dist = MIN (bb_dist, shortest_dist);
20368 found = true;
20372 distance = shortest_dist;
20376 /* get_attr_type may modify recog data. We want to make sure
20377 that recog data is valid for instruction INSN, on which
20378 distance_non_agu_define is called. INSN is unchanged here. */
20379 extract_insn_cached (insn);
20381 if (!found)
20382 return -1;
20384 return distance >> 1;
20387 /* Return the distance in half-cycles between INSN and the next
20388 insn that uses register number REGNO in memory address added
20389 to DISTANCE. Return -1 if REGNO0 is set.
20391 Put true value into *FOUND if register usage was found and
20392 false otherwise.
20393 Put true value into *REDEFINED if register redefinition was
20394 found and false otherwise. */
20396 static int
20397 distance_agu_use_in_bb (unsigned int regno,
20398 rtx_insn *insn, int distance, rtx_insn *start,
20399 bool *found, bool *redefined)
20401 basic_block bb = NULL;
20402 rtx_insn *next = start;
20403 rtx_insn *prev = NULL;
20405 *found = false;
20406 *redefined = false;
20408 if (start != NULL_RTX)
20410 bb = BLOCK_FOR_INSN (start);
20411 if (start != BB_HEAD (bb))
20412 /* If insn and start belong to the same bb, set prev to insn,
20413 so the call to increase_distance will increase the distance
20414 between insns by 1. */
20415 prev = insn;
20418 while (next
20419 && next != insn
20420 && distance < LEA_SEARCH_THRESHOLD)
20422 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
20424 distance = increase_distance(prev, next, distance);
20425 if (insn_uses_reg_mem (regno, next))
20427 /* Return DISTANCE if OP0 is used in memory
20428 address in NEXT. */
20429 *found = true;
20430 return distance;
20433 if (insn_defines_reg (regno, INVALID_REGNUM, next))
20435 /* Return -1 if OP0 is set in NEXT. */
20436 *redefined = true;
20437 return -1;
20440 prev = next;
20443 if (next == BB_END (bb))
20444 break;
20446 next = NEXT_INSN (next);
20449 return distance;
20452 /* Return the distance between INSN and the next insn that uses
20453 register number REGNO0 in memory address. Return -1 if no such
20454 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
20456 static int
20457 distance_agu_use (unsigned int regno0, rtx_insn *insn)
20459 basic_block bb = BLOCK_FOR_INSN (insn);
20460 int distance = 0;
20461 bool found = false;
20462 bool redefined = false;
20464 if (insn != BB_END (bb))
20465 distance = distance_agu_use_in_bb (regno0, insn, distance,
20466 NEXT_INSN (insn),
20467 &found, &redefined);
20469 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
20471 edge e;
20472 edge_iterator ei;
20473 bool simple_loop = false;
20475 FOR_EACH_EDGE (e, ei, bb->succs)
20476 if (e->dest == bb)
20478 simple_loop = true;
20479 break;
20482 if (simple_loop)
20483 distance = distance_agu_use_in_bb (regno0, insn,
20484 distance, BB_HEAD (bb),
20485 &found, &redefined);
20486 else
20488 int shortest_dist = -1;
20489 bool found_in_bb = false;
20490 bool redefined_in_bb = false;
20492 FOR_EACH_EDGE (e, ei, bb->succs)
20494 int bb_dist
20495 = distance_agu_use_in_bb (regno0, insn,
20496 distance, BB_HEAD (e->dest),
20497 &found_in_bb, &redefined_in_bb);
20498 if (found_in_bb)
20500 if (shortest_dist < 0)
20501 shortest_dist = bb_dist;
20502 else if (bb_dist > 0)
20503 shortest_dist = MIN (bb_dist, shortest_dist);
20505 found = true;
20509 distance = shortest_dist;
20513 if (!found || redefined)
20514 return -1;
20516 return distance >> 1;
20519 /* Define this macro to tune LEA priority vs ADD, it take effect when
20520 there is a dilemma of choicing LEA or ADD
20521 Negative value: ADD is more preferred than LEA
20522 Zero: Netrual
20523 Positive value: LEA is more preferred than ADD*/
20524 #define IX86_LEA_PRIORITY 0
20526 /* Return true if usage of lea INSN has performance advantage
20527 over a sequence of instructions. Instructions sequence has
20528 SPLIT_COST cycles higher latency than lea latency. */
20530 static bool
20531 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
20532 unsigned int regno2, int split_cost, bool has_scale)
20534 int dist_define, dist_use;
20536 /* For Silvermont if using a 2-source or 3-source LEA for
20537 non-destructive destination purposes, or due to wanting
20538 ability to use SCALE, the use of LEA is justified. */
20539 if (TARGET_SILVERMONT || TARGET_INTEL)
20541 if (has_scale)
20542 return true;
20543 if (split_cost < 1)
20544 return false;
20545 if (regno0 == regno1 || regno0 == regno2)
20546 return false;
20547 return true;
20550 dist_define = distance_non_agu_define (regno1, regno2, insn);
20551 dist_use = distance_agu_use (regno0, insn);
20553 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
20555 /* If there is no non AGU operand definition, no AGU
20556 operand usage and split cost is 0 then both lea
20557 and non lea variants have same priority. Currently
20558 we prefer lea for 64 bit code and non lea on 32 bit
20559 code. */
20560 if (dist_use < 0 && split_cost == 0)
20561 return TARGET_64BIT || IX86_LEA_PRIORITY;
20562 else
20563 return true;
20566 /* With longer definitions distance lea is more preferable.
20567 Here we change it to take into account splitting cost and
20568 lea priority. */
20569 dist_define += split_cost + IX86_LEA_PRIORITY;
20571 /* If there is no use in memory addess then we just check
20572 that split cost exceeds AGU stall. */
20573 if (dist_use < 0)
20574 return dist_define > LEA_MAX_STALL;
20576 /* If this insn has both backward non-agu dependence and forward
20577 agu dependence, the one with short distance takes effect. */
20578 return dist_define >= dist_use;
20581 /* Return true if it is legal to clobber flags by INSN and
20582 false otherwise. */
20584 static bool
20585 ix86_ok_to_clobber_flags (rtx_insn *insn)
20587 basic_block bb = BLOCK_FOR_INSN (insn);
20588 df_ref use;
20589 bitmap live;
20591 while (insn)
20593 if (NONDEBUG_INSN_P (insn))
20595 FOR_EACH_INSN_USE (use, insn)
20596 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
20597 return false;
20599 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
20600 return true;
20603 if (insn == BB_END (bb))
20604 break;
20606 insn = NEXT_INSN (insn);
20609 live = df_get_live_out(bb);
20610 return !REGNO_REG_SET_P (live, FLAGS_REG);
20613 /* Return true if we need to split op0 = op1 + op2 into a sequence of
20614 move and add to avoid AGU stalls. */
20616 bool
20617 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
20619 unsigned int regno0, regno1, regno2;
20621 /* Check if we need to optimize. */
20622 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
20623 return false;
20625 /* Check it is correct to split here. */
20626 if (!ix86_ok_to_clobber_flags(insn))
20627 return false;
20629 regno0 = true_regnum (operands[0]);
20630 regno1 = true_regnum (operands[1]);
20631 regno2 = true_regnum (operands[2]);
20633 /* We need to split only adds with non destructive
20634 destination operand. */
20635 if (regno0 == regno1 || regno0 == regno2)
20636 return false;
20637 else
20638 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
20641 /* Return true if we should emit lea instruction instead of mov
20642 instruction. */
20644 bool
20645 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
20647 unsigned int regno0, regno1;
20649 /* Check if we need to optimize. */
20650 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
20651 return false;
20653 /* Use lea for reg to reg moves only. */
20654 if (!REG_P (operands[0]) || !REG_P (operands[1]))
20655 return false;
20657 regno0 = true_regnum (operands[0]);
20658 regno1 = true_regnum (operands[1]);
20660 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
20663 /* Return true if we need to split lea into a sequence of
20664 instructions to avoid AGU stalls. */
20666 bool
20667 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
20669 unsigned int regno0, regno1, regno2;
20670 int split_cost;
20671 struct ix86_address parts;
20672 int ok;
20674 /* Check we need to optimize. */
20675 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
20676 return false;
20678 /* The "at least two components" test below might not catch simple
20679 move or zero extension insns if parts.base is non-NULL and parts.disp
20680 is const0_rtx as the only components in the address, e.g. if the
20681 register is %rbp or %r13. As this test is much cheaper and moves or
20682 zero extensions are the common case, do this check first. */
20683 if (REG_P (operands[1])
20684 || (SImode_address_operand (operands[1], VOIDmode)
20685 && REG_P (XEXP (operands[1], 0))))
20686 return false;
20688 /* Check if it is OK to split here. */
20689 if (!ix86_ok_to_clobber_flags (insn))
20690 return false;
20692 ok = ix86_decompose_address (operands[1], &parts);
20693 gcc_assert (ok);
20695 /* There should be at least two components in the address. */
20696 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
20697 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
20698 return false;
20700 /* We should not split into add if non legitimate pic
20701 operand is used as displacement. */
20702 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
20703 return false;
20705 regno0 = true_regnum (operands[0]) ;
20706 regno1 = INVALID_REGNUM;
20707 regno2 = INVALID_REGNUM;
20709 if (parts.base)
20710 regno1 = true_regnum (parts.base);
20711 if (parts.index)
20712 regno2 = true_regnum (parts.index);
20714 split_cost = 0;
20716 /* Compute how many cycles we will add to execution time
20717 if split lea into a sequence of instructions. */
20718 if (parts.base || parts.index)
20720 /* Have to use mov instruction if non desctructive
20721 destination form is used. */
20722 if (regno1 != regno0 && regno2 != regno0)
20723 split_cost += 1;
20725 /* Have to add index to base if both exist. */
20726 if (parts.base && parts.index)
20727 split_cost += 1;
20729 /* Have to use shift and adds if scale is 2 or greater. */
20730 if (parts.scale > 1)
20732 if (regno0 != regno1)
20733 split_cost += 1;
20734 else if (regno2 == regno0)
20735 split_cost += 4;
20736 else
20737 split_cost += parts.scale;
20740 /* Have to use add instruction with immediate if
20741 disp is non zero. */
20742 if (parts.disp && parts.disp != const0_rtx)
20743 split_cost += 1;
20745 /* Subtract the price of lea. */
20746 split_cost -= 1;
20749 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
20750 parts.scale > 1);
20753 /* Emit x86 binary operand CODE in mode MODE, where the first operand
20754 matches destination. RTX includes clobber of FLAGS_REG. */
20756 static void
20757 ix86_emit_binop (enum rtx_code code, machine_mode mode,
20758 rtx dst, rtx src)
20760 rtx op, clob;
20762 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, dst, src));
20763 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
20765 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
20768 /* Return true if regno1 def is nearest to the insn. */
20770 static bool
20771 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
20773 rtx_insn *prev = insn;
20774 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
20776 if (insn == start)
20777 return false;
20778 while (prev && prev != start)
20780 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
20782 prev = PREV_INSN (prev);
20783 continue;
20785 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
20786 return true;
20787 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
20788 return false;
20789 prev = PREV_INSN (prev);
20792 /* None of the regs is defined in the bb. */
20793 return false;
20796 /* Split lea instructions into a sequence of instructions
20797 which are executed on ALU to avoid AGU stalls.
20798 It is assumed that it is allowed to clobber flags register
20799 at lea position. */
20801 void
20802 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
20804 unsigned int regno0, regno1, regno2;
20805 struct ix86_address parts;
20806 rtx target, tmp;
20807 int ok, adds;
20809 ok = ix86_decompose_address (operands[1], &parts);
20810 gcc_assert (ok);
20812 target = gen_lowpart (mode, operands[0]);
20814 regno0 = true_regnum (target);
20815 regno1 = INVALID_REGNUM;
20816 regno2 = INVALID_REGNUM;
20818 if (parts.base)
20820 parts.base = gen_lowpart (mode, parts.base);
20821 regno1 = true_regnum (parts.base);
20824 if (parts.index)
20826 parts.index = gen_lowpart (mode, parts.index);
20827 regno2 = true_regnum (parts.index);
20830 if (parts.disp)
20831 parts.disp = gen_lowpart (mode, parts.disp);
20833 if (parts.scale > 1)
20835 /* Case r1 = r1 + ... */
20836 if (regno1 == regno0)
20838 /* If we have a case r1 = r1 + C * r2 then we
20839 should use multiplication which is very
20840 expensive. Assume cost model is wrong if we
20841 have such case here. */
20842 gcc_assert (regno2 != regno0);
20844 for (adds = parts.scale; adds > 0; adds--)
20845 ix86_emit_binop (PLUS, mode, target, parts.index);
20847 else
20849 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
20850 if (regno0 != regno2)
20851 emit_insn (gen_rtx_SET (target, parts.index));
20853 /* Use shift for scaling. */
20854 ix86_emit_binop (ASHIFT, mode, target,
20855 GEN_INT (exact_log2 (parts.scale)));
20857 if (parts.base)
20858 ix86_emit_binop (PLUS, mode, target, parts.base);
20860 if (parts.disp && parts.disp != const0_rtx)
20861 ix86_emit_binop (PLUS, mode, target, parts.disp);
20864 else if (!parts.base && !parts.index)
20866 gcc_assert(parts.disp);
20867 emit_insn (gen_rtx_SET (target, parts.disp));
20869 else
20871 if (!parts.base)
20873 if (regno0 != regno2)
20874 emit_insn (gen_rtx_SET (target, parts.index));
20876 else if (!parts.index)
20878 if (regno0 != regno1)
20879 emit_insn (gen_rtx_SET (target, parts.base));
20881 else
20883 if (regno0 == regno1)
20884 tmp = parts.index;
20885 else if (regno0 == regno2)
20886 tmp = parts.base;
20887 else
20889 rtx tmp1;
20891 /* Find better operand for SET instruction, depending
20892 on which definition is farther from the insn. */
20893 if (find_nearest_reg_def (insn, regno1, regno2))
20894 tmp = parts.index, tmp1 = parts.base;
20895 else
20896 tmp = parts.base, tmp1 = parts.index;
20898 emit_insn (gen_rtx_SET (target, tmp));
20900 if (parts.disp && parts.disp != const0_rtx)
20901 ix86_emit_binop (PLUS, mode, target, parts.disp);
20903 ix86_emit_binop (PLUS, mode, target, tmp1);
20904 return;
20907 ix86_emit_binop (PLUS, mode, target, tmp);
20910 if (parts.disp && parts.disp != const0_rtx)
20911 ix86_emit_binop (PLUS, mode, target, parts.disp);
20915 /* Return true if it is ok to optimize an ADD operation to LEA
20916 operation to avoid flag register consumation. For most processors,
20917 ADD is faster than LEA. For the processors like BONNELL, if the
20918 destination register of LEA holds an actual address which will be
20919 used soon, LEA is better and otherwise ADD is better. */
20921 bool
20922 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
20924 unsigned int regno0 = true_regnum (operands[0]);
20925 unsigned int regno1 = true_regnum (operands[1]);
20926 unsigned int regno2 = true_regnum (operands[2]);
20928 /* If a = b + c, (a!=b && a!=c), must use lea form. */
20929 if (regno0 != regno1 && regno0 != regno2)
20930 return true;
20932 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
20933 return false;
20935 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
20938 /* Return true if destination reg of SET_BODY is shift count of
20939 USE_BODY. */
20941 static bool
20942 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
20944 rtx set_dest;
20945 rtx shift_rtx;
20946 int i;
20948 /* Retrieve destination of SET_BODY. */
20949 switch (GET_CODE (set_body))
20951 case SET:
20952 set_dest = SET_DEST (set_body);
20953 if (!set_dest || !REG_P (set_dest))
20954 return false;
20955 break;
20956 case PARALLEL:
20957 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
20958 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
20959 use_body))
20960 return true;
20961 /* FALLTHROUGH */
20962 default:
20963 return false;
20966 /* Retrieve shift count of USE_BODY. */
20967 switch (GET_CODE (use_body))
20969 case SET:
20970 shift_rtx = XEXP (use_body, 1);
20971 break;
20972 case PARALLEL:
20973 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
20974 if (ix86_dep_by_shift_count_body (set_body,
20975 XVECEXP (use_body, 0, i)))
20976 return true;
20977 /* FALLTHROUGH */
20978 default:
20979 return false;
20982 if (shift_rtx
20983 && (GET_CODE (shift_rtx) == ASHIFT
20984 || GET_CODE (shift_rtx) == LSHIFTRT
20985 || GET_CODE (shift_rtx) == ASHIFTRT
20986 || GET_CODE (shift_rtx) == ROTATE
20987 || GET_CODE (shift_rtx) == ROTATERT))
20989 rtx shift_count = XEXP (shift_rtx, 1);
20991 /* Return true if shift count is dest of SET_BODY. */
20992 if (REG_P (shift_count))
20994 /* Add check since it can be invoked before register
20995 allocation in pre-reload schedule. */
20996 if (reload_completed
20997 && true_regnum (set_dest) == true_regnum (shift_count))
20998 return true;
20999 else if (REGNO(set_dest) == REGNO(shift_count))
21000 return true;
21004 return false;
21007 /* Return true if destination reg of SET_INSN is shift count of
21008 USE_INSN. */
21010 bool
21011 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
21013 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
21014 PATTERN (use_insn));
21017 /* Return TRUE or FALSE depending on whether the unary operator meets the
21018 appropriate constraints. */
21020 bool
21021 ix86_unary_operator_ok (enum rtx_code,
21022 machine_mode,
21023 rtx operands[2])
21025 /* If one of operands is memory, source and destination must match. */
21026 if ((MEM_P (operands[0])
21027 || MEM_P (operands[1]))
21028 && ! rtx_equal_p (operands[0], operands[1]))
21029 return false;
21030 return true;
21033 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
21034 are ok, keeping in mind the possible movddup alternative. */
21036 bool
21037 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
21039 if (MEM_P (operands[0]))
21040 return rtx_equal_p (operands[0], operands[1 + high]);
21041 if (MEM_P (operands[1]) && MEM_P (operands[2]))
21042 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
21043 return true;
21046 /* Post-reload splitter for converting an SF or DFmode value in an
21047 SSE register into an unsigned SImode. */
21049 void
21050 ix86_split_convert_uns_si_sse (rtx operands[])
21052 machine_mode vecmode;
21053 rtx value, large, zero_or_two31, input, two31, x;
21055 large = operands[1];
21056 zero_or_two31 = operands[2];
21057 input = operands[3];
21058 two31 = operands[4];
21059 vecmode = GET_MODE (large);
21060 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
21062 /* Load up the value into the low element. We must ensure that the other
21063 elements are valid floats -- zero is the easiest such value. */
21064 if (MEM_P (input))
21066 if (vecmode == V4SFmode)
21067 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
21068 else
21069 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
21071 else
21073 input = gen_rtx_REG (vecmode, REGNO (input));
21074 emit_move_insn (value, CONST0_RTX (vecmode));
21075 if (vecmode == V4SFmode)
21076 emit_insn (gen_sse_movss (value, value, input));
21077 else
21078 emit_insn (gen_sse2_movsd (value, value, input));
21081 emit_move_insn (large, two31);
21082 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
21084 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
21085 emit_insn (gen_rtx_SET (large, x));
21087 x = gen_rtx_AND (vecmode, zero_or_two31, large);
21088 emit_insn (gen_rtx_SET (zero_or_two31, x));
21090 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
21091 emit_insn (gen_rtx_SET (value, x));
21093 large = gen_rtx_REG (V4SImode, REGNO (large));
21094 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
21096 x = gen_rtx_REG (V4SImode, REGNO (value));
21097 if (vecmode == V4SFmode)
21098 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
21099 else
21100 emit_insn (gen_sse2_cvttpd2dq (x, value));
21101 value = x;
21103 emit_insn (gen_xorv4si3 (value, value, large));
21106 /* Convert an unsigned DImode value into a DFmode, using only SSE.
21107 Expects the 64-bit DImode to be supplied in a pair of integral
21108 registers. Requires SSE2; will use SSE3 if available. For x86_32,
21109 -mfpmath=sse, !optimize_size only. */
21111 void
21112 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
21114 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
21115 rtx int_xmm, fp_xmm;
21116 rtx biases, exponents;
21117 rtx x;
21119 int_xmm = gen_reg_rtx (V4SImode);
21120 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
21121 emit_insn (gen_movdi_to_sse (int_xmm, input));
21122 else if (TARGET_SSE_SPLIT_REGS)
21124 emit_clobber (int_xmm);
21125 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
21127 else
21129 x = gen_reg_rtx (V2DImode);
21130 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
21131 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
21134 x = gen_rtx_CONST_VECTOR (V4SImode,
21135 gen_rtvec (4, GEN_INT (0x43300000UL),
21136 GEN_INT (0x45300000UL),
21137 const0_rtx, const0_rtx));
21138 exponents = validize_mem (force_const_mem (V4SImode, x));
21140 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
21141 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
21143 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
21144 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
21145 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
21146 (0x1.0p84 + double(fp_value_hi_xmm)).
21147 Note these exponents differ by 32. */
21149 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
21151 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
21152 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
21153 real_ldexp (&bias_lo_rvt, &dconst1, 52);
21154 real_ldexp (&bias_hi_rvt, &dconst1, 84);
21155 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
21156 x = const_double_from_real_value (bias_hi_rvt, DFmode);
21157 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
21158 biases = validize_mem (force_const_mem (V2DFmode, biases));
21159 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
21161 /* Add the upper and lower DFmode values together. */
21162 if (TARGET_SSE3)
21163 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
21164 else
21166 x = copy_to_mode_reg (V2DFmode, fp_xmm);
21167 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
21168 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
21171 ix86_expand_vector_extract (false, target, fp_xmm, 0);
21174 /* Not used, but eases macroization of patterns. */
21175 void
21176 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
21178 gcc_unreachable ();
21181 /* Convert an unsigned SImode value into a DFmode. Only currently used
21182 for SSE, but applicable anywhere. */
21184 void
21185 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
21187 REAL_VALUE_TYPE TWO31r;
21188 rtx x, fp;
21190 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
21191 NULL, 1, OPTAB_DIRECT);
21193 fp = gen_reg_rtx (DFmode);
21194 emit_insn (gen_floatsidf2 (fp, x));
21196 real_ldexp (&TWO31r, &dconst1, 31);
21197 x = const_double_from_real_value (TWO31r, DFmode);
21199 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
21200 if (x != target)
21201 emit_move_insn (target, x);
21204 /* Convert a signed DImode value into a DFmode. Only used for SSE in
21205 32-bit mode; otherwise we have a direct convert instruction. */
21207 void
21208 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
21210 REAL_VALUE_TYPE TWO32r;
21211 rtx fp_lo, fp_hi, x;
21213 fp_lo = gen_reg_rtx (DFmode);
21214 fp_hi = gen_reg_rtx (DFmode);
21216 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
21218 real_ldexp (&TWO32r, &dconst1, 32);
21219 x = const_double_from_real_value (TWO32r, DFmode);
21220 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
21222 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
21224 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
21225 0, OPTAB_DIRECT);
21226 if (x != target)
21227 emit_move_insn (target, x);
21230 /* Convert an unsigned SImode value into a SFmode, using only SSE.
21231 For x86_32, -mfpmath=sse, !optimize_size only. */
21232 void
21233 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
21235 REAL_VALUE_TYPE ONE16r;
21236 rtx fp_hi, fp_lo, int_hi, int_lo, x;
21238 real_ldexp (&ONE16r, &dconst1, 16);
21239 x = const_double_from_real_value (ONE16r, SFmode);
21240 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
21241 NULL, 0, OPTAB_DIRECT);
21242 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
21243 NULL, 0, OPTAB_DIRECT);
21244 fp_hi = gen_reg_rtx (SFmode);
21245 fp_lo = gen_reg_rtx (SFmode);
21246 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
21247 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
21248 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
21249 0, OPTAB_DIRECT);
21250 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
21251 0, OPTAB_DIRECT);
21252 if (!rtx_equal_p (target, fp_hi))
21253 emit_move_insn (target, fp_hi);
21256 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
21257 a vector of unsigned ints VAL to vector of floats TARGET. */
21259 void
21260 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
21262 rtx tmp[8];
21263 REAL_VALUE_TYPE TWO16r;
21264 machine_mode intmode = GET_MODE (val);
21265 machine_mode fltmode = GET_MODE (target);
21266 rtx (*cvt) (rtx, rtx);
21268 if (intmode == V4SImode)
21269 cvt = gen_floatv4siv4sf2;
21270 else
21271 cvt = gen_floatv8siv8sf2;
21272 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
21273 tmp[0] = force_reg (intmode, tmp[0]);
21274 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
21275 OPTAB_DIRECT);
21276 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
21277 NULL_RTX, 1, OPTAB_DIRECT);
21278 tmp[3] = gen_reg_rtx (fltmode);
21279 emit_insn (cvt (tmp[3], tmp[1]));
21280 tmp[4] = gen_reg_rtx (fltmode);
21281 emit_insn (cvt (tmp[4], tmp[2]));
21282 real_ldexp (&TWO16r, &dconst1, 16);
21283 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
21284 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
21285 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
21286 OPTAB_DIRECT);
21287 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
21288 OPTAB_DIRECT);
21289 if (tmp[7] != target)
21290 emit_move_insn (target, tmp[7]);
21293 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
21294 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
21295 This is done by doing just signed conversion if < 0x1p31, and otherwise by
21296 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
21299 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
21301 REAL_VALUE_TYPE TWO31r;
21302 rtx two31r, tmp[4];
21303 machine_mode mode = GET_MODE (val);
21304 machine_mode scalarmode = GET_MODE_INNER (mode);
21305 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
21306 rtx (*cmp) (rtx, rtx, rtx, rtx);
21307 int i;
21309 for (i = 0; i < 3; i++)
21310 tmp[i] = gen_reg_rtx (mode);
21311 real_ldexp (&TWO31r, &dconst1, 31);
21312 two31r = const_double_from_real_value (TWO31r, scalarmode);
21313 two31r = ix86_build_const_vector (mode, 1, two31r);
21314 two31r = force_reg (mode, two31r);
21315 switch (mode)
21317 case E_V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
21318 case E_V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
21319 case E_V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
21320 case E_V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
21321 default: gcc_unreachable ();
21323 tmp[3] = gen_rtx_LE (mode, two31r, val);
21324 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
21325 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
21326 0, OPTAB_DIRECT);
21327 if (intmode == V4SImode || TARGET_AVX2)
21328 *xorp = expand_simple_binop (intmode, ASHIFT,
21329 gen_lowpart (intmode, tmp[0]),
21330 GEN_INT (31), NULL_RTX, 0,
21331 OPTAB_DIRECT);
21332 else
21334 rtx two31 = GEN_INT (HOST_WIDE_INT_1U << 31);
21335 two31 = ix86_build_const_vector (intmode, 1, two31);
21336 *xorp = expand_simple_binop (intmode, AND,
21337 gen_lowpart (intmode, tmp[0]),
21338 two31, NULL_RTX, 0,
21339 OPTAB_DIRECT);
21341 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
21342 0, OPTAB_DIRECT);
21345 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
21346 then replicate the value for all elements of the vector
21347 register. */
21350 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
21352 int i, n_elt;
21353 rtvec v;
21354 machine_mode scalar_mode;
21356 switch (mode)
21358 case E_V64QImode:
21359 case E_V32QImode:
21360 case E_V16QImode:
21361 case E_V32HImode:
21362 case E_V16HImode:
21363 case E_V8HImode:
21364 case E_V16SImode:
21365 case E_V8SImode:
21366 case E_V4SImode:
21367 case E_V8DImode:
21368 case E_V4DImode:
21369 case E_V2DImode:
21370 gcc_assert (vect);
21371 /* FALLTHRU */
21372 case E_V16SFmode:
21373 case E_V8SFmode:
21374 case E_V4SFmode:
21375 case E_V8DFmode:
21376 case E_V4DFmode:
21377 case E_V2DFmode:
21378 n_elt = GET_MODE_NUNITS (mode);
21379 v = rtvec_alloc (n_elt);
21380 scalar_mode = GET_MODE_INNER (mode);
21382 RTVEC_ELT (v, 0) = value;
21384 for (i = 1; i < n_elt; ++i)
21385 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
21387 return gen_rtx_CONST_VECTOR (mode, v);
21389 default:
21390 gcc_unreachable ();
21394 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
21395 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
21396 for an SSE register. If VECT is true, then replicate the mask for
21397 all elements of the vector register. If INVERT is true, then create
21398 a mask excluding the sign bit. */
21401 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
21403 machine_mode vec_mode, imode;
21404 wide_int w;
21405 rtx mask, v;
21407 switch (mode)
21409 case E_V16SImode:
21410 case E_V16SFmode:
21411 case E_V8SImode:
21412 case E_V4SImode:
21413 case E_V8SFmode:
21414 case E_V4SFmode:
21415 vec_mode = mode;
21416 imode = SImode;
21417 break;
21419 case E_V8DImode:
21420 case E_V4DImode:
21421 case E_V2DImode:
21422 case E_V8DFmode:
21423 case E_V4DFmode:
21424 case E_V2DFmode:
21425 vec_mode = mode;
21426 imode = DImode;
21427 break;
21429 case E_TImode:
21430 case E_TFmode:
21431 vec_mode = VOIDmode;
21432 imode = TImode;
21433 break;
21435 default:
21436 gcc_unreachable ();
21439 machine_mode inner_mode = GET_MODE_INNER (mode);
21440 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
21441 GET_MODE_BITSIZE (inner_mode));
21442 if (invert)
21443 w = wi::bit_not (w);
21445 /* Force this value into the low part of a fp vector constant. */
21446 mask = immed_wide_int_const (w, imode);
21447 mask = gen_lowpart (inner_mode, mask);
21449 if (vec_mode == VOIDmode)
21450 return force_reg (inner_mode, mask);
21452 v = ix86_build_const_vector (vec_mode, vect, mask);
21453 return force_reg (vec_mode, v);
21456 /* Generate code for floating point ABS or NEG. */
21458 void
21459 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
21460 rtx operands[])
21462 rtx mask, set, dst, src;
21463 bool use_sse = false;
21464 bool vector_mode = VECTOR_MODE_P (mode);
21465 machine_mode vmode = mode;
21467 if (vector_mode)
21468 use_sse = true;
21469 else if (mode == TFmode)
21470 use_sse = true;
21471 else if (TARGET_SSE_MATH)
21473 use_sse = SSE_FLOAT_MODE_P (mode);
21474 if (mode == SFmode)
21475 vmode = V4SFmode;
21476 else if (mode == DFmode)
21477 vmode = V2DFmode;
21480 /* NEG and ABS performed with SSE use bitwise mask operations.
21481 Create the appropriate mask now. */
21482 if (use_sse)
21483 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
21484 else
21485 mask = NULL_RTX;
21487 dst = operands[0];
21488 src = operands[1];
21490 set = gen_rtx_fmt_e (code, mode, src);
21491 set = gen_rtx_SET (dst, set);
21493 if (mask)
21495 rtx use, clob;
21496 rtvec par;
21498 use = gen_rtx_USE (VOIDmode, mask);
21499 if (vector_mode)
21500 par = gen_rtvec (2, set, use);
21501 else
21503 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
21504 par = gen_rtvec (3, set, use, clob);
21506 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
21508 else
21509 emit_insn (set);
21512 /* Expand a copysign operation. Special case operand 0 being a constant. */
21514 void
21515 ix86_expand_copysign (rtx operands[])
21517 machine_mode mode, vmode;
21518 rtx dest, op0, op1, mask, nmask;
21520 dest = operands[0];
21521 op0 = operands[1];
21522 op1 = operands[2];
21524 mode = GET_MODE (dest);
21526 if (mode == SFmode)
21527 vmode = V4SFmode;
21528 else if (mode == DFmode)
21529 vmode = V2DFmode;
21530 else
21531 vmode = mode;
21533 if (CONST_DOUBLE_P (op0))
21535 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
21537 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
21538 op0 = simplify_unary_operation (ABS, mode, op0, mode);
21540 if (mode == SFmode || mode == DFmode)
21542 if (op0 == CONST0_RTX (mode))
21543 op0 = CONST0_RTX (vmode);
21544 else
21546 rtx v = ix86_build_const_vector (vmode, false, op0);
21548 op0 = force_reg (vmode, v);
21551 else if (op0 != CONST0_RTX (mode))
21552 op0 = force_reg (mode, op0);
21554 mask = ix86_build_signbit_mask (vmode, 0, 0);
21556 if (mode == SFmode)
21557 copysign_insn = gen_copysignsf3_const;
21558 else if (mode == DFmode)
21559 copysign_insn = gen_copysigndf3_const;
21560 else
21561 copysign_insn = gen_copysigntf3_const;
21563 emit_insn (copysign_insn (dest, op0, op1, mask));
21565 else
21567 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
21569 nmask = ix86_build_signbit_mask (vmode, 0, 1);
21570 mask = ix86_build_signbit_mask (vmode, 0, 0);
21572 if (mode == SFmode)
21573 copysign_insn = gen_copysignsf3_var;
21574 else if (mode == DFmode)
21575 copysign_insn = gen_copysigndf3_var;
21576 else
21577 copysign_insn = gen_copysigntf3_var;
21579 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
21583 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
21584 be a constant, and so has already been expanded into a vector constant. */
21586 void
21587 ix86_split_copysign_const (rtx operands[])
21589 machine_mode mode, vmode;
21590 rtx dest, op0, mask, x;
21592 dest = operands[0];
21593 op0 = operands[1];
21594 mask = operands[3];
21596 mode = GET_MODE (dest);
21597 vmode = GET_MODE (mask);
21599 dest = lowpart_subreg (vmode, dest, mode);
21600 x = gen_rtx_AND (vmode, dest, mask);
21601 emit_insn (gen_rtx_SET (dest, x));
21603 if (op0 != CONST0_RTX (vmode))
21605 x = gen_rtx_IOR (vmode, dest, op0);
21606 emit_insn (gen_rtx_SET (dest, x));
21610 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
21611 so we have to do two masks. */
21613 void
21614 ix86_split_copysign_var (rtx operands[])
21616 machine_mode mode, vmode;
21617 rtx dest, scratch, op0, op1, mask, nmask, x;
21619 dest = operands[0];
21620 scratch = operands[1];
21621 op0 = operands[2];
21622 op1 = operands[3];
21623 nmask = operands[4];
21624 mask = operands[5];
21626 mode = GET_MODE (dest);
21627 vmode = GET_MODE (mask);
21629 if (rtx_equal_p (op0, op1))
21631 /* Shouldn't happen often (it's useless, obviously), but when it does
21632 we'd generate incorrect code if we continue below. */
21633 emit_move_insn (dest, op0);
21634 return;
21637 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
21639 gcc_assert (REGNO (op1) == REGNO (scratch));
21641 x = gen_rtx_AND (vmode, scratch, mask);
21642 emit_insn (gen_rtx_SET (scratch, x));
21644 dest = mask;
21645 op0 = lowpart_subreg (vmode, op0, mode);
21646 x = gen_rtx_NOT (vmode, dest);
21647 x = gen_rtx_AND (vmode, x, op0);
21648 emit_insn (gen_rtx_SET (dest, x));
21650 else
21652 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
21654 x = gen_rtx_AND (vmode, scratch, mask);
21656 else /* alternative 2,4 */
21658 gcc_assert (REGNO (mask) == REGNO (scratch));
21659 op1 = lowpart_subreg (vmode, op1, mode);
21660 x = gen_rtx_AND (vmode, scratch, op1);
21662 emit_insn (gen_rtx_SET (scratch, x));
21664 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
21666 dest = lowpart_subreg (vmode, op0, mode);
21667 x = gen_rtx_AND (vmode, dest, nmask);
21669 else /* alternative 3,4 */
21671 gcc_assert (REGNO (nmask) == REGNO (dest));
21672 dest = nmask;
21673 op0 = lowpart_subreg (vmode, op0, mode);
21674 x = gen_rtx_AND (vmode, dest, op0);
21676 emit_insn (gen_rtx_SET (dest, x));
21679 x = gen_rtx_IOR (vmode, dest, scratch);
21680 emit_insn (gen_rtx_SET (dest, x));
21683 /* Return TRUE or FALSE depending on whether the first SET in INSN
21684 has source and destination with matching CC modes, and that the
21685 CC mode is at least as constrained as REQ_MODE. */
21687 bool
21688 ix86_match_ccmode (rtx insn, machine_mode req_mode)
21690 rtx set;
21691 machine_mode set_mode;
21693 set = PATTERN (insn);
21694 if (GET_CODE (set) == PARALLEL)
21695 set = XVECEXP (set, 0, 0);
21696 gcc_assert (GET_CODE (set) == SET);
21697 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
21699 set_mode = GET_MODE (SET_DEST (set));
21700 switch (set_mode)
21702 case E_CCNOmode:
21703 if (req_mode != CCNOmode
21704 && (req_mode != CCmode
21705 || XEXP (SET_SRC (set), 1) != const0_rtx))
21706 return false;
21707 break;
21708 case E_CCmode:
21709 if (req_mode == CCGCmode)
21710 return false;
21711 /* FALLTHRU */
21712 case E_CCGCmode:
21713 if (req_mode == CCGOCmode || req_mode == CCNOmode)
21714 return false;
21715 /* FALLTHRU */
21716 case E_CCGOCmode:
21717 if (req_mode == CCZmode)
21718 return false;
21719 /* FALLTHRU */
21720 case E_CCZmode:
21721 break;
21723 case E_CCGZmode:
21725 case E_CCAmode:
21726 case E_CCCmode:
21727 case E_CCOmode:
21728 case E_CCPmode:
21729 case E_CCSmode:
21730 if (set_mode != req_mode)
21731 return false;
21732 break;
21734 default:
21735 gcc_unreachable ();
21738 return GET_MODE (SET_SRC (set)) == set_mode;
21741 /* Generate insn patterns to do an integer compare of OPERANDS. */
21743 static rtx
21744 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
21746 machine_mode cmpmode;
21747 rtx tmp, flags;
21749 cmpmode = SELECT_CC_MODE (code, op0, op1);
21750 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
21752 /* This is very simple, but making the interface the same as in the
21753 FP case makes the rest of the code easier. */
21754 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
21755 emit_insn (gen_rtx_SET (flags, tmp));
21757 /* Return the test that should be put into the flags user, i.e.
21758 the bcc, scc, or cmov instruction. */
21759 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
21762 /* Figure out whether to use unordered fp comparisons. */
21764 static bool
21765 ix86_unordered_fp_compare (enum rtx_code code)
21767 if (!TARGET_IEEE_FP)
21768 return false;
21770 switch (code)
21772 case GT:
21773 case GE:
21774 case LT:
21775 case LE:
21776 return false;
21778 case EQ:
21779 case NE:
21781 case LTGT:
21782 case UNORDERED:
21783 case ORDERED:
21784 case UNLT:
21785 case UNLE:
21786 case UNGT:
21787 case UNGE:
21788 case UNEQ:
21789 return true;
21791 default:
21792 gcc_unreachable ();
21796 machine_mode
21797 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
21799 machine_mode mode = GET_MODE (op0);
21801 if (SCALAR_FLOAT_MODE_P (mode))
21803 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
21804 return CCFPmode;
21807 switch (code)
21809 /* Only zero flag is needed. */
21810 case EQ: /* ZF=0 */
21811 case NE: /* ZF!=0 */
21812 return CCZmode;
21813 /* Codes needing carry flag. */
21814 case GEU: /* CF=0 */
21815 case LTU: /* CF=1 */
21816 /* Detect overflow checks. They need just the carry flag. */
21817 if (GET_CODE (op0) == PLUS
21818 && (rtx_equal_p (op1, XEXP (op0, 0))
21819 || rtx_equal_p (op1, XEXP (op0, 1))))
21820 return CCCmode;
21821 else
21822 return CCmode;
21823 case GTU: /* CF=0 & ZF=0 */
21824 case LEU: /* CF=1 | ZF=1 */
21825 return CCmode;
21826 /* Codes possibly doable only with sign flag when
21827 comparing against zero. */
21828 case GE: /* SF=OF or SF=0 */
21829 case LT: /* SF<>OF or SF=1 */
21830 if (op1 == const0_rtx)
21831 return CCGOCmode;
21832 else
21833 /* For other cases Carry flag is not required. */
21834 return CCGCmode;
21835 /* Codes doable only with sign flag when comparing
21836 against zero, but we miss jump instruction for it
21837 so we need to use relational tests against overflow
21838 that thus needs to be zero. */
21839 case GT: /* ZF=0 & SF=OF */
21840 case LE: /* ZF=1 | SF<>OF */
21841 if (op1 == const0_rtx)
21842 return CCNOmode;
21843 else
21844 return CCGCmode;
21845 /* strcmp pattern do (use flags) and combine may ask us for proper
21846 mode. */
21847 case USE:
21848 return CCmode;
21849 default:
21850 gcc_unreachable ();
21854 /* Return the fixed registers used for condition codes. */
21856 static bool
21857 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
21859 *p1 = FLAGS_REG;
21860 *p2 = FPSR_REG;
21861 return true;
21864 /* If two condition code modes are compatible, return a condition code
21865 mode which is compatible with both. Otherwise, return
21866 VOIDmode. */
21868 static machine_mode
21869 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
21871 if (m1 == m2)
21872 return m1;
21874 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
21875 return VOIDmode;
21877 if ((m1 == CCGCmode && m2 == CCGOCmode)
21878 || (m1 == CCGOCmode && m2 == CCGCmode))
21879 return CCGCmode;
21881 if ((m1 == CCNOmode && m2 == CCGOCmode)
21882 || (m1 == CCGOCmode && m2 == CCNOmode))
21883 return CCNOmode;
21885 if (m1 == CCZmode
21886 && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode))
21887 return m2;
21888 else if (m2 == CCZmode
21889 && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode))
21890 return m1;
21892 switch (m1)
21894 default:
21895 gcc_unreachable ();
21897 case E_CCmode:
21898 case E_CCGCmode:
21899 case E_CCGOCmode:
21900 case E_CCNOmode:
21901 case E_CCAmode:
21902 case E_CCCmode:
21903 case E_CCOmode:
21904 case E_CCPmode:
21905 case E_CCSmode:
21906 case E_CCZmode:
21907 switch (m2)
21909 default:
21910 return VOIDmode;
21912 case E_CCmode:
21913 case E_CCGCmode:
21914 case E_CCGOCmode:
21915 case E_CCNOmode:
21916 case E_CCAmode:
21917 case E_CCCmode:
21918 case E_CCOmode:
21919 case E_CCPmode:
21920 case E_CCSmode:
21921 case E_CCZmode:
21922 return CCmode;
21925 case E_CCFPmode:
21926 /* These are only compatible with themselves, which we already
21927 checked above. */
21928 return VOIDmode;
21933 /* Return a comparison we can do and that it is equivalent to
21934 swap_condition (code) apart possibly from orderedness.
21935 But, never change orderedness if TARGET_IEEE_FP, returning
21936 UNKNOWN in that case if necessary. */
21938 static enum rtx_code
21939 ix86_fp_swap_condition (enum rtx_code code)
21941 switch (code)
21943 case GT: /* GTU - CF=0 & ZF=0 */
21944 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
21945 case GE: /* GEU - CF=0 */
21946 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
21947 case UNLT: /* LTU - CF=1 */
21948 return TARGET_IEEE_FP ? UNKNOWN : GT;
21949 case UNLE: /* LEU - CF=1 | ZF=1 */
21950 return TARGET_IEEE_FP ? UNKNOWN : GE;
21951 default:
21952 return swap_condition (code);
21956 /* Return cost of comparison CODE using the best strategy for performance.
21957 All following functions do use number of instructions as a cost metrics.
21958 In future this should be tweaked to compute bytes for optimize_size and
21959 take into account performance of various instructions on various CPUs. */
21961 static int
21962 ix86_fp_comparison_cost (enum rtx_code code)
21964 int arith_cost;
21966 /* The cost of code using bit-twiddling on %ah. */
21967 switch (code)
21969 case UNLE:
21970 case UNLT:
21971 case LTGT:
21972 case GT:
21973 case GE:
21974 case UNORDERED:
21975 case ORDERED:
21976 case UNEQ:
21977 arith_cost = 4;
21978 break;
21979 case LT:
21980 case NE:
21981 case EQ:
21982 case UNGE:
21983 arith_cost = TARGET_IEEE_FP ? 5 : 4;
21984 break;
21985 case LE:
21986 case UNGT:
21987 arith_cost = TARGET_IEEE_FP ? 6 : 4;
21988 break;
21989 default:
21990 gcc_unreachable ();
21993 switch (ix86_fp_comparison_strategy (code))
21995 case IX86_FPCMP_COMI:
21996 return arith_cost > 4 ? 3 : 2;
21997 case IX86_FPCMP_SAHF:
21998 return arith_cost > 4 ? 4 : 3;
21999 default:
22000 return arith_cost;
22004 /* Return strategy to use for floating-point. We assume that fcomi is always
22005 preferrable where available, since that is also true when looking at size
22006 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
22008 enum ix86_fpcmp_strategy
22009 ix86_fp_comparison_strategy (enum rtx_code)
22011 /* Do fcomi/sahf based test when profitable. */
22013 if (TARGET_CMOVE)
22014 return IX86_FPCMP_COMI;
22016 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
22017 return IX86_FPCMP_SAHF;
22019 return IX86_FPCMP_ARITH;
22022 /* Swap, force into registers, or otherwise massage the two operands
22023 to a fp comparison. The operands are updated in place; the new
22024 comparison code is returned. */
22026 static enum rtx_code
22027 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
22029 bool unordered_compare = ix86_unordered_fp_compare (code);
22030 rtx op0 = *pop0, op1 = *pop1;
22031 machine_mode op_mode = GET_MODE (op0);
22032 bool is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
22034 /* All of the unordered compare instructions only work on registers.
22035 The same is true of the fcomi compare instructions. The XFmode
22036 compare instructions require registers except when comparing
22037 against zero or when converting operand 1 from fixed point to
22038 floating point. */
22040 if (!is_sse
22041 && (unordered_compare
22042 || (op_mode == XFmode
22043 && ! (standard_80387_constant_p (op0) == 1
22044 || standard_80387_constant_p (op1) == 1)
22045 && GET_CODE (op1) != FLOAT)
22046 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
22048 op0 = force_reg (op_mode, op0);
22049 op1 = force_reg (op_mode, op1);
22051 else
22053 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
22054 things around if they appear profitable, otherwise force op0
22055 into a register. */
22057 if (standard_80387_constant_p (op0) == 0
22058 || (MEM_P (op0)
22059 && ! (standard_80387_constant_p (op1) == 0
22060 || MEM_P (op1))))
22062 enum rtx_code new_code = ix86_fp_swap_condition (code);
22063 if (new_code != UNKNOWN)
22065 std::swap (op0, op1);
22066 code = new_code;
22070 if (!REG_P (op0))
22071 op0 = force_reg (op_mode, op0);
22073 if (CONSTANT_P (op1))
22075 int tmp = standard_80387_constant_p (op1);
22076 if (tmp == 0)
22077 op1 = validize_mem (force_const_mem (op_mode, op1));
22078 else if (tmp == 1)
22080 if (TARGET_CMOVE)
22081 op1 = force_reg (op_mode, op1);
22083 else
22084 op1 = force_reg (op_mode, op1);
22088 /* Try to rearrange the comparison to make it cheaper. */
22089 if (ix86_fp_comparison_cost (code)
22090 > ix86_fp_comparison_cost (swap_condition (code))
22091 && (REG_P (op1) || can_create_pseudo_p ()))
22093 std::swap (op0, op1);
22094 code = swap_condition (code);
22095 if (!REG_P (op0))
22096 op0 = force_reg (op_mode, op0);
22099 *pop0 = op0;
22100 *pop1 = op1;
22101 return code;
22104 /* Convert comparison codes we use to represent FP comparison to integer
22105 code that will result in proper branch. Return UNKNOWN if no such code
22106 is available. */
22108 enum rtx_code
22109 ix86_fp_compare_code_to_integer (enum rtx_code code)
22111 switch (code)
22113 case GT:
22114 return GTU;
22115 case GE:
22116 return GEU;
22117 case ORDERED:
22118 case UNORDERED:
22119 return code;
22120 case UNEQ:
22121 return EQ;
22122 case UNLT:
22123 return LTU;
22124 case UNLE:
22125 return LEU;
22126 case LTGT:
22127 return NE;
22128 default:
22129 return UNKNOWN;
22133 /* Generate insn patterns to do a floating point compare of OPERANDS. */
22135 static rtx
22136 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
22138 bool unordered_compare = ix86_unordered_fp_compare (code);
22139 machine_mode intcmp_mode;
22140 rtx tmp, tmp2;
22142 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
22144 /* Do fcomi/sahf based test when profitable. */
22145 switch (ix86_fp_comparison_strategy (code))
22147 case IX86_FPCMP_COMI:
22148 intcmp_mode = CCFPmode;
22149 tmp = gen_rtx_COMPARE (CCFPmode, op0, op1);
22150 if (unordered_compare)
22151 tmp = gen_rtx_UNSPEC (CCFPmode, gen_rtvec (1, tmp), UNSPEC_NOTRAP);
22152 emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, FLAGS_REG), tmp));
22153 break;
22155 case IX86_FPCMP_SAHF:
22156 intcmp_mode = CCFPmode;
22157 tmp = gen_rtx_COMPARE (CCFPmode, op0, op1);
22158 if (unordered_compare)
22159 tmp = gen_rtx_UNSPEC (CCFPmode, gen_rtvec (1, tmp), UNSPEC_NOTRAP);
22160 tmp = gen_rtx_SET (gen_rtx_REG (CCFPmode, FLAGS_REG), tmp);
22161 if (!scratch)
22162 scratch = gen_reg_rtx (HImode);
22163 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
22164 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
22165 break;
22167 case IX86_FPCMP_ARITH:
22168 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
22169 tmp = gen_rtx_COMPARE (CCFPmode, op0, op1);
22170 if (unordered_compare)
22171 tmp = gen_rtx_UNSPEC (CCFPmode, gen_rtvec (1, tmp), UNSPEC_NOTRAP);
22172 tmp = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
22173 if (!scratch)
22174 scratch = gen_reg_rtx (HImode);
22175 emit_insn (gen_rtx_SET (scratch, tmp));
22177 /* In the unordered case, we have to check C2 for NaN's, which
22178 doesn't happen to work out to anything nice combination-wise.
22179 So do some bit twiddling on the value we've got in AH to come
22180 up with an appropriate set of condition codes. */
22182 intcmp_mode = CCNOmode;
22183 switch (code)
22185 case GT:
22186 case UNGT:
22187 if (code == GT || !TARGET_IEEE_FP)
22189 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x45)));
22190 code = EQ;
22192 else
22194 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
22195 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
22196 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
22197 intcmp_mode = CCmode;
22198 code = GEU;
22200 break;
22201 case LT:
22202 case UNLT:
22203 if (code == LT && TARGET_IEEE_FP)
22205 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
22206 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
22207 intcmp_mode = CCmode;
22208 code = EQ;
22210 else
22212 emit_insn (gen_testqi_ext_1_ccno (scratch, const1_rtx));
22213 code = NE;
22215 break;
22216 case GE:
22217 case UNGE:
22218 if (code == GE || !TARGET_IEEE_FP)
22220 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x05)));
22221 code = EQ;
22223 else
22225 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
22226 emit_insn (gen_xorqi_ext_1_cc (scratch, scratch, const1_rtx));
22227 code = NE;
22229 break;
22230 case LE:
22231 case UNLE:
22232 if (code == LE && TARGET_IEEE_FP)
22234 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
22235 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
22236 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
22237 intcmp_mode = CCmode;
22238 code = LTU;
22240 else
22242 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x45)));
22243 code = NE;
22245 break;
22246 case EQ:
22247 case UNEQ:
22248 if (code == EQ && TARGET_IEEE_FP)
22250 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
22251 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
22252 intcmp_mode = CCmode;
22253 code = EQ;
22255 else
22257 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x40)));
22258 code = NE;
22260 break;
22261 case NE:
22262 case LTGT:
22263 if (code == NE && TARGET_IEEE_FP)
22265 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
22266 emit_insn (gen_xorqi_ext_1_cc (scratch, scratch,
22267 GEN_INT (0x40)));
22268 code = NE;
22270 else
22272 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x40)));
22273 code = EQ;
22275 break;
22277 case UNORDERED:
22278 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x04)));
22279 code = NE;
22280 break;
22281 case ORDERED:
22282 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x04)));
22283 code = EQ;
22284 break;
22286 default:
22287 gcc_unreachable ();
22289 break;
22291 default:
22292 gcc_unreachable();
22295 /* Return the test that should be put into the flags user, i.e.
22296 the bcc, scc, or cmov instruction. */
22297 return gen_rtx_fmt_ee (code, VOIDmode,
22298 gen_rtx_REG (intcmp_mode, FLAGS_REG),
22299 const0_rtx);
22302 static rtx
22303 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
22305 rtx ret;
22307 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
22308 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
22310 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
22312 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
22313 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
22315 else
22316 ret = ix86_expand_int_compare (code, op0, op1);
22318 return ret;
22321 void
22322 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
22324 machine_mode mode = GET_MODE (op0);
22325 rtx tmp;
22327 /* Handle special case - vector comparsion with boolean result, transform
22328 it using ptest instruction. */
22329 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22331 rtx flag = gen_rtx_REG (CCZmode, FLAGS_REG);
22332 machine_mode p_mode = GET_MODE_SIZE (mode) == 32 ? V4DImode : V2DImode;
22334 gcc_assert (code == EQ || code == NE);
22335 /* Generate XOR since we can't check that one operand is zero vector. */
22336 tmp = gen_reg_rtx (mode);
22337 emit_insn (gen_rtx_SET (tmp, gen_rtx_XOR (mode, op0, op1)));
22338 tmp = gen_lowpart (p_mode, tmp);
22339 emit_insn (gen_rtx_SET (gen_rtx_REG (CCmode, FLAGS_REG),
22340 gen_rtx_UNSPEC (CCmode,
22341 gen_rtvec (2, tmp, tmp),
22342 UNSPEC_PTEST)));
22343 tmp = gen_rtx_fmt_ee (code, VOIDmode, flag, const0_rtx);
22344 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
22345 gen_rtx_LABEL_REF (VOIDmode, label),
22346 pc_rtx);
22347 emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
22348 return;
22351 switch (mode)
22353 case E_SFmode:
22354 case E_DFmode:
22355 case E_XFmode:
22356 case E_QImode:
22357 case E_HImode:
22358 case E_SImode:
22359 simple:
22360 tmp = ix86_expand_compare (code, op0, op1);
22361 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
22362 gen_rtx_LABEL_REF (VOIDmode, label),
22363 pc_rtx);
22364 emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
22365 return;
22367 case E_DImode:
22368 if (TARGET_64BIT)
22369 goto simple;
22370 /* For 32-bit target DI comparison may be performed on
22371 SSE registers. To allow this we should avoid split
22372 to SI mode which is achieved by doing xor in DI mode
22373 and then comparing with zero (which is recognized by
22374 STV pass). We don't compare using xor when optimizing
22375 for size. */
22376 if (!optimize_insn_for_size_p ()
22377 && TARGET_STV
22378 && (code == EQ || code == NE))
22380 op0 = force_reg (mode, gen_rtx_XOR (mode, op0, op1));
22381 op1 = const0_rtx;
22383 /* FALLTHRU */
22384 case E_TImode:
22385 /* Expand DImode branch into multiple compare+branch. */
22387 rtx lo[2], hi[2];
22388 rtx_code_label *label2;
22389 enum rtx_code code1, code2, code3;
22390 machine_mode submode;
22392 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
22394 std::swap (op0, op1);
22395 code = swap_condition (code);
22398 split_double_mode (mode, &op0, 1, lo+0, hi+0);
22399 split_double_mode (mode, &op1, 1, lo+1, hi+1);
22401 submode = mode == DImode ? SImode : DImode;
22403 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
22404 avoid two branches. This costs one extra insn, so disable when
22405 optimizing for size. */
22407 if ((code == EQ || code == NE)
22408 && (!optimize_insn_for_size_p ()
22409 || hi[1] == const0_rtx || lo[1] == const0_rtx))
22411 rtx xor0, xor1;
22413 xor1 = hi[0];
22414 if (hi[1] != const0_rtx)
22415 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
22416 NULL_RTX, 0, OPTAB_WIDEN);
22418 xor0 = lo[0];
22419 if (lo[1] != const0_rtx)
22420 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
22421 NULL_RTX, 0, OPTAB_WIDEN);
22423 tmp = expand_binop (submode, ior_optab, xor1, xor0,
22424 NULL_RTX, 0, OPTAB_WIDEN);
22426 ix86_expand_branch (code, tmp, const0_rtx, label);
22427 return;
22430 /* Otherwise, if we are doing less-than or greater-or-equal-than,
22431 op1 is a constant and the low word is zero, then we can just
22432 examine the high word. Similarly for low word -1 and
22433 less-or-equal-than or greater-than. */
22435 if (CONST_INT_P (hi[1]))
22436 switch (code)
22438 case LT: case LTU: case GE: case GEU:
22439 if (lo[1] == const0_rtx)
22441 ix86_expand_branch (code, hi[0], hi[1], label);
22442 return;
22444 break;
22445 case LE: case LEU: case GT: case GTU:
22446 if (lo[1] == constm1_rtx)
22448 ix86_expand_branch (code, hi[0], hi[1], label);
22449 return;
22451 break;
22452 default:
22453 break;
22456 /* Emulate comparisons that do not depend on Zero flag with
22457 double-word subtraction. Note that only Overflow, Sign
22458 and Carry flags are valid, so swap arguments and condition
22459 of comparisons that would otherwise test Zero flag. */
22461 switch (code)
22463 case LE: case LEU: case GT: case GTU:
22464 std::swap (lo[0], lo[1]);
22465 std::swap (hi[0], hi[1]);
22466 code = swap_condition (code);
22467 /* FALLTHRU */
22469 case LT: case LTU: case GE: case GEU:
22471 rtx (*cmp_insn) (rtx, rtx);
22472 rtx (*sbb_insn) (rtx, rtx, rtx);
22473 bool uns = (code == LTU || code == GEU);
22475 if (TARGET_64BIT)
22477 cmp_insn = gen_cmpdi_1;
22478 sbb_insn
22479 = uns ? gen_subdi3_carry_ccc : gen_subdi3_carry_ccgz;
22481 else
22483 cmp_insn = gen_cmpsi_1;
22484 sbb_insn
22485 = uns ? gen_subsi3_carry_ccc : gen_subsi3_carry_ccgz;
22488 if (!nonimmediate_operand (lo[0], submode))
22489 lo[0] = force_reg (submode, lo[0]);
22490 if (!x86_64_general_operand (lo[1], submode))
22491 lo[1] = force_reg (submode, lo[1]);
22493 if (!register_operand (hi[0], submode))
22494 hi[0] = force_reg (submode, hi[0]);
22495 if ((uns && !nonimmediate_operand (hi[1], submode))
22496 || (!uns && !x86_64_general_operand (hi[1], submode)))
22497 hi[1] = force_reg (submode, hi[1]);
22499 emit_insn (cmp_insn (lo[0], lo[1]));
22500 emit_insn (sbb_insn (gen_rtx_SCRATCH (submode), hi[0], hi[1]));
22502 tmp = gen_rtx_REG (uns ? CCCmode : CCGZmode, FLAGS_REG);
22504 ix86_expand_branch (code, tmp, const0_rtx, label);
22505 return;
22508 default:
22509 break;
22512 /* Otherwise, we need two or three jumps. */
22514 label2 = gen_label_rtx ();
22516 code1 = code;
22517 code2 = swap_condition (code);
22518 code3 = unsigned_condition (code);
22520 switch (code)
22522 case LT: case GT: case LTU: case GTU:
22523 break;
22525 case LE: code1 = LT; code2 = GT; break;
22526 case GE: code1 = GT; code2 = LT; break;
22527 case LEU: code1 = LTU; code2 = GTU; break;
22528 case GEU: code1 = GTU; code2 = LTU; break;
22530 case EQ: code1 = UNKNOWN; code2 = NE; break;
22531 case NE: code2 = UNKNOWN; break;
22533 default:
22534 gcc_unreachable ();
22538 * a < b =>
22539 * if (hi(a) < hi(b)) goto true;
22540 * if (hi(a) > hi(b)) goto false;
22541 * if (lo(a) < lo(b)) goto true;
22542 * false:
22545 if (code1 != UNKNOWN)
22546 ix86_expand_branch (code1, hi[0], hi[1], label);
22547 if (code2 != UNKNOWN)
22548 ix86_expand_branch (code2, hi[0], hi[1], label2);
22550 ix86_expand_branch (code3, lo[0], lo[1], label);
22552 if (code2 != UNKNOWN)
22553 emit_label (label2);
22554 return;
22557 default:
22558 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
22559 goto simple;
22563 void
22564 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
22566 rtx ret;
22568 gcc_assert (GET_MODE (dest) == QImode);
22570 ret = ix86_expand_compare (code, op0, op1);
22571 PUT_MODE (ret, QImode);
22572 emit_insn (gen_rtx_SET (dest, ret));
22575 /* Expand comparison setting or clearing carry flag. Return true when
22576 successful and set pop for the operation. */
22577 static bool
22578 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
22580 machine_mode mode =
22581 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
22583 /* Do not handle double-mode compares that go through special path. */
22584 if (mode == (TARGET_64BIT ? TImode : DImode))
22585 return false;
22587 if (SCALAR_FLOAT_MODE_P (mode))
22589 rtx compare_op;
22590 rtx_insn *compare_seq;
22592 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
22594 /* Shortcut: following common codes never translate
22595 into carry flag compares. */
22596 if (code == EQ || code == NE || code == UNEQ || code == LTGT
22597 || code == ORDERED || code == UNORDERED)
22598 return false;
22600 /* These comparisons require zero flag; swap operands so they won't. */
22601 if ((code == GT || code == UNLE || code == LE || code == UNGT)
22602 && !TARGET_IEEE_FP)
22604 std::swap (op0, op1);
22605 code = swap_condition (code);
22608 /* Try to expand the comparison and verify that we end up with
22609 carry flag based comparison. This fails to be true only when
22610 we decide to expand comparison using arithmetic that is not
22611 too common scenario. */
22612 start_sequence ();
22613 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
22614 compare_seq = get_insns ();
22615 end_sequence ();
22617 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode)
22618 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
22619 else
22620 code = GET_CODE (compare_op);
22622 if (code != LTU && code != GEU)
22623 return false;
22625 emit_insn (compare_seq);
22626 *pop = compare_op;
22627 return true;
22630 if (!INTEGRAL_MODE_P (mode))
22631 return false;
22633 switch (code)
22635 case LTU:
22636 case GEU:
22637 break;
22639 /* Convert a==0 into (unsigned)a<1. */
22640 case EQ:
22641 case NE:
22642 if (op1 != const0_rtx)
22643 return false;
22644 op1 = const1_rtx;
22645 code = (code == EQ ? LTU : GEU);
22646 break;
22648 /* Convert a>b into b<a or a>=b-1. */
22649 case GTU:
22650 case LEU:
22651 if (CONST_INT_P (op1))
22653 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
22654 /* Bail out on overflow. We still can swap operands but that
22655 would force loading of the constant into register. */
22656 if (op1 == const0_rtx
22657 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
22658 return false;
22659 code = (code == GTU ? GEU : LTU);
22661 else
22663 std::swap (op0, op1);
22664 code = (code == GTU ? LTU : GEU);
22666 break;
22668 /* Convert a>=0 into (unsigned)a<0x80000000. */
22669 case LT:
22670 case GE:
22671 if (mode == DImode || op1 != const0_rtx)
22672 return false;
22673 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
22674 code = (code == LT ? GEU : LTU);
22675 break;
22676 case LE:
22677 case GT:
22678 if (mode == DImode || op1 != constm1_rtx)
22679 return false;
22680 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
22681 code = (code == LE ? GEU : LTU);
22682 break;
22684 default:
22685 return false;
22687 /* Swapping operands may cause constant to appear as first operand. */
22688 if (!nonimmediate_operand (op0, VOIDmode))
22690 if (!can_create_pseudo_p ())
22691 return false;
22692 op0 = force_reg (mode, op0);
22694 *pop = ix86_expand_compare (code, op0, op1);
22695 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
22696 return true;
22699 bool
22700 ix86_expand_int_movcc (rtx operands[])
22702 enum rtx_code code = GET_CODE (operands[1]), compare_code;
22703 rtx_insn *compare_seq;
22704 rtx compare_op;
22705 machine_mode mode = GET_MODE (operands[0]);
22706 bool sign_bit_compare_p = false;
22707 rtx op0 = XEXP (operands[1], 0);
22708 rtx op1 = XEXP (operands[1], 1);
22710 if (GET_MODE (op0) == TImode
22711 || (GET_MODE (op0) == DImode
22712 && !TARGET_64BIT))
22713 return false;
22715 start_sequence ();
22716 compare_op = ix86_expand_compare (code, op0, op1);
22717 compare_seq = get_insns ();
22718 end_sequence ();
22720 compare_code = GET_CODE (compare_op);
22722 if ((op1 == const0_rtx && (code == GE || code == LT))
22723 || (op1 == constm1_rtx && (code == GT || code == LE)))
22724 sign_bit_compare_p = true;
22726 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
22727 HImode insns, we'd be swallowed in word prefix ops. */
22729 if ((mode != HImode || TARGET_FAST_PREFIX)
22730 && (mode != (TARGET_64BIT ? TImode : DImode))
22731 && CONST_INT_P (operands[2])
22732 && CONST_INT_P (operands[3]))
22734 rtx out = operands[0];
22735 HOST_WIDE_INT ct = INTVAL (operands[2]);
22736 HOST_WIDE_INT cf = INTVAL (operands[3]);
22737 HOST_WIDE_INT diff;
22739 diff = ct - cf;
22740 /* Sign bit compares are better done using shifts than we do by using
22741 sbb. */
22742 if (sign_bit_compare_p
22743 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22745 /* Detect overlap between destination and compare sources. */
22746 rtx tmp = out;
22748 if (!sign_bit_compare_p)
22750 rtx flags;
22751 bool fpcmp = false;
22753 compare_code = GET_CODE (compare_op);
22755 flags = XEXP (compare_op, 0);
22757 if (GET_MODE (flags) == CCFPmode)
22759 fpcmp = true;
22760 compare_code
22761 = ix86_fp_compare_code_to_integer (compare_code);
22764 /* To simplify rest of code, restrict to the GEU case. */
22765 if (compare_code == LTU)
22767 std::swap (ct, cf);
22768 compare_code = reverse_condition (compare_code);
22769 code = reverse_condition (code);
22771 else
22773 if (fpcmp)
22774 PUT_CODE (compare_op,
22775 reverse_condition_maybe_unordered
22776 (GET_CODE (compare_op)));
22777 else
22778 PUT_CODE (compare_op,
22779 reverse_condition (GET_CODE (compare_op)));
22781 diff = ct - cf;
22783 if (reg_overlap_mentioned_p (out, op0)
22784 || reg_overlap_mentioned_p (out, op1))
22785 tmp = gen_reg_rtx (mode);
22787 if (mode == DImode)
22788 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
22789 else
22790 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
22791 flags, compare_op));
22793 else
22795 if (code == GT || code == GE)
22796 code = reverse_condition (code);
22797 else
22799 std::swap (ct, cf);
22800 diff = ct - cf;
22802 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
22805 if (diff == 1)
22808 * cmpl op0,op1
22809 * sbbl dest,dest
22810 * [addl dest, ct]
22812 * Size 5 - 8.
22814 if (ct)
22815 tmp = expand_simple_binop (mode, PLUS,
22816 tmp, GEN_INT (ct),
22817 copy_rtx (tmp), 1, OPTAB_DIRECT);
22819 else if (cf == -1)
22822 * cmpl op0,op1
22823 * sbbl dest,dest
22824 * orl $ct, dest
22826 * Size 8.
22828 tmp = expand_simple_binop (mode, IOR,
22829 tmp, GEN_INT (ct),
22830 copy_rtx (tmp), 1, OPTAB_DIRECT);
22832 else if (diff == -1 && ct)
22835 * cmpl op0,op1
22836 * sbbl dest,dest
22837 * notl dest
22838 * [addl dest, cf]
22840 * Size 8 - 11.
22842 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
22843 if (cf)
22844 tmp = expand_simple_binop (mode, PLUS,
22845 copy_rtx (tmp), GEN_INT (cf),
22846 copy_rtx (tmp), 1, OPTAB_DIRECT);
22848 else
22851 * cmpl op0,op1
22852 * sbbl dest,dest
22853 * [notl dest]
22854 * andl cf - ct, dest
22855 * [addl dest, ct]
22857 * Size 8 - 11.
22860 if (cf == 0)
22862 cf = ct;
22863 ct = 0;
22864 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
22867 tmp = expand_simple_binop (mode, AND,
22868 copy_rtx (tmp),
22869 gen_int_mode (cf - ct, mode),
22870 copy_rtx (tmp), 1, OPTAB_DIRECT);
22871 if (ct)
22872 tmp = expand_simple_binop (mode, PLUS,
22873 copy_rtx (tmp), GEN_INT (ct),
22874 copy_rtx (tmp), 1, OPTAB_DIRECT);
22877 if (!rtx_equal_p (tmp, out))
22878 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
22880 return true;
22883 if (diff < 0)
22885 machine_mode cmp_mode = GET_MODE (op0);
22886 enum rtx_code new_code;
22888 if (SCALAR_FLOAT_MODE_P (cmp_mode))
22890 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
22892 /* We may be reversing unordered compare to normal compare, that
22893 is not valid in general (we may convert non-trapping condition
22894 to trapping one), however on i386 we currently emit all
22895 comparisons unordered. */
22896 new_code = reverse_condition_maybe_unordered (code);
22898 else
22899 new_code = ix86_reverse_condition (code, cmp_mode);
22900 if (new_code != UNKNOWN)
22902 std::swap (ct, cf);
22903 diff = -diff;
22904 code = new_code;
22908 compare_code = UNKNOWN;
22909 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
22910 && CONST_INT_P (op1))
22912 if (op1 == const0_rtx
22913 && (code == LT || code == GE))
22914 compare_code = code;
22915 else if (op1 == constm1_rtx)
22917 if (code == LE)
22918 compare_code = LT;
22919 else if (code == GT)
22920 compare_code = GE;
22924 /* Optimize dest = (op0 < 0) ? -1 : cf. */
22925 if (compare_code != UNKNOWN
22926 && GET_MODE (op0) == GET_MODE (out)
22927 && (cf == -1 || ct == -1))
22929 /* If lea code below could be used, only optimize
22930 if it results in a 2 insn sequence. */
22932 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
22933 || diff == 3 || diff == 5 || diff == 9)
22934 || (compare_code == LT && ct == -1)
22935 || (compare_code == GE && cf == -1))
22938 * notl op1 (if necessary)
22939 * sarl $31, op1
22940 * orl cf, op1
22942 if (ct != -1)
22944 cf = ct;
22945 ct = -1;
22946 code = reverse_condition (code);
22949 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
22951 out = expand_simple_binop (mode, IOR,
22952 out, GEN_INT (cf),
22953 out, 1, OPTAB_DIRECT);
22954 if (out != operands[0])
22955 emit_move_insn (operands[0], out);
22957 return true;
22962 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
22963 || diff == 3 || diff == 5 || diff == 9)
22964 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
22965 && (mode != DImode
22966 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
22969 * xorl dest,dest
22970 * cmpl op1,op2
22971 * setcc dest
22972 * lea cf(dest*(ct-cf)),dest
22974 * Size 14.
22976 * This also catches the degenerate setcc-only case.
22979 rtx tmp;
22980 int nops;
22982 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
22984 nops = 0;
22985 /* On x86_64 the lea instruction operates on Pmode, so we need
22986 to get arithmetics done in proper mode to match. */
22987 if (diff == 1)
22988 tmp = copy_rtx (out);
22989 else
22991 rtx out1;
22992 out1 = copy_rtx (out);
22993 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
22994 nops++;
22995 if (diff & 1)
22997 tmp = gen_rtx_PLUS (mode, tmp, out1);
22998 nops++;
23001 if (cf != 0)
23003 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
23004 nops++;
23006 if (!rtx_equal_p (tmp, out))
23008 if (nops == 1)
23009 out = force_operand (tmp, copy_rtx (out));
23010 else
23011 emit_insn (gen_rtx_SET (copy_rtx (out), copy_rtx (tmp)));
23013 if (!rtx_equal_p (out, operands[0]))
23014 emit_move_insn (operands[0], copy_rtx (out));
23016 return true;
23020 * General case: Jumpful:
23021 * xorl dest,dest cmpl op1, op2
23022 * cmpl op1, op2 movl ct, dest
23023 * setcc dest jcc 1f
23024 * decl dest movl cf, dest
23025 * andl (cf-ct),dest 1:
23026 * addl ct,dest
23028 * Size 20. Size 14.
23030 * This is reasonably steep, but branch mispredict costs are
23031 * high on modern cpus, so consider failing only if optimizing
23032 * for space.
23035 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
23036 && BRANCH_COST (optimize_insn_for_speed_p (),
23037 false) >= 2)
23039 if (cf == 0)
23041 machine_mode cmp_mode = GET_MODE (op0);
23042 enum rtx_code new_code;
23044 if (SCALAR_FLOAT_MODE_P (cmp_mode))
23046 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
23048 /* We may be reversing unordered compare to normal compare,
23049 that is not valid in general (we may convert non-trapping
23050 condition to trapping one), however on i386 we currently
23051 emit all comparisons unordered. */
23052 new_code = reverse_condition_maybe_unordered (code);
23054 else
23056 new_code = ix86_reverse_condition (code, cmp_mode);
23057 if (compare_code != UNKNOWN && new_code != UNKNOWN)
23058 compare_code = reverse_condition (compare_code);
23061 if (new_code != UNKNOWN)
23063 cf = ct;
23064 ct = 0;
23065 code = new_code;
23069 if (compare_code != UNKNOWN)
23071 /* notl op1 (if needed)
23072 sarl $31, op1
23073 andl (cf-ct), op1
23074 addl ct, op1
23076 For x < 0 (resp. x <= -1) there will be no notl,
23077 so if possible swap the constants to get rid of the
23078 complement.
23079 True/false will be -1/0 while code below (store flag
23080 followed by decrement) is 0/-1, so the constants need
23081 to be exchanged once more. */
23083 if (compare_code == GE || !cf)
23085 code = reverse_condition (code);
23086 compare_code = LT;
23088 else
23089 std::swap (ct, cf);
23091 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
23093 else
23095 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
23097 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
23098 constm1_rtx,
23099 copy_rtx (out), 1, OPTAB_DIRECT);
23102 out = expand_simple_binop (mode, AND, copy_rtx (out),
23103 gen_int_mode (cf - ct, mode),
23104 copy_rtx (out), 1, OPTAB_DIRECT);
23105 if (ct)
23106 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
23107 copy_rtx (out), 1, OPTAB_DIRECT);
23108 if (!rtx_equal_p (out, operands[0]))
23109 emit_move_insn (operands[0], copy_rtx (out));
23111 return true;
23115 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
23117 /* Try a few things more with specific constants and a variable. */
23119 optab op;
23120 rtx var, orig_out, out, tmp;
23122 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
23123 return false;
23125 /* If one of the two operands is an interesting constant, load a
23126 constant with the above and mask it in with a logical operation. */
23128 if (CONST_INT_P (operands[2]))
23130 var = operands[3];
23131 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
23132 operands[3] = constm1_rtx, op = and_optab;
23133 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
23134 operands[3] = const0_rtx, op = ior_optab;
23135 else
23136 return false;
23138 else if (CONST_INT_P (operands[3]))
23140 var = operands[2];
23141 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
23142 operands[2] = constm1_rtx, op = and_optab;
23143 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
23144 operands[2] = const0_rtx, op = ior_optab;
23145 else
23146 return false;
23148 else
23149 return false;
23151 orig_out = operands[0];
23152 tmp = gen_reg_rtx (mode);
23153 operands[0] = tmp;
23155 /* Recurse to get the constant loaded. */
23156 if (!ix86_expand_int_movcc (operands))
23157 return false;
23159 /* Mask in the interesting variable. */
23160 out = expand_binop (mode, op, var, tmp, orig_out, 0,
23161 OPTAB_WIDEN);
23162 if (!rtx_equal_p (out, orig_out))
23163 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
23165 return true;
23169 * For comparison with above,
23171 * movl cf,dest
23172 * movl ct,tmp
23173 * cmpl op1,op2
23174 * cmovcc tmp,dest
23176 * Size 15.
23179 if (! nonimmediate_operand (operands[2], mode))
23180 operands[2] = force_reg (mode, operands[2]);
23181 if (! nonimmediate_operand (operands[3], mode))
23182 operands[3] = force_reg (mode, operands[3]);
23184 if (! register_operand (operands[2], VOIDmode)
23185 && (mode == QImode
23186 || ! register_operand (operands[3], VOIDmode)))
23187 operands[2] = force_reg (mode, operands[2]);
23189 if (mode == QImode
23190 && ! register_operand (operands[3], VOIDmode))
23191 operands[3] = force_reg (mode, operands[3]);
23193 emit_insn (compare_seq);
23194 emit_insn (gen_rtx_SET (operands[0],
23195 gen_rtx_IF_THEN_ELSE (mode,
23196 compare_op, operands[2],
23197 operands[3])));
23198 return true;
23201 /* Swap, force into registers, or otherwise massage the two operands
23202 to an sse comparison with a mask result. Thus we differ a bit from
23203 ix86_prepare_fp_compare_args which expects to produce a flags result.
23205 The DEST operand exists to help determine whether to commute commutative
23206 operators. The POP0/POP1 operands are updated in place. The new
23207 comparison code is returned, or UNKNOWN if not implementable. */
23209 static enum rtx_code
23210 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
23211 rtx *pop0, rtx *pop1)
23213 switch (code)
23215 case LTGT:
23216 case UNEQ:
23217 /* AVX supports all the needed comparisons. */
23218 if (TARGET_AVX)
23219 break;
23220 /* We have no LTGT as an operator. We could implement it with
23221 NE & ORDERED, but this requires an extra temporary. It's
23222 not clear that it's worth it. */
23223 return UNKNOWN;
23225 case LT:
23226 case LE:
23227 case UNGT:
23228 case UNGE:
23229 /* These are supported directly. */
23230 break;
23232 case EQ:
23233 case NE:
23234 case UNORDERED:
23235 case ORDERED:
23236 /* AVX has 3 operand comparisons, no need to swap anything. */
23237 if (TARGET_AVX)
23238 break;
23239 /* For commutative operators, try to canonicalize the destination
23240 operand to be first in the comparison - this helps reload to
23241 avoid extra moves. */
23242 if (!dest || !rtx_equal_p (dest, *pop1))
23243 break;
23244 /* FALLTHRU */
23246 case GE:
23247 case GT:
23248 case UNLE:
23249 case UNLT:
23250 /* These are not supported directly before AVX, and furthermore
23251 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
23252 comparison operands to transform into something that is
23253 supported. */
23254 std::swap (*pop0, *pop1);
23255 code = swap_condition (code);
23256 break;
23258 default:
23259 gcc_unreachable ();
23262 return code;
23265 /* Detect conditional moves that exactly match min/max operational
23266 semantics. Note that this is IEEE safe, as long as we don't
23267 interchange the operands.
23269 Returns FALSE if this conditional move doesn't match a MIN/MAX,
23270 and TRUE if the operation is successful and instructions are emitted. */
23272 static bool
23273 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
23274 rtx cmp_op1, rtx if_true, rtx if_false)
23276 machine_mode mode;
23277 bool is_min;
23278 rtx tmp;
23280 if (code == LT)
23282 else if (code == UNGE)
23283 std::swap (if_true, if_false);
23284 else
23285 return false;
23287 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
23288 is_min = true;
23289 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
23290 is_min = false;
23291 else
23292 return false;
23294 mode = GET_MODE (dest);
23296 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
23297 but MODE may be a vector mode and thus not appropriate. */
23298 if (!flag_finite_math_only || flag_signed_zeros)
23300 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
23301 rtvec v;
23303 if_true = force_reg (mode, if_true);
23304 v = gen_rtvec (2, if_true, if_false);
23305 tmp = gen_rtx_UNSPEC (mode, v, u);
23307 else
23309 code = is_min ? SMIN : SMAX;
23310 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
23313 emit_insn (gen_rtx_SET (dest, tmp));
23314 return true;
23317 /* Expand an sse vector comparison. Return the register with the result. */
23319 static rtx
23320 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
23321 rtx op_true, rtx op_false)
23323 machine_mode mode = GET_MODE (dest);
23324 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
23326 /* In general case result of comparison can differ from operands' type. */
23327 machine_mode cmp_mode;
23329 /* In AVX512F the result of comparison is an integer mask. */
23330 bool maskcmp = false;
23331 rtx x;
23333 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
23335 unsigned int nbits = GET_MODE_NUNITS (cmp_ops_mode);
23336 cmp_mode = int_mode_for_size (nbits, 0).require ();
23337 maskcmp = true;
23339 else
23340 cmp_mode = cmp_ops_mode;
23343 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
23344 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
23345 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
23347 if (optimize
23348 || (maskcmp && cmp_mode != mode)
23349 || (op_true && reg_overlap_mentioned_p (dest, op_true))
23350 || (op_false && reg_overlap_mentioned_p (dest, op_false)))
23351 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
23353 /* Compare patterns for int modes are unspec in AVX512F only. */
23354 if (maskcmp && (code == GT || code == EQ))
23356 rtx (*gen)(rtx, rtx, rtx);
23358 switch (cmp_ops_mode)
23360 case E_V64QImode:
23361 gcc_assert (TARGET_AVX512BW);
23362 gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
23363 break;
23364 case E_V32HImode:
23365 gcc_assert (TARGET_AVX512BW);
23366 gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
23367 break;
23368 case E_V16SImode:
23369 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
23370 break;
23371 case E_V8DImode:
23372 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
23373 break;
23374 default:
23375 gen = NULL;
23378 if (gen)
23380 emit_insn (gen (dest, cmp_op0, cmp_op1));
23381 return dest;
23384 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
23386 if (cmp_mode != mode && !maskcmp)
23388 x = force_reg (cmp_ops_mode, x);
23389 convert_move (dest, x, false);
23391 else
23392 emit_insn (gen_rtx_SET (dest, x));
23394 return dest;
23397 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
23398 operations. This is used for both scalar and vector conditional moves. */
23400 void
23401 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
23403 machine_mode mode = GET_MODE (dest);
23404 machine_mode cmpmode = GET_MODE (cmp);
23406 /* In AVX512F the result of comparison is an integer mask. */
23407 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
23409 rtx t2, t3, x;
23411 /* If we have an integer mask and FP value then we need
23412 to cast mask to FP mode. */
23413 if (mode != cmpmode && VECTOR_MODE_P (cmpmode))
23415 cmp = force_reg (cmpmode, cmp);
23416 cmp = gen_rtx_SUBREG (mode, cmp, 0);
23419 if (vector_all_ones_operand (op_true, mode)
23420 && rtx_equal_p (op_false, CONST0_RTX (mode))
23421 && !maskcmp)
23423 emit_insn (gen_rtx_SET (dest, cmp));
23425 else if (op_false == CONST0_RTX (mode)
23426 && !maskcmp)
23428 op_true = force_reg (mode, op_true);
23429 x = gen_rtx_AND (mode, cmp, op_true);
23430 emit_insn (gen_rtx_SET (dest, x));
23432 else if (op_true == CONST0_RTX (mode)
23433 && !maskcmp)
23435 op_false = force_reg (mode, op_false);
23436 x = gen_rtx_NOT (mode, cmp);
23437 x = gen_rtx_AND (mode, x, op_false);
23438 emit_insn (gen_rtx_SET (dest, x));
23440 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
23441 && !maskcmp)
23443 op_false = force_reg (mode, op_false);
23444 x = gen_rtx_IOR (mode, cmp, op_false);
23445 emit_insn (gen_rtx_SET (dest, x));
23447 else if (TARGET_XOP
23448 && !maskcmp)
23450 op_true = force_reg (mode, op_true);
23452 if (!nonimmediate_operand (op_false, mode))
23453 op_false = force_reg (mode, op_false);
23455 emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cmp,
23456 op_true,
23457 op_false)));
23459 else
23461 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
23462 rtx d = dest;
23464 if (!nonimmediate_operand (op_true, mode))
23465 op_true = force_reg (mode, op_true);
23467 op_false = force_reg (mode, op_false);
23469 switch (mode)
23471 case E_V4SFmode:
23472 if (TARGET_SSE4_1)
23473 gen = gen_sse4_1_blendvps;
23474 break;
23475 case E_V2DFmode:
23476 if (TARGET_SSE4_1)
23477 gen = gen_sse4_1_blendvpd;
23478 break;
23479 case E_V16QImode:
23480 case E_V8HImode:
23481 case E_V4SImode:
23482 case E_V2DImode:
23483 if (TARGET_SSE4_1)
23485 gen = gen_sse4_1_pblendvb;
23486 if (mode != V16QImode)
23487 d = gen_reg_rtx (V16QImode);
23488 op_false = gen_lowpart (V16QImode, op_false);
23489 op_true = gen_lowpart (V16QImode, op_true);
23490 cmp = gen_lowpart (V16QImode, cmp);
23492 break;
23493 case E_V8SFmode:
23494 if (TARGET_AVX)
23495 gen = gen_avx_blendvps256;
23496 break;
23497 case E_V4DFmode:
23498 if (TARGET_AVX)
23499 gen = gen_avx_blendvpd256;
23500 break;
23501 case E_V32QImode:
23502 case E_V16HImode:
23503 case E_V8SImode:
23504 case E_V4DImode:
23505 if (TARGET_AVX2)
23507 gen = gen_avx2_pblendvb;
23508 if (mode != V32QImode)
23509 d = gen_reg_rtx (V32QImode);
23510 op_false = gen_lowpart (V32QImode, op_false);
23511 op_true = gen_lowpart (V32QImode, op_true);
23512 cmp = gen_lowpart (V32QImode, cmp);
23514 break;
23516 case E_V64QImode:
23517 gen = gen_avx512bw_blendmv64qi;
23518 break;
23519 case E_V32HImode:
23520 gen = gen_avx512bw_blendmv32hi;
23521 break;
23522 case E_V16SImode:
23523 gen = gen_avx512f_blendmv16si;
23524 break;
23525 case E_V8DImode:
23526 gen = gen_avx512f_blendmv8di;
23527 break;
23528 case E_V8DFmode:
23529 gen = gen_avx512f_blendmv8df;
23530 break;
23531 case E_V16SFmode:
23532 gen = gen_avx512f_blendmv16sf;
23533 break;
23535 default:
23536 break;
23539 if (gen != NULL)
23541 emit_insn (gen (d, op_false, op_true, cmp));
23542 if (d != dest)
23543 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
23545 else
23547 op_true = force_reg (mode, op_true);
23549 t2 = gen_reg_rtx (mode);
23550 if (optimize)
23551 t3 = gen_reg_rtx (mode);
23552 else
23553 t3 = dest;
23555 x = gen_rtx_AND (mode, op_true, cmp);
23556 emit_insn (gen_rtx_SET (t2, x));
23558 x = gen_rtx_NOT (mode, cmp);
23559 x = gen_rtx_AND (mode, x, op_false);
23560 emit_insn (gen_rtx_SET (t3, x));
23562 x = gen_rtx_IOR (mode, t3, t2);
23563 emit_insn (gen_rtx_SET (dest, x));
23568 /* Expand a floating-point conditional move. Return true if successful. */
23570 bool
23571 ix86_expand_fp_movcc (rtx operands[])
23573 machine_mode mode = GET_MODE (operands[0]);
23574 enum rtx_code code = GET_CODE (operands[1]);
23575 rtx tmp, compare_op;
23576 rtx op0 = XEXP (operands[1], 0);
23577 rtx op1 = XEXP (operands[1], 1);
23579 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
23581 machine_mode cmode;
23583 /* Since we've no cmove for sse registers, don't force bad register
23584 allocation just to gain access to it. Deny movcc when the
23585 comparison mode doesn't match the move mode. */
23586 cmode = GET_MODE (op0);
23587 if (cmode == VOIDmode)
23588 cmode = GET_MODE (op1);
23589 if (cmode != mode)
23590 return false;
23592 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
23593 if (code == UNKNOWN)
23594 return false;
23596 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
23597 operands[2], operands[3]))
23598 return true;
23600 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
23601 operands[2], operands[3]);
23602 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
23603 return true;
23606 if (GET_MODE (op0) == TImode
23607 || (GET_MODE (op0) == DImode
23608 && !TARGET_64BIT))
23609 return false;
23611 /* The floating point conditional move instructions don't directly
23612 support conditions resulting from a signed integer comparison. */
23614 compare_op = ix86_expand_compare (code, op0, op1);
23615 if (!fcmov_comparison_operator (compare_op, VOIDmode))
23617 tmp = gen_reg_rtx (QImode);
23618 ix86_expand_setcc (tmp, code, op0, op1);
23620 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
23623 emit_insn (gen_rtx_SET (operands[0],
23624 gen_rtx_IF_THEN_ELSE (mode, compare_op,
23625 operands[2], operands[3])));
23627 return true;
23630 /* Helper for ix86_cmp_code_to_pcmp_immediate for int modes. */
23632 static int
23633 ix86_int_cmp_code_to_pcmp_immediate (enum rtx_code code)
23635 switch (code)
23637 case EQ:
23638 return 0;
23639 case LT:
23640 case LTU:
23641 return 1;
23642 case LE:
23643 case LEU:
23644 return 2;
23645 case NE:
23646 return 4;
23647 case GE:
23648 case GEU:
23649 return 5;
23650 case GT:
23651 case GTU:
23652 return 6;
23653 default:
23654 gcc_unreachable ();
23658 /* Helper for ix86_cmp_code_to_pcmp_immediate for fp modes. */
23660 static int
23661 ix86_fp_cmp_code_to_pcmp_immediate (enum rtx_code code)
23663 switch (code)
23665 case EQ:
23666 return 0x00;
23667 case NE:
23668 return 0x04;
23669 case GT:
23670 return 0x0e;
23671 case LE:
23672 return 0x02;
23673 case GE:
23674 return 0x0d;
23675 case LT:
23676 return 0x01;
23677 case UNLE:
23678 return 0x0a;
23679 case UNLT:
23680 return 0x09;
23681 case UNGE:
23682 return 0x05;
23683 case UNGT:
23684 return 0x06;
23685 case UNEQ:
23686 return 0x18;
23687 case LTGT:
23688 return 0x0c;
23689 case ORDERED:
23690 return 0x07;
23691 case UNORDERED:
23692 return 0x03;
23693 default:
23694 gcc_unreachable ();
23698 /* Return immediate value to be used in UNSPEC_PCMP
23699 for comparison CODE in MODE. */
23701 static int
23702 ix86_cmp_code_to_pcmp_immediate (enum rtx_code code, machine_mode mode)
23704 if (FLOAT_MODE_P (mode))
23705 return ix86_fp_cmp_code_to_pcmp_immediate (code);
23706 return ix86_int_cmp_code_to_pcmp_immediate (code);
23709 /* Expand AVX-512 vector comparison. */
23711 bool
23712 ix86_expand_mask_vec_cmp (rtx operands[])
23714 machine_mode mask_mode = GET_MODE (operands[0]);
23715 machine_mode cmp_mode = GET_MODE (operands[2]);
23716 enum rtx_code code = GET_CODE (operands[1]);
23717 rtx imm = GEN_INT (ix86_cmp_code_to_pcmp_immediate (code, cmp_mode));
23718 int unspec_code;
23719 rtx unspec;
23721 switch (code)
23723 case LEU:
23724 case GTU:
23725 case GEU:
23726 case LTU:
23727 unspec_code = UNSPEC_UNSIGNED_PCMP;
23728 break;
23730 default:
23731 unspec_code = UNSPEC_PCMP;
23734 unspec = gen_rtx_UNSPEC (mask_mode, gen_rtvec (3, operands[2],
23735 operands[3], imm),
23736 unspec_code);
23737 emit_insn (gen_rtx_SET (operands[0], unspec));
23739 return true;
23742 /* Expand fp vector comparison. */
23744 bool
23745 ix86_expand_fp_vec_cmp (rtx operands[])
23747 enum rtx_code code = GET_CODE (operands[1]);
23748 rtx cmp;
23750 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
23751 &operands[2], &operands[3]);
23752 if (code == UNKNOWN)
23754 rtx temp;
23755 switch (GET_CODE (operands[1]))
23757 case LTGT:
23758 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[2],
23759 operands[3], NULL, NULL);
23760 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[2],
23761 operands[3], NULL, NULL);
23762 code = AND;
23763 break;
23764 case UNEQ:
23765 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[2],
23766 operands[3], NULL, NULL);
23767 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[2],
23768 operands[3], NULL, NULL);
23769 code = IOR;
23770 break;
23771 default:
23772 gcc_unreachable ();
23774 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
23775 OPTAB_DIRECT);
23777 else
23778 cmp = ix86_expand_sse_cmp (operands[0], code, operands[2], operands[3],
23779 operands[1], operands[2]);
23781 if (operands[0] != cmp)
23782 emit_move_insn (operands[0], cmp);
23784 return true;
23787 static rtx
23788 ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1,
23789 rtx op_true, rtx op_false, bool *negate)
23791 machine_mode data_mode = GET_MODE (dest);
23792 machine_mode mode = GET_MODE (cop0);
23793 rtx x;
23795 *negate = false;
23797 /* XOP supports all of the comparisons on all 128-bit vector int types. */
23798 if (TARGET_XOP
23799 && (mode == V16QImode || mode == V8HImode
23800 || mode == V4SImode || mode == V2DImode))
23802 else
23804 /* Canonicalize the comparison to EQ, GT, GTU. */
23805 switch (code)
23807 case EQ:
23808 case GT:
23809 case GTU:
23810 break;
23812 case NE:
23813 case LE:
23814 case LEU:
23815 code = reverse_condition (code);
23816 *negate = true;
23817 break;
23819 case GE:
23820 case GEU:
23821 code = reverse_condition (code);
23822 *negate = true;
23823 /* FALLTHRU */
23825 case LT:
23826 case LTU:
23827 std::swap (cop0, cop1);
23828 code = swap_condition (code);
23829 break;
23831 default:
23832 gcc_unreachable ();
23835 /* Only SSE4.1/SSE4.2 supports V2DImode. */
23836 if (mode == V2DImode)
23838 switch (code)
23840 case EQ:
23841 /* SSE4.1 supports EQ. */
23842 if (!TARGET_SSE4_1)
23843 return NULL;
23844 break;
23846 case GT:
23847 case GTU:
23848 /* SSE4.2 supports GT/GTU. */
23849 if (!TARGET_SSE4_2)
23850 return NULL;
23851 break;
23853 default:
23854 gcc_unreachable ();
23858 /* Unsigned parallel compare is not supported by the hardware.
23859 Play some tricks to turn this into a signed comparison
23860 against 0. */
23861 if (code == GTU)
23863 cop0 = force_reg (mode, cop0);
23865 switch (mode)
23867 case E_V16SImode:
23868 case E_V8DImode:
23869 case E_V8SImode:
23870 case E_V4DImode:
23871 case E_V4SImode:
23872 case E_V2DImode:
23874 rtx t1, t2, mask;
23875 rtx (*gen_sub3) (rtx, rtx, rtx);
23877 switch (mode)
23879 case E_V16SImode: gen_sub3 = gen_subv16si3; break;
23880 case E_V8DImode: gen_sub3 = gen_subv8di3; break;
23881 case E_V8SImode: gen_sub3 = gen_subv8si3; break;
23882 case E_V4DImode: gen_sub3 = gen_subv4di3; break;
23883 case E_V4SImode: gen_sub3 = gen_subv4si3; break;
23884 case E_V2DImode: gen_sub3 = gen_subv2di3; break;
23885 default:
23886 gcc_unreachable ();
23888 /* Subtract (-(INT MAX) - 1) from both operands to make
23889 them signed. */
23890 mask = ix86_build_signbit_mask (mode, true, false);
23891 t1 = gen_reg_rtx (mode);
23892 emit_insn (gen_sub3 (t1, cop0, mask));
23894 t2 = gen_reg_rtx (mode);
23895 emit_insn (gen_sub3 (t2, cop1, mask));
23897 cop0 = t1;
23898 cop1 = t2;
23899 code = GT;
23901 break;
23903 case E_V64QImode:
23904 case E_V32HImode:
23905 case E_V32QImode:
23906 case E_V16HImode:
23907 case E_V16QImode:
23908 case E_V8HImode:
23909 /* Perform a parallel unsigned saturating subtraction. */
23910 x = gen_reg_rtx (mode);
23911 emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, cop0,
23912 cop1)));
23914 cop0 = x;
23915 cop1 = CONST0_RTX (mode);
23916 code = EQ;
23917 *negate = !*negate;
23918 break;
23920 default:
23921 gcc_unreachable ();
23926 if (*negate)
23927 std::swap (op_true, op_false);
23929 /* Allow the comparison to be done in one mode, but the movcc to
23930 happen in another mode. */
23931 if (data_mode == mode)
23933 x = ix86_expand_sse_cmp (dest, code, cop0, cop1,
23934 op_true, op_false);
23936 else
23938 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
23939 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
23940 op_true, op_false);
23941 if (GET_MODE (x) == mode)
23942 x = gen_lowpart (data_mode, x);
23945 return x;
23948 /* Expand integer vector comparison. */
23950 bool
23951 ix86_expand_int_vec_cmp (rtx operands[])
23953 rtx_code code = GET_CODE (operands[1]);
23954 bool negate = false;
23955 rtx cmp = ix86_expand_int_sse_cmp (operands[0], code, operands[2],
23956 operands[3], NULL, NULL, &negate);
23958 if (!cmp)
23959 return false;
23961 if (negate)
23962 cmp = ix86_expand_int_sse_cmp (operands[0], EQ, cmp,
23963 CONST0_RTX (GET_MODE (cmp)),
23964 NULL, NULL, &negate);
23966 gcc_assert (!negate);
23968 if (operands[0] != cmp)
23969 emit_move_insn (operands[0], cmp);
23971 return true;
23974 /* Expand a floating-point vector conditional move; a vcond operation
23975 rather than a movcc operation. */
23977 bool
23978 ix86_expand_fp_vcond (rtx operands[])
23980 enum rtx_code code = GET_CODE (operands[3]);
23981 rtx cmp;
23983 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
23984 &operands[4], &operands[5]);
23985 if (code == UNKNOWN)
23987 rtx temp;
23988 switch (GET_CODE (operands[3]))
23990 case LTGT:
23991 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
23992 operands[5], operands[0], operands[0]);
23993 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
23994 operands[5], operands[1], operands[2]);
23995 code = AND;
23996 break;
23997 case UNEQ:
23998 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
23999 operands[5], operands[0], operands[0]);
24000 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
24001 operands[5], operands[1], operands[2]);
24002 code = IOR;
24003 break;
24004 default:
24005 gcc_unreachable ();
24007 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
24008 OPTAB_DIRECT);
24009 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
24010 return true;
24013 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
24014 operands[5], operands[1], operands[2]))
24015 return true;
24017 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
24018 operands[1], operands[2]);
24019 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
24020 return true;
24023 /* Expand a signed/unsigned integral vector conditional move. */
24025 bool
24026 ix86_expand_int_vcond (rtx operands[])
24028 machine_mode data_mode = GET_MODE (operands[0]);
24029 machine_mode mode = GET_MODE (operands[4]);
24030 enum rtx_code code = GET_CODE (operands[3]);
24031 bool negate = false;
24032 rtx x, cop0, cop1;
24034 cop0 = operands[4];
24035 cop1 = operands[5];
24037 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
24038 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
24039 if ((code == LT || code == GE)
24040 && data_mode == mode
24041 && cop1 == CONST0_RTX (mode)
24042 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
24043 && GET_MODE_UNIT_SIZE (data_mode) > 1
24044 && GET_MODE_UNIT_SIZE (data_mode) <= 8
24045 && (GET_MODE_SIZE (data_mode) == 16
24046 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
24048 rtx negop = operands[2 - (code == LT)];
24049 int shift = GET_MODE_UNIT_BITSIZE (data_mode) - 1;
24050 if (negop == CONST1_RTX (data_mode))
24052 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
24053 operands[0], 1, OPTAB_DIRECT);
24054 if (res != operands[0])
24055 emit_move_insn (operands[0], res);
24056 return true;
24058 else if (GET_MODE_INNER (data_mode) != DImode
24059 && vector_all_ones_operand (negop, data_mode))
24061 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
24062 operands[0], 0, OPTAB_DIRECT);
24063 if (res != operands[0])
24064 emit_move_insn (operands[0], res);
24065 return true;
24069 if (!nonimmediate_operand (cop1, mode))
24070 cop1 = force_reg (mode, cop1);
24071 if (!general_operand (operands[1], data_mode))
24072 operands[1] = force_reg (data_mode, operands[1]);
24073 if (!general_operand (operands[2], data_mode))
24074 operands[2] = force_reg (data_mode, operands[2]);
24076 x = ix86_expand_int_sse_cmp (operands[0], code, cop0, cop1,
24077 operands[1], operands[2], &negate);
24079 if (!x)
24080 return false;
24082 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
24083 operands[2-negate]);
24084 return true;
24087 /* AVX512F does support 64-byte integer vector operations,
24088 thus the longest vector we are faced with is V64QImode. */
24089 #define MAX_VECT_LEN 64
24091 struct expand_vec_perm_d
24093 rtx target, op0, op1;
24094 unsigned char perm[MAX_VECT_LEN];
24095 machine_mode vmode;
24096 unsigned char nelt;
24097 bool one_operand_p;
24098 bool testing_p;
24101 static bool
24102 ix86_expand_vec_perm_vpermt2 (rtx target, rtx mask, rtx op0, rtx op1,
24103 struct expand_vec_perm_d *d)
24105 /* ix86_expand_vec_perm_vpermt2 is called from both const and non-const
24106 expander, so args are either in d, or in op0, op1 etc. */
24107 machine_mode mode = GET_MODE (d ? d->op0 : op0);
24108 machine_mode maskmode = mode;
24109 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
24111 switch (mode)
24113 case E_V8HImode:
24114 if (TARGET_AVX512VL && TARGET_AVX512BW)
24115 gen = gen_avx512vl_vpermt2varv8hi3;
24116 break;
24117 case E_V16HImode:
24118 if (TARGET_AVX512VL && TARGET_AVX512BW)
24119 gen = gen_avx512vl_vpermt2varv16hi3;
24120 break;
24121 case E_V64QImode:
24122 if (TARGET_AVX512VBMI)
24123 gen = gen_avx512bw_vpermt2varv64qi3;
24124 break;
24125 case E_V32HImode:
24126 if (TARGET_AVX512BW)
24127 gen = gen_avx512bw_vpermt2varv32hi3;
24128 break;
24129 case E_V4SImode:
24130 if (TARGET_AVX512VL)
24131 gen = gen_avx512vl_vpermt2varv4si3;
24132 break;
24133 case E_V8SImode:
24134 if (TARGET_AVX512VL)
24135 gen = gen_avx512vl_vpermt2varv8si3;
24136 break;
24137 case E_V16SImode:
24138 if (TARGET_AVX512F)
24139 gen = gen_avx512f_vpermt2varv16si3;
24140 break;
24141 case E_V4SFmode:
24142 if (TARGET_AVX512VL)
24144 gen = gen_avx512vl_vpermt2varv4sf3;
24145 maskmode = V4SImode;
24147 break;
24148 case E_V8SFmode:
24149 if (TARGET_AVX512VL)
24151 gen = gen_avx512vl_vpermt2varv8sf3;
24152 maskmode = V8SImode;
24154 break;
24155 case E_V16SFmode:
24156 if (TARGET_AVX512F)
24158 gen = gen_avx512f_vpermt2varv16sf3;
24159 maskmode = V16SImode;
24161 break;
24162 case E_V2DImode:
24163 if (TARGET_AVX512VL)
24164 gen = gen_avx512vl_vpermt2varv2di3;
24165 break;
24166 case E_V4DImode:
24167 if (TARGET_AVX512VL)
24168 gen = gen_avx512vl_vpermt2varv4di3;
24169 break;
24170 case E_V8DImode:
24171 if (TARGET_AVX512F)
24172 gen = gen_avx512f_vpermt2varv8di3;
24173 break;
24174 case E_V2DFmode:
24175 if (TARGET_AVX512VL)
24177 gen = gen_avx512vl_vpermt2varv2df3;
24178 maskmode = V2DImode;
24180 break;
24181 case E_V4DFmode:
24182 if (TARGET_AVX512VL)
24184 gen = gen_avx512vl_vpermt2varv4df3;
24185 maskmode = V4DImode;
24187 break;
24188 case E_V8DFmode:
24189 if (TARGET_AVX512F)
24191 gen = gen_avx512f_vpermt2varv8df3;
24192 maskmode = V8DImode;
24194 break;
24195 default:
24196 break;
24199 if (gen == NULL)
24200 return false;
24202 /* ix86_expand_vec_perm_vpermt2 is called from both const and non-const
24203 expander, so args are either in d, or in op0, op1 etc. */
24204 if (d)
24206 rtx vec[64];
24207 target = d->target;
24208 op0 = d->op0;
24209 op1 = d->op1;
24210 for (int i = 0; i < d->nelt; ++i)
24211 vec[i] = GEN_INT (d->perm[i]);
24212 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
24215 emit_insn (gen (target, force_reg (maskmode, mask), op0, op1));
24216 return true;
24219 /* Expand a variable vector permutation. */
24221 void
24222 ix86_expand_vec_perm (rtx operands[])
24224 rtx target = operands[0];
24225 rtx op0 = operands[1];
24226 rtx op1 = operands[2];
24227 rtx mask = operands[3];
24228 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
24229 machine_mode mode = GET_MODE (op0);
24230 machine_mode maskmode = GET_MODE (mask);
24231 int w, e, i;
24232 bool one_operand_shuffle = rtx_equal_p (op0, op1);
24234 /* Number of elements in the vector. */
24235 w = GET_MODE_NUNITS (mode);
24236 e = GET_MODE_UNIT_SIZE (mode);
24237 gcc_assert (w <= 64);
24239 if (TARGET_AVX512F && one_operand_shuffle)
24241 rtx (*gen) (rtx, rtx, rtx) = NULL;
24242 switch (mode)
24244 case E_V16SImode:
24245 gen =gen_avx512f_permvarv16si;
24246 break;
24247 case E_V16SFmode:
24248 gen = gen_avx512f_permvarv16sf;
24249 break;
24250 case E_V8DImode:
24251 gen = gen_avx512f_permvarv8di;
24252 break;
24253 case E_V8DFmode:
24254 gen = gen_avx512f_permvarv8df;
24255 break;
24256 default:
24257 break;
24259 if (gen != NULL)
24261 emit_insn (gen (target, op0, mask));
24262 return;
24266 if (ix86_expand_vec_perm_vpermt2 (target, mask, op0, op1, NULL))
24267 return;
24269 if (TARGET_AVX2)
24271 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
24273 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
24274 an constant shuffle operand. With a tiny bit of effort we can
24275 use VPERMD instead. A re-interpretation stall for V4DFmode is
24276 unfortunate but there's no avoiding it.
24277 Similarly for V16HImode we don't have instructions for variable
24278 shuffling, while for V32QImode we can use after preparing suitable
24279 masks vpshufb; vpshufb; vpermq; vpor. */
24281 if (mode == V16HImode)
24283 maskmode = mode = V32QImode;
24284 w = 32;
24285 e = 1;
24287 else
24289 maskmode = mode = V8SImode;
24290 w = 8;
24291 e = 4;
24293 t1 = gen_reg_rtx (maskmode);
24295 /* Replicate the low bits of the V4DImode mask into V8SImode:
24296 mask = { A B C D }
24297 t1 = { A A B B C C D D }. */
24298 for (i = 0; i < w / 2; ++i)
24299 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
24300 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
24301 vt = force_reg (maskmode, vt);
24302 mask = gen_lowpart (maskmode, mask);
24303 if (maskmode == V8SImode)
24304 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
24305 else
24306 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
24308 /* Multiply the shuffle indicies by two. */
24309 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
24310 OPTAB_DIRECT);
24312 /* Add one to the odd shuffle indicies:
24313 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
24314 for (i = 0; i < w / 2; ++i)
24316 vec[i * 2] = const0_rtx;
24317 vec[i * 2 + 1] = const1_rtx;
24319 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
24320 vt = validize_mem (force_const_mem (maskmode, vt));
24321 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
24322 OPTAB_DIRECT);
24324 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
24325 operands[3] = mask = t1;
24326 target = gen_reg_rtx (mode);
24327 op0 = gen_lowpart (mode, op0);
24328 op1 = gen_lowpart (mode, op1);
24331 switch (mode)
24333 case E_V8SImode:
24334 /* The VPERMD and VPERMPS instructions already properly ignore
24335 the high bits of the shuffle elements. No need for us to
24336 perform an AND ourselves. */
24337 if (one_operand_shuffle)
24339 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
24340 if (target != operands[0])
24341 emit_move_insn (operands[0],
24342 gen_lowpart (GET_MODE (operands[0]), target));
24344 else
24346 t1 = gen_reg_rtx (V8SImode);
24347 t2 = gen_reg_rtx (V8SImode);
24348 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
24349 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
24350 goto merge_two;
24352 return;
24354 case E_V8SFmode:
24355 mask = gen_lowpart (V8SImode, mask);
24356 if (one_operand_shuffle)
24357 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
24358 else
24360 t1 = gen_reg_rtx (V8SFmode);
24361 t2 = gen_reg_rtx (V8SFmode);
24362 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
24363 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
24364 goto merge_two;
24366 return;
24368 case E_V4SImode:
24369 /* By combining the two 128-bit input vectors into one 256-bit
24370 input vector, we can use VPERMD and VPERMPS for the full
24371 two-operand shuffle. */
24372 t1 = gen_reg_rtx (V8SImode);
24373 t2 = gen_reg_rtx (V8SImode);
24374 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
24375 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
24376 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
24377 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
24378 return;
24380 case E_V4SFmode:
24381 t1 = gen_reg_rtx (V8SFmode);
24382 t2 = gen_reg_rtx (V8SImode);
24383 mask = gen_lowpart (V4SImode, mask);
24384 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
24385 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
24386 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
24387 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
24388 return;
24390 case E_V32QImode:
24391 t1 = gen_reg_rtx (V32QImode);
24392 t2 = gen_reg_rtx (V32QImode);
24393 t3 = gen_reg_rtx (V32QImode);
24394 vt2 = GEN_INT (-128);
24395 vt = gen_const_vec_duplicate (V32QImode, vt2);
24396 vt = force_reg (V32QImode, vt);
24397 for (i = 0; i < 32; i++)
24398 vec[i] = i < 16 ? vt2 : const0_rtx;
24399 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
24400 vt2 = force_reg (V32QImode, vt2);
24401 /* From mask create two adjusted masks, which contain the same
24402 bits as mask in the low 7 bits of each vector element.
24403 The first mask will have the most significant bit clear
24404 if it requests element from the same 128-bit lane
24405 and MSB set if it requests element from the other 128-bit lane.
24406 The second mask will have the opposite values of the MSB,
24407 and additionally will have its 128-bit lanes swapped.
24408 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
24409 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
24410 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
24411 stands for other 12 bytes. */
24412 /* The bit whether element is from the same lane or the other
24413 lane is bit 4, so shift it up by 3 to the MSB position. */
24414 t5 = gen_reg_rtx (V4DImode);
24415 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
24416 GEN_INT (3)));
24417 /* Clear MSB bits from the mask just in case it had them set. */
24418 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
24419 /* After this t1 will have MSB set for elements from other lane. */
24420 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
24421 /* Clear bits other than MSB. */
24422 emit_insn (gen_andv32qi3 (t1, t1, vt));
24423 /* Or in the lower bits from mask into t3. */
24424 emit_insn (gen_iorv32qi3 (t3, t1, t2));
24425 /* And invert MSB bits in t1, so MSB is set for elements from the same
24426 lane. */
24427 emit_insn (gen_xorv32qi3 (t1, t1, vt));
24428 /* Swap 128-bit lanes in t3. */
24429 t6 = gen_reg_rtx (V4DImode);
24430 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
24431 const2_rtx, GEN_INT (3),
24432 const0_rtx, const1_rtx));
24433 /* And or in the lower bits from mask into t1. */
24434 emit_insn (gen_iorv32qi3 (t1, t1, t2));
24435 if (one_operand_shuffle)
24437 /* Each of these shuffles will put 0s in places where
24438 element from the other 128-bit lane is needed, otherwise
24439 will shuffle in the requested value. */
24440 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
24441 gen_lowpart (V32QImode, t6)));
24442 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
24443 /* For t3 the 128-bit lanes are swapped again. */
24444 t7 = gen_reg_rtx (V4DImode);
24445 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
24446 const2_rtx, GEN_INT (3),
24447 const0_rtx, const1_rtx));
24448 /* And oring both together leads to the result. */
24449 emit_insn (gen_iorv32qi3 (target, t1,
24450 gen_lowpart (V32QImode, t7)));
24451 if (target != operands[0])
24452 emit_move_insn (operands[0],
24453 gen_lowpart (GET_MODE (operands[0]), target));
24454 return;
24457 t4 = gen_reg_rtx (V32QImode);
24458 /* Similarly to the above one_operand_shuffle code,
24459 just for repeated twice for each operand. merge_two:
24460 code will merge the two results together. */
24461 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
24462 gen_lowpart (V32QImode, t6)));
24463 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
24464 gen_lowpart (V32QImode, t6)));
24465 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
24466 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
24467 t7 = gen_reg_rtx (V4DImode);
24468 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
24469 const2_rtx, GEN_INT (3),
24470 const0_rtx, const1_rtx));
24471 t8 = gen_reg_rtx (V4DImode);
24472 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
24473 const2_rtx, GEN_INT (3),
24474 const0_rtx, const1_rtx));
24475 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
24476 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
24477 t1 = t4;
24478 t2 = t3;
24479 goto merge_two;
24481 default:
24482 gcc_assert (GET_MODE_SIZE (mode) <= 16);
24483 break;
24487 if (TARGET_XOP)
24489 /* The XOP VPPERM insn supports three inputs. By ignoring the
24490 one_operand_shuffle special case, we avoid creating another
24491 set of constant vectors in memory. */
24492 one_operand_shuffle = false;
24494 /* mask = mask & {2*w-1, ...} */
24495 vt = GEN_INT (2*w - 1);
24497 else
24499 /* mask = mask & {w-1, ...} */
24500 vt = GEN_INT (w - 1);
24503 vt = gen_const_vec_duplicate (maskmode, vt);
24504 mask = expand_simple_binop (maskmode, AND, mask, vt,
24505 NULL_RTX, 0, OPTAB_DIRECT);
24507 /* For non-QImode operations, convert the word permutation control
24508 into a byte permutation control. */
24509 if (mode != V16QImode)
24511 mask = expand_simple_binop (maskmode, ASHIFT, mask,
24512 GEN_INT (exact_log2 (e)),
24513 NULL_RTX, 0, OPTAB_DIRECT);
24515 /* Convert mask to vector of chars. */
24516 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
24518 /* Replicate each of the input bytes into byte positions:
24519 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
24520 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
24521 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
24522 for (i = 0; i < 16; ++i)
24523 vec[i] = GEN_INT (i/e * e);
24524 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
24525 vt = validize_mem (force_const_mem (V16QImode, vt));
24526 if (TARGET_XOP)
24527 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
24528 else
24529 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
24531 /* Convert it into the byte positions by doing
24532 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
24533 for (i = 0; i < 16; ++i)
24534 vec[i] = GEN_INT (i % e);
24535 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
24536 vt = validize_mem (force_const_mem (V16QImode, vt));
24537 emit_insn (gen_addv16qi3 (mask, mask, vt));
24540 /* The actual shuffle operations all operate on V16QImode. */
24541 op0 = gen_lowpart (V16QImode, op0);
24542 op1 = gen_lowpart (V16QImode, op1);
24544 if (TARGET_XOP)
24546 if (GET_MODE (target) != V16QImode)
24547 target = gen_reg_rtx (V16QImode);
24548 emit_insn (gen_xop_pperm (target, op0, op1, mask));
24549 if (target != operands[0])
24550 emit_move_insn (operands[0],
24551 gen_lowpart (GET_MODE (operands[0]), target));
24553 else if (one_operand_shuffle)
24555 if (GET_MODE (target) != V16QImode)
24556 target = gen_reg_rtx (V16QImode);
24557 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
24558 if (target != operands[0])
24559 emit_move_insn (operands[0],
24560 gen_lowpart (GET_MODE (operands[0]), target));
24562 else
24564 rtx xops[6];
24565 bool ok;
24567 /* Shuffle the two input vectors independently. */
24568 t1 = gen_reg_rtx (V16QImode);
24569 t2 = gen_reg_rtx (V16QImode);
24570 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
24571 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
24573 merge_two:
24574 /* Then merge them together. The key is whether any given control
24575 element contained a bit set that indicates the second word. */
24576 mask = operands[3];
24577 vt = GEN_INT (w);
24578 if (maskmode == V2DImode && !TARGET_SSE4_1)
24580 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
24581 more shuffle to convert the V2DI input mask into a V4SI
24582 input mask. At which point the masking that expand_int_vcond
24583 will work as desired. */
24584 rtx t3 = gen_reg_rtx (V4SImode);
24585 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
24586 const0_rtx, const0_rtx,
24587 const2_rtx, const2_rtx));
24588 mask = t3;
24589 maskmode = V4SImode;
24590 e = w = 4;
24593 vt = gen_const_vec_duplicate (maskmode, vt);
24594 vt = force_reg (maskmode, vt);
24595 mask = expand_simple_binop (maskmode, AND, mask, vt,
24596 NULL_RTX, 0, OPTAB_DIRECT);
24598 if (GET_MODE (target) != mode)
24599 target = gen_reg_rtx (mode);
24600 xops[0] = target;
24601 xops[1] = gen_lowpart (mode, t2);
24602 xops[2] = gen_lowpart (mode, t1);
24603 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
24604 xops[4] = mask;
24605 xops[5] = vt;
24606 ok = ix86_expand_int_vcond (xops);
24607 gcc_assert (ok);
24608 if (target != operands[0])
24609 emit_move_insn (operands[0],
24610 gen_lowpart (GET_MODE (operands[0]), target));
24614 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
24615 true if we should do zero extension, else sign extension. HIGH_P is
24616 true if we want the N/2 high elements, else the low elements. */
24618 void
24619 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
24621 machine_mode imode = GET_MODE (src);
24622 rtx tmp;
24624 if (TARGET_SSE4_1)
24626 rtx (*unpack)(rtx, rtx);
24627 rtx (*extract)(rtx, rtx) = NULL;
24628 machine_mode halfmode = BLKmode;
24630 switch (imode)
24632 case E_V64QImode:
24633 if (unsigned_p)
24634 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
24635 else
24636 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
24637 halfmode = V32QImode;
24638 extract
24639 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
24640 break;
24641 case E_V32QImode:
24642 if (unsigned_p)
24643 unpack = gen_avx2_zero_extendv16qiv16hi2;
24644 else
24645 unpack = gen_avx2_sign_extendv16qiv16hi2;
24646 halfmode = V16QImode;
24647 extract
24648 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
24649 break;
24650 case E_V32HImode:
24651 if (unsigned_p)
24652 unpack = gen_avx512f_zero_extendv16hiv16si2;
24653 else
24654 unpack = gen_avx512f_sign_extendv16hiv16si2;
24655 halfmode = V16HImode;
24656 extract
24657 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
24658 break;
24659 case E_V16HImode:
24660 if (unsigned_p)
24661 unpack = gen_avx2_zero_extendv8hiv8si2;
24662 else
24663 unpack = gen_avx2_sign_extendv8hiv8si2;
24664 halfmode = V8HImode;
24665 extract
24666 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
24667 break;
24668 case E_V16SImode:
24669 if (unsigned_p)
24670 unpack = gen_avx512f_zero_extendv8siv8di2;
24671 else
24672 unpack = gen_avx512f_sign_extendv8siv8di2;
24673 halfmode = V8SImode;
24674 extract
24675 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
24676 break;
24677 case E_V8SImode:
24678 if (unsigned_p)
24679 unpack = gen_avx2_zero_extendv4siv4di2;
24680 else
24681 unpack = gen_avx2_sign_extendv4siv4di2;
24682 halfmode = V4SImode;
24683 extract
24684 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
24685 break;
24686 case E_V16QImode:
24687 if (unsigned_p)
24688 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
24689 else
24690 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
24691 break;
24692 case E_V8HImode:
24693 if (unsigned_p)
24694 unpack = gen_sse4_1_zero_extendv4hiv4si2;
24695 else
24696 unpack = gen_sse4_1_sign_extendv4hiv4si2;
24697 break;
24698 case E_V4SImode:
24699 if (unsigned_p)
24700 unpack = gen_sse4_1_zero_extendv2siv2di2;
24701 else
24702 unpack = gen_sse4_1_sign_extendv2siv2di2;
24703 break;
24704 default:
24705 gcc_unreachable ();
24708 if (GET_MODE_SIZE (imode) >= 32)
24710 tmp = gen_reg_rtx (halfmode);
24711 emit_insn (extract (tmp, src));
24713 else if (high_p)
24715 /* Shift higher 8 bytes to lower 8 bytes. */
24716 tmp = gen_reg_rtx (V1TImode);
24717 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
24718 GEN_INT (64)));
24719 tmp = gen_lowpart (imode, tmp);
24721 else
24722 tmp = src;
24724 emit_insn (unpack (dest, tmp));
24726 else
24728 rtx (*unpack)(rtx, rtx, rtx);
24730 switch (imode)
24732 case E_V16QImode:
24733 if (high_p)
24734 unpack = gen_vec_interleave_highv16qi;
24735 else
24736 unpack = gen_vec_interleave_lowv16qi;
24737 break;
24738 case E_V8HImode:
24739 if (high_p)
24740 unpack = gen_vec_interleave_highv8hi;
24741 else
24742 unpack = gen_vec_interleave_lowv8hi;
24743 break;
24744 case E_V4SImode:
24745 if (high_p)
24746 unpack = gen_vec_interleave_highv4si;
24747 else
24748 unpack = gen_vec_interleave_lowv4si;
24749 break;
24750 default:
24751 gcc_unreachable ();
24754 if (unsigned_p)
24755 tmp = force_reg (imode, CONST0_RTX (imode));
24756 else
24757 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
24758 src, pc_rtx, pc_rtx);
24760 rtx tmp2 = gen_reg_rtx (imode);
24761 emit_insn (unpack (tmp2, src, tmp));
24762 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
24766 /* Expand conditional increment or decrement using adb/sbb instructions.
24767 The default case using setcc followed by the conditional move can be
24768 done by generic code. */
24769 bool
24770 ix86_expand_int_addcc (rtx operands[])
24772 enum rtx_code code = GET_CODE (operands[1]);
24773 rtx flags;
24774 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
24775 rtx compare_op;
24776 rtx val = const0_rtx;
24777 bool fpcmp = false;
24778 machine_mode mode;
24779 rtx op0 = XEXP (operands[1], 0);
24780 rtx op1 = XEXP (operands[1], 1);
24782 if (operands[3] != const1_rtx
24783 && operands[3] != constm1_rtx)
24784 return false;
24785 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
24786 return false;
24787 code = GET_CODE (compare_op);
24789 flags = XEXP (compare_op, 0);
24791 if (GET_MODE (flags) == CCFPmode)
24793 fpcmp = true;
24794 code = ix86_fp_compare_code_to_integer (code);
24797 if (code != LTU)
24799 val = constm1_rtx;
24800 if (fpcmp)
24801 PUT_CODE (compare_op,
24802 reverse_condition_maybe_unordered
24803 (GET_CODE (compare_op)));
24804 else
24805 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
24808 mode = GET_MODE (operands[0]);
24810 /* Construct either adc or sbb insn. */
24811 if ((code == LTU) == (operands[3] == constm1_rtx))
24813 switch (mode)
24815 case E_QImode:
24816 insn = gen_subqi3_carry;
24817 break;
24818 case E_HImode:
24819 insn = gen_subhi3_carry;
24820 break;
24821 case E_SImode:
24822 insn = gen_subsi3_carry;
24823 break;
24824 case E_DImode:
24825 insn = gen_subdi3_carry;
24826 break;
24827 default:
24828 gcc_unreachable ();
24831 else
24833 switch (mode)
24835 case E_QImode:
24836 insn = gen_addqi3_carry;
24837 break;
24838 case E_HImode:
24839 insn = gen_addhi3_carry;
24840 break;
24841 case E_SImode:
24842 insn = gen_addsi3_carry;
24843 break;
24844 case E_DImode:
24845 insn = gen_adddi3_carry;
24846 break;
24847 default:
24848 gcc_unreachable ();
24851 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
24853 return true;
24857 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
24858 but works for floating pointer parameters and nonoffsetable memories.
24859 For pushes, it returns just stack offsets; the values will be saved
24860 in the right order. Maximally three parts are generated. */
24862 static int
24863 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
24865 int size;
24867 if (!TARGET_64BIT)
24868 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
24869 else
24870 size = (GET_MODE_SIZE (mode) + 4) / 8;
24872 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
24873 gcc_assert (size >= 2 && size <= 4);
24875 /* Optimize constant pool reference to immediates. This is used by fp
24876 moves, that force all constants to memory to allow combining. */
24877 if (MEM_P (operand) && MEM_READONLY_P (operand))
24878 operand = avoid_constant_pool_reference (operand);
24880 if (MEM_P (operand) && !offsettable_memref_p (operand))
24882 /* The only non-offsetable memories we handle are pushes. */
24883 int ok = push_operand (operand, VOIDmode);
24885 gcc_assert (ok);
24887 operand = copy_rtx (operand);
24888 PUT_MODE (operand, word_mode);
24889 parts[0] = parts[1] = parts[2] = parts[3] = operand;
24890 return size;
24893 if (GET_CODE (operand) == CONST_VECTOR)
24895 scalar_int_mode imode = int_mode_for_mode (mode).require ();
24896 /* Caution: if we looked through a constant pool memory above,
24897 the operand may actually have a different mode now. That's
24898 ok, since we want to pun this all the way back to an integer. */
24899 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
24900 gcc_assert (operand != NULL);
24901 mode = imode;
24904 if (!TARGET_64BIT)
24906 if (mode == DImode)
24907 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
24908 else
24910 int i;
24912 if (REG_P (operand))
24914 gcc_assert (reload_completed);
24915 for (i = 0; i < size; i++)
24916 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
24918 else if (offsettable_memref_p (operand))
24920 operand = adjust_address (operand, SImode, 0);
24921 parts[0] = operand;
24922 for (i = 1; i < size; i++)
24923 parts[i] = adjust_address (operand, SImode, 4 * i);
24925 else if (CONST_DOUBLE_P (operand))
24927 const REAL_VALUE_TYPE *r;
24928 long l[4];
24930 r = CONST_DOUBLE_REAL_VALUE (operand);
24931 switch (mode)
24933 case E_TFmode:
24934 real_to_target (l, r, mode);
24935 parts[3] = gen_int_mode (l[3], SImode);
24936 parts[2] = gen_int_mode (l[2], SImode);
24937 break;
24938 case E_XFmode:
24939 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
24940 long double may not be 80-bit. */
24941 real_to_target (l, r, mode);
24942 parts[2] = gen_int_mode (l[2], SImode);
24943 break;
24944 case E_DFmode:
24945 REAL_VALUE_TO_TARGET_DOUBLE (*r, l);
24946 break;
24947 default:
24948 gcc_unreachable ();
24950 parts[1] = gen_int_mode (l[1], SImode);
24951 parts[0] = gen_int_mode (l[0], SImode);
24953 else
24954 gcc_unreachable ();
24957 else
24959 if (mode == TImode)
24960 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
24961 if (mode == XFmode || mode == TFmode)
24963 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
24964 if (REG_P (operand))
24966 gcc_assert (reload_completed);
24967 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
24968 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
24970 else if (offsettable_memref_p (operand))
24972 operand = adjust_address (operand, DImode, 0);
24973 parts[0] = operand;
24974 parts[1] = adjust_address (operand, upper_mode, 8);
24976 else if (CONST_DOUBLE_P (operand))
24978 long l[4];
24980 real_to_target (l, CONST_DOUBLE_REAL_VALUE (operand), mode);
24982 /* real_to_target puts 32-bit pieces in each long. */
24983 parts[0] = gen_int_mode ((l[0] & HOST_WIDE_INT_C (0xffffffff))
24984 | ((l[1] & HOST_WIDE_INT_C (0xffffffff))
24985 << 32), DImode);
24987 if (upper_mode == SImode)
24988 parts[1] = gen_int_mode (l[2], SImode);
24989 else
24990 parts[1]
24991 = gen_int_mode ((l[2] & HOST_WIDE_INT_C (0xffffffff))
24992 | ((l[3] & HOST_WIDE_INT_C (0xffffffff))
24993 << 32), DImode);
24995 else
24996 gcc_unreachable ();
25000 return size;
25003 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
25004 Return false when normal moves are needed; true when all required
25005 insns have been emitted. Operands 2-4 contain the input values
25006 int the correct order; operands 5-7 contain the output values. */
25008 void
25009 ix86_split_long_move (rtx operands[])
25011 rtx part[2][4];
25012 int nparts, i, j;
25013 int push = 0;
25014 int collisions = 0;
25015 machine_mode mode = GET_MODE (operands[0]);
25016 bool collisionparts[4];
25018 /* The DFmode expanders may ask us to move double.
25019 For 64bit target this is single move. By hiding the fact
25020 here we simplify i386.md splitters. */
25021 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
25023 /* Optimize constant pool reference to immediates. This is used by
25024 fp moves, that force all constants to memory to allow combining. */
25026 if (MEM_P (operands[1])
25027 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
25028 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
25029 operands[1] = get_pool_constant (XEXP (operands[1], 0));
25030 if (push_operand (operands[0], VOIDmode))
25032 operands[0] = copy_rtx (operands[0]);
25033 PUT_MODE (operands[0], word_mode);
25035 else
25036 operands[0] = gen_lowpart (DImode, operands[0]);
25037 operands[1] = gen_lowpart (DImode, operands[1]);
25038 emit_move_insn (operands[0], operands[1]);
25039 return;
25042 /* The only non-offsettable memory we handle is push. */
25043 if (push_operand (operands[0], VOIDmode))
25044 push = 1;
25045 else
25046 gcc_assert (!MEM_P (operands[0])
25047 || offsettable_memref_p (operands[0]));
25049 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
25050 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
25052 /* When emitting push, take care for source operands on the stack. */
25053 if (push && MEM_P (operands[1])
25054 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
25056 rtx src_base = XEXP (part[1][nparts - 1], 0);
25058 /* Compensate for the stack decrement by 4. */
25059 if (!TARGET_64BIT && nparts == 3
25060 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
25061 src_base = plus_constant (Pmode, src_base, 4);
25063 /* src_base refers to the stack pointer and is
25064 automatically decreased by emitted push. */
25065 for (i = 0; i < nparts; i++)
25066 part[1][i] = change_address (part[1][i],
25067 GET_MODE (part[1][i]), src_base);
25070 /* We need to do copy in the right order in case an address register
25071 of the source overlaps the destination. */
25072 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
25074 rtx tmp;
25076 for (i = 0; i < nparts; i++)
25078 collisionparts[i]
25079 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
25080 if (collisionparts[i])
25081 collisions++;
25084 /* Collision in the middle part can be handled by reordering. */
25085 if (collisions == 1 && nparts == 3 && collisionparts [1])
25087 std::swap (part[0][1], part[0][2]);
25088 std::swap (part[1][1], part[1][2]);
25090 else if (collisions == 1
25091 && nparts == 4
25092 && (collisionparts [1] || collisionparts [2]))
25094 if (collisionparts [1])
25096 std::swap (part[0][1], part[0][2]);
25097 std::swap (part[1][1], part[1][2]);
25099 else
25101 std::swap (part[0][2], part[0][3]);
25102 std::swap (part[1][2], part[1][3]);
25106 /* If there are more collisions, we can't handle it by reordering.
25107 Do an lea to the last part and use only one colliding move. */
25108 else if (collisions > 1)
25110 rtx base, addr;
25112 collisions = 1;
25114 base = part[0][nparts - 1];
25116 /* Handle the case when the last part isn't valid for lea.
25117 Happens in 64-bit mode storing the 12-byte XFmode. */
25118 if (GET_MODE (base) != Pmode)
25119 base = gen_rtx_REG (Pmode, REGNO (base));
25121 addr = XEXP (part[1][0], 0);
25122 if (TARGET_TLS_DIRECT_SEG_REFS)
25124 struct ix86_address parts;
25125 int ok = ix86_decompose_address (addr, &parts);
25126 gcc_assert (ok);
25127 /* It is not valid to use %gs: or %fs: in lea. */
25128 gcc_assert (parts.seg == ADDR_SPACE_GENERIC);
25130 emit_insn (gen_rtx_SET (base, addr));
25131 part[1][0] = replace_equiv_address (part[1][0], base);
25132 for (i = 1; i < nparts; i++)
25134 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
25135 part[1][i] = replace_equiv_address (part[1][i], tmp);
25140 if (push)
25142 if (!TARGET_64BIT)
25144 if (nparts == 3)
25146 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
25147 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
25148 stack_pointer_rtx, GEN_INT (-4)));
25149 emit_move_insn (part[0][2], part[1][2]);
25151 else if (nparts == 4)
25153 emit_move_insn (part[0][3], part[1][3]);
25154 emit_move_insn (part[0][2], part[1][2]);
25157 else
25159 /* In 64bit mode we don't have 32bit push available. In case this is
25160 register, it is OK - we will just use larger counterpart. We also
25161 retype memory - these comes from attempt to avoid REX prefix on
25162 moving of second half of TFmode value. */
25163 if (GET_MODE (part[1][1]) == SImode)
25165 switch (GET_CODE (part[1][1]))
25167 case MEM:
25168 part[1][1] = adjust_address (part[1][1], DImode, 0);
25169 break;
25171 case REG:
25172 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
25173 break;
25175 default:
25176 gcc_unreachable ();
25179 if (GET_MODE (part[1][0]) == SImode)
25180 part[1][0] = part[1][1];
25183 emit_move_insn (part[0][1], part[1][1]);
25184 emit_move_insn (part[0][0], part[1][0]);
25185 return;
25188 /* Choose correct order to not overwrite the source before it is copied. */
25189 if ((REG_P (part[0][0])
25190 && REG_P (part[1][1])
25191 && (REGNO (part[0][0]) == REGNO (part[1][1])
25192 || (nparts == 3
25193 && REGNO (part[0][0]) == REGNO (part[1][2]))
25194 || (nparts == 4
25195 && REGNO (part[0][0]) == REGNO (part[1][3]))))
25196 || (collisions > 0
25197 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
25199 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
25201 operands[2 + i] = part[0][j];
25202 operands[6 + i] = part[1][j];
25205 else
25207 for (i = 0; i < nparts; i++)
25209 operands[2 + i] = part[0][i];
25210 operands[6 + i] = part[1][i];
25214 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
25215 if (optimize_insn_for_size_p ())
25217 for (j = 0; j < nparts - 1; j++)
25218 if (CONST_INT_P (operands[6 + j])
25219 && operands[6 + j] != const0_rtx
25220 && REG_P (operands[2 + j]))
25221 for (i = j; i < nparts - 1; i++)
25222 if (CONST_INT_P (operands[7 + i])
25223 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
25224 operands[7 + i] = operands[2 + j];
25227 for (i = 0; i < nparts; i++)
25228 emit_move_insn (operands[2 + i], operands[6 + i]);
25230 return;
25233 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
25234 left shift by a constant, either using a single shift or
25235 a sequence of add instructions. */
25237 static void
25238 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
25240 rtx (*insn)(rtx, rtx, rtx);
25242 if (count == 1
25243 || (count * ix86_cost->add <= ix86_cost->shift_const
25244 && !optimize_insn_for_size_p ()))
25246 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
25247 while (count-- > 0)
25248 emit_insn (insn (operand, operand, operand));
25250 else
25252 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
25253 emit_insn (insn (operand, operand, GEN_INT (count)));
25257 void
25258 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
25260 rtx (*gen_ashl3)(rtx, rtx, rtx);
25261 rtx (*gen_shld)(rtx, rtx, rtx);
25262 int half_width = GET_MODE_BITSIZE (mode) >> 1;
25264 rtx low[2], high[2];
25265 int count;
25267 if (CONST_INT_P (operands[2]))
25269 split_double_mode (mode, operands, 2, low, high);
25270 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
25272 if (count >= half_width)
25274 emit_move_insn (high[0], low[1]);
25275 emit_move_insn (low[0], const0_rtx);
25277 if (count > half_width)
25278 ix86_expand_ashl_const (high[0], count - half_width, mode);
25280 else
25282 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
25284 if (!rtx_equal_p (operands[0], operands[1]))
25285 emit_move_insn (operands[0], operands[1]);
25287 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
25288 ix86_expand_ashl_const (low[0], count, mode);
25290 return;
25293 split_double_mode (mode, operands, 1, low, high);
25295 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
25297 if (operands[1] == const1_rtx)
25299 /* Assuming we've chosen a QImode capable registers, then 1 << N
25300 can be done with two 32/64-bit shifts, no branches, no cmoves. */
25301 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
25303 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
25305 ix86_expand_clear (low[0]);
25306 ix86_expand_clear (high[0]);
25307 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
25309 d = gen_lowpart (QImode, low[0]);
25310 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
25311 s = gen_rtx_EQ (QImode, flags, const0_rtx);
25312 emit_insn (gen_rtx_SET (d, s));
25314 d = gen_lowpart (QImode, high[0]);
25315 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
25316 s = gen_rtx_NE (QImode, flags, const0_rtx);
25317 emit_insn (gen_rtx_SET (d, s));
25320 /* Otherwise, we can get the same results by manually performing
25321 a bit extract operation on bit 5/6, and then performing the two
25322 shifts. The two methods of getting 0/1 into low/high are exactly
25323 the same size. Avoiding the shift in the bit extract case helps
25324 pentium4 a bit; no one else seems to care much either way. */
25325 else
25327 machine_mode half_mode;
25328 rtx (*gen_lshr3)(rtx, rtx, rtx);
25329 rtx (*gen_and3)(rtx, rtx, rtx);
25330 rtx (*gen_xor3)(rtx, rtx, rtx);
25331 HOST_WIDE_INT bits;
25332 rtx x;
25334 if (mode == DImode)
25336 half_mode = SImode;
25337 gen_lshr3 = gen_lshrsi3;
25338 gen_and3 = gen_andsi3;
25339 gen_xor3 = gen_xorsi3;
25340 bits = 5;
25342 else
25344 half_mode = DImode;
25345 gen_lshr3 = gen_lshrdi3;
25346 gen_and3 = gen_anddi3;
25347 gen_xor3 = gen_xordi3;
25348 bits = 6;
25351 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
25352 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
25353 else
25354 x = gen_lowpart (half_mode, operands[2]);
25355 emit_insn (gen_rtx_SET (high[0], x));
25357 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
25358 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
25359 emit_move_insn (low[0], high[0]);
25360 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
25363 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
25364 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
25365 return;
25368 if (operands[1] == constm1_rtx)
25370 /* For -1 << N, we can avoid the shld instruction, because we
25371 know that we're shifting 0...31/63 ones into a -1. */
25372 emit_move_insn (low[0], constm1_rtx);
25373 if (optimize_insn_for_size_p ())
25374 emit_move_insn (high[0], low[0]);
25375 else
25376 emit_move_insn (high[0], constm1_rtx);
25378 else
25380 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
25382 if (!rtx_equal_p (operands[0], operands[1]))
25383 emit_move_insn (operands[0], operands[1]);
25385 split_double_mode (mode, operands, 1, low, high);
25386 emit_insn (gen_shld (high[0], low[0], operands[2]));
25389 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
25391 if (TARGET_CMOVE && scratch)
25393 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
25394 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
25396 ix86_expand_clear (scratch);
25397 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
25399 else
25401 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
25402 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
25404 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
25408 void
25409 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
25411 rtx (*gen_ashr3)(rtx, rtx, rtx)
25412 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
25413 rtx (*gen_shrd)(rtx, rtx, rtx);
25414 int half_width = GET_MODE_BITSIZE (mode) >> 1;
25416 rtx low[2], high[2];
25417 int count;
25419 if (CONST_INT_P (operands[2]))
25421 split_double_mode (mode, operands, 2, low, high);
25422 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
25424 if (count == GET_MODE_BITSIZE (mode) - 1)
25426 emit_move_insn (high[0], high[1]);
25427 emit_insn (gen_ashr3 (high[0], high[0],
25428 GEN_INT (half_width - 1)));
25429 emit_move_insn (low[0], high[0]);
25432 else if (count >= half_width)
25434 emit_move_insn (low[0], high[1]);
25435 emit_move_insn (high[0], low[0]);
25436 emit_insn (gen_ashr3 (high[0], high[0],
25437 GEN_INT (half_width - 1)));
25439 if (count > half_width)
25440 emit_insn (gen_ashr3 (low[0], low[0],
25441 GEN_INT (count - half_width)));
25443 else
25445 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
25447 if (!rtx_equal_p (operands[0], operands[1]))
25448 emit_move_insn (operands[0], operands[1]);
25450 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
25451 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
25454 else
25456 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
25458 if (!rtx_equal_p (operands[0], operands[1]))
25459 emit_move_insn (operands[0], operands[1]);
25461 split_double_mode (mode, operands, 1, low, high);
25463 emit_insn (gen_shrd (low[0], high[0], operands[2]));
25464 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
25466 if (TARGET_CMOVE && scratch)
25468 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
25469 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
25471 emit_move_insn (scratch, high[0]);
25472 emit_insn (gen_ashr3 (scratch, scratch,
25473 GEN_INT (half_width - 1)));
25474 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
25475 scratch));
25477 else
25479 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
25480 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
25482 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
25487 void
25488 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
25490 rtx (*gen_lshr3)(rtx, rtx, rtx)
25491 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
25492 rtx (*gen_shrd)(rtx, rtx, rtx);
25493 int half_width = GET_MODE_BITSIZE (mode) >> 1;
25495 rtx low[2], high[2];
25496 int count;
25498 if (CONST_INT_P (operands[2]))
25500 split_double_mode (mode, operands, 2, low, high);
25501 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
25503 if (count >= half_width)
25505 emit_move_insn (low[0], high[1]);
25506 ix86_expand_clear (high[0]);
25508 if (count > half_width)
25509 emit_insn (gen_lshr3 (low[0], low[0],
25510 GEN_INT (count - half_width)));
25512 else
25514 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
25516 if (!rtx_equal_p (operands[0], operands[1]))
25517 emit_move_insn (operands[0], operands[1]);
25519 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
25520 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
25523 else
25525 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
25527 if (!rtx_equal_p (operands[0], operands[1]))
25528 emit_move_insn (operands[0], operands[1]);
25530 split_double_mode (mode, operands, 1, low, high);
25532 emit_insn (gen_shrd (low[0], high[0], operands[2]));
25533 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
25535 if (TARGET_CMOVE && scratch)
25537 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
25538 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
25540 ix86_expand_clear (scratch);
25541 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
25542 scratch));
25544 else
25546 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
25547 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
25549 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
25554 /* Predict just emitted jump instruction to be taken with probability PROB. */
25555 static void
25556 predict_jump (int prob)
25558 rtx_insn *insn = get_last_insn ();
25559 gcc_assert (JUMP_P (insn));
25560 add_reg_br_prob_note (insn, profile_probability::from_reg_br_prob_base (prob));
25563 /* Helper function for the string operations below. Dest VARIABLE whether
25564 it is aligned to VALUE bytes. If true, jump to the label. */
25565 static rtx_code_label *
25566 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
25568 rtx_code_label *label = gen_label_rtx ();
25569 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
25570 if (GET_MODE (variable) == DImode)
25571 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
25572 else
25573 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
25574 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
25575 1, label);
25576 if (epilogue)
25577 predict_jump (REG_BR_PROB_BASE * 50 / 100);
25578 else
25579 predict_jump (REG_BR_PROB_BASE * 90 / 100);
25580 return label;
25583 /* Adjust COUNTER by the VALUE. */
25584 static void
25585 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
25587 rtx (*gen_add)(rtx, rtx, rtx)
25588 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
25590 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
25593 /* Zero extend possibly SImode EXP to Pmode register. */
25595 ix86_zero_extend_to_Pmode (rtx exp)
25597 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
25600 /* Divide COUNTREG by SCALE. */
25601 static rtx
25602 scale_counter (rtx countreg, int scale)
25604 rtx sc;
25606 if (scale == 1)
25607 return countreg;
25608 if (CONST_INT_P (countreg))
25609 return GEN_INT (INTVAL (countreg) / scale);
25610 gcc_assert (REG_P (countreg));
25612 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
25613 GEN_INT (exact_log2 (scale)),
25614 NULL, 1, OPTAB_DIRECT);
25615 return sc;
25618 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
25619 DImode for constant loop counts. */
25621 static machine_mode
25622 counter_mode (rtx count_exp)
25624 if (GET_MODE (count_exp) != VOIDmode)
25625 return GET_MODE (count_exp);
25626 if (!CONST_INT_P (count_exp))
25627 return Pmode;
25628 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
25629 return DImode;
25630 return SImode;
25633 /* Copy the address to a Pmode register. This is used for x32 to
25634 truncate DImode TLS address to a SImode register. */
25636 static rtx
25637 ix86_copy_addr_to_reg (rtx addr)
25639 rtx reg;
25640 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
25642 reg = copy_addr_to_reg (addr);
25643 REG_POINTER (reg) = 1;
25644 return reg;
25646 else
25648 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
25649 reg = copy_to_mode_reg (DImode, addr);
25650 REG_POINTER (reg) = 1;
25651 return gen_rtx_SUBREG (SImode, reg, 0);
25655 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
25656 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
25657 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
25658 memory by VALUE (supposed to be in MODE).
25660 The size is rounded down to whole number of chunk size moved at once.
25661 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
25664 static void
25665 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
25666 rtx destptr, rtx srcptr, rtx value,
25667 rtx count, machine_mode mode, int unroll,
25668 int expected_size, bool issetmem)
25670 rtx_code_label *out_label, *top_label;
25671 rtx iter, tmp;
25672 machine_mode iter_mode = counter_mode (count);
25673 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
25674 rtx piece_size = GEN_INT (piece_size_n);
25675 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
25676 rtx size;
25677 int i;
25679 top_label = gen_label_rtx ();
25680 out_label = gen_label_rtx ();
25681 iter = gen_reg_rtx (iter_mode);
25683 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
25684 NULL, 1, OPTAB_DIRECT);
25685 /* Those two should combine. */
25686 if (piece_size == const1_rtx)
25688 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
25689 true, out_label);
25690 predict_jump (REG_BR_PROB_BASE * 10 / 100);
25692 emit_move_insn (iter, const0_rtx);
25694 emit_label (top_label);
25696 tmp = convert_modes (Pmode, iter_mode, iter, true);
25698 /* This assert could be relaxed - in this case we'll need to compute
25699 smallest power of two, containing in PIECE_SIZE_N and pass it to
25700 offset_address. */
25701 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
25702 destmem = offset_address (destmem, tmp, piece_size_n);
25703 destmem = adjust_address (destmem, mode, 0);
25705 if (!issetmem)
25707 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
25708 srcmem = adjust_address (srcmem, mode, 0);
25710 /* When unrolling for chips that reorder memory reads and writes,
25711 we can save registers by using single temporary.
25712 Also using 4 temporaries is overkill in 32bit mode. */
25713 if (!TARGET_64BIT && 0)
25715 for (i = 0; i < unroll; i++)
25717 if (i)
25719 destmem =
25720 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
25721 srcmem =
25722 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
25724 emit_move_insn (destmem, srcmem);
25727 else
25729 rtx tmpreg[4];
25730 gcc_assert (unroll <= 4);
25731 for (i = 0; i < unroll; i++)
25733 tmpreg[i] = gen_reg_rtx (mode);
25734 if (i)
25736 srcmem =
25737 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
25739 emit_move_insn (tmpreg[i], srcmem);
25741 for (i = 0; i < unroll; i++)
25743 if (i)
25745 destmem =
25746 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
25748 emit_move_insn (destmem, tmpreg[i]);
25752 else
25753 for (i = 0; i < unroll; i++)
25755 if (i)
25756 destmem =
25757 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
25758 emit_move_insn (destmem, value);
25761 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
25762 true, OPTAB_LIB_WIDEN);
25763 if (tmp != iter)
25764 emit_move_insn (iter, tmp);
25766 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
25767 true, top_label);
25768 if (expected_size != -1)
25770 expected_size /= GET_MODE_SIZE (mode) * unroll;
25771 if (expected_size == 0)
25772 predict_jump (0);
25773 else if (expected_size > REG_BR_PROB_BASE)
25774 predict_jump (REG_BR_PROB_BASE - 1);
25775 else
25776 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
25778 else
25779 predict_jump (REG_BR_PROB_BASE * 80 / 100);
25780 iter = ix86_zero_extend_to_Pmode (iter);
25781 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
25782 true, OPTAB_LIB_WIDEN);
25783 if (tmp != destptr)
25784 emit_move_insn (destptr, tmp);
25785 if (!issetmem)
25787 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
25788 true, OPTAB_LIB_WIDEN);
25789 if (tmp != srcptr)
25790 emit_move_insn (srcptr, tmp);
25792 emit_label (out_label);
25795 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
25796 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
25797 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
25798 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
25799 ORIG_VALUE is the original value passed to memset to fill the memory with.
25800 Other arguments have same meaning as for previous function. */
25802 static void
25803 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
25804 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
25805 rtx count,
25806 machine_mode mode, bool issetmem)
25808 rtx destexp;
25809 rtx srcexp;
25810 rtx countreg;
25811 HOST_WIDE_INT rounded_count;
25813 /* If possible, it is shorter to use rep movs.
25814 TODO: Maybe it is better to move this logic to decide_alg. */
25815 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
25816 && (!issetmem || orig_value == const0_rtx))
25817 mode = SImode;
25819 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
25820 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
25822 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
25823 GET_MODE_SIZE (mode)));
25824 if (mode != QImode)
25826 destexp = gen_rtx_ASHIFT (Pmode, countreg,
25827 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
25828 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
25830 else
25831 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
25832 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
25834 rounded_count
25835 = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode));
25836 destmem = shallow_copy_rtx (destmem);
25837 set_mem_size (destmem, rounded_count);
25839 else if (MEM_SIZE_KNOWN_P (destmem))
25840 clear_mem_size (destmem);
25842 if (issetmem)
25844 value = force_reg (mode, gen_lowpart (mode, value));
25845 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
25847 else
25849 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
25850 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
25851 if (mode != QImode)
25853 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
25854 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
25855 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
25857 else
25858 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
25859 if (CONST_INT_P (count))
25861 rounded_count
25862 = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode));
25863 srcmem = shallow_copy_rtx (srcmem);
25864 set_mem_size (srcmem, rounded_count);
25866 else
25868 if (MEM_SIZE_KNOWN_P (srcmem))
25869 clear_mem_size (srcmem);
25871 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
25872 destexp, srcexp));
25876 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
25877 DESTMEM.
25878 SRC is passed by pointer to be updated on return.
25879 Return value is updated DST. */
25880 static rtx
25881 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
25882 HOST_WIDE_INT size_to_move)
25884 rtx dst = destmem, src = *srcmem, adjust, tempreg;
25885 enum insn_code code;
25886 machine_mode move_mode;
25887 int piece_size, i;
25889 /* Find the widest mode in which we could perform moves.
25890 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
25891 it until move of such size is supported. */
25892 piece_size = 1 << floor_log2 (size_to_move);
25893 while (!int_mode_for_size (piece_size * BITS_PER_UNIT, 0).exists (&move_mode)
25894 || (code = optab_handler (mov_optab, move_mode)) == CODE_FOR_nothing)
25896 gcc_assert (piece_size > 1);
25897 piece_size >>= 1;
25900 /* Find the corresponding vector mode with the same size as MOVE_MODE.
25901 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
25902 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
25904 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
25905 if (!mode_for_vector (word_mode, nunits).exists (&move_mode)
25906 || (code = optab_handler (mov_optab, move_mode)) == CODE_FOR_nothing)
25908 move_mode = word_mode;
25909 piece_size = GET_MODE_SIZE (move_mode);
25910 code = optab_handler (mov_optab, move_mode);
25913 gcc_assert (code != CODE_FOR_nothing);
25915 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
25916 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
25918 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
25919 gcc_assert (size_to_move % piece_size == 0);
25920 adjust = GEN_INT (piece_size);
25921 for (i = 0; i < size_to_move; i += piece_size)
25923 /* We move from memory to memory, so we'll need to do it via
25924 a temporary register. */
25925 tempreg = gen_reg_rtx (move_mode);
25926 emit_insn (GEN_FCN (code) (tempreg, src));
25927 emit_insn (GEN_FCN (code) (dst, tempreg));
25929 emit_move_insn (destptr,
25930 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
25931 emit_move_insn (srcptr,
25932 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
25934 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
25935 piece_size);
25936 src = adjust_automodify_address_nv (src, move_mode, srcptr,
25937 piece_size);
25940 /* Update DST and SRC rtx. */
25941 *srcmem = src;
25942 return dst;
25945 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
25946 static void
25947 expand_movmem_epilogue (rtx destmem, rtx srcmem,
25948 rtx destptr, rtx srcptr, rtx count, int max_size)
25950 rtx src, dest;
25951 if (CONST_INT_P (count))
25953 HOST_WIDE_INT countval = INTVAL (count);
25954 HOST_WIDE_INT epilogue_size = countval % max_size;
25955 int i;
25957 /* For now MAX_SIZE should be a power of 2. This assert could be
25958 relaxed, but it'll require a bit more complicated epilogue
25959 expanding. */
25960 gcc_assert ((max_size & (max_size - 1)) == 0);
25961 for (i = max_size; i >= 1; i >>= 1)
25963 if (epilogue_size & i)
25964 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
25966 return;
25968 if (max_size > 8)
25970 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
25971 count, 1, OPTAB_DIRECT);
25972 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
25973 count, QImode, 1, 4, false);
25974 return;
25977 /* When there are stringops, we can cheaply increase dest and src pointers.
25978 Otherwise we save code size by maintaining offset (zero is readily
25979 available from preceding rep operation) and using x86 addressing modes.
25981 if (TARGET_SINGLE_STRINGOP)
25983 if (max_size > 4)
25985 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
25986 src = change_address (srcmem, SImode, srcptr);
25987 dest = change_address (destmem, SImode, destptr);
25988 emit_insn (gen_strmov (destptr, dest, srcptr, src));
25989 emit_label (label);
25990 LABEL_NUSES (label) = 1;
25992 if (max_size > 2)
25994 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
25995 src = change_address (srcmem, HImode, srcptr);
25996 dest = change_address (destmem, HImode, destptr);
25997 emit_insn (gen_strmov (destptr, dest, srcptr, src));
25998 emit_label (label);
25999 LABEL_NUSES (label) = 1;
26001 if (max_size > 1)
26003 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
26004 src = change_address (srcmem, QImode, srcptr);
26005 dest = change_address (destmem, QImode, destptr);
26006 emit_insn (gen_strmov (destptr, dest, srcptr, src));
26007 emit_label (label);
26008 LABEL_NUSES (label) = 1;
26011 else
26013 rtx offset = force_reg (Pmode, const0_rtx);
26014 rtx tmp;
26016 if (max_size > 4)
26018 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
26019 src = change_address (srcmem, SImode, srcptr);
26020 dest = change_address (destmem, SImode, destptr);
26021 emit_move_insn (dest, src);
26022 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
26023 true, OPTAB_LIB_WIDEN);
26024 if (tmp != offset)
26025 emit_move_insn (offset, tmp);
26026 emit_label (label);
26027 LABEL_NUSES (label) = 1;
26029 if (max_size > 2)
26031 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
26032 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
26033 src = change_address (srcmem, HImode, tmp);
26034 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
26035 dest = change_address (destmem, HImode, tmp);
26036 emit_move_insn (dest, src);
26037 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
26038 true, OPTAB_LIB_WIDEN);
26039 if (tmp != offset)
26040 emit_move_insn (offset, tmp);
26041 emit_label (label);
26042 LABEL_NUSES (label) = 1;
26044 if (max_size > 1)
26046 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
26047 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
26048 src = change_address (srcmem, QImode, tmp);
26049 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
26050 dest = change_address (destmem, QImode, tmp);
26051 emit_move_insn (dest, src);
26052 emit_label (label);
26053 LABEL_NUSES (label) = 1;
26058 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
26059 with value PROMOTED_VAL.
26060 SRC is passed by pointer to be updated on return.
26061 Return value is updated DST. */
26062 static rtx
26063 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
26064 HOST_WIDE_INT size_to_move)
26066 rtx dst = destmem, adjust;
26067 enum insn_code code;
26068 machine_mode move_mode;
26069 int piece_size, i;
26071 /* Find the widest mode in which we could perform moves.
26072 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
26073 it until move of such size is supported. */
26074 move_mode = GET_MODE (promoted_val);
26075 if (move_mode == VOIDmode)
26076 move_mode = QImode;
26077 if (size_to_move < GET_MODE_SIZE (move_mode))
26079 unsigned int move_bits = size_to_move * BITS_PER_UNIT;
26080 move_mode = int_mode_for_size (move_bits, 0).require ();
26081 promoted_val = gen_lowpart (move_mode, promoted_val);
26083 piece_size = GET_MODE_SIZE (move_mode);
26084 code = optab_handler (mov_optab, move_mode);
26085 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
26087 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
26089 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
26090 gcc_assert (size_to_move % piece_size == 0);
26091 adjust = GEN_INT (piece_size);
26092 for (i = 0; i < size_to_move; i += piece_size)
26094 if (piece_size <= GET_MODE_SIZE (word_mode))
26096 emit_insn (gen_strset (destptr, dst, promoted_val));
26097 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
26098 piece_size);
26099 continue;
26102 emit_insn (GEN_FCN (code) (dst, promoted_val));
26104 emit_move_insn (destptr,
26105 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
26107 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
26108 piece_size);
26111 /* Update DST rtx. */
26112 return dst;
26114 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
26115 static void
26116 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
26117 rtx count, int max_size)
26119 count =
26120 expand_simple_binop (counter_mode (count), AND, count,
26121 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
26122 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
26123 gen_lowpart (QImode, value), count, QImode,
26124 1, max_size / 2, true);
26127 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
26128 static void
26129 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
26130 rtx count, int max_size)
26132 rtx dest;
26134 if (CONST_INT_P (count))
26136 HOST_WIDE_INT countval = INTVAL (count);
26137 HOST_WIDE_INT epilogue_size = countval % max_size;
26138 int i;
26140 /* For now MAX_SIZE should be a power of 2. This assert could be
26141 relaxed, but it'll require a bit more complicated epilogue
26142 expanding. */
26143 gcc_assert ((max_size & (max_size - 1)) == 0);
26144 for (i = max_size; i >= 1; i >>= 1)
26146 if (epilogue_size & i)
26148 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
26149 destmem = emit_memset (destmem, destptr, vec_value, i);
26150 else
26151 destmem = emit_memset (destmem, destptr, value, i);
26154 return;
26156 if (max_size > 32)
26158 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
26159 return;
26161 if (max_size > 16)
26163 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
26164 if (TARGET_64BIT)
26166 dest = change_address (destmem, DImode, destptr);
26167 emit_insn (gen_strset (destptr, dest, value));
26168 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
26169 emit_insn (gen_strset (destptr, dest, value));
26171 else
26173 dest = change_address (destmem, SImode, destptr);
26174 emit_insn (gen_strset (destptr, dest, value));
26175 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
26176 emit_insn (gen_strset (destptr, dest, value));
26177 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
26178 emit_insn (gen_strset (destptr, dest, value));
26179 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
26180 emit_insn (gen_strset (destptr, dest, value));
26182 emit_label (label);
26183 LABEL_NUSES (label) = 1;
26185 if (max_size > 8)
26187 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
26188 if (TARGET_64BIT)
26190 dest = change_address (destmem, DImode, destptr);
26191 emit_insn (gen_strset (destptr, dest, value));
26193 else
26195 dest = change_address (destmem, SImode, destptr);
26196 emit_insn (gen_strset (destptr, dest, value));
26197 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
26198 emit_insn (gen_strset (destptr, dest, value));
26200 emit_label (label);
26201 LABEL_NUSES (label) = 1;
26203 if (max_size > 4)
26205 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
26206 dest = change_address (destmem, SImode, destptr);
26207 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
26208 emit_label (label);
26209 LABEL_NUSES (label) = 1;
26211 if (max_size > 2)
26213 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
26214 dest = change_address (destmem, HImode, destptr);
26215 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
26216 emit_label (label);
26217 LABEL_NUSES (label) = 1;
26219 if (max_size > 1)
26221 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
26222 dest = change_address (destmem, QImode, destptr);
26223 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
26224 emit_label (label);
26225 LABEL_NUSES (label) = 1;
26229 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
26230 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
26231 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
26232 ignored.
26233 Return value is updated DESTMEM. */
26234 static rtx
26235 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
26236 rtx destptr, rtx srcptr, rtx value,
26237 rtx vec_value, rtx count, int align,
26238 int desired_alignment, bool issetmem)
26240 int i;
26241 for (i = 1; i < desired_alignment; i <<= 1)
26243 if (align <= i)
26245 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
26246 if (issetmem)
26248 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
26249 destmem = emit_memset (destmem, destptr, vec_value, i);
26250 else
26251 destmem = emit_memset (destmem, destptr, value, i);
26253 else
26254 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
26255 ix86_adjust_counter (count, i);
26256 emit_label (label);
26257 LABEL_NUSES (label) = 1;
26258 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
26261 return destmem;
26264 /* Test if COUNT&SIZE is nonzero and if so, expand movme
26265 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
26266 and jump to DONE_LABEL. */
26267 static void
26268 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
26269 rtx destptr, rtx srcptr,
26270 rtx value, rtx vec_value,
26271 rtx count, int size,
26272 rtx done_label, bool issetmem)
26274 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
26275 machine_mode mode = int_mode_for_size (size * BITS_PER_UNIT, 1).else_blk ();
26276 rtx modesize;
26277 int n;
26279 /* If we do not have vector value to copy, we must reduce size. */
26280 if (issetmem)
26282 if (!vec_value)
26284 if (GET_MODE (value) == VOIDmode && size > 8)
26285 mode = Pmode;
26286 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
26287 mode = GET_MODE (value);
26289 else
26290 mode = GET_MODE (vec_value), value = vec_value;
26292 else
26294 /* Choose appropriate vector mode. */
26295 if (size >= 32)
26296 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
26297 else if (size >= 16)
26298 mode = TARGET_SSE ? V16QImode : DImode;
26299 srcmem = change_address (srcmem, mode, srcptr);
26301 destmem = change_address (destmem, mode, destptr);
26302 modesize = GEN_INT (GET_MODE_SIZE (mode));
26303 gcc_assert (GET_MODE_SIZE (mode) <= size);
26304 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
26306 if (issetmem)
26307 emit_move_insn (destmem, gen_lowpart (mode, value));
26308 else
26310 emit_move_insn (destmem, srcmem);
26311 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
26313 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
26316 destmem = offset_address (destmem, count, 1);
26317 destmem = offset_address (destmem, GEN_INT (-2 * size),
26318 GET_MODE_SIZE (mode));
26319 if (!issetmem)
26321 srcmem = offset_address (srcmem, count, 1);
26322 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
26323 GET_MODE_SIZE (mode));
26325 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
26327 if (issetmem)
26328 emit_move_insn (destmem, gen_lowpart (mode, value));
26329 else
26331 emit_move_insn (destmem, srcmem);
26332 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
26334 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
26336 emit_jump_insn (gen_jump (done_label));
26337 emit_barrier ();
26339 emit_label (label);
26340 LABEL_NUSES (label) = 1;
26343 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
26344 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
26345 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
26346 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
26347 DONE_LABEL is a label after the whole copying sequence. The label is created
26348 on demand if *DONE_LABEL is NULL.
26349 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
26350 bounds after the initial copies.
26352 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
26353 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
26354 we will dispatch to a library call for large blocks.
26356 In pseudocode we do:
26358 if (COUNT < SIZE)
26360 Assume that SIZE is 4. Bigger sizes are handled analogously
26361 if (COUNT & 4)
26363 copy 4 bytes from SRCPTR to DESTPTR
26364 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
26365 goto done_label
26367 if (!COUNT)
26368 goto done_label;
26369 copy 1 byte from SRCPTR to DESTPTR
26370 if (COUNT & 2)
26372 copy 2 bytes from SRCPTR to DESTPTR
26373 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
26376 else
26378 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
26379 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
26381 OLD_DESPTR = DESTPTR;
26382 Align DESTPTR up to DESIRED_ALIGN
26383 SRCPTR += DESTPTR - OLD_DESTPTR
26384 COUNT -= DEST_PTR - OLD_DESTPTR
26385 if (DYNAMIC_CHECK)
26386 Round COUNT down to multiple of SIZE
26387 << optional caller supplied zero size guard is here >>
26388 << optional caller supplied dynamic check is here >>
26389 << caller supplied main copy loop is here >>
26391 done_label:
26393 static void
26394 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
26395 rtx *destptr, rtx *srcptr,
26396 machine_mode mode,
26397 rtx value, rtx vec_value,
26398 rtx *count,
26399 rtx_code_label **done_label,
26400 int size,
26401 int desired_align,
26402 int align,
26403 unsigned HOST_WIDE_INT *min_size,
26404 bool dynamic_check,
26405 bool issetmem)
26407 rtx_code_label *loop_label = NULL, *label;
26408 int n;
26409 rtx modesize;
26410 int prolog_size = 0;
26411 rtx mode_value;
26413 /* Chose proper value to copy. */
26414 if (issetmem && VECTOR_MODE_P (mode))
26415 mode_value = vec_value;
26416 else
26417 mode_value = value;
26418 gcc_assert (GET_MODE_SIZE (mode) <= size);
26420 /* See if block is big or small, handle small blocks. */
26421 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
26423 int size2 = size;
26424 loop_label = gen_label_rtx ();
26426 if (!*done_label)
26427 *done_label = gen_label_rtx ();
26429 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
26430 1, loop_label);
26431 size2 >>= 1;
26433 /* Handle sizes > 3. */
26434 for (;size2 > 2; size2 >>= 1)
26435 expand_small_movmem_or_setmem (destmem, srcmem,
26436 *destptr, *srcptr,
26437 value, vec_value,
26438 *count,
26439 size2, *done_label, issetmem);
26440 /* Nothing to copy? Jump to DONE_LABEL if so */
26441 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
26442 1, *done_label);
26444 /* Do a byte copy. */
26445 destmem = change_address (destmem, QImode, *destptr);
26446 if (issetmem)
26447 emit_move_insn (destmem, gen_lowpart (QImode, value));
26448 else
26450 srcmem = change_address (srcmem, QImode, *srcptr);
26451 emit_move_insn (destmem, srcmem);
26454 /* Handle sizes 2 and 3. */
26455 label = ix86_expand_aligntest (*count, 2, false);
26456 destmem = change_address (destmem, HImode, *destptr);
26457 destmem = offset_address (destmem, *count, 1);
26458 destmem = offset_address (destmem, GEN_INT (-2), 2);
26459 if (issetmem)
26460 emit_move_insn (destmem, gen_lowpart (HImode, value));
26461 else
26463 srcmem = change_address (srcmem, HImode, *srcptr);
26464 srcmem = offset_address (srcmem, *count, 1);
26465 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
26466 emit_move_insn (destmem, srcmem);
26469 emit_label (label);
26470 LABEL_NUSES (label) = 1;
26471 emit_jump_insn (gen_jump (*done_label));
26472 emit_barrier ();
26474 else
26475 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
26476 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
26478 /* Start memcpy for COUNT >= SIZE. */
26479 if (loop_label)
26481 emit_label (loop_label);
26482 LABEL_NUSES (loop_label) = 1;
26485 /* Copy first desired_align bytes. */
26486 if (!issetmem)
26487 srcmem = change_address (srcmem, mode, *srcptr);
26488 destmem = change_address (destmem, mode, *destptr);
26489 modesize = GEN_INT (GET_MODE_SIZE (mode));
26490 for (n = 0; prolog_size < desired_align - align; n++)
26492 if (issetmem)
26493 emit_move_insn (destmem, mode_value);
26494 else
26496 emit_move_insn (destmem, srcmem);
26497 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
26499 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
26500 prolog_size += GET_MODE_SIZE (mode);
26504 /* Copy last SIZE bytes. */
26505 destmem = offset_address (destmem, *count, 1);
26506 destmem = offset_address (destmem,
26507 GEN_INT (-size - prolog_size),
26509 if (issetmem)
26510 emit_move_insn (destmem, mode_value);
26511 else
26513 srcmem = offset_address (srcmem, *count, 1);
26514 srcmem = offset_address (srcmem,
26515 GEN_INT (-size - prolog_size),
26517 emit_move_insn (destmem, srcmem);
26519 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
26521 destmem = offset_address (destmem, modesize, 1);
26522 if (issetmem)
26523 emit_move_insn (destmem, mode_value);
26524 else
26526 srcmem = offset_address (srcmem, modesize, 1);
26527 emit_move_insn (destmem, srcmem);
26531 /* Align destination. */
26532 if (desired_align > 1 && desired_align > align)
26534 rtx saveddest = *destptr;
26536 gcc_assert (desired_align <= size);
26537 /* Align destptr up, place it to new register. */
26538 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
26539 GEN_INT (prolog_size),
26540 NULL_RTX, 1, OPTAB_DIRECT);
26541 if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest))
26542 REG_POINTER (*destptr) = 1;
26543 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
26544 GEN_INT (-desired_align),
26545 *destptr, 1, OPTAB_DIRECT);
26546 /* See how many bytes we skipped. */
26547 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
26548 *destptr,
26549 saveddest, 1, OPTAB_DIRECT);
26550 /* Adjust srcptr and count. */
26551 if (!issetmem)
26552 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr,
26553 saveddest, *srcptr, 1, OPTAB_DIRECT);
26554 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
26555 saveddest, *count, 1, OPTAB_DIRECT);
26556 /* We copied at most size + prolog_size. */
26557 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
26558 *min_size
26559 = ROUND_DOWN (*min_size - size, (unsigned HOST_WIDE_INT)size);
26560 else
26561 *min_size = 0;
26563 /* Our loops always round down the block size, but for dispatch to
26564 library we need precise value. */
26565 if (dynamic_check)
26566 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
26567 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
26569 else
26571 gcc_assert (prolog_size == 0);
26572 /* Decrease count, so we won't end up copying last word twice. */
26573 if (!CONST_INT_P (*count))
26574 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
26575 constm1_rtx, *count, 1, OPTAB_DIRECT);
26576 else
26577 *count = GEN_INT (ROUND_DOWN (UINTVAL (*count) - 1,
26578 (unsigned HOST_WIDE_INT)size));
26579 if (*min_size)
26580 *min_size = ROUND_DOWN (*min_size - 1, (unsigned HOST_WIDE_INT)size);
26585 /* This function is like the previous one, except here we know how many bytes
26586 need to be copied. That allows us to update alignment not only of DST, which
26587 is returned, but also of SRC, which is passed as a pointer for that
26588 reason. */
26589 static rtx
26590 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
26591 rtx srcreg, rtx value, rtx vec_value,
26592 int desired_align, int align_bytes,
26593 bool issetmem)
26595 rtx src = NULL;
26596 rtx orig_dst = dst;
26597 rtx orig_src = NULL;
26598 int piece_size = 1;
26599 int copied_bytes = 0;
26601 if (!issetmem)
26603 gcc_assert (srcp != NULL);
26604 src = *srcp;
26605 orig_src = src;
26608 for (piece_size = 1;
26609 piece_size <= desired_align && copied_bytes < align_bytes;
26610 piece_size <<= 1)
26612 if (align_bytes & piece_size)
26614 if (issetmem)
26616 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
26617 dst = emit_memset (dst, destreg, vec_value, piece_size);
26618 else
26619 dst = emit_memset (dst, destreg, value, piece_size);
26621 else
26622 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
26623 copied_bytes += piece_size;
26626 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
26627 set_mem_align (dst, desired_align * BITS_PER_UNIT);
26628 if (MEM_SIZE_KNOWN_P (orig_dst))
26629 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
26631 if (!issetmem)
26633 int src_align_bytes = get_mem_align_offset (src, desired_align
26634 * BITS_PER_UNIT);
26635 if (src_align_bytes >= 0)
26636 src_align_bytes = desired_align - src_align_bytes;
26637 if (src_align_bytes >= 0)
26639 unsigned int src_align;
26640 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
26642 if ((src_align_bytes & (src_align - 1))
26643 == (align_bytes & (src_align - 1)))
26644 break;
26646 if (src_align > (unsigned int) desired_align)
26647 src_align = desired_align;
26648 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
26649 set_mem_align (src, src_align * BITS_PER_UNIT);
26651 if (MEM_SIZE_KNOWN_P (orig_src))
26652 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
26653 *srcp = src;
26656 return dst;
26659 /* Return true if ALG can be used in current context.
26660 Assume we expand memset if MEMSET is true. */
26661 static bool
26662 alg_usable_p (enum stringop_alg alg, bool memset, bool have_as)
26664 if (alg == no_stringop)
26665 return false;
26666 if (alg == vector_loop)
26667 return TARGET_SSE || TARGET_AVX;
26668 /* Algorithms using the rep prefix want at least edi and ecx;
26669 additionally, memset wants eax and memcpy wants esi. Don't
26670 consider such algorithms if the user has appropriated those
26671 registers for their own purposes, or if we have a non-default
26672 address space, since some string insns cannot override the segment. */
26673 if (alg == rep_prefix_1_byte
26674 || alg == rep_prefix_4_byte
26675 || alg == rep_prefix_8_byte)
26677 if (have_as)
26678 return false;
26679 if (fixed_regs[CX_REG]
26680 || fixed_regs[DI_REG]
26681 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]))
26682 return false;
26684 return true;
26687 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
26688 static enum stringop_alg
26689 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
26690 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
26691 bool memset, bool zero_memset, bool have_as,
26692 int *dynamic_check, bool *noalign, bool recur)
26694 const struct stringop_algs *algs;
26695 bool optimize_for_speed;
26696 int max = 0;
26697 const struct processor_costs *cost;
26698 int i;
26699 bool any_alg_usable_p = false;
26701 *noalign = false;
26702 *dynamic_check = -1;
26704 /* Even if the string operation call is cold, we still might spend a lot
26705 of time processing large blocks. */
26706 if (optimize_function_for_size_p (cfun)
26707 || (optimize_insn_for_size_p ()
26708 && (max_size < 256
26709 || (expected_size != -1 && expected_size < 256))))
26710 optimize_for_speed = false;
26711 else
26712 optimize_for_speed = true;
26714 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
26715 if (memset)
26716 algs = &cost->memset[TARGET_64BIT != 0];
26717 else
26718 algs = &cost->memcpy[TARGET_64BIT != 0];
26720 /* See maximal size for user defined algorithm. */
26721 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
26723 enum stringop_alg candidate = algs->size[i].alg;
26724 bool usable = alg_usable_p (candidate, memset, have_as);
26725 any_alg_usable_p |= usable;
26727 if (candidate != libcall && candidate && usable)
26728 max = algs->size[i].max;
26731 /* If expected size is not known but max size is small enough
26732 so inline version is a win, set expected size into
26733 the range. */
26734 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
26735 && expected_size == -1)
26736 expected_size = min_size / 2 + max_size / 2;
26738 /* If user specified the algorithm, honor it if possible. */
26739 if (ix86_stringop_alg != no_stringop
26740 && alg_usable_p (ix86_stringop_alg, memset, have_as))
26741 return ix86_stringop_alg;
26742 /* rep; movq or rep; movl is the smallest variant. */
26743 else if (!optimize_for_speed)
26745 *noalign = true;
26746 if (!count || (count & 3) || (memset && !zero_memset))
26747 return alg_usable_p (rep_prefix_1_byte, memset, have_as)
26748 ? rep_prefix_1_byte : loop_1_byte;
26749 else
26750 return alg_usable_p (rep_prefix_4_byte, memset, have_as)
26751 ? rep_prefix_4_byte : loop;
26753 /* Very tiny blocks are best handled via the loop, REP is expensive to
26754 setup. */
26755 else if (expected_size != -1 && expected_size < 4)
26756 return loop_1_byte;
26757 else if (expected_size != -1)
26759 enum stringop_alg alg = libcall;
26760 bool alg_noalign = false;
26761 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
26763 /* We get here if the algorithms that were not libcall-based
26764 were rep-prefix based and we are unable to use rep prefixes
26765 based on global register usage. Break out of the loop and
26766 use the heuristic below. */
26767 if (algs->size[i].max == 0)
26768 break;
26769 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
26771 enum stringop_alg candidate = algs->size[i].alg;
26773 if (candidate != libcall
26774 && alg_usable_p (candidate, memset, have_as))
26776 alg = candidate;
26777 alg_noalign = algs->size[i].noalign;
26779 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
26780 last non-libcall inline algorithm. */
26781 if (TARGET_INLINE_ALL_STRINGOPS)
26783 /* When the current size is best to be copied by a libcall,
26784 but we are still forced to inline, run the heuristic below
26785 that will pick code for medium sized blocks. */
26786 if (alg != libcall)
26788 *noalign = alg_noalign;
26789 return alg;
26791 else if (!any_alg_usable_p)
26792 break;
26794 else if (alg_usable_p (candidate, memset, have_as))
26796 *noalign = algs->size[i].noalign;
26797 return candidate;
26802 /* When asked to inline the call anyway, try to pick meaningful choice.
26803 We look for maximal size of block that is faster to copy by hand and
26804 take blocks of at most of that size guessing that average size will
26805 be roughly half of the block.
26807 If this turns out to be bad, we might simply specify the preferred
26808 choice in ix86_costs. */
26809 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
26810 && (algs->unknown_size == libcall
26811 || !alg_usable_p (algs->unknown_size, memset, have_as)))
26813 enum stringop_alg alg;
26814 HOST_WIDE_INT new_expected_size = (max > 0 ? max : 4096) / 2;
26816 /* If there aren't any usable algorithms or if recursing already,
26817 then recursing on smaller sizes or same size isn't going to
26818 find anything. Just return the simple byte-at-a-time copy loop. */
26819 if (!any_alg_usable_p || recur)
26821 /* Pick something reasonable. */
26822 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY && !recur)
26823 *dynamic_check = 128;
26824 return loop_1_byte;
26826 alg = decide_alg (count, new_expected_size, min_size, max_size, memset,
26827 zero_memset, have_as, dynamic_check, noalign, true);
26828 gcc_assert (*dynamic_check == -1);
26829 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
26830 *dynamic_check = max;
26831 else
26832 gcc_assert (alg != libcall);
26833 return alg;
26835 return (alg_usable_p (algs->unknown_size, memset, have_as)
26836 ? algs->unknown_size : libcall);
26839 /* Decide on alignment. We know that the operand is already aligned to ALIGN
26840 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
26841 static int
26842 decide_alignment (int align,
26843 enum stringop_alg alg,
26844 int expected_size,
26845 machine_mode move_mode)
26847 int desired_align = 0;
26849 gcc_assert (alg != no_stringop);
26851 if (alg == libcall)
26852 return 0;
26853 if (move_mode == VOIDmode)
26854 return 0;
26856 desired_align = GET_MODE_SIZE (move_mode);
26857 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
26858 copying whole cacheline at once. */
26859 if (TARGET_PENTIUMPRO
26860 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
26861 desired_align = 8;
26863 if (optimize_size)
26864 desired_align = 1;
26865 if (desired_align < align)
26866 desired_align = align;
26867 if (expected_size != -1 && expected_size < 4)
26868 desired_align = align;
26870 return desired_align;
26874 /* Helper function for memcpy. For QImode value 0xXY produce
26875 0xXYXYXYXY of wide specified by MODE. This is essentially
26876 a * 0x10101010, but we can do slightly better than
26877 synth_mult by unwinding the sequence by hand on CPUs with
26878 slow multiply. */
26879 static rtx
26880 promote_duplicated_reg (machine_mode mode, rtx val)
26882 machine_mode valmode = GET_MODE (val);
26883 rtx tmp;
26884 int nops = mode == DImode ? 3 : 2;
26886 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
26887 if (val == const0_rtx)
26888 return copy_to_mode_reg (mode, CONST0_RTX (mode));
26889 if (CONST_INT_P (val))
26891 HOST_WIDE_INT v = INTVAL (val) & 255;
26893 v |= v << 8;
26894 v |= v << 16;
26895 if (mode == DImode)
26896 v |= (v << 16) << 16;
26897 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
26900 if (valmode == VOIDmode)
26901 valmode = QImode;
26902 if (valmode != QImode)
26903 val = gen_lowpart (QImode, val);
26904 if (mode == QImode)
26905 return val;
26906 if (!TARGET_PARTIAL_REG_STALL)
26907 nops--;
26908 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
26909 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
26910 <= (ix86_cost->shift_const + ix86_cost->add) * nops
26911 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
26913 rtx reg = convert_modes (mode, QImode, val, true);
26914 tmp = promote_duplicated_reg (mode, const1_rtx);
26915 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
26916 OPTAB_DIRECT);
26918 else
26920 rtx reg = convert_modes (mode, QImode, val, true);
26922 if (!TARGET_PARTIAL_REG_STALL)
26923 if (mode == SImode)
26924 emit_insn (gen_insvsi_1 (reg, reg));
26925 else
26926 emit_insn (gen_insvdi_1 (reg, reg));
26927 else
26929 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
26930 NULL, 1, OPTAB_DIRECT);
26931 reg =
26932 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
26934 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
26935 NULL, 1, OPTAB_DIRECT);
26936 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
26937 if (mode == SImode)
26938 return reg;
26939 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
26940 NULL, 1, OPTAB_DIRECT);
26941 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
26942 return reg;
26946 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
26947 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
26948 alignment from ALIGN to DESIRED_ALIGN. */
26949 static rtx
26950 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
26951 int align)
26953 rtx promoted_val;
26955 if (TARGET_64BIT
26956 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
26957 promoted_val = promote_duplicated_reg (DImode, val);
26958 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
26959 promoted_val = promote_duplicated_reg (SImode, val);
26960 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
26961 promoted_val = promote_duplicated_reg (HImode, val);
26962 else
26963 promoted_val = val;
26965 return promoted_val;
26968 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
26969 operations when profitable. The code depends upon architecture, block size
26970 and alignment, but always has one of the following overall structures:
26972 Aligned move sequence:
26974 1) Prologue guard: Conditional that jumps up to epilogues for small
26975 blocks that can be handled by epilogue alone. This is faster
26976 but also needed for correctness, since prologue assume the block
26977 is larger than the desired alignment.
26979 Optional dynamic check for size and libcall for large
26980 blocks is emitted here too, with -minline-stringops-dynamically.
26982 2) Prologue: copy first few bytes in order to get destination
26983 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
26984 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
26985 copied. We emit either a jump tree on power of two sized
26986 blocks, or a byte loop.
26988 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
26989 with specified algorithm.
26991 4) Epilogue: code copying tail of the block that is too small to be
26992 handled by main body (or up to size guarded by prologue guard).
26994 Misaligned move sequence
26996 1) missaligned move prologue/epilogue containing:
26997 a) Prologue handling small memory blocks and jumping to done_label
26998 (skipped if blocks are known to be large enough)
26999 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
27000 needed by single possibly misaligned move
27001 (skipped if alignment is not needed)
27002 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
27004 2) Zero size guard dispatching to done_label, if needed
27006 3) dispatch to library call, if needed,
27008 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
27009 with specified algorithm. */
27010 bool
27011 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
27012 rtx align_exp, rtx expected_align_exp,
27013 rtx expected_size_exp, rtx min_size_exp,
27014 rtx max_size_exp, rtx probable_max_size_exp,
27015 bool issetmem)
27017 rtx destreg;
27018 rtx srcreg = NULL;
27019 rtx_code_label *label = NULL;
27020 rtx tmp;
27021 rtx_code_label *jump_around_label = NULL;
27022 HOST_WIDE_INT align = 1;
27023 unsigned HOST_WIDE_INT count = 0;
27024 HOST_WIDE_INT expected_size = -1;
27025 int size_needed = 0, epilogue_size_needed;
27026 int desired_align = 0, align_bytes = 0;
27027 enum stringop_alg alg;
27028 rtx promoted_val = NULL;
27029 rtx vec_promoted_val = NULL;
27030 bool force_loopy_epilogue = false;
27031 int dynamic_check;
27032 bool need_zero_guard = false;
27033 bool noalign;
27034 machine_mode move_mode = VOIDmode;
27035 machine_mode wider_mode;
27036 int unroll_factor = 1;
27037 /* TODO: Once value ranges are available, fill in proper data. */
27038 unsigned HOST_WIDE_INT min_size = 0;
27039 unsigned HOST_WIDE_INT max_size = -1;
27040 unsigned HOST_WIDE_INT probable_max_size = -1;
27041 bool misaligned_prologue_used = false;
27042 bool have_as;
27044 if (CONST_INT_P (align_exp))
27045 align = INTVAL (align_exp);
27046 /* i386 can do misaligned access on reasonably increased cost. */
27047 if (CONST_INT_P (expected_align_exp)
27048 && INTVAL (expected_align_exp) > align)
27049 align = INTVAL (expected_align_exp);
27050 /* ALIGN is the minimum of destination and source alignment, but we care here
27051 just about destination alignment. */
27052 else if (!issetmem
27053 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
27054 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
27056 if (CONST_INT_P (count_exp))
27058 min_size = max_size = probable_max_size = count = expected_size
27059 = INTVAL (count_exp);
27060 /* When COUNT is 0, there is nothing to do. */
27061 if (!count)
27062 return true;
27064 else
27066 if (min_size_exp)
27067 min_size = INTVAL (min_size_exp);
27068 if (max_size_exp)
27069 max_size = INTVAL (max_size_exp);
27070 if (probable_max_size_exp)
27071 probable_max_size = INTVAL (probable_max_size_exp);
27072 if (CONST_INT_P (expected_size_exp))
27073 expected_size = INTVAL (expected_size_exp);
27076 /* Make sure we don't need to care about overflow later on. */
27077 if (count > (HOST_WIDE_INT_1U << 30))
27078 return false;
27080 have_as = !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst));
27081 if (!issetmem)
27082 have_as |= !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src));
27084 /* Step 0: Decide on preferred algorithm, desired alignment and
27085 size of chunks to be copied by main loop. */
27086 alg = decide_alg (count, expected_size, min_size, probable_max_size,
27087 issetmem,
27088 issetmem && val_exp == const0_rtx, have_as,
27089 &dynamic_check, &noalign, false);
27090 if (alg == libcall)
27091 return false;
27092 gcc_assert (alg != no_stringop);
27094 /* For now vector-version of memset is generated only for memory zeroing, as
27095 creating of promoted vector value is very cheap in this case. */
27096 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
27097 alg = unrolled_loop;
27099 if (!count)
27100 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
27101 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
27102 if (!issetmem)
27103 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
27105 unroll_factor = 1;
27106 move_mode = word_mode;
27107 switch (alg)
27109 case libcall:
27110 case no_stringop:
27111 case last_alg:
27112 gcc_unreachable ();
27113 case loop_1_byte:
27114 need_zero_guard = true;
27115 move_mode = QImode;
27116 break;
27117 case loop:
27118 need_zero_guard = true;
27119 break;
27120 case unrolled_loop:
27121 need_zero_guard = true;
27122 unroll_factor = (TARGET_64BIT ? 4 : 2);
27123 break;
27124 case vector_loop:
27125 need_zero_guard = true;
27126 unroll_factor = 4;
27127 /* Find the widest supported mode. */
27128 move_mode = word_mode;
27129 while (GET_MODE_WIDER_MODE (move_mode).exists (&wider_mode)
27130 && optab_handler (mov_optab, wider_mode) != CODE_FOR_nothing)
27131 move_mode = wider_mode;
27133 if (TARGET_AVX128_OPTIMAL && GET_MODE_BITSIZE (move_mode) > 128)
27134 move_mode = TImode;
27136 /* Find the corresponding vector mode with the same size as MOVE_MODE.
27137 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
27138 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
27140 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
27141 if (!mode_for_vector (word_mode, nunits).exists (&move_mode)
27142 || optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
27143 move_mode = word_mode;
27145 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
27146 break;
27147 case rep_prefix_8_byte:
27148 move_mode = DImode;
27149 break;
27150 case rep_prefix_4_byte:
27151 move_mode = SImode;
27152 break;
27153 case rep_prefix_1_byte:
27154 move_mode = QImode;
27155 break;
27157 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
27158 epilogue_size_needed = size_needed;
27160 /* If we are going to call any library calls conditionally, make sure any
27161 pending stack adjustment happen before the first conditional branch,
27162 otherwise they will be emitted before the library call only and won't
27163 happen from the other branches. */
27164 if (dynamic_check != -1)
27165 do_pending_stack_adjust ();
27167 desired_align = decide_alignment (align, alg, expected_size, move_mode);
27168 if (!TARGET_ALIGN_STRINGOPS || noalign)
27169 align = desired_align;
27171 /* Step 1: Prologue guard. */
27173 /* Alignment code needs count to be in register. */
27174 if (CONST_INT_P (count_exp) && desired_align > align)
27176 if (INTVAL (count_exp) > desired_align
27177 && INTVAL (count_exp) > size_needed)
27179 align_bytes
27180 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
27181 if (align_bytes <= 0)
27182 align_bytes = 0;
27183 else
27184 align_bytes = desired_align - align_bytes;
27186 if (align_bytes == 0)
27187 count_exp = force_reg (counter_mode (count_exp), count_exp);
27189 gcc_assert (desired_align >= 1 && align >= 1);
27191 /* Misaligned move sequences handle both prologue and epilogue at once.
27192 Default code generation results in a smaller code for large alignments
27193 and also avoids redundant job when sizes are known precisely. */
27194 misaligned_prologue_used
27195 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
27196 && MAX (desired_align, epilogue_size_needed) <= 32
27197 && desired_align <= epilogue_size_needed
27198 && ((desired_align > align && !align_bytes)
27199 || (!count && epilogue_size_needed > 1)));
27201 /* Do the cheap promotion to allow better CSE across the
27202 main loop and epilogue (ie one load of the big constant in the
27203 front of all code.
27204 For now the misaligned move sequences do not have fast path
27205 without broadcasting. */
27206 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
27208 if (alg == vector_loop)
27210 gcc_assert (val_exp == const0_rtx);
27211 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
27212 promoted_val = promote_duplicated_reg_to_size (val_exp,
27213 GET_MODE_SIZE (word_mode),
27214 desired_align, align);
27216 else
27218 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
27219 desired_align, align);
27222 /* Misaligned move sequences handles both prologues and epilogues at once.
27223 Default code generation results in smaller code for large alignments and
27224 also avoids redundant job when sizes are known precisely. */
27225 if (misaligned_prologue_used)
27227 /* Misaligned move prologue handled small blocks by itself. */
27228 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
27229 (dst, src, &destreg, &srcreg,
27230 move_mode, promoted_val, vec_promoted_val,
27231 &count_exp,
27232 &jump_around_label,
27233 desired_align < align
27234 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
27235 desired_align, align, &min_size, dynamic_check, issetmem);
27236 if (!issetmem)
27237 src = change_address (src, BLKmode, srcreg);
27238 dst = change_address (dst, BLKmode, destreg);
27239 set_mem_align (dst, desired_align * BITS_PER_UNIT);
27240 epilogue_size_needed = 0;
27241 if (need_zero_guard
27242 && min_size < (unsigned HOST_WIDE_INT) size_needed)
27244 /* It is possible that we copied enough so the main loop will not
27245 execute. */
27246 gcc_assert (size_needed > 1);
27247 if (jump_around_label == NULL_RTX)
27248 jump_around_label = gen_label_rtx ();
27249 emit_cmp_and_jump_insns (count_exp,
27250 GEN_INT (size_needed),
27251 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
27252 if (expected_size == -1
27253 || expected_size < (desired_align - align) / 2 + size_needed)
27254 predict_jump (REG_BR_PROB_BASE * 20 / 100);
27255 else
27256 predict_jump (REG_BR_PROB_BASE * 60 / 100);
27259 /* Ensure that alignment prologue won't copy past end of block. */
27260 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
27262 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
27263 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
27264 Make sure it is power of 2. */
27265 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
27267 /* To improve performance of small blocks, we jump around the VAL
27268 promoting mode. This mean that if the promoted VAL is not constant,
27269 we might not use it in the epilogue and have to use byte
27270 loop variant. */
27271 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
27272 force_loopy_epilogue = true;
27273 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
27274 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
27276 /* If main algorithm works on QImode, no epilogue is needed.
27277 For small sizes just don't align anything. */
27278 if (size_needed == 1)
27279 desired_align = align;
27280 else
27281 goto epilogue;
27283 else if (!count
27284 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
27286 label = gen_label_rtx ();
27287 emit_cmp_and_jump_insns (count_exp,
27288 GEN_INT (epilogue_size_needed),
27289 LTU, 0, counter_mode (count_exp), 1, label);
27290 if (expected_size == -1 || expected_size < epilogue_size_needed)
27291 predict_jump (REG_BR_PROB_BASE * 60 / 100);
27292 else
27293 predict_jump (REG_BR_PROB_BASE * 20 / 100);
27297 /* Emit code to decide on runtime whether library call or inline should be
27298 used. */
27299 if (dynamic_check != -1)
27301 if (!issetmem && CONST_INT_P (count_exp))
27303 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
27305 emit_block_copy_via_libcall (dst, src, count_exp);
27306 count_exp = const0_rtx;
27307 goto epilogue;
27310 else
27312 rtx_code_label *hot_label = gen_label_rtx ();
27313 if (jump_around_label == NULL_RTX)
27314 jump_around_label = gen_label_rtx ();
27315 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
27316 LEU, 0, counter_mode (count_exp),
27317 1, hot_label);
27318 predict_jump (REG_BR_PROB_BASE * 90 / 100);
27319 if (issetmem)
27320 set_storage_via_libcall (dst, count_exp, val_exp);
27321 else
27322 emit_block_copy_via_libcall (dst, src, count_exp);
27323 emit_jump (jump_around_label);
27324 emit_label (hot_label);
27328 /* Step 2: Alignment prologue. */
27329 /* Do the expensive promotion once we branched off the small blocks. */
27330 if (issetmem && !promoted_val)
27331 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
27332 desired_align, align);
27334 if (desired_align > align && !misaligned_prologue_used)
27336 if (align_bytes == 0)
27338 /* Except for the first move in prologue, we no longer know
27339 constant offset in aliasing info. It don't seems to worth
27340 the pain to maintain it for the first move, so throw away
27341 the info early. */
27342 dst = change_address (dst, BLKmode, destreg);
27343 if (!issetmem)
27344 src = change_address (src, BLKmode, srcreg);
27345 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
27346 promoted_val, vec_promoted_val,
27347 count_exp, align, desired_align,
27348 issetmem);
27349 /* At most desired_align - align bytes are copied. */
27350 if (min_size < (unsigned)(desired_align - align))
27351 min_size = 0;
27352 else
27353 min_size -= desired_align - align;
27355 else
27357 /* If we know how many bytes need to be stored before dst is
27358 sufficiently aligned, maintain aliasing info accurately. */
27359 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
27360 srcreg,
27361 promoted_val,
27362 vec_promoted_val,
27363 desired_align,
27364 align_bytes,
27365 issetmem);
27367 count_exp = plus_constant (counter_mode (count_exp),
27368 count_exp, -align_bytes);
27369 count -= align_bytes;
27370 min_size -= align_bytes;
27371 max_size -= align_bytes;
27373 if (need_zero_guard
27374 && min_size < (unsigned HOST_WIDE_INT) size_needed
27375 && (count < (unsigned HOST_WIDE_INT) size_needed
27376 || (align_bytes == 0
27377 && count < ((unsigned HOST_WIDE_INT) size_needed
27378 + desired_align - align))))
27380 /* It is possible that we copied enough so the main loop will not
27381 execute. */
27382 gcc_assert (size_needed > 1);
27383 if (label == NULL_RTX)
27384 label = gen_label_rtx ();
27385 emit_cmp_and_jump_insns (count_exp,
27386 GEN_INT (size_needed),
27387 LTU, 0, counter_mode (count_exp), 1, label);
27388 if (expected_size == -1
27389 || expected_size < (desired_align - align) / 2 + size_needed)
27390 predict_jump (REG_BR_PROB_BASE * 20 / 100);
27391 else
27392 predict_jump (REG_BR_PROB_BASE * 60 / 100);
27395 if (label && size_needed == 1)
27397 emit_label (label);
27398 LABEL_NUSES (label) = 1;
27399 label = NULL;
27400 epilogue_size_needed = 1;
27401 if (issetmem)
27402 promoted_val = val_exp;
27404 else if (label == NULL_RTX && !misaligned_prologue_used)
27405 epilogue_size_needed = size_needed;
27407 /* Step 3: Main loop. */
27409 switch (alg)
27411 case libcall:
27412 case no_stringop:
27413 case last_alg:
27414 gcc_unreachable ();
27415 case loop_1_byte:
27416 case loop:
27417 case unrolled_loop:
27418 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
27419 count_exp, move_mode, unroll_factor,
27420 expected_size, issetmem);
27421 break;
27422 case vector_loop:
27423 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
27424 vec_promoted_val, count_exp, move_mode,
27425 unroll_factor, expected_size, issetmem);
27426 break;
27427 case rep_prefix_8_byte:
27428 case rep_prefix_4_byte:
27429 case rep_prefix_1_byte:
27430 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
27431 val_exp, count_exp, move_mode, issetmem);
27432 break;
27434 /* Adjust properly the offset of src and dest memory for aliasing. */
27435 if (CONST_INT_P (count_exp))
27437 if (!issetmem)
27438 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
27439 (count / size_needed) * size_needed);
27440 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
27441 (count / size_needed) * size_needed);
27443 else
27445 if (!issetmem)
27446 src = change_address (src, BLKmode, srcreg);
27447 dst = change_address (dst, BLKmode, destreg);
27450 /* Step 4: Epilogue to copy the remaining bytes. */
27451 epilogue:
27452 if (label)
27454 /* When the main loop is done, COUNT_EXP might hold original count,
27455 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
27456 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
27457 bytes. Compensate if needed. */
27459 if (size_needed < epilogue_size_needed)
27461 tmp =
27462 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
27463 GEN_INT (size_needed - 1), count_exp, 1,
27464 OPTAB_DIRECT);
27465 if (tmp != count_exp)
27466 emit_move_insn (count_exp, tmp);
27468 emit_label (label);
27469 LABEL_NUSES (label) = 1;
27472 if (count_exp != const0_rtx && epilogue_size_needed > 1)
27474 if (force_loopy_epilogue)
27475 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
27476 epilogue_size_needed);
27477 else
27479 if (issetmem)
27480 expand_setmem_epilogue (dst, destreg, promoted_val,
27481 vec_promoted_val, count_exp,
27482 epilogue_size_needed);
27483 else
27484 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
27485 epilogue_size_needed);
27488 if (jump_around_label)
27489 emit_label (jump_around_label);
27490 return true;
27494 /* Expand the appropriate insns for doing strlen if not just doing
27495 repnz; scasb
27497 out = result, initialized with the start address
27498 align_rtx = alignment of the address.
27499 scratch = scratch register, initialized with the startaddress when
27500 not aligned, otherwise undefined
27502 This is just the body. It needs the initializations mentioned above and
27503 some address computing at the end. These things are done in i386.md. */
27505 static void
27506 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
27508 int align;
27509 rtx tmp;
27510 rtx_code_label *align_2_label = NULL;
27511 rtx_code_label *align_3_label = NULL;
27512 rtx_code_label *align_4_label = gen_label_rtx ();
27513 rtx_code_label *end_0_label = gen_label_rtx ();
27514 rtx mem;
27515 rtx tmpreg = gen_reg_rtx (SImode);
27516 rtx scratch = gen_reg_rtx (SImode);
27517 rtx cmp;
27519 align = 0;
27520 if (CONST_INT_P (align_rtx))
27521 align = INTVAL (align_rtx);
27523 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
27525 /* Is there a known alignment and is it less than 4? */
27526 if (align < 4)
27528 rtx scratch1 = gen_reg_rtx (Pmode);
27529 emit_move_insn (scratch1, out);
27530 /* Is there a known alignment and is it not 2? */
27531 if (align != 2)
27533 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
27534 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
27536 /* Leave just the 3 lower bits. */
27537 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
27538 NULL_RTX, 0, OPTAB_WIDEN);
27540 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
27541 Pmode, 1, align_4_label);
27542 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
27543 Pmode, 1, align_2_label);
27544 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
27545 Pmode, 1, align_3_label);
27547 else
27549 /* Since the alignment is 2, we have to check 2 or 0 bytes;
27550 check if is aligned to 4 - byte. */
27552 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
27553 NULL_RTX, 0, OPTAB_WIDEN);
27555 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
27556 Pmode, 1, align_4_label);
27559 mem = change_address (src, QImode, out);
27561 /* Now compare the bytes. */
27563 /* Compare the first n unaligned byte on a byte per byte basis. */
27564 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
27565 QImode, 1, end_0_label);
27567 /* Increment the address. */
27568 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
27570 /* Not needed with an alignment of 2 */
27571 if (align != 2)
27573 emit_label (align_2_label);
27575 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
27576 end_0_label);
27578 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
27580 emit_label (align_3_label);
27583 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
27584 end_0_label);
27586 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
27589 /* Generate loop to check 4 bytes at a time. It is not a good idea to
27590 align this loop. It gives only huge programs, but does not help to
27591 speed up. */
27592 emit_label (align_4_label);
27594 mem = change_address (src, SImode, out);
27595 emit_move_insn (scratch, mem);
27596 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
27598 /* This formula yields a nonzero result iff one of the bytes is zero.
27599 This saves three branches inside loop and many cycles. */
27601 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
27602 emit_insn (gen_one_cmplsi2 (scratch, scratch));
27603 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
27604 emit_insn (gen_andsi3 (tmpreg, tmpreg,
27605 gen_int_mode (0x80808080, SImode)));
27606 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
27607 align_4_label);
27609 if (TARGET_CMOVE)
27611 rtx reg = gen_reg_rtx (SImode);
27612 rtx reg2 = gen_reg_rtx (Pmode);
27613 emit_move_insn (reg, tmpreg);
27614 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
27616 /* If zero is not in the first two bytes, move two bytes forward. */
27617 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
27618 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
27619 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
27620 emit_insn (gen_rtx_SET (tmpreg,
27621 gen_rtx_IF_THEN_ELSE (SImode, tmp,
27622 reg,
27623 tmpreg)));
27624 /* Emit lea manually to avoid clobbering of flags. */
27625 emit_insn (gen_rtx_SET (reg2, gen_rtx_PLUS (Pmode, out, const2_rtx)));
27627 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
27628 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
27629 emit_insn (gen_rtx_SET (out,
27630 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
27631 reg2,
27632 out)));
27634 else
27636 rtx_code_label *end_2_label = gen_label_rtx ();
27637 /* Is zero in the first two bytes? */
27639 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
27640 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
27641 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
27642 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
27643 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
27644 pc_rtx);
27645 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
27646 JUMP_LABEL (tmp) = end_2_label;
27648 /* Not in the first two. Move two bytes forward. */
27649 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
27650 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
27652 emit_label (end_2_label);
27656 /* Avoid branch in fixing the byte. */
27657 tmpreg = gen_lowpart (QImode, tmpreg);
27658 emit_insn (gen_addqi3_cconly_overflow (tmpreg, tmpreg));
27659 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
27660 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
27661 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
27663 emit_label (end_0_label);
27666 /* Expand strlen. */
27668 bool
27669 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
27671 rtx addr, scratch1, scratch2, scratch3, scratch4;
27673 /* The generic case of strlen expander is long. Avoid it's
27674 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
27676 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
27677 && !TARGET_INLINE_ALL_STRINGOPS
27678 && !optimize_insn_for_size_p ()
27679 && (!CONST_INT_P (align) || INTVAL (align) < 4))
27680 return false;
27682 addr = force_reg (Pmode, XEXP (src, 0));
27683 scratch1 = gen_reg_rtx (Pmode);
27685 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
27686 && !optimize_insn_for_size_p ())
27688 /* Well it seems that some optimizer does not combine a call like
27689 foo(strlen(bar), strlen(bar));
27690 when the move and the subtraction is done here. It does calculate
27691 the length just once when these instructions are done inside of
27692 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
27693 often used and I use one fewer register for the lifetime of
27694 output_strlen_unroll() this is better. */
27696 emit_move_insn (out, addr);
27698 ix86_expand_strlensi_unroll_1 (out, src, align);
27700 /* strlensi_unroll_1 returns the address of the zero at the end of
27701 the string, like memchr(), so compute the length by subtracting
27702 the start address. */
27703 emit_insn (ix86_gen_sub3 (out, out, addr));
27705 else
27707 rtx unspec;
27709 /* Can't use this if the user has appropriated eax, ecx, or edi. */
27710 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
27711 return false;
27712 /* Can't use this for non-default address spaces. */
27713 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src)))
27714 return false;
27716 scratch2 = gen_reg_rtx (Pmode);
27717 scratch3 = gen_reg_rtx (Pmode);
27718 scratch4 = force_reg (Pmode, constm1_rtx);
27720 emit_move_insn (scratch3, addr);
27721 eoschar = force_reg (QImode, eoschar);
27723 src = replace_equiv_address_nv (src, scratch3);
27725 /* If .md starts supporting :P, this can be done in .md. */
27726 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
27727 scratch4), UNSPEC_SCAS);
27728 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
27729 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
27730 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
27732 return true;
27735 /* For given symbol (function) construct code to compute address of it's PLT
27736 entry in large x86-64 PIC model. */
27737 static rtx
27738 construct_plt_address (rtx symbol)
27740 rtx tmp, unspec;
27742 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
27743 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
27744 gcc_assert (Pmode == DImode);
27746 tmp = gen_reg_rtx (Pmode);
27747 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
27749 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
27750 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
27751 return tmp;
27755 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
27756 rtx callarg2,
27757 rtx pop, bool sibcall)
27759 rtx vec[3];
27760 rtx use = NULL, call;
27761 unsigned int vec_len = 0;
27762 tree fndecl;
27764 if (GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
27766 fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0));
27767 if (fndecl
27768 && (lookup_attribute ("interrupt",
27769 TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))))
27770 error ("interrupt service routine can't be called directly");
27772 else
27773 fndecl = NULL_TREE;
27775 if (pop == const0_rtx)
27776 pop = NULL;
27777 gcc_assert (!TARGET_64BIT || !pop);
27779 if (TARGET_MACHO && !TARGET_64BIT)
27781 #if TARGET_MACHO
27782 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
27783 fnaddr = machopic_indirect_call_target (fnaddr);
27784 #endif
27786 else
27788 /* Static functions and indirect calls don't need the pic register. Also,
27789 check if PLT was explicitly avoided via no-plt or "noplt" attribute, making
27790 it an indirect call. */
27791 rtx addr = XEXP (fnaddr, 0);
27792 if (flag_pic
27793 && GET_CODE (addr) == SYMBOL_REF
27794 && !SYMBOL_REF_LOCAL_P (addr))
27796 if (flag_plt
27797 && (SYMBOL_REF_DECL (addr) == NULL_TREE
27798 || !lookup_attribute ("noplt",
27799 DECL_ATTRIBUTES (SYMBOL_REF_DECL (addr)))))
27801 if (!TARGET_64BIT
27802 || (ix86_cmodel == CM_LARGE_PIC
27803 && DEFAULT_ABI != MS_ABI))
27805 use_reg (&use, gen_rtx_REG (Pmode,
27806 REAL_PIC_OFFSET_TABLE_REGNUM));
27807 if (ix86_use_pseudo_pic_reg ())
27808 emit_move_insn (gen_rtx_REG (Pmode,
27809 REAL_PIC_OFFSET_TABLE_REGNUM),
27810 pic_offset_table_rtx);
27813 else if (!TARGET_PECOFF && !TARGET_MACHO)
27815 if (TARGET_64BIT)
27817 fnaddr = gen_rtx_UNSPEC (Pmode,
27818 gen_rtvec (1, addr),
27819 UNSPEC_GOTPCREL);
27820 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
27822 else
27824 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
27825 UNSPEC_GOT);
27826 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
27827 fnaddr = gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
27828 fnaddr);
27830 fnaddr = gen_const_mem (Pmode, fnaddr);
27831 /* Pmode may not be the same as word_mode for x32, which
27832 doesn't support indirect branch via 32-bit memory slot.
27833 Since x32 GOT slot is 64 bit with zero upper 32 bits,
27834 indirect branch via x32 GOT slot is OK. */
27835 if (GET_MODE (fnaddr) != word_mode)
27836 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
27837 fnaddr = gen_rtx_MEM (QImode, fnaddr);
27842 /* Skip setting up RAX register for -mskip-rax-setup when there are no
27843 parameters passed in vector registers. */
27844 if (TARGET_64BIT
27845 && (INTVAL (callarg2) > 0
27846 || (INTVAL (callarg2) == 0
27847 && (TARGET_SSE || !flag_skip_rax_setup))))
27849 rtx al = gen_rtx_REG (QImode, AX_REG);
27850 emit_move_insn (al, callarg2);
27851 use_reg (&use, al);
27854 if (ix86_cmodel == CM_LARGE_PIC
27855 && !TARGET_PECOFF
27856 && MEM_P (fnaddr)
27857 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
27858 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
27859 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
27860 /* Since x32 GOT slot is 64 bit with zero upper 32 bits, indirect
27861 branch via x32 GOT slot is OK. */
27862 else if (!(TARGET_X32
27863 && MEM_P (fnaddr)
27864 && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND
27865 && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode))
27866 && (sibcall
27867 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
27868 : !call_insn_operand (XEXP (fnaddr, 0), word_mode)))
27870 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
27871 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
27874 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
27876 if (retval)
27878 /* We should add bounds as destination register in case
27879 pointer with bounds may be returned. */
27880 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
27882 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
27883 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
27884 if (GET_CODE (retval) == PARALLEL)
27886 b0 = gen_rtx_EXPR_LIST (VOIDmode, b0, const0_rtx);
27887 b1 = gen_rtx_EXPR_LIST (VOIDmode, b1, const0_rtx);
27888 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, b0, b1));
27889 retval = chkp_join_splitted_slot (retval, par);
27891 else
27893 retval = gen_rtx_PARALLEL (VOIDmode,
27894 gen_rtvec (3, retval, b0, b1));
27895 chkp_put_regs_to_expr_list (retval);
27899 call = gen_rtx_SET (retval, call);
27901 vec[vec_len++] = call;
27903 if (pop)
27905 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
27906 pop = gen_rtx_SET (stack_pointer_rtx, pop);
27907 vec[vec_len++] = pop;
27910 if (cfun->machine->no_caller_saved_registers
27911 && (!fndecl
27912 || (!TREE_THIS_VOLATILE (fndecl)
27913 && !lookup_attribute ("no_caller_saved_registers",
27914 TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))))
27916 static const char ix86_call_used_regs[] = CALL_USED_REGISTERS;
27917 bool is_64bit_ms_abi = (TARGET_64BIT
27918 && ix86_function_abi (fndecl) == MS_ABI);
27919 char c_mask = CALL_USED_REGISTERS_MASK (is_64bit_ms_abi);
27921 /* If there are no caller-saved registers, add all registers
27922 that are clobbered by the call which returns. */
27923 for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27924 if (!fixed_regs[i]
27925 && (ix86_call_used_regs[i] == 1
27926 || (ix86_call_used_regs[i] & c_mask))
27927 && !STACK_REGNO_P (i)
27928 && !MMX_REGNO_P (i))
27929 clobber_reg (&use,
27930 gen_rtx_REG (GET_MODE (regno_reg_rtx[i]), i));
27932 else if (TARGET_64BIT_MS_ABI
27933 && (!callarg2 || INTVAL (callarg2) != -2))
27935 unsigned i;
27937 for (i = 0; i < NUM_X86_64_MS_CLOBBERED_REGS; i++)
27939 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
27940 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
27942 clobber_reg (&use, gen_rtx_REG (mode, regno));
27945 /* Set here, but it may get cleared later. */
27946 if (TARGET_CALL_MS2SYSV_XLOGUES)
27948 if (!TARGET_SSE)
27951 /* Don't break hot-patched functions. */
27952 else if (ix86_function_ms_hook_prologue (current_function_decl))
27955 /* TODO: Cases not yet examined. */
27956 else if (flag_split_stack)
27957 warn_once_call_ms2sysv_xlogues ("-fsplit-stack");
27959 else
27961 gcc_assert (!reload_completed);
27962 cfun->machine->call_ms2sysv = true;
27967 if (vec_len > 1)
27968 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
27969 call = emit_call_insn (call);
27970 if (use)
27971 CALL_INSN_FUNCTION_USAGE (call) = use;
27973 return call;
27976 /* Return true if the function being called was marked with attribute
27977 "noplt" or using -fno-plt and we are compiling for non-PIC. We need
27978 to handle the non-PIC case in the backend because there is no easy
27979 interface for the front-end to force non-PLT calls to use the GOT.
27980 This is currently used only with 64-bit or 32-bit GOT32X ELF targets
27981 to call the function marked "noplt" indirectly. */
27983 static bool
27984 ix86_nopic_noplt_attribute_p (rtx call_op)
27986 if (flag_pic || ix86_cmodel == CM_LARGE
27987 || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X)
27988 || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
27989 || SYMBOL_REF_LOCAL_P (call_op))
27990 return false;
27992 tree symbol_decl = SYMBOL_REF_DECL (call_op);
27994 if (!flag_plt
27995 || (symbol_decl != NULL_TREE
27996 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
27997 return true;
27999 return false;
28002 /* Output the assembly for a call instruction. */
28004 const char *
28005 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
28007 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
28008 bool seh_nop_p = false;
28009 const char *xasm;
28011 if (SIBLING_CALL_P (insn))
28013 if (direct_p)
28015 if (ix86_nopic_noplt_attribute_p (call_op))
28017 if (TARGET_64BIT)
28018 xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
28019 else
28020 xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
28022 else
28023 xasm = "%!jmp\t%P0";
28025 /* SEH epilogue detection requires the indirect branch case
28026 to include REX.W. */
28027 else if (TARGET_SEH)
28028 xasm = "%!rex.W jmp\t%A0";
28029 else
28030 xasm = "%!jmp\t%A0";
28032 output_asm_insn (xasm, &call_op);
28033 return "";
28036 /* SEH unwinding can require an extra nop to be emitted in several
28037 circumstances. Determine if we have one of those. */
28038 if (TARGET_SEH)
28040 rtx_insn *i;
28042 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
28044 /* If we get to another real insn, we don't need the nop. */
28045 if (INSN_P (i))
28046 break;
28048 /* If we get to the epilogue note, prevent a catch region from
28049 being adjacent to the standard epilogue sequence. If non-
28050 call-exceptions, we'll have done this during epilogue emission. */
28051 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
28052 && !flag_non_call_exceptions
28053 && !can_throw_internal (insn))
28055 seh_nop_p = true;
28056 break;
28060 /* If we didn't find a real insn following the call, prevent the
28061 unwinder from looking into the next function. */
28062 if (i == NULL)
28063 seh_nop_p = true;
28066 if (direct_p)
28068 if (ix86_nopic_noplt_attribute_p (call_op))
28070 if (TARGET_64BIT)
28071 xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
28072 else
28073 xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
28075 else
28076 xasm = "%!call\t%P0";
28078 else
28079 xasm = "%!call\t%A0";
28081 output_asm_insn (xasm, &call_op);
28083 if (seh_nop_p)
28084 return "nop";
28086 return "";
28089 /* Clear stack slot assignments remembered from previous functions.
28090 This is called from INIT_EXPANDERS once before RTL is emitted for each
28091 function. */
28093 static struct machine_function *
28094 ix86_init_machine_status (void)
28096 struct machine_function *f;
28098 f = ggc_cleared_alloc<machine_function> ();
28099 f->call_abi = ix86_abi;
28101 return f;
28104 /* Return a MEM corresponding to a stack slot with mode MODE.
28105 Allocate a new slot if necessary.
28107 The RTL for a function can have several slots available: N is
28108 which slot to use. */
28111 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
28113 struct stack_local_entry *s;
28115 gcc_assert (n < MAX_386_STACK_LOCALS);
28117 for (s = ix86_stack_locals; s; s = s->next)
28118 if (s->mode == mode && s->n == n)
28119 return validize_mem (copy_rtx (s->rtl));
28121 s = ggc_alloc<stack_local_entry> ();
28122 s->n = n;
28123 s->mode = mode;
28124 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
28126 s->next = ix86_stack_locals;
28127 ix86_stack_locals = s;
28128 return validize_mem (copy_rtx (s->rtl));
28131 static void
28132 ix86_instantiate_decls (void)
28134 struct stack_local_entry *s;
28136 for (s = ix86_stack_locals; s; s = s->next)
28137 if (s->rtl != NULL_RTX)
28138 instantiate_decl_rtl (s->rtl);
28141 /* Return the number used for encoding REG, in the range 0..7. */
28143 static int
28144 reg_encoded_number (rtx reg)
28146 unsigned regno = REGNO (reg);
28147 switch (regno)
28149 case AX_REG:
28150 return 0;
28151 case CX_REG:
28152 return 1;
28153 case DX_REG:
28154 return 2;
28155 case BX_REG:
28156 return 3;
28157 case SP_REG:
28158 return 4;
28159 case BP_REG:
28160 return 5;
28161 case SI_REG:
28162 return 6;
28163 case DI_REG:
28164 return 7;
28165 default:
28166 break;
28168 if (IN_RANGE (regno, FIRST_STACK_REG, LAST_STACK_REG))
28169 return regno - FIRST_STACK_REG;
28170 if (IN_RANGE (regno, FIRST_SSE_REG, LAST_SSE_REG))
28171 return regno - FIRST_SSE_REG;
28172 if (IN_RANGE (regno, FIRST_MMX_REG, LAST_MMX_REG))
28173 return regno - FIRST_MMX_REG;
28174 if (IN_RANGE (regno, FIRST_REX_SSE_REG, LAST_REX_SSE_REG))
28175 return regno - FIRST_REX_SSE_REG;
28176 if (IN_RANGE (regno, FIRST_REX_INT_REG, LAST_REX_INT_REG))
28177 return regno - FIRST_REX_INT_REG;
28178 if (IN_RANGE (regno, FIRST_MASK_REG, LAST_MASK_REG))
28179 return regno - FIRST_MASK_REG;
28180 if (IN_RANGE (regno, FIRST_BND_REG, LAST_BND_REG))
28181 return regno - FIRST_BND_REG;
28182 return -1;
28185 /* Given an insn INSN with NOPERANDS OPERANDS, return the modr/m byte used
28186 in its encoding if it could be relevant for ROP mitigation, otherwise
28187 return -1. If POPNO0 and POPNO1 are nonnull, store the operand numbers
28188 used for calculating it into them. */
28190 static int
28191 ix86_get_modrm_for_rop (rtx_insn *insn, rtx *operands, int noperands,
28192 int *popno0 = 0, int *popno1 = 0)
28194 if (asm_noperands (PATTERN (insn)) >= 0)
28195 return -1;
28196 int has_modrm = get_attr_modrm (insn);
28197 if (!has_modrm)
28198 return -1;
28199 enum attr_modrm_class cls = get_attr_modrm_class (insn);
28200 rtx op0, op1;
28201 switch (cls)
28203 case MODRM_CLASS_OP02:
28204 gcc_assert (noperands >= 3);
28205 if (popno0)
28207 *popno0 = 0;
28208 *popno1 = 2;
28210 op0 = operands[0];
28211 op1 = operands[2];
28212 break;
28213 case MODRM_CLASS_OP01:
28214 gcc_assert (noperands >= 2);
28215 if (popno0)
28217 *popno0 = 0;
28218 *popno1 = 1;
28220 op0 = operands[0];
28221 op1 = operands[1];
28222 break;
28223 default:
28224 return -1;
28226 if (REG_P (op0) && REG_P (op1))
28228 int enc0 = reg_encoded_number (op0);
28229 int enc1 = reg_encoded_number (op1);
28230 return 0xc0 + (enc1 << 3) + enc0;
28232 return -1;
28235 /* Check whether x86 address PARTS is a pc-relative address. */
28237 bool
28238 ix86_rip_relative_addr_p (struct ix86_address *parts)
28240 rtx base, index, disp;
28242 base = parts->base;
28243 index = parts->index;
28244 disp = parts->disp;
28246 if (disp && !base && !index)
28248 if (TARGET_64BIT)
28250 rtx symbol = disp;
28252 if (GET_CODE (disp) == CONST)
28253 symbol = XEXP (disp, 0);
28254 if (GET_CODE (symbol) == PLUS
28255 && CONST_INT_P (XEXP (symbol, 1)))
28256 symbol = XEXP (symbol, 0);
28258 if (GET_CODE (symbol) == LABEL_REF
28259 || (GET_CODE (symbol) == SYMBOL_REF
28260 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
28261 || (GET_CODE (symbol) == UNSPEC
28262 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
28263 || XINT (symbol, 1) == UNSPEC_PCREL
28264 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
28265 return true;
28268 return false;
28271 /* Calculate the length of the memory address in the instruction encoding.
28272 Includes addr32 prefix, does not include the one-byte modrm, opcode,
28273 or other prefixes. We never generate addr32 prefix for LEA insn. */
28276 memory_address_length (rtx addr, bool lea)
28278 struct ix86_address parts;
28279 rtx base, index, disp;
28280 int len;
28281 int ok;
28283 if (GET_CODE (addr) == PRE_DEC
28284 || GET_CODE (addr) == POST_INC
28285 || GET_CODE (addr) == PRE_MODIFY
28286 || GET_CODE (addr) == POST_MODIFY)
28287 return 0;
28289 ok = ix86_decompose_address (addr, &parts);
28290 gcc_assert (ok);
28292 len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1;
28294 /* If this is not LEA instruction, add the length of addr32 prefix. */
28295 if (TARGET_64BIT && !lea
28296 && (SImode_address_operand (addr, VOIDmode)
28297 || (parts.base && GET_MODE (parts.base) == SImode)
28298 || (parts.index && GET_MODE (parts.index) == SImode)))
28299 len++;
28301 base = parts.base;
28302 index = parts.index;
28303 disp = parts.disp;
28305 if (base && SUBREG_P (base))
28306 base = SUBREG_REG (base);
28307 if (index && SUBREG_P (index))
28308 index = SUBREG_REG (index);
28310 gcc_assert (base == NULL_RTX || REG_P (base));
28311 gcc_assert (index == NULL_RTX || REG_P (index));
28313 /* Rule of thumb:
28314 - esp as the base always wants an index,
28315 - ebp as the base always wants a displacement,
28316 - r12 as the base always wants an index,
28317 - r13 as the base always wants a displacement. */
28319 /* Register Indirect. */
28320 if (base && !index && !disp)
28322 /* esp (for its index) and ebp (for its displacement) need
28323 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
28324 code. */
28325 if (base == arg_pointer_rtx
28326 || base == frame_pointer_rtx
28327 || REGNO (base) == SP_REG
28328 || REGNO (base) == BP_REG
28329 || REGNO (base) == R12_REG
28330 || REGNO (base) == R13_REG)
28331 len++;
28334 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
28335 is not disp32, but disp32(%rip), so for disp32
28336 SIB byte is needed, unless print_operand_address
28337 optimizes it into disp32(%rip) or (%rip) is implied
28338 by UNSPEC. */
28339 else if (disp && !base && !index)
28341 len += 4;
28342 if (!ix86_rip_relative_addr_p (&parts))
28343 len++;
28345 else
28347 /* Find the length of the displacement constant. */
28348 if (disp)
28350 if (base && satisfies_constraint_K (disp))
28351 len += 1;
28352 else
28353 len += 4;
28355 /* ebp always wants a displacement. Similarly r13. */
28356 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
28357 len++;
28359 /* An index requires the two-byte modrm form.... */
28360 if (index
28361 /* ...like esp (or r12), which always wants an index. */
28362 || base == arg_pointer_rtx
28363 || base == frame_pointer_rtx
28364 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
28365 len++;
28368 return len;
28371 /* Compute default value for "length_immediate" attribute. When SHORTFORM
28372 is set, expect that insn have 8bit immediate alternative. */
28374 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
28376 int len = 0;
28377 int i;
28378 extract_insn_cached (insn);
28379 for (i = recog_data.n_operands - 1; i >= 0; --i)
28380 if (CONSTANT_P (recog_data.operand[i]))
28382 enum attr_mode mode = get_attr_mode (insn);
28384 gcc_assert (!len);
28385 if (shortform && CONST_INT_P (recog_data.operand[i]))
28387 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
28388 switch (mode)
28390 case MODE_QI:
28391 len = 1;
28392 continue;
28393 case MODE_HI:
28394 ival = trunc_int_for_mode (ival, HImode);
28395 break;
28396 case MODE_SI:
28397 ival = trunc_int_for_mode (ival, SImode);
28398 break;
28399 default:
28400 break;
28402 if (IN_RANGE (ival, -128, 127))
28404 len = 1;
28405 continue;
28408 switch (mode)
28410 case MODE_QI:
28411 len = 1;
28412 break;
28413 case MODE_HI:
28414 len = 2;
28415 break;
28416 case MODE_SI:
28417 len = 4;
28418 break;
28419 /* Immediates for DImode instructions are encoded
28420 as 32bit sign extended values. */
28421 case MODE_DI:
28422 len = 4;
28423 break;
28424 default:
28425 fatal_insn ("unknown insn mode", insn);
28428 return len;
28431 /* Compute default value for "length_address" attribute. */
28433 ix86_attr_length_address_default (rtx_insn *insn)
28435 int i;
28437 if (get_attr_type (insn) == TYPE_LEA)
28439 rtx set = PATTERN (insn), addr;
28441 if (GET_CODE (set) == PARALLEL)
28442 set = XVECEXP (set, 0, 0);
28444 gcc_assert (GET_CODE (set) == SET);
28446 addr = SET_SRC (set);
28448 return memory_address_length (addr, true);
28451 extract_insn_cached (insn);
28452 for (i = recog_data.n_operands - 1; i >= 0; --i)
28454 rtx op = recog_data.operand[i];
28455 if (MEM_P (op))
28457 constrain_operands_cached (insn, reload_completed);
28458 if (which_alternative != -1)
28460 const char *constraints = recog_data.constraints[i];
28461 int alt = which_alternative;
28463 while (*constraints == '=' || *constraints == '+')
28464 constraints++;
28465 while (alt-- > 0)
28466 while (*constraints++ != ',')
28468 /* Skip ignored operands. */
28469 if (*constraints == 'X')
28470 continue;
28473 int len = memory_address_length (XEXP (op, 0), false);
28475 /* Account for segment prefix for non-default addr spaces. */
28476 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op)))
28477 len++;
28479 return len;
28482 return 0;
28485 /* Compute default value for "length_vex" attribute. It includes
28486 2 or 3 byte VEX prefix and 1 opcode byte. */
28489 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
28490 bool has_vex_w)
28492 int i;
28494 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
28495 byte VEX prefix. */
28496 if (!has_0f_opcode || has_vex_w)
28497 return 3 + 1;
28499 /* We can always use 2 byte VEX prefix in 32bit. */
28500 if (!TARGET_64BIT)
28501 return 2 + 1;
28503 extract_insn_cached (insn);
28505 for (i = recog_data.n_operands - 1; i >= 0; --i)
28506 if (REG_P (recog_data.operand[i]))
28508 /* REX.W bit uses 3 byte VEX prefix. */
28509 if (GET_MODE (recog_data.operand[i]) == DImode
28510 && GENERAL_REG_P (recog_data.operand[i]))
28511 return 3 + 1;
28513 else
28515 /* REX.X or REX.B bits use 3 byte VEX prefix. */
28516 if (MEM_P (recog_data.operand[i])
28517 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
28518 return 3 + 1;
28521 return 2 + 1;
28525 static bool
28526 ix86_class_likely_spilled_p (reg_class_t);
28528 /* Returns true if lhs of insn is HW function argument register and set up
28529 is_spilled to true if it is likely spilled HW register. */
28530 static bool
28531 insn_is_function_arg (rtx insn, bool* is_spilled)
28533 rtx dst;
28535 if (!NONDEBUG_INSN_P (insn))
28536 return false;
28537 /* Call instructions are not movable, ignore it. */
28538 if (CALL_P (insn))
28539 return false;
28540 insn = PATTERN (insn);
28541 if (GET_CODE (insn) == PARALLEL)
28542 insn = XVECEXP (insn, 0, 0);
28543 if (GET_CODE (insn) != SET)
28544 return false;
28545 dst = SET_DEST (insn);
28546 if (REG_P (dst) && HARD_REGISTER_P (dst)
28547 && ix86_function_arg_regno_p (REGNO (dst)))
28549 /* Is it likely spilled HW register? */
28550 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
28551 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
28552 *is_spilled = true;
28553 return true;
28555 return false;
28558 /* Add output dependencies for chain of function adjacent arguments if only
28559 there is a move to likely spilled HW register. Return first argument
28560 if at least one dependence was added or NULL otherwise. */
28561 static rtx_insn *
28562 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
28564 rtx_insn *insn;
28565 rtx_insn *last = call;
28566 rtx_insn *first_arg = NULL;
28567 bool is_spilled = false;
28569 head = PREV_INSN (head);
28571 /* Find nearest to call argument passing instruction. */
28572 while (true)
28574 last = PREV_INSN (last);
28575 if (last == head)
28576 return NULL;
28577 if (!NONDEBUG_INSN_P (last))
28578 continue;
28579 if (insn_is_function_arg (last, &is_spilled))
28580 break;
28581 return NULL;
28584 first_arg = last;
28585 while (true)
28587 insn = PREV_INSN (last);
28588 if (!INSN_P (insn))
28589 break;
28590 if (insn == head)
28591 break;
28592 if (!NONDEBUG_INSN_P (insn))
28594 last = insn;
28595 continue;
28597 if (insn_is_function_arg (insn, &is_spilled))
28599 /* Add output depdendence between two function arguments if chain
28600 of output arguments contains likely spilled HW registers. */
28601 if (is_spilled)
28602 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
28603 first_arg = last = insn;
28605 else
28606 break;
28608 if (!is_spilled)
28609 return NULL;
28610 return first_arg;
28613 /* Add output or anti dependency from insn to first_arg to restrict its code
28614 motion. */
28615 static void
28616 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
28618 rtx set;
28619 rtx tmp;
28621 /* Add anti dependencies for bounds stores. */
28622 if (INSN_P (insn)
28623 && GET_CODE (PATTERN (insn)) == PARALLEL
28624 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
28625 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_BNDSTX)
28627 add_dependence (first_arg, insn, REG_DEP_ANTI);
28628 return;
28631 set = single_set (insn);
28632 if (!set)
28633 return;
28634 tmp = SET_DEST (set);
28635 if (REG_P (tmp))
28637 /* Add output dependency to the first function argument. */
28638 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
28639 return;
28641 /* Add anti dependency. */
28642 add_dependence (first_arg, insn, REG_DEP_ANTI);
28645 /* Avoid cross block motion of function argument through adding dependency
28646 from the first non-jump instruction in bb. */
28647 static void
28648 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
28650 rtx_insn *insn = BB_END (bb);
28652 while (insn)
28654 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
28656 rtx set = single_set (insn);
28657 if (set)
28659 avoid_func_arg_motion (arg, insn);
28660 return;
28663 if (insn == BB_HEAD (bb))
28664 return;
28665 insn = PREV_INSN (insn);
28669 /* Hook for pre-reload schedule - avoid motion of function arguments
28670 passed in likely spilled HW registers. */
28671 static void
28672 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
28674 rtx_insn *insn;
28675 rtx_insn *first_arg = NULL;
28676 if (reload_completed)
28677 return;
28678 while (head != tail && DEBUG_INSN_P (head))
28679 head = NEXT_INSN (head);
28680 for (insn = tail; insn != head; insn = PREV_INSN (insn))
28681 if (INSN_P (insn) && CALL_P (insn))
28683 first_arg = add_parameter_dependencies (insn, head);
28684 if (first_arg)
28686 /* Add dependee for first argument to predecessors if only
28687 region contains more than one block. */
28688 basic_block bb = BLOCK_FOR_INSN (insn);
28689 int rgn = CONTAINING_RGN (bb->index);
28690 int nr_blks = RGN_NR_BLOCKS (rgn);
28691 /* Skip trivial regions and region head blocks that can have
28692 predecessors outside of region. */
28693 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
28695 edge e;
28696 edge_iterator ei;
28698 /* Regions are SCCs with the exception of selective
28699 scheduling with pipelining of outer blocks enabled.
28700 So also check that immediate predecessors of a non-head
28701 block are in the same region. */
28702 FOR_EACH_EDGE (e, ei, bb->preds)
28704 /* Avoid creating of loop-carried dependencies through
28705 using topological ordering in the region. */
28706 if (rgn == CONTAINING_RGN (e->src->index)
28707 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
28708 add_dependee_for_func_arg (first_arg, e->src);
28711 insn = first_arg;
28712 if (insn == head)
28713 break;
28716 else if (first_arg)
28717 avoid_func_arg_motion (first_arg, insn);
28720 /* Hook for pre-reload schedule - set priority of moves from likely spilled
28721 HW registers to maximum, to schedule them at soon as possible. These are
28722 moves from function argument registers at the top of the function entry
28723 and moves from function return value registers after call. */
28724 static int
28725 ix86_adjust_priority (rtx_insn *insn, int priority)
28727 rtx set;
28729 if (reload_completed)
28730 return priority;
28732 if (!NONDEBUG_INSN_P (insn))
28733 return priority;
28735 set = single_set (insn);
28736 if (set)
28738 rtx tmp = SET_SRC (set);
28739 if (REG_P (tmp)
28740 && HARD_REGISTER_P (tmp)
28741 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
28742 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
28743 return current_sched_info->sched_max_insns_priority;
28746 return priority;
28749 /* Prepare for scheduling pass. */
28750 static void
28751 ix86_sched_init_global (FILE *, int, int)
28753 /* Install scheduling hooks for current CPU. Some of these hooks are used
28754 in time-critical parts of the scheduler, so we only set them up when
28755 they are actually used. */
28756 switch (ix86_tune)
28758 case PROCESSOR_CORE2:
28759 case PROCESSOR_NEHALEM:
28760 case PROCESSOR_SANDYBRIDGE:
28761 case PROCESSOR_HASWELL:
28762 /* Do not perform multipass scheduling for pre-reload schedule
28763 to save compile time. */
28764 if (reload_completed)
28766 ix86_core2i7_init_hooks ();
28767 break;
28769 /* Fall through. */
28770 default:
28771 targetm.sched.dfa_post_advance_cycle = NULL;
28772 targetm.sched.first_cycle_multipass_init = NULL;
28773 targetm.sched.first_cycle_multipass_begin = NULL;
28774 targetm.sched.first_cycle_multipass_issue = NULL;
28775 targetm.sched.first_cycle_multipass_backtrack = NULL;
28776 targetm.sched.first_cycle_multipass_end = NULL;
28777 targetm.sched.first_cycle_multipass_fini = NULL;
28778 break;
28783 /* Implement TARGET_STATIC_RTX_ALIGNMENT. */
28785 static HOST_WIDE_INT
28786 ix86_static_rtx_alignment (machine_mode mode)
28788 if (mode == DFmode)
28789 return 64;
28790 if (ALIGN_MODE_128 (mode))
28791 return MAX (128, GET_MODE_ALIGNMENT (mode));
28792 return GET_MODE_ALIGNMENT (mode);
28795 /* Implement TARGET_CONSTANT_ALIGNMENT. */
28797 static HOST_WIDE_INT
28798 ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align)
28800 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
28801 || TREE_CODE (exp) == INTEGER_CST)
28803 machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
28804 HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode);
28805 return MAX (mode_align, align);
28807 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
28808 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
28809 return BITS_PER_WORD;
28811 return align;
28814 /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
28815 the data type, and ALIGN is the alignment that the object would
28816 ordinarily have. */
28818 static int
28819 iamcu_alignment (tree type, int align)
28821 machine_mode mode;
28823 if (align < 32 || TYPE_USER_ALIGN (type))
28824 return align;
28826 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
28827 bytes. */
28828 mode = TYPE_MODE (strip_array_types (type));
28829 switch (GET_MODE_CLASS (mode))
28831 case MODE_INT:
28832 case MODE_COMPLEX_INT:
28833 case MODE_COMPLEX_FLOAT:
28834 case MODE_FLOAT:
28835 case MODE_DECIMAL_FLOAT:
28836 return 32;
28837 default:
28838 return align;
28842 /* Compute the alignment for a static variable.
28843 TYPE is the data type, and ALIGN is the alignment that
28844 the object would ordinarily have. The value of this function is used
28845 instead of that alignment to align the object. */
28848 ix86_data_alignment (tree type, int align, bool opt)
28850 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
28851 for symbols from other compilation units or symbols that don't need
28852 to bind locally. In order to preserve some ABI compatibility with
28853 those compilers, ensure we don't decrease alignment from what we
28854 used to assume. */
28856 int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
28858 /* A data structure, equal or greater than the size of a cache line
28859 (64 bytes in the Pentium 4 and other recent Intel processors, including
28860 processors based on Intel Core microarchitecture) should be aligned
28861 so that its base address is a multiple of a cache line size. */
28863 int max_align
28864 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
28866 if (max_align < BITS_PER_WORD)
28867 max_align = BITS_PER_WORD;
28869 switch (ix86_align_data_type)
28871 case ix86_align_data_type_abi: opt = false; break;
28872 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
28873 case ix86_align_data_type_cacheline: break;
28876 if (TARGET_IAMCU)
28877 align = iamcu_alignment (type, align);
28879 if (opt
28880 && AGGREGATE_TYPE_P (type)
28881 && TYPE_SIZE (type)
28882 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
28884 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align_compat)
28885 && align < max_align_compat)
28886 align = max_align_compat;
28887 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align)
28888 && align < max_align)
28889 align = max_align;
28892 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
28893 to 16byte boundary. */
28894 if (TARGET_64BIT)
28896 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
28897 && TYPE_SIZE (type)
28898 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
28899 && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
28900 && align < 128)
28901 return 128;
28904 if (!opt)
28905 return align;
28907 if (TREE_CODE (type) == ARRAY_TYPE)
28909 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
28910 return 64;
28911 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
28912 return 128;
28914 else if (TREE_CODE (type) == COMPLEX_TYPE)
28917 if (TYPE_MODE (type) == DCmode && align < 64)
28918 return 64;
28919 if ((TYPE_MODE (type) == XCmode
28920 || TYPE_MODE (type) == TCmode) && align < 128)
28921 return 128;
28923 else if ((TREE_CODE (type) == RECORD_TYPE
28924 || TREE_CODE (type) == UNION_TYPE
28925 || TREE_CODE (type) == QUAL_UNION_TYPE)
28926 && TYPE_FIELDS (type))
28928 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
28929 return 64;
28930 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
28931 return 128;
28933 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
28934 || TREE_CODE (type) == INTEGER_TYPE)
28936 if (TYPE_MODE (type) == DFmode && align < 64)
28937 return 64;
28938 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
28939 return 128;
28942 return align;
28945 /* Compute the alignment for a local variable or a stack slot. EXP is
28946 the data type or decl itself, MODE is the widest mode available and
28947 ALIGN is the alignment that the object would ordinarily have. The
28948 value of this macro is used instead of that alignment to align the
28949 object. */
28951 unsigned int
28952 ix86_local_alignment (tree exp, machine_mode mode,
28953 unsigned int align)
28955 tree type, decl;
28957 if (exp && DECL_P (exp))
28959 type = TREE_TYPE (exp);
28960 decl = exp;
28962 else
28964 type = exp;
28965 decl = NULL;
28968 /* Don't do dynamic stack realignment for long long objects with
28969 -mpreferred-stack-boundary=2. */
28970 if (!TARGET_64BIT
28971 && align == 64
28972 && ix86_preferred_stack_boundary < 64
28973 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
28974 && (!type || !TYPE_USER_ALIGN (type))
28975 && (!decl || !DECL_USER_ALIGN (decl)))
28976 align = 32;
28978 /* If TYPE is NULL, we are allocating a stack slot for caller-save
28979 register in MODE. We will return the largest alignment of XF
28980 and DF. */
28981 if (!type)
28983 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
28984 align = GET_MODE_ALIGNMENT (DFmode);
28985 return align;
28988 /* Don't increase alignment for Intel MCU psABI. */
28989 if (TARGET_IAMCU)
28990 return align;
28992 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
28993 to 16byte boundary. Exact wording is:
28995 An array uses the same alignment as its elements, except that a local or
28996 global array variable of length at least 16 bytes or
28997 a C99 variable-length array variable always has alignment of at least 16 bytes.
28999 This was added to allow use of aligned SSE instructions at arrays. This
29000 rule is meant for static storage (where compiler can not do the analysis
29001 by itself). We follow it for automatic variables only when convenient.
29002 We fully control everything in the function compiled and functions from
29003 other unit can not rely on the alignment.
29005 Exclude va_list type. It is the common case of local array where
29006 we can not benefit from the alignment.
29008 TODO: Probably one should optimize for size only when var is not escaping. */
29009 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
29010 && TARGET_SSE)
29012 if (AGGREGATE_TYPE_P (type)
29013 && (va_list_type_node == NULL_TREE
29014 || (TYPE_MAIN_VARIANT (type)
29015 != TYPE_MAIN_VARIANT (va_list_type_node)))
29016 && TYPE_SIZE (type)
29017 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
29018 && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
29019 && align < 128)
29020 return 128;
29022 if (TREE_CODE (type) == ARRAY_TYPE)
29024 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
29025 return 64;
29026 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
29027 return 128;
29029 else if (TREE_CODE (type) == COMPLEX_TYPE)
29031 if (TYPE_MODE (type) == DCmode && align < 64)
29032 return 64;
29033 if ((TYPE_MODE (type) == XCmode
29034 || TYPE_MODE (type) == TCmode) && align < 128)
29035 return 128;
29037 else if ((TREE_CODE (type) == RECORD_TYPE
29038 || TREE_CODE (type) == UNION_TYPE
29039 || TREE_CODE (type) == QUAL_UNION_TYPE)
29040 && TYPE_FIELDS (type))
29042 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
29043 return 64;
29044 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
29045 return 128;
29047 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
29048 || TREE_CODE (type) == INTEGER_TYPE)
29051 if (TYPE_MODE (type) == DFmode && align < 64)
29052 return 64;
29053 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
29054 return 128;
29056 return align;
29059 /* Compute the minimum required alignment for dynamic stack realignment
29060 purposes for a local variable, parameter or a stack slot. EXP is
29061 the data type or decl itself, MODE is its mode and ALIGN is the
29062 alignment that the object would ordinarily have. */
29064 unsigned int
29065 ix86_minimum_alignment (tree exp, machine_mode mode,
29066 unsigned int align)
29068 tree type, decl;
29070 if (exp && DECL_P (exp))
29072 type = TREE_TYPE (exp);
29073 decl = exp;
29075 else
29077 type = exp;
29078 decl = NULL;
29081 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
29082 return align;
29084 /* Don't do dynamic stack realignment for long long objects with
29085 -mpreferred-stack-boundary=2. */
29086 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
29087 && (!type || !TYPE_USER_ALIGN (type))
29088 && (!decl || !DECL_USER_ALIGN (decl)))
29090 gcc_checking_assert (!TARGET_STV);
29091 return 32;
29094 return align;
29097 /* Find a location for the static chain incoming to a nested function.
29098 This is a register, unless all free registers are used by arguments. */
29100 static rtx
29101 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
29103 unsigned regno;
29105 /* While this function won't be called by the middle-end when a static
29106 chain isn't needed, it's also used throughout the backend so it's
29107 easiest to keep this check centralized. */
29108 if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
29109 return NULL;
29111 if (TARGET_64BIT)
29113 /* We always use R10 in 64-bit mode. */
29114 regno = R10_REG;
29116 else
29118 const_tree fntype, fndecl;
29119 unsigned int ccvt;
29121 /* By default in 32-bit mode we use ECX to pass the static chain. */
29122 regno = CX_REG;
29124 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
29126 fntype = TREE_TYPE (fndecl_or_type);
29127 fndecl = fndecl_or_type;
29129 else
29131 fntype = fndecl_or_type;
29132 fndecl = NULL;
29135 ccvt = ix86_get_callcvt (fntype);
29136 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
29138 /* Fastcall functions use ecx/edx for arguments, which leaves
29139 us with EAX for the static chain.
29140 Thiscall functions use ecx for arguments, which also
29141 leaves us with EAX for the static chain. */
29142 regno = AX_REG;
29144 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
29146 /* Thiscall functions use ecx for arguments, which leaves
29147 us with EAX and EDX for the static chain.
29148 We are using for abi-compatibility EAX. */
29149 regno = AX_REG;
29151 else if (ix86_function_regparm (fntype, fndecl) == 3)
29153 /* For regparm 3, we have no free call-clobbered registers in
29154 which to store the static chain. In order to implement this,
29155 we have the trampoline push the static chain to the stack.
29156 However, we can't push a value below the return address when
29157 we call the nested function directly, so we have to use an
29158 alternate entry point. For this we use ESI, and have the
29159 alternate entry point push ESI, so that things appear the
29160 same once we're executing the nested function. */
29161 if (incoming_p)
29163 if (fndecl == current_function_decl
29164 && !ix86_static_chain_on_stack)
29166 gcc_assert (!reload_completed);
29167 ix86_static_chain_on_stack = true;
29169 return gen_frame_mem (SImode,
29170 plus_constant (Pmode,
29171 arg_pointer_rtx, -8));
29173 regno = SI_REG;
29177 return gen_rtx_REG (Pmode, regno);
29180 /* Emit RTL insns to initialize the variable parts of a trampoline.
29181 FNDECL is the decl of the target address; M_TRAMP is a MEM for
29182 the trampoline, and CHAIN_VALUE is an RTX for the static chain
29183 to be passed to the target function. */
29185 static void
29186 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
29188 rtx mem, fnaddr;
29189 int opcode;
29190 int offset = 0;
29192 fnaddr = XEXP (DECL_RTL (fndecl), 0);
29194 if (TARGET_64BIT)
29196 int size;
29198 /* Load the function address to r11. Try to load address using
29199 the shorter movl instead of movabs. We may want to support
29200 movq for kernel mode, but kernel does not use trampolines at
29201 the moment. FNADDR is a 32bit address and may not be in
29202 DImode when ptr_mode == SImode. Always use movl in this
29203 case. */
29204 if (ptr_mode == SImode
29205 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
29207 fnaddr = copy_addr_to_reg (fnaddr);
29209 mem = adjust_address (m_tramp, HImode, offset);
29210 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
29212 mem = adjust_address (m_tramp, SImode, offset + 2);
29213 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
29214 offset += 6;
29216 else
29218 mem = adjust_address (m_tramp, HImode, offset);
29219 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
29221 mem = adjust_address (m_tramp, DImode, offset + 2);
29222 emit_move_insn (mem, fnaddr);
29223 offset += 10;
29226 /* Load static chain using movabs to r10. Use the shorter movl
29227 instead of movabs when ptr_mode == SImode. */
29228 if (ptr_mode == SImode)
29230 opcode = 0xba41;
29231 size = 6;
29233 else
29235 opcode = 0xba49;
29236 size = 10;
29239 mem = adjust_address (m_tramp, HImode, offset);
29240 emit_move_insn (mem, gen_int_mode (opcode, HImode));
29242 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
29243 emit_move_insn (mem, chain_value);
29244 offset += size;
29246 /* Jump to r11; the last (unused) byte is a nop, only there to
29247 pad the write out to a single 32-bit store. */
29248 mem = adjust_address (m_tramp, SImode, offset);
29249 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
29250 offset += 4;
29252 else
29254 rtx disp, chain;
29256 /* Depending on the static chain location, either load a register
29257 with a constant, or push the constant to the stack. All of the
29258 instructions are the same size. */
29259 chain = ix86_static_chain (fndecl, true);
29260 if (REG_P (chain))
29262 switch (REGNO (chain))
29264 case AX_REG:
29265 opcode = 0xb8; break;
29266 case CX_REG:
29267 opcode = 0xb9; break;
29268 default:
29269 gcc_unreachable ();
29272 else
29273 opcode = 0x68;
29275 mem = adjust_address (m_tramp, QImode, offset);
29276 emit_move_insn (mem, gen_int_mode (opcode, QImode));
29278 mem = adjust_address (m_tramp, SImode, offset + 1);
29279 emit_move_insn (mem, chain_value);
29280 offset += 5;
29282 mem = adjust_address (m_tramp, QImode, offset);
29283 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
29285 mem = adjust_address (m_tramp, SImode, offset + 1);
29287 /* Compute offset from the end of the jmp to the target function.
29288 In the case in which the trampoline stores the static chain on
29289 the stack, we need to skip the first insn which pushes the
29290 (call-saved) register static chain; this push is 1 byte. */
29291 offset += 5;
29292 disp = expand_binop (SImode, sub_optab, fnaddr,
29293 plus_constant (Pmode, XEXP (m_tramp, 0),
29294 offset - (MEM_P (chain) ? 1 : 0)),
29295 NULL_RTX, 1, OPTAB_DIRECT);
29296 emit_move_insn (mem, disp);
29299 gcc_assert (offset <= TRAMPOLINE_SIZE);
29301 #ifdef HAVE_ENABLE_EXECUTE_STACK
29302 #ifdef CHECK_EXECUTE_STACK_ENABLED
29303 if (CHECK_EXECUTE_STACK_ENABLED)
29304 #endif
29305 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
29306 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
29307 #endif
29310 static bool
29311 ix86_allocate_stack_slots_for_args (void)
29313 /* Naked functions should not allocate stack slots for arguments. */
29314 return !ix86_function_naked (current_function_decl);
29317 static bool
29318 ix86_warn_func_return (tree decl)
29320 /* Naked functions are implemented entirely in assembly, including the
29321 return sequence, so suppress warnings about this. */
29322 return !ix86_function_naked (decl);
29325 /* The following file contains several enumerations and data structures
29326 built from the definitions in i386-builtin-types.def. */
29328 #include "i386-builtin-types.inc"
29330 /* Table for the ix86 builtin non-function types. */
29331 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
29333 /* Retrieve an element from the above table, building some of
29334 the types lazily. */
29336 static tree
29337 ix86_get_builtin_type (enum ix86_builtin_type tcode)
29339 unsigned int index;
29340 tree type, itype;
29342 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
29344 type = ix86_builtin_type_tab[(int) tcode];
29345 if (type != NULL)
29346 return type;
29348 gcc_assert (tcode > IX86_BT_LAST_PRIM);
29349 if (tcode <= IX86_BT_LAST_VECT)
29351 machine_mode mode;
29353 index = tcode - IX86_BT_LAST_PRIM - 1;
29354 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
29355 mode = ix86_builtin_type_vect_mode[index];
29357 type = build_vector_type_for_mode (itype, mode);
29359 else
29361 int quals;
29363 index = tcode - IX86_BT_LAST_VECT - 1;
29364 if (tcode <= IX86_BT_LAST_PTR)
29365 quals = TYPE_UNQUALIFIED;
29366 else
29367 quals = TYPE_QUAL_CONST;
29369 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
29370 if (quals != TYPE_UNQUALIFIED)
29371 itype = build_qualified_type (itype, quals);
29373 type = build_pointer_type (itype);
29376 ix86_builtin_type_tab[(int) tcode] = type;
29377 return type;
29380 /* Table for the ix86 builtin function types. */
29381 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
29383 /* Retrieve an element from the above table, building some of
29384 the types lazily. */
29386 static tree
29387 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
29389 tree type;
29391 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
29393 type = ix86_builtin_func_type_tab[(int) tcode];
29394 if (type != NULL)
29395 return type;
29397 if (tcode <= IX86_BT_LAST_FUNC)
29399 unsigned start = ix86_builtin_func_start[(int) tcode];
29400 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
29401 tree rtype, atype, args = void_list_node;
29402 unsigned i;
29404 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
29405 for (i = after - 1; i > start; --i)
29407 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
29408 args = tree_cons (NULL, atype, args);
29411 type = build_function_type (rtype, args);
29413 else
29415 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
29416 enum ix86_builtin_func_type icode;
29418 icode = ix86_builtin_func_alias_base[index];
29419 type = ix86_get_builtin_func_type (icode);
29422 ix86_builtin_func_type_tab[(int) tcode] = type;
29423 return type;
29427 /* Codes for all the SSE/MMX builtins. Builtins not mentioned in any
29428 bdesc_* arrays below should come first, then builtins for each bdesc_*
29429 array in ascending order, so that we can use direct array accesses. */
29430 enum ix86_builtins
29432 IX86_BUILTIN_MASKMOVQ,
29433 IX86_BUILTIN_LDMXCSR,
29434 IX86_BUILTIN_STMXCSR,
29435 IX86_BUILTIN_MASKMOVDQU,
29436 IX86_BUILTIN_PSLLDQ128,
29437 IX86_BUILTIN_CLFLUSH,
29438 IX86_BUILTIN_MONITOR,
29439 IX86_BUILTIN_MWAIT,
29440 IX86_BUILTIN_CLZERO,
29441 IX86_BUILTIN_VEC_INIT_V2SI,
29442 IX86_BUILTIN_VEC_INIT_V4HI,
29443 IX86_BUILTIN_VEC_INIT_V8QI,
29444 IX86_BUILTIN_VEC_EXT_V2DF,
29445 IX86_BUILTIN_VEC_EXT_V2DI,
29446 IX86_BUILTIN_VEC_EXT_V4SF,
29447 IX86_BUILTIN_VEC_EXT_V4SI,
29448 IX86_BUILTIN_VEC_EXT_V8HI,
29449 IX86_BUILTIN_VEC_EXT_V2SI,
29450 IX86_BUILTIN_VEC_EXT_V4HI,
29451 IX86_BUILTIN_VEC_EXT_V16QI,
29452 IX86_BUILTIN_VEC_SET_V2DI,
29453 IX86_BUILTIN_VEC_SET_V4SF,
29454 IX86_BUILTIN_VEC_SET_V4SI,
29455 IX86_BUILTIN_VEC_SET_V8HI,
29456 IX86_BUILTIN_VEC_SET_V4HI,
29457 IX86_BUILTIN_VEC_SET_V16QI,
29458 IX86_BUILTIN_GATHERSIV2DF,
29459 IX86_BUILTIN_GATHERSIV4DF,
29460 IX86_BUILTIN_GATHERDIV2DF,
29461 IX86_BUILTIN_GATHERDIV4DF,
29462 IX86_BUILTIN_GATHERSIV4SF,
29463 IX86_BUILTIN_GATHERSIV8SF,
29464 IX86_BUILTIN_GATHERDIV4SF,
29465 IX86_BUILTIN_GATHERDIV8SF,
29466 IX86_BUILTIN_GATHERSIV2DI,
29467 IX86_BUILTIN_GATHERSIV4DI,
29468 IX86_BUILTIN_GATHERDIV2DI,
29469 IX86_BUILTIN_GATHERDIV4DI,
29470 IX86_BUILTIN_GATHERSIV4SI,
29471 IX86_BUILTIN_GATHERSIV8SI,
29472 IX86_BUILTIN_GATHERDIV4SI,
29473 IX86_BUILTIN_GATHERDIV8SI,
29474 IX86_BUILTIN_VFMSUBSD3_MASK3,
29475 IX86_BUILTIN_VFMSUBSS3_MASK3,
29476 IX86_BUILTIN_GATHER3SIV8SF,
29477 IX86_BUILTIN_GATHER3SIV4SF,
29478 IX86_BUILTIN_GATHER3SIV4DF,
29479 IX86_BUILTIN_GATHER3SIV2DF,
29480 IX86_BUILTIN_GATHER3DIV8SF,
29481 IX86_BUILTIN_GATHER3DIV4SF,
29482 IX86_BUILTIN_GATHER3DIV4DF,
29483 IX86_BUILTIN_GATHER3DIV2DF,
29484 IX86_BUILTIN_GATHER3SIV8SI,
29485 IX86_BUILTIN_GATHER3SIV4SI,
29486 IX86_BUILTIN_GATHER3SIV4DI,
29487 IX86_BUILTIN_GATHER3SIV2DI,
29488 IX86_BUILTIN_GATHER3DIV8SI,
29489 IX86_BUILTIN_GATHER3DIV4SI,
29490 IX86_BUILTIN_GATHER3DIV4DI,
29491 IX86_BUILTIN_GATHER3DIV2DI,
29492 IX86_BUILTIN_SCATTERSIV8SF,
29493 IX86_BUILTIN_SCATTERSIV4SF,
29494 IX86_BUILTIN_SCATTERSIV4DF,
29495 IX86_BUILTIN_SCATTERSIV2DF,
29496 IX86_BUILTIN_SCATTERDIV8SF,
29497 IX86_BUILTIN_SCATTERDIV4SF,
29498 IX86_BUILTIN_SCATTERDIV4DF,
29499 IX86_BUILTIN_SCATTERDIV2DF,
29500 IX86_BUILTIN_SCATTERSIV8SI,
29501 IX86_BUILTIN_SCATTERSIV4SI,
29502 IX86_BUILTIN_SCATTERSIV4DI,
29503 IX86_BUILTIN_SCATTERSIV2DI,
29504 IX86_BUILTIN_SCATTERDIV8SI,
29505 IX86_BUILTIN_SCATTERDIV4SI,
29506 IX86_BUILTIN_SCATTERDIV4DI,
29507 IX86_BUILTIN_SCATTERDIV2DI,
29508 /* Alternate 4 and 8 element gather/scatter for the vectorizer
29509 where all operands are 32-byte or 64-byte wide respectively. */
29510 IX86_BUILTIN_GATHERALTSIV4DF,
29511 IX86_BUILTIN_GATHERALTDIV8SF,
29512 IX86_BUILTIN_GATHERALTSIV4DI,
29513 IX86_BUILTIN_GATHERALTDIV8SI,
29514 IX86_BUILTIN_GATHER3ALTDIV16SF,
29515 IX86_BUILTIN_GATHER3ALTDIV16SI,
29516 IX86_BUILTIN_GATHER3ALTSIV4DF,
29517 IX86_BUILTIN_GATHER3ALTDIV8SF,
29518 IX86_BUILTIN_GATHER3ALTSIV4DI,
29519 IX86_BUILTIN_GATHER3ALTDIV8SI,
29520 IX86_BUILTIN_GATHER3ALTSIV8DF,
29521 IX86_BUILTIN_GATHER3ALTSIV8DI,
29522 IX86_BUILTIN_GATHER3DIV16SF,
29523 IX86_BUILTIN_GATHER3DIV16SI,
29524 IX86_BUILTIN_GATHER3DIV8DF,
29525 IX86_BUILTIN_GATHER3DIV8DI,
29526 IX86_BUILTIN_GATHER3SIV16SF,
29527 IX86_BUILTIN_GATHER3SIV16SI,
29528 IX86_BUILTIN_GATHER3SIV8DF,
29529 IX86_BUILTIN_GATHER3SIV8DI,
29530 IX86_BUILTIN_SCATTERALTSIV8DF,
29531 IX86_BUILTIN_SCATTERALTDIV16SF,
29532 IX86_BUILTIN_SCATTERALTSIV8DI,
29533 IX86_BUILTIN_SCATTERALTDIV16SI,
29534 IX86_BUILTIN_SCATTERDIV16SF,
29535 IX86_BUILTIN_SCATTERDIV16SI,
29536 IX86_BUILTIN_SCATTERDIV8DF,
29537 IX86_BUILTIN_SCATTERDIV8DI,
29538 IX86_BUILTIN_SCATTERSIV16SF,
29539 IX86_BUILTIN_SCATTERSIV16SI,
29540 IX86_BUILTIN_SCATTERSIV8DF,
29541 IX86_BUILTIN_SCATTERSIV8DI,
29542 IX86_BUILTIN_GATHERPFQPD,
29543 IX86_BUILTIN_GATHERPFDPS,
29544 IX86_BUILTIN_GATHERPFDPD,
29545 IX86_BUILTIN_GATHERPFQPS,
29546 IX86_BUILTIN_SCATTERPFDPD,
29547 IX86_BUILTIN_SCATTERPFDPS,
29548 IX86_BUILTIN_SCATTERPFQPD,
29549 IX86_BUILTIN_SCATTERPFQPS,
29550 IX86_BUILTIN_CLWB,
29551 IX86_BUILTIN_CLFLUSHOPT,
29552 IX86_BUILTIN_INFQ,
29553 IX86_BUILTIN_HUGE_VALQ,
29554 IX86_BUILTIN_NANQ,
29555 IX86_BUILTIN_NANSQ,
29556 IX86_BUILTIN_XABORT,
29557 IX86_BUILTIN_ADDCARRYX32,
29558 IX86_BUILTIN_ADDCARRYX64,
29559 IX86_BUILTIN_SBB32,
29560 IX86_BUILTIN_SBB64,
29561 IX86_BUILTIN_RDRAND16_STEP,
29562 IX86_BUILTIN_RDRAND32_STEP,
29563 IX86_BUILTIN_RDRAND64_STEP,
29564 IX86_BUILTIN_RDSEED16_STEP,
29565 IX86_BUILTIN_RDSEED32_STEP,
29566 IX86_BUILTIN_RDSEED64_STEP,
29567 IX86_BUILTIN_MONITORX,
29568 IX86_BUILTIN_MWAITX,
29569 IX86_BUILTIN_CFSTRING,
29570 IX86_BUILTIN_CPU_INIT,
29571 IX86_BUILTIN_CPU_IS,
29572 IX86_BUILTIN_CPU_SUPPORTS,
29573 IX86_BUILTIN_READ_FLAGS,
29574 IX86_BUILTIN_WRITE_FLAGS,
29576 /* All the remaining builtins are tracked in bdesc_* arrays in
29577 i386-builtin.def. Don't add any IX86_BUILTIN_* enumerators after
29578 this point. */
29579 #define BDESC(mask, icode, name, code, comparison, flag) \
29580 code,
29581 #define BDESC_FIRST(kind, kindu, mask, icode, name, code, comparison, flag) \
29582 code, \
29583 IX86_BUILTIN__BDESC_##kindu##_FIRST = code,
29584 #define BDESC_END(kind, next_kind)
29586 #include "i386-builtin.def"
29588 #undef BDESC
29589 #undef BDESC_FIRST
29590 #undef BDESC_END
29592 IX86_BUILTIN_MAX,
29594 IX86_BUILTIN__BDESC_MAX_FIRST = IX86_BUILTIN_MAX,
29596 /* Now just the aliases for bdesc_* start/end. */
29597 #define BDESC(mask, icode, name, code, comparison, flag)
29598 #define BDESC_FIRST(kind, kindu, mask, icode, name, code, comparison, flag)
29599 #define BDESC_END(kind, next_kind) \
29600 IX86_BUILTIN__BDESC_##kind##_LAST \
29601 = IX86_BUILTIN__BDESC_##next_kind##_FIRST - 1,
29603 #include "i386-builtin.def"
29605 #undef BDESC
29606 #undef BDESC_FIRST
29607 #undef BDESC_END
29609 /* Just to make sure there is no comma after the last enumerator. */
29610 IX86_BUILTIN__BDESC_MAX_LAST = IX86_BUILTIN__BDESC_MAX_FIRST
29613 /* Table for the ix86 builtin decls. */
29614 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
29616 /* Table of all of the builtin functions that are possible with different ISA's
29617 but are waiting to be built until a function is declared to use that
29618 ISA. */
29619 struct builtin_isa {
29620 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
29621 HOST_WIDE_INT isa2; /* additional isa_flags this builtin is defined for */
29622 const char *name; /* function name */
29623 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
29624 unsigned char const_p:1; /* true if the declaration is constant */
29625 unsigned char pure_p:1; /* true if the declaration has pure attribute */
29626 bool leaf_p; /* true if the declaration has leaf attribute */
29627 bool nothrow_p; /* true if the declaration has nothrow attribute */
29628 bool set_and_not_built_p;
29631 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
29633 /* Bits that can still enable any inclusion of a builtin. */
29634 static HOST_WIDE_INT deferred_isa_values = 0;
29635 static HOST_WIDE_INT deferred_isa_values2 = 0;
29637 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
29638 of which isa_flags to use in the ix86_builtins_isa array. Stores the
29639 function decl in the ix86_builtins array. Returns the function decl or
29640 NULL_TREE, if the builtin was not added.
29642 If the front end has a special hook for builtin functions, delay adding
29643 builtin functions that aren't in the current ISA until the ISA is changed
29644 with function specific optimization. Doing so, can save about 300K for the
29645 default compiler. When the builtin is expanded, check at that time whether
29646 it is valid.
29648 If the front end doesn't have a special hook, record all builtins, even if
29649 it isn't an instruction set in the current ISA in case the user uses
29650 function specific options for a different ISA, so that we don't get scope
29651 errors if a builtin is added in the middle of a function scope. */
29653 static inline tree
29654 def_builtin (HOST_WIDE_INT mask, const char *name,
29655 enum ix86_builtin_func_type tcode,
29656 enum ix86_builtins code)
29658 tree decl = NULL_TREE;
29660 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
29662 ix86_builtins_isa[(int) code].isa = mask;
29664 /* OPTION_MASK_ISA_AVX512VL has special meaning. Despite of generic case,
29665 where any bit set means that built-in is enable, this bit must be *and-ed*
29666 with another one. E.g.: OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL
29667 means that *both* cpuid bits must be set for the built-in to be available.
29668 Handle this here. */
29669 if (mask & ix86_isa_flags & OPTION_MASK_ISA_AVX512VL)
29670 mask &= ~OPTION_MASK_ISA_AVX512VL;
29672 mask &= ~OPTION_MASK_ISA_64BIT;
29673 if (mask == 0
29674 || (mask & ix86_isa_flags) != 0
29675 || (lang_hooks.builtin_function
29676 == lang_hooks.builtin_function_ext_scope))
29679 tree type = ix86_get_builtin_func_type (tcode);
29680 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
29681 NULL, NULL_TREE);
29682 ix86_builtins[(int) code] = decl;
29683 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
29685 else
29687 /* Just a MASK where set_and_not_built_p == true can potentially
29688 include a builtin. */
29689 deferred_isa_values |= mask;
29690 ix86_builtins[(int) code] = NULL_TREE;
29691 ix86_builtins_isa[(int) code].tcode = tcode;
29692 ix86_builtins_isa[(int) code].name = name;
29693 ix86_builtins_isa[(int) code].leaf_p = false;
29694 ix86_builtins_isa[(int) code].nothrow_p = false;
29695 ix86_builtins_isa[(int) code].const_p = false;
29696 ix86_builtins_isa[(int) code].pure_p = false;
29697 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
29701 return decl;
29704 /* Like def_builtin, but also marks the function decl "const". */
29706 static inline tree
29707 def_builtin_const (HOST_WIDE_INT mask, const char *name,
29708 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
29710 tree decl = def_builtin (mask, name, tcode, code);
29711 if (decl)
29712 TREE_READONLY (decl) = 1;
29713 else
29714 ix86_builtins_isa[(int) code].const_p = true;
29716 return decl;
29719 /* Like def_builtin, but also marks the function decl "pure". */
29721 static inline tree
29722 def_builtin_pure (HOST_WIDE_INT mask, const char *name,
29723 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
29725 tree decl = def_builtin (mask, name, tcode, code);
29726 if (decl)
29727 DECL_PURE_P (decl) = 1;
29728 else
29729 ix86_builtins_isa[(int) code].pure_p = true;
29731 return decl;
29734 /* Like def_builtin, but for additional isa2 flags. */
29736 static inline tree
29737 def_builtin2 (HOST_WIDE_INT mask, const char *name,
29738 enum ix86_builtin_func_type tcode,
29739 enum ix86_builtins code)
29741 tree decl = NULL_TREE;
29743 ix86_builtins_isa[(int) code].isa2 = mask;
29745 if (mask == 0
29746 || (mask & ix86_isa_flags2) != 0
29747 || (lang_hooks.builtin_function
29748 == lang_hooks.builtin_function_ext_scope))
29751 tree type = ix86_get_builtin_func_type (tcode);
29752 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
29753 NULL, NULL_TREE);
29754 ix86_builtins[(int) code] = decl;
29755 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
29757 else
29759 /* Just a MASK where set_and_not_built_p == true can potentially
29760 include a builtin. */
29761 deferred_isa_values2 |= mask;
29762 ix86_builtins[(int) code] = NULL_TREE;
29763 ix86_builtins_isa[(int) code].tcode = tcode;
29764 ix86_builtins_isa[(int) code].name = name;
29765 ix86_builtins_isa[(int) code].leaf_p = false;
29766 ix86_builtins_isa[(int) code].nothrow_p = false;
29767 ix86_builtins_isa[(int) code].const_p = false;
29768 ix86_builtins_isa[(int) code].pure_p = false;
29769 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
29772 return decl;
29775 /* Like def_builtin, but also marks the function decl "const". */
29777 static inline tree
29778 def_builtin_const2 (HOST_WIDE_INT mask, const char *name,
29779 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
29781 tree decl = def_builtin2 (mask, name, tcode, code);
29782 if (decl)
29783 TREE_READONLY (decl) = 1;
29784 else
29785 ix86_builtins_isa[(int) code].const_p = true;
29787 return decl;
29790 /* Like def_builtin, but also marks the function decl "pure". */
29792 static inline tree
29793 def_builtin_pure2 (HOST_WIDE_INT mask, const char *name,
29794 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
29796 tree decl = def_builtin2 (mask, name, tcode, code);
29797 if (decl)
29798 DECL_PURE_P (decl) = 1;
29799 else
29800 ix86_builtins_isa[(int) code].pure_p = true;
29802 return decl;
29805 /* Add any new builtin functions for a given ISA that may not have been
29806 declared. This saves a bit of space compared to adding all of the
29807 declarations to the tree, even if we didn't use them. */
29809 static void
29810 ix86_add_new_builtins (HOST_WIDE_INT isa, HOST_WIDE_INT isa2)
29812 if ((isa & deferred_isa_values) == 0
29813 && (isa2 & deferred_isa_values2) == 0)
29814 return;
29816 /* Bits in ISA value can be removed from potential isa values. */
29817 deferred_isa_values &= ~isa;
29818 deferred_isa_values2 &= ~isa2;
29820 int i;
29821 tree saved_current_target_pragma = current_target_pragma;
29822 current_target_pragma = NULL_TREE;
29824 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
29826 if (((ix86_builtins_isa[i].isa & isa) != 0
29827 || (ix86_builtins_isa[i].isa2 & isa2) != 0)
29828 && ix86_builtins_isa[i].set_and_not_built_p)
29830 tree decl, type;
29832 /* Don't define the builtin again. */
29833 ix86_builtins_isa[i].set_and_not_built_p = false;
29835 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
29836 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
29837 type, i, BUILT_IN_MD, NULL,
29838 NULL_TREE);
29840 ix86_builtins[i] = decl;
29841 if (ix86_builtins_isa[i].const_p)
29842 TREE_READONLY (decl) = 1;
29843 if (ix86_builtins_isa[i].pure_p)
29844 DECL_PURE_P (decl) = 1;
29845 if (ix86_builtins_isa[i].leaf_p)
29846 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
29847 NULL_TREE);
29848 if (ix86_builtins_isa[i].nothrow_p)
29849 TREE_NOTHROW (decl) = 1;
29853 current_target_pragma = saved_current_target_pragma;
29856 /* Bits for builtin_description.flag. */
29858 /* Set when we don't support the comparison natively, and should
29859 swap_comparison in order to support it. */
29860 #define BUILTIN_DESC_SWAP_OPERANDS 1
29862 struct builtin_description
29864 const HOST_WIDE_INT mask;
29865 const enum insn_code icode;
29866 const char *const name;
29867 const enum ix86_builtins code;
29868 const enum rtx_code comparison;
29869 const int flag;
29872 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
29873 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
29874 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
29875 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
29876 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
29877 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
29878 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
29879 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
29880 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
29881 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
29882 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
29883 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
29884 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
29885 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
29886 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
29887 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
29888 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
29889 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
29890 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
29891 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
29892 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
29893 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
29894 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
29895 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
29896 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
29897 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
29898 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
29899 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
29900 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
29901 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
29902 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
29903 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
29904 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
29905 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
29906 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
29907 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
29908 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
29909 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
29910 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
29911 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
29912 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
29913 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
29914 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
29915 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
29916 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
29917 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
29918 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
29919 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
29920 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
29921 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
29922 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
29923 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
29925 #define BDESC(mask, icode, name, code, comparison, flag) \
29926 { mask, icode, name, code, comparison, flag },
29927 #define BDESC_FIRST(kind, kindu, mask, icode, name, code, comparison, flag) \
29928 static const struct builtin_description bdesc_##kind[] = \
29930 BDESC (mask, icode, name, code, comparison, flag)
29931 #define BDESC_END(kind, next_kind) \
29934 #include "i386-builtin.def"
29936 #undef BDESC
29937 #undef BDESC_FIRST
29938 #undef BDESC_END
29940 /* TM vector builtins. */
29942 /* Reuse the existing x86-specific `struct builtin_description' cause
29943 we're lazy. Add casts to make them fit. */
29944 static const struct builtin_description bdesc_tm[] =
29946 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
29947 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
29948 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
29949 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
29950 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
29951 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
29952 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
29954 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
29955 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
29956 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
29957 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
29958 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
29959 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
29960 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
29962 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
29963 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
29964 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
29965 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
29966 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
29967 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
29968 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
29970 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
29971 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
29972 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
29975 /* Initialize the transactional memory vector load/store builtins. */
29977 static void
29978 ix86_init_tm_builtins (void)
29980 enum ix86_builtin_func_type ftype;
29981 const struct builtin_description *d;
29982 size_t i;
29983 tree decl;
29984 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
29985 tree attrs_log, attrs_type_log;
29987 if (!flag_tm)
29988 return;
29990 /* If there are no builtins defined, we must be compiling in a
29991 language without trans-mem support. */
29992 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
29993 return;
29995 /* Use whatever attributes a normal TM load has. */
29996 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
29997 attrs_load = DECL_ATTRIBUTES (decl);
29998 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
29999 /* Use whatever attributes a normal TM store has. */
30000 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
30001 attrs_store = DECL_ATTRIBUTES (decl);
30002 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30003 /* Use whatever attributes a normal TM log has. */
30004 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
30005 attrs_log = DECL_ATTRIBUTES (decl);
30006 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30008 for (i = 0, d = bdesc_tm;
30009 i < ARRAY_SIZE (bdesc_tm);
30010 i++, d++)
30012 if ((d->mask & ix86_isa_flags) != 0
30013 || (lang_hooks.builtin_function
30014 == lang_hooks.builtin_function_ext_scope))
30016 tree type, attrs, attrs_type;
30017 enum built_in_function code = (enum built_in_function) d->code;
30019 ftype = (enum ix86_builtin_func_type) d->flag;
30020 type = ix86_get_builtin_func_type (ftype);
30022 if (BUILTIN_TM_LOAD_P (code))
30024 attrs = attrs_load;
30025 attrs_type = attrs_type_load;
30027 else if (BUILTIN_TM_STORE_P (code))
30029 attrs = attrs_store;
30030 attrs_type = attrs_type_store;
30032 else
30034 attrs = attrs_log;
30035 attrs_type = attrs_type_log;
30037 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
30038 /* The builtin without the prefix for
30039 calling it directly. */
30040 d->name + strlen ("__builtin_"),
30041 attrs);
30042 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
30043 set the TYPE_ATTRIBUTES. */
30044 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
30046 set_builtin_decl (code, decl, false);
30051 /* Macros for verification of enum ix86_builtins order. */
30052 #define BDESC_VERIFY(x, y, z) \
30053 gcc_checking_assert ((x) == (enum ix86_builtins) ((y) + (z)))
30054 #define BDESC_VERIFYS(x, y, z) \
30055 STATIC_ASSERT ((x) == (enum ix86_builtins) ((y) + (z)))
30057 BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPESTR_FIRST,
30058 IX86_BUILTIN__BDESC_COMI_LAST, 1);
30059 BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPISTR_FIRST,
30060 IX86_BUILTIN__BDESC_PCMPESTR_LAST, 1);
30061 BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST,
30062 IX86_BUILTIN__BDESC_PCMPISTR_LAST, 1);
30063 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS_FIRST,
30064 IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST, 1);
30065 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST,
30066 IX86_BUILTIN__BDESC_ARGS_LAST, 1);
30067 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS2_FIRST,
30068 IX86_BUILTIN__BDESC_ROUND_ARGS_LAST, 1);
30069 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MPX_FIRST,
30070 IX86_BUILTIN__BDESC_ARGS2_LAST, 1);
30071 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MPX_CONST_FIRST,
30072 IX86_BUILTIN__BDESC_MPX_LAST, 1);
30073 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MULTI_ARG_FIRST,
30074 IX86_BUILTIN__BDESC_MPX_CONST_LAST, 1);
30075 BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_FIRST,
30076 IX86_BUILTIN__BDESC_MULTI_ARG_LAST, 1);
30077 BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_NORMAL_FIRST,
30078 IX86_BUILTIN__BDESC_CET_LAST, 1);
30079 BDESC_VERIFYS (IX86_BUILTIN_MAX,
30080 IX86_BUILTIN__BDESC_CET_NORMAL_LAST, 1);
30082 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
30083 in the current target ISA to allow the user to compile particular modules
30084 with different target specific options that differ from the command line
30085 options. */
30086 static void
30087 ix86_init_mmx_sse_builtins (void)
30089 const struct builtin_description * d;
30090 enum ix86_builtin_func_type ftype;
30091 size_t i;
30093 /* Add all special builtins with variable number of operands. */
30094 for (i = 0, d = bdesc_special_args;
30095 i < ARRAY_SIZE (bdesc_special_args);
30096 i++, d++)
30098 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST, i);
30099 if (d->name == 0)
30100 continue;
30102 ftype = (enum ix86_builtin_func_type) d->flag;
30103 def_builtin (d->mask, d->name, ftype, d->code);
30105 BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST,
30106 IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST,
30107 ARRAY_SIZE (bdesc_special_args) - 1);
30109 /* Add all builtins with variable number of operands. */
30110 for (i = 0, d = bdesc_args;
30111 i < ARRAY_SIZE (bdesc_args);
30112 i++, d++)
30114 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_ARGS_FIRST, i);
30115 if (d->name == 0)
30116 continue;
30118 ftype = (enum ix86_builtin_func_type) d->flag;
30119 def_builtin_const (d->mask, d->name, ftype, d->code);
30121 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS_LAST,
30122 IX86_BUILTIN__BDESC_ARGS_FIRST,
30123 ARRAY_SIZE (bdesc_args) - 1);
30125 /* Add all builtins with variable number of operands. */
30126 for (i = 0, d = bdesc_args2;
30127 i < ARRAY_SIZE (bdesc_args2);
30128 i++, d++)
30130 if (d->name == 0)
30131 continue;
30133 ftype = (enum ix86_builtin_func_type) d->flag;
30134 def_builtin_const2 (d->mask, d->name, ftype, d->code);
30137 /* Add all builtins with rounding. */
30138 for (i = 0, d = bdesc_round_args;
30139 i < ARRAY_SIZE (bdesc_round_args);
30140 i++, d++)
30142 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST, i);
30143 if (d->name == 0)
30144 continue;
30146 ftype = (enum ix86_builtin_func_type) d->flag;
30147 def_builtin_const (d->mask, d->name, ftype, d->code);
30149 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ROUND_ARGS_LAST,
30150 IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST,
30151 ARRAY_SIZE (bdesc_round_args) - 1);
30153 /* pcmpestr[im] insns. */
30154 for (i = 0, d = bdesc_pcmpestr;
30155 i < ARRAY_SIZE (bdesc_pcmpestr);
30156 i++, d++)
30158 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_PCMPESTR_FIRST, i);
30159 if (d->code == IX86_BUILTIN_PCMPESTRM128)
30160 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
30161 else
30162 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
30163 def_builtin_const (d->mask, d->name, ftype, d->code);
30165 BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPESTR_LAST,
30166 IX86_BUILTIN__BDESC_PCMPESTR_FIRST,
30167 ARRAY_SIZE (bdesc_pcmpestr) - 1);
30169 /* pcmpistr[im] insns. */
30170 for (i = 0, d = bdesc_pcmpistr;
30171 i < ARRAY_SIZE (bdesc_pcmpistr);
30172 i++, d++)
30174 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_PCMPISTR_FIRST, i);
30175 if (d->code == IX86_BUILTIN_PCMPISTRM128)
30176 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
30177 else
30178 ftype = INT_FTYPE_V16QI_V16QI_INT;
30179 def_builtin_const (d->mask, d->name, ftype, d->code);
30181 BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPISTR_LAST,
30182 IX86_BUILTIN__BDESC_PCMPISTR_FIRST,
30183 ARRAY_SIZE (bdesc_pcmpistr) - 1);
30185 /* comi/ucomi insns. */
30186 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
30188 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_COMI_FIRST, i);
30189 if (d->mask == OPTION_MASK_ISA_SSE2)
30190 ftype = INT_FTYPE_V2DF_V2DF;
30191 else
30192 ftype = INT_FTYPE_V4SF_V4SF;
30193 def_builtin_const (d->mask, d->name, ftype, d->code);
30195 BDESC_VERIFYS (IX86_BUILTIN__BDESC_COMI_LAST,
30196 IX86_BUILTIN__BDESC_COMI_FIRST,
30197 ARRAY_SIZE (bdesc_comi) - 1);
30199 /* SSE */
30200 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
30201 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
30202 def_builtin_pure (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
30203 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
30205 /* SSE or 3DNow!A */
30206 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A
30207 /* As it uses V4HImode, we have to require -mmmx too. */
30208 | OPTION_MASK_ISA_MMX,
30209 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
30210 IX86_BUILTIN_MASKMOVQ);
30212 /* SSE2 */
30213 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
30214 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
30216 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
30217 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
30218 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
30219 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
30221 /* SSE3. */
30222 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
30223 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
30224 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
30225 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
30227 /* AES */
30228 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
30229 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
30230 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
30231 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
30232 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
30233 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
30234 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
30235 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
30236 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
30237 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
30238 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
30239 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
30241 /* PCLMUL */
30242 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
30243 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
30245 /* RDRND */
30246 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
30247 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
30248 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
30249 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
30250 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
30251 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
30252 IX86_BUILTIN_RDRAND64_STEP);
30254 /* AVX2 */
30255 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
30256 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
30257 IX86_BUILTIN_GATHERSIV2DF);
30259 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
30260 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
30261 IX86_BUILTIN_GATHERSIV4DF);
30263 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
30264 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
30265 IX86_BUILTIN_GATHERDIV2DF);
30267 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
30268 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
30269 IX86_BUILTIN_GATHERDIV4DF);
30271 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
30272 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
30273 IX86_BUILTIN_GATHERSIV4SF);
30275 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
30276 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
30277 IX86_BUILTIN_GATHERSIV8SF);
30279 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
30280 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
30281 IX86_BUILTIN_GATHERDIV4SF);
30283 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
30284 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
30285 IX86_BUILTIN_GATHERDIV8SF);
30287 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
30288 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
30289 IX86_BUILTIN_GATHERSIV2DI);
30291 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
30292 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
30293 IX86_BUILTIN_GATHERSIV4DI);
30295 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
30296 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
30297 IX86_BUILTIN_GATHERDIV2DI);
30299 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
30300 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
30301 IX86_BUILTIN_GATHERDIV4DI);
30303 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
30304 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
30305 IX86_BUILTIN_GATHERSIV4SI);
30307 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
30308 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
30309 IX86_BUILTIN_GATHERSIV8SI);
30311 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
30312 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
30313 IX86_BUILTIN_GATHERDIV4SI);
30315 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
30316 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
30317 IX86_BUILTIN_GATHERDIV8SI);
30319 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
30320 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
30321 IX86_BUILTIN_GATHERALTSIV4DF);
30323 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
30324 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
30325 IX86_BUILTIN_GATHERALTDIV8SF);
30327 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
30328 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
30329 IX86_BUILTIN_GATHERALTSIV4DI);
30331 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
30332 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
30333 IX86_BUILTIN_GATHERALTDIV8SI);
30335 /* AVX512F */
30336 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
30337 V16SF_FTYPE_V16SF_PCVOID_V16SI_HI_INT,
30338 IX86_BUILTIN_GATHER3SIV16SF);
30340 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
30341 V8DF_FTYPE_V8DF_PCVOID_V8SI_QI_INT,
30342 IX86_BUILTIN_GATHER3SIV8DF);
30344 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
30345 V8SF_FTYPE_V8SF_PCVOID_V8DI_QI_INT,
30346 IX86_BUILTIN_GATHER3DIV16SF);
30348 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
30349 V8DF_FTYPE_V8DF_PCVOID_V8DI_QI_INT,
30350 IX86_BUILTIN_GATHER3DIV8DF);
30352 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
30353 V16SI_FTYPE_V16SI_PCVOID_V16SI_HI_INT,
30354 IX86_BUILTIN_GATHER3SIV16SI);
30356 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
30357 V8DI_FTYPE_V8DI_PCVOID_V8SI_QI_INT,
30358 IX86_BUILTIN_GATHER3SIV8DI);
30360 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
30361 V8SI_FTYPE_V8SI_PCVOID_V8DI_QI_INT,
30362 IX86_BUILTIN_GATHER3DIV16SI);
30364 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
30365 V8DI_FTYPE_V8DI_PCVOID_V8DI_QI_INT,
30366 IX86_BUILTIN_GATHER3DIV8DI);
30368 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
30369 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
30370 IX86_BUILTIN_GATHER3ALTSIV8DF);
30372 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
30373 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
30374 IX86_BUILTIN_GATHER3ALTDIV16SF);
30376 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
30377 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
30378 IX86_BUILTIN_GATHER3ALTSIV8DI);
30380 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
30381 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
30382 IX86_BUILTIN_GATHER3ALTDIV16SI);
30384 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
30385 VOID_FTYPE_PVOID_HI_V16SI_V16SF_INT,
30386 IX86_BUILTIN_SCATTERSIV16SF);
30388 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
30389 VOID_FTYPE_PVOID_QI_V8SI_V8DF_INT,
30390 IX86_BUILTIN_SCATTERSIV8DF);
30392 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
30393 VOID_FTYPE_PVOID_QI_V8DI_V8SF_INT,
30394 IX86_BUILTIN_SCATTERDIV16SF);
30396 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
30397 VOID_FTYPE_PVOID_QI_V8DI_V8DF_INT,
30398 IX86_BUILTIN_SCATTERDIV8DF);
30400 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
30401 VOID_FTYPE_PVOID_HI_V16SI_V16SI_INT,
30402 IX86_BUILTIN_SCATTERSIV16SI);
30404 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
30405 VOID_FTYPE_PVOID_QI_V8SI_V8DI_INT,
30406 IX86_BUILTIN_SCATTERSIV8DI);
30408 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
30409 VOID_FTYPE_PVOID_QI_V8DI_V8SI_INT,
30410 IX86_BUILTIN_SCATTERDIV16SI);
30412 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
30413 VOID_FTYPE_PVOID_QI_V8DI_V8DI_INT,
30414 IX86_BUILTIN_SCATTERDIV8DI);
30416 /* AVX512VL */
30417 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
30418 V2DF_FTYPE_V2DF_PCVOID_V4SI_QI_INT,
30419 IX86_BUILTIN_GATHER3SIV2DF);
30421 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
30422 V4DF_FTYPE_V4DF_PCVOID_V4SI_QI_INT,
30423 IX86_BUILTIN_GATHER3SIV4DF);
30425 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
30426 V2DF_FTYPE_V2DF_PCVOID_V2DI_QI_INT,
30427 IX86_BUILTIN_GATHER3DIV2DF);
30429 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
30430 V4DF_FTYPE_V4DF_PCVOID_V4DI_QI_INT,
30431 IX86_BUILTIN_GATHER3DIV4DF);
30433 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
30434 V4SF_FTYPE_V4SF_PCVOID_V4SI_QI_INT,
30435 IX86_BUILTIN_GATHER3SIV4SF);
30437 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
30438 V8SF_FTYPE_V8SF_PCVOID_V8SI_QI_INT,
30439 IX86_BUILTIN_GATHER3SIV8SF);
30441 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
30442 V4SF_FTYPE_V4SF_PCVOID_V2DI_QI_INT,
30443 IX86_BUILTIN_GATHER3DIV4SF);
30445 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
30446 V4SF_FTYPE_V4SF_PCVOID_V4DI_QI_INT,
30447 IX86_BUILTIN_GATHER3DIV8SF);
30449 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
30450 V2DI_FTYPE_V2DI_PCVOID_V4SI_QI_INT,
30451 IX86_BUILTIN_GATHER3SIV2DI);
30453 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
30454 V4DI_FTYPE_V4DI_PCVOID_V4SI_QI_INT,
30455 IX86_BUILTIN_GATHER3SIV4DI);
30457 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
30458 V2DI_FTYPE_V2DI_PCVOID_V2DI_QI_INT,
30459 IX86_BUILTIN_GATHER3DIV2DI);
30461 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
30462 V4DI_FTYPE_V4DI_PCVOID_V4DI_QI_INT,
30463 IX86_BUILTIN_GATHER3DIV4DI);
30465 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
30466 V4SI_FTYPE_V4SI_PCVOID_V4SI_QI_INT,
30467 IX86_BUILTIN_GATHER3SIV4SI);
30469 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
30470 V8SI_FTYPE_V8SI_PCVOID_V8SI_QI_INT,
30471 IX86_BUILTIN_GATHER3SIV8SI);
30473 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
30474 V4SI_FTYPE_V4SI_PCVOID_V2DI_QI_INT,
30475 IX86_BUILTIN_GATHER3DIV4SI);
30477 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
30478 V4SI_FTYPE_V4SI_PCVOID_V4DI_QI_INT,
30479 IX86_BUILTIN_GATHER3DIV8SI);
30481 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
30482 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
30483 IX86_BUILTIN_GATHER3ALTSIV4DF);
30485 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
30486 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
30487 IX86_BUILTIN_GATHER3ALTDIV8SF);
30489 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
30490 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
30491 IX86_BUILTIN_GATHER3ALTSIV4DI);
30493 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
30494 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
30495 IX86_BUILTIN_GATHER3ALTDIV8SI);
30497 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
30498 VOID_FTYPE_PVOID_QI_V8SI_V8SF_INT,
30499 IX86_BUILTIN_SCATTERSIV8SF);
30501 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
30502 VOID_FTYPE_PVOID_QI_V4SI_V4SF_INT,
30503 IX86_BUILTIN_SCATTERSIV4SF);
30505 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
30506 VOID_FTYPE_PVOID_QI_V4SI_V4DF_INT,
30507 IX86_BUILTIN_SCATTERSIV4DF);
30509 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
30510 VOID_FTYPE_PVOID_QI_V4SI_V2DF_INT,
30511 IX86_BUILTIN_SCATTERSIV2DF);
30513 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
30514 VOID_FTYPE_PVOID_QI_V4DI_V4SF_INT,
30515 IX86_BUILTIN_SCATTERDIV8SF);
30517 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
30518 VOID_FTYPE_PVOID_QI_V2DI_V4SF_INT,
30519 IX86_BUILTIN_SCATTERDIV4SF);
30521 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
30522 VOID_FTYPE_PVOID_QI_V4DI_V4DF_INT,
30523 IX86_BUILTIN_SCATTERDIV4DF);
30525 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
30526 VOID_FTYPE_PVOID_QI_V2DI_V2DF_INT,
30527 IX86_BUILTIN_SCATTERDIV2DF);
30529 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
30530 VOID_FTYPE_PVOID_QI_V8SI_V8SI_INT,
30531 IX86_BUILTIN_SCATTERSIV8SI);
30533 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
30534 VOID_FTYPE_PVOID_QI_V4SI_V4SI_INT,
30535 IX86_BUILTIN_SCATTERSIV4SI);
30537 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
30538 VOID_FTYPE_PVOID_QI_V4SI_V4DI_INT,
30539 IX86_BUILTIN_SCATTERSIV4DI);
30541 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
30542 VOID_FTYPE_PVOID_QI_V4SI_V2DI_INT,
30543 IX86_BUILTIN_SCATTERSIV2DI);
30545 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
30546 VOID_FTYPE_PVOID_QI_V4DI_V4SI_INT,
30547 IX86_BUILTIN_SCATTERDIV8SI);
30549 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
30550 VOID_FTYPE_PVOID_QI_V2DI_V4SI_INT,
30551 IX86_BUILTIN_SCATTERDIV4SI);
30553 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
30554 VOID_FTYPE_PVOID_QI_V4DI_V4DI_INT,
30555 IX86_BUILTIN_SCATTERDIV4DI);
30557 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
30558 VOID_FTYPE_PVOID_QI_V2DI_V2DI_INT,
30559 IX86_BUILTIN_SCATTERDIV2DI);
30560 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8df ",
30561 VOID_FTYPE_PDOUBLE_QI_V16SI_V8DF_INT,
30562 IX86_BUILTIN_SCATTERALTSIV8DF);
30564 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8sf ",
30565 VOID_FTYPE_PFLOAT_HI_V8DI_V16SF_INT,
30566 IX86_BUILTIN_SCATTERALTDIV16SF);
30568 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8di ",
30569 VOID_FTYPE_PLONGLONG_QI_V16SI_V8DI_INT,
30570 IX86_BUILTIN_SCATTERALTSIV8DI);
30572 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8si ",
30573 VOID_FTYPE_PINT_HI_V8DI_V16SI_INT,
30574 IX86_BUILTIN_SCATTERALTDIV16SI);
30576 /* AVX512PF */
30577 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
30578 VOID_FTYPE_QI_V8SI_PCVOID_INT_INT,
30579 IX86_BUILTIN_GATHERPFDPD);
30580 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
30581 VOID_FTYPE_HI_V16SI_PCVOID_INT_INT,
30582 IX86_BUILTIN_GATHERPFDPS);
30583 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
30584 VOID_FTYPE_QI_V8DI_PCVOID_INT_INT,
30585 IX86_BUILTIN_GATHERPFQPD);
30586 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
30587 VOID_FTYPE_QI_V8DI_PCVOID_INT_INT,
30588 IX86_BUILTIN_GATHERPFQPS);
30589 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
30590 VOID_FTYPE_QI_V8SI_PCVOID_INT_INT,
30591 IX86_BUILTIN_SCATTERPFDPD);
30592 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
30593 VOID_FTYPE_HI_V16SI_PCVOID_INT_INT,
30594 IX86_BUILTIN_SCATTERPFDPS);
30595 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
30596 VOID_FTYPE_QI_V8DI_PCVOID_INT_INT,
30597 IX86_BUILTIN_SCATTERPFQPD);
30598 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
30599 VOID_FTYPE_QI_V8DI_PCVOID_INT_INT,
30600 IX86_BUILTIN_SCATTERPFQPS);
30602 /* SHA */
30603 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
30604 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
30605 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
30606 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
30607 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
30608 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
30609 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
30610 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
30611 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
30612 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
30613 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
30614 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
30615 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
30616 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
30618 /* RTM. */
30619 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
30620 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
30622 /* MMX access to the vec_init patterns. */
30623 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
30624 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
30626 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
30627 V4HI_FTYPE_HI_HI_HI_HI,
30628 IX86_BUILTIN_VEC_INIT_V4HI);
30630 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
30631 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
30632 IX86_BUILTIN_VEC_INIT_V8QI);
30634 /* Access to the vec_extract patterns. */
30635 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
30636 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
30637 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
30638 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
30639 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
30640 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
30641 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
30642 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
30643 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
30644 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
30646 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A
30647 /* As it uses V4HImode, we have to require -mmmx too. */
30648 | OPTION_MASK_ISA_MMX,
30649 "__builtin_ia32_vec_ext_v4hi",
30650 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
30652 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
30653 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
30655 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
30656 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
30658 /* Access to the vec_set patterns. */
30659 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
30660 "__builtin_ia32_vec_set_v2di",
30661 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
30663 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
30664 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
30666 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
30667 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
30669 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
30670 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
30672 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A
30673 /* As it uses V4HImode, we have to require -mmmx too. */
30674 | OPTION_MASK_ISA_MMX,
30675 "__builtin_ia32_vec_set_v4hi",
30676 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
30678 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
30679 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
30681 /* RDSEED */
30682 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
30683 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
30684 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
30685 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
30686 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
30687 "__builtin_ia32_rdseed_di_step",
30688 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
30690 /* ADCX */
30691 def_builtin (0, "__builtin_ia32_addcarryx_u32",
30692 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
30693 def_builtin (OPTION_MASK_ISA_64BIT,
30694 "__builtin_ia32_addcarryx_u64",
30695 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
30696 IX86_BUILTIN_ADDCARRYX64);
30698 /* SBB */
30699 def_builtin (0, "__builtin_ia32_sbb_u32",
30700 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
30701 def_builtin (OPTION_MASK_ISA_64BIT,
30702 "__builtin_ia32_sbb_u64",
30703 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
30704 IX86_BUILTIN_SBB64);
30706 /* Read/write FLAGS. */
30707 def_builtin (0, "__builtin_ia32_readeflags_u32",
30708 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
30709 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
30710 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
30711 def_builtin (0, "__builtin_ia32_writeeflags_u32",
30712 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
30713 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
30714 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
30716 /* CLFLUSHOPT. */
30717 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
30718 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
30720 /* CLWB. */
30721 def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
30722 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
30724 /* MONITORX and MWAITX. */
30725 def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_monitorx",
30726 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITORX);
30727 def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_mwaitx",
30728 VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAITX);
30730 /* CLZERO. */
30731 def_builtin (OPTION_MASK_ISA_CLZERO, "__builtin_ia32_clzero",
30732 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLZERO);
30734 /* Add FMA4 multi-arg argument instructions */
30735 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
30737 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_MULTI_ARG_FIRST, i);
30738 if (d->name == 0)
30739 continue;
30741 ftype = (enum ix86_builtin_func_type) d->flag;
30742 def_builtin_const (d->mask, d->name, ftype, d->code);
30744 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MULTI_ARG_LAST,
30745 IX86_BUILTIN__BDESC_MULTI_ARG_FIRST,
30746 ARRAY_SIZE (bdesc_multi_arg) - 1);
30748 /* Add CET inrinsics. */
30749 for (i = 0, d = bdesc_cet; i < ARRAY_SIZE (bdesc_cet); i++, d++)
30751 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_CET_FIRST, i);
30752 if (d->name == 0)
30753 continue;
30755 ftype = (enum ix86_builtin_func_type) d->flag;
30756 def_builtin2 (d->mask, d->name, ftype, d->code);
30758 BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_LAST,
30759 IX86_BUILTIN__BDESC_CET_FIRST,
30760 ARRAY_SIZE (bdesc_cet) - 1);
30762 for (i = 0, d = bdesc_cet_rdssp;
30763 i < ARRAY_SIZE (bdesc_cet_rdssp);
30764 i++, d++)
30766 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_CET_NORMAL_FIRST, i);
30767 if (d->name == 0)
30768 continue;
30770 ftype = (enum ix86_builtin_func_type) d->flag;
30771 def_builtin2 (d->mask, d->name, ftype, d->code);
30773 BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_NORMAL_LAST,
30774 IX86_BUILTIN__BDESC_CET_NORMAL_FIRST,
30775 ARRAY_SIZE (bdesc_cet_rdssp) - 1);
30778 static void
30779 ix86_init_mpx_builtins ()
30781 const struct builtin_description * d;
30782 enum ix86_builtin_func_type ftype;
30783 tree decl;
30784 size_t i;
30786 for (i = 0, d = bdesc_mpx;
30787 i < ARRAY_SIZE (bdesc_mpx);
30788 i++, d++)
30790 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_MPX_FIRST, i);
30791 if (d->name == 0)
30792 continue;
30794 ftype = (enum ix86_builtin_func_type) d->flag;
30795 decl = def_builtin2 (d->mask, d->name, ftype, d->code);
30797 /* With no leaf and nothrow flags for MPX builtins
30798 abnormal edges may follow its call when setjmp
30799 presents in the function. Since we may have a lot
30800 of MPX builtins calls it causes lots of useless
30801 edges and enormous PHI nodes. To avoid this we mark
30802 MPX builtins as leaf and nothrow. */
30803 if (decl)
30805 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
30806 NULL_TREE);
30807 TREE_NOTHROW (decl) = 1;
30809 else
30811 ix86_builtins_isa[(int)d->code].leaf_p = true;
30812 ix86_builtins_isa[(int)d->code].nothrow_p = true;
30815 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MPX_LAST,
30816 IX86_BUILTIN__BDESC_MPX_FIRST,
30817 ARRAY_SIZE (bdesc_mpx) - 1);
30819 for (i = 0, d = bdesc_mpx_const;
30820 i < ARRAY_SIZE (bdesc_mpx_const);
30821 i++, d++)
30823 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_MPX_CONST_FIRST, i);
30824 if (d->name == 0)
30825 continue;
30827 ftype = (enum ix86_builtin_func_type) d->flag;
30828 decl = def_builtin_const2 (d->mask, d->name, ftype, d->code);
30830 if (decl)
30832 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
30833 NULL_TREE);
30834 TREE_NOTHROW (decl) = 1;
30836 else
30838 ix86_builtins_isa[(int)d->code].leaf_p = true;
30839 ix86_builtins_isa[(int)d->code].nothrow_p = true;
30842 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MPX_CONST_LAST,
30843 IX86_BUILTIN__BDESC_MPX_CONST_FIRST,
30844 ARRAY_SIZE (bdesc_mpx_const) - 1);
30846 #undef BDESC_VERIFY
30847 #undef BDESC_VERIFYS
30849 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
30850 to return a pointer to VERSION_DECL if the outcome of the expression
30851 formed by PREDICATE_CHAIN is true. This function will be called during
30852 version dispatch to decide which function version to execute. It returns
30853 the basic block at the end, to which more conditions can be added. */
30855 static basic_block
30856 add_condition_to_bb (tree function_decl, tree version_decl,
30857 tree predicate_chain, basic_block new_bb)
30859 gimple *return_stmt;
30860 tree convert_expr, result_var;
30861 gimple *convert_stmt;
30862 gimple *call_cond_stmt;
30863 gimple *if_else_stmt;
30865 basic_block bb1, bb2, bb3;
30866 edge e12, e23;
30868 tree cond_var, and_expr_var = NULL_TREE;
30869 gimple_seq gseq;
30871 tree predicate_decl, predicate_arg;
30873 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
30875 gcc_assert (new_bb != NULL);
30876 gseq = bb_seq (new_bb);
30879 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
30880 build_fold_addr_expr (version_decl));
30881 result_var = create_tmp_var (ptr_type_node);
30882 convert_stmt = gimple_build_assign (result_var, convert_expr);
30883 return_stmt = gimple_build_return (result_var);
30885 if (predicate_chain == NULL_TREE)
30887 gimple_seq_add_stmt (&gseq, convert_stmt);
30888 gimple_seq_add_stmt (&gseq, return_stmt);
30889 set_bb_seq (new_bb, gseq);
30890 gimple_set_bb (convert_stmt, new_bb);
30891 gimple_set_bb (return_stmt, new_bb);
30892 pop_cfun ();
30893 return new_bb;
30896 while (predicate_chain != NULL)
30898 cond_var = create_tmp_var (integer_type_node);
30899 predicate_decl = TREE_PURPOSE (predicate_chain);
30900 predicate_arg = TREE_VALUE (predicate_chain);
30901 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
30902 gimple_call_set_lhs (call_cond_stmt, cond_var);
30904 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
30905 gimple_set_bb (call_cond_stmt, new_bb);
30906 gimple_seq_add_stmt (&gseq, call_cond_stmt);
30908 predicate_chain = TREE_CHAIN (predicate_chain);
30910 if (and_expr_var == NULL)
30911 and_expr_var = cond_var;
30912 else
30914 gimple *assign_stmt;
30915 /* Use MIN_EXPR to check if any integer is zero?.
30916 and_expr_var = min_expr <cond_var, and_expr_var> */
30917 assign_stmt = gimple_build_assign (and_expr_var,
30918 build2 (MIN_EXPR, integer_type_node,
30919 cond_var, and_expr_var));
30921 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
30922 gimple_set_bb (assign_stmt, new_bb);
30923 gimple_seq_add_stmt (&gseq, assign_stmt);
30927 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
30928 integer_zero_node,
30929 NULL_TREE, NULL_TREE);
30930 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
30931 gimple_set_bb (if_else_stmt, new_bb);
30932 gimple_seq_add_stmt (&gseq, if_else_stmt);
30934 gimple_seq_add_stmt (&gseq, convert_stmt);
30935 gimple_seq_add_stmt (&gseq, return_stmt);
30936 set_bb_seq (new_bb, gseq);
30938 bb1 = new_bb;
30939 e12 = split_block (bb1, if_else_stmt);
30940 bb2 = e12->dest;
30941 e12->flags &= ~EDGE_FALLTHRU;
30942 e12->flags |= EDGE_TRUE_VALUE;
30944 e23 = split_block (bb2, return_stmt);
30946 gimple_set_bb (convert_stmt, bb2);
30947 gimple_set_bb (return_stmt, bb2);
30949 bb3 = e23->dest;
30950 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
30952 remove_edge (e23);
30953 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
30955 pop_cfun ();
30957 return bb3;
30960 /* This parses the attribute arguments to target in DECL and determines
30961 the right builtin to use to match the platform specification.
30962 It returns the priority value for this version decl. If PREDICATE_LIST
30963 is not NULL, it stores the list of cpu features that need to be checked
30964 before dispatching this function. */
30966 static unsigned int
30967 get_builtin_code_for_version (tree decl, tree *predicate_list)
30969 tree attrs;
30970 struct cl_target_option cur_target;
30971 tree target_node;
30972 struct cl_target_option *new_target;
30973 const char *arg_str = NULL;
30974 const char *attrs_str = NULL;
30975 char *tok_str = NULL;
30976 char *token;
30978 /* Priority of i386 features, greater value is higher priority. This is
30979 used to decide the order in which function dispatch must happen. For
30980 instance, a version specialized for SSE4.2 should be checked for dispatch
30981 before a version for SSE3, as SSE4.2 implies SSE3. */
30982 enum feature_priority
30984 P_ZERO = 0,
30985 P_MMX,
30986 P_SSE,
30987 P_SSE2,
30988 P_SSE3,
30989 P_SSSE3,
30990 P_PROC_SSSE3,
30991 P_SSE4_A,
30992 P_PROC_SSE4_A,
30993 P_SSE4_1,
30994 P_SSE4_2,
30995 P_PROC_SSE4_2,
30996 P_POPCNT,
30997 P_AES,
30998 P_PCLMUL,
30999 P_AVX,
31000 P_PROC_AVX,
31001 P_BMI,
31002 P_PROC_BMI,
31003 P_FMA4,
31004 P_XOP,
31005 P_PROC_XOP,
31006 P_FMA,
31007 P_PROC_FMA,
31008 P_BMI2,
31009 P_AVX2,
31010 P_PROC_AVX2,
31011 P_AVX512F,
31012 P_PROC_AVX512F
31015 enum feature_priority priority = P_ZERO;
31017 /* These are the target attribute strings for which a dispatcher is
31018 available, from fold_builtin_cpu. */
31020 static struct _feature_list
31022 const char *const name;
31023 const enum feature_priority priority;
31025 const feature_list[] =
31027 {"mmx", P_MMX},
31028 {"sse", P_SSE},
31029 {"sse2", P_SSE2},
31030 {"sse3", P_SSE3},
31031 {"sse4a", P_SSE4_A},
31032 {"ssse3", P_SSSE3},
31033 {"sse4.1", P_SSE4_1},
31034 {"sse4.2", P_SSE4_2},
31035 {"popcnt", P_POPCNT},
31036 {"aes", P_AES},
31037 {"pclmul", P_PCLMUL},
31038 {"avx", P_AVX},
31039 {"bmi", P_BMI},
31040 {"fma4", P_FMA4},
31041 {"xop", P_XOP},
31042 {"fma", P_FMA},
31043 {"bmi2", P_BMI2},
31044 {"avx2", P_AVX2},
31045 {"avx512f", P_AVX512F}
31049 static unsigned int NUM_FEATURES
31050 = sizeof (feature_list) / sizeof (struct _feature_list);
31052 unsigned int i;
31054 tree predicate_chain = NULL_TREE;
31055 tree predicate_decl, predicate_arg;
31057 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31058 gcc_assert (attrs != NULL);
31060 attrs = TREE_VALUE (TREE_VALUE (attrs));
31062 gcc_assert (TREE_CODE (attrs) == STRING_CST);
31063 attrs_str = TREE_STRING_POINTER (attrs);
31065 /* Return priority zero for default function. */
31066 if (strcmp (attrs_str, "default") == 0)
31067 return 0;
31069 /* Handle arch= if specified. For priority, set it to be 1 more than
31070 the best instruction set the processor can handle. For instance, if
31071 there is a version for atom and a version for ssse3 (the highest ISA
31072 priority for atom), the atom version must be checked for dispatch
31073 before the ssse3 version. */
31074 if (strstr (attrs_str, "arch=") != NULL)
31076 cl_target_option_save (&cur_target, &global_options);
31077 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
31078 &global_options_set);
31080 gcc_assert (target_node);
31081 new_target = TREE_TARGET_OPTION (target_node);
31082 gcc_assert (new_target);
31084 if (new_target->arch_specified && new_target->arch > 0)
31086 switch (new_target->arch)
31088 case PROCESSOR_CORE2:
31089 arg_str = "core2";
31090 priority = P_PROC_SSSE3;
31091 break;
31092 case PROCESSOR_NEHALEM:
31093 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
31095 arg_str = "westmere";
31096 priority = P_AES;
31098 else
31100 /* We translate "arch=corei7" and "arch=nehalem" to
31101 "corei7" so that it will be mapped to M_INTEL_COREI7
31102 as cpu type to cover all M_INTEL_COREI7_XXXs. */
31103 arg_str = "corei7";
31104 priority = P_PROC_SSE4_2;
31106 break;
31107 case PROCESSOR_SANDYBRIDGE:
31108 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
31109 arg_str = "ivybridge";
31110 else
31111 arg_str = "sandybridge";
31112 priority = P_PROC_AVX;
31113 break;
31114 case PROCESSOR_HASWELL:
31115 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AVX512VL)
31116 arg_str = "skylake-avx512";
31117 else if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_XSAVES)
31118 arg_str = "skylake";
31119 else if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
31120 arg_str = "broadwell";
31121 else
31122 arg_str = "haswell";
31123 priority = P_PROC_AVX2;
31124 break;
31125 case PROCESSOR_BONNELL:
31126 arg_str = "bonnell";
31127 priority = P_PROC_SSSE3;
31128 break;
31129 case PROCESSOR_KNL:
31130 arg_str = "knl";
31131 priority = P_PROC_AVX512F;
31132 break;
31133 case PROCESSOR_KNM:
31134 arg_str = "knm";
31135 priority = P_PROC_AVX512F;
31136 break;
31137 case PROCESSOR_SILVERMONT:
31138 arg_str = "silvermont";
31139 priority = P_PROC_SSE4_2;
31140 break;
31141 case PROCESSOR_AMDFAM10:
31142 arg_str = "amdfam10h";
31143 priority = P_PROC_SSE4_A;
31144 break;
31145 case PROCESSOR_BTVER1:
31146 arg_str = "btver1";
31147 priority = P_PROC_SSE4_A;
31148 break;
31149 case PROCESSOR_BTVER2:
31150 arg_str = "btver2";
31151 priority = P_PROC_BMI;
31152 break;
31153 case PROCESSOR_BDVER1:
31154 arg_str = "bdver1";
31155 priority = P_PROC_XOP;
31156 break;
31157 case PROCESSOR_BDVER2:
31158 arg_str = "bdver2";
31159 priority = P_PROC_FMA;
31160 break;
31161 case PROCESSOR_BDVER3:
31162 arg_str = "bdver3";
31163 priority = P_PROC_FMA;
31164 break;
31165 case PROCESSOR_BDVER4:
31166 arg_str = "bdver4";
31167 priority = P_PROC_AVX2;
31168 break;
31169 case PROCESSOR_ZNVER1:
31170 arg_str = "znver1";
31171 priority = P_PROC_AVX2;
31172 break;
31176 cl_target_option_restore (&global_options, &cur_target);
31178 if (predicate_list && arg_str == NULL)
31180 error_at (DECL_SOURCE_LOCATION (decl),
31181 "No dispatcher found for the versioning attributes");
31182 return 0;
31185 if (predicate_list)
31187 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
31188 /* For a C string literal the length includes the trailing NULL. */
31189 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
31190 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31191 predicate_chain);
31195 /* Process feature name. */
31196 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
31197 strcpy (tok_str, attrs_str);
31198 token = strtok (tok_str, ",");
31199 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
31201 while (token != NULL)
31203 /* Do not process "arch=" */
31204 if (strncmp (token, "arch=", 5) == 0)
31206 token = strtok (NULL, ",");
31207 continue;
31209 for (i = 0; i < NUM_FEATURES; ++i)
31211 if (strcmp (token, feature_list[i].name) == 0)
31213 if (predicate_list)
31215 predicate_arg = build_string_literal (
31216 strlen (feature_list[i].name) + 1,
31217 feature_list[i].name);
31218 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31219 predicate_chain);
31221 /* Find the maximum priority feature. */
31222 if (feature_list[i].priority > priority)
31223 priority = feature_list[i].priority;
31225 break;
31228 if (predicate_list && i == NUM_FEATURES)
31230 error_at (DECL_SOURCE_LOCATION (decl),
31231 "No dispatcher found for %s", token);
31232 return 0;
31234 token = strtok (NULL, ",");
31236 free (tok_str);
31238 if (predicate_list && predicate_chain == NULL_TREE)
31240 error_at (DECL_SOURCE_LOCATION (decl),
31241 "No dispatcher found for the versioning attributes : %s",
31242 attrs_str);
31243 return 0;
31245 else if (predicate_list)
31247 predicate_chain = nreverse (predicate_chain);
31248 *predicate_list = predicate_chain;
31251 return priority;
31254 /* This compares the priority of target features in function DECL1
31255 and DECL2. It returns positive value if DECL1 is higher priority,
31256 negative value if DECL2 is higher priority and 0 if they are the
31257 same. */
31259 static int
31260 ix86_compare_version_priority (tree decl1, tree decl2)
31262 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
31263 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
31265 return (int)priority1 - (int)priority2;
31268 /* V1 and V2 point to function versions with different priorities
31269 based on the target ISA. This function compares their priorities. */
31271 static int
31272 feature_compare (const void *v1, const void *v2)
31274 typedef struct _function_version_info
31276 tree version_decl;
31277 tree predicate_chain;
31278 unsigned int dispatch_priority;
31279 } function_version_info;
31281 const function_version_info c1 = *(const function_version_info *)v1;
31282 const function_version_info c2 = *(const function_version_info *)v2;
31283 return (c2.dispatch_priority - c1.dispatch_priority);
31286 /* This function generates the dispatch function for
31287 multi-versioned functions. DISPATCH_DECL is the function which will
31288 contain the dispatch logic. FNDECLS are the function choices for
31289 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
31290 in DISPATCH_DECL in which the dispatch code is generated. */
31292 static int
31293 dispatch_function_versions (tree dispatch_decl,
31294 void *fndecls_p,
31295 basic_block *empty_bb)
31297 tree default_decl;
31298 gimple *ifunc_cpu_init_stmt;
31299 gimple_seq gseq;
31300 int ix;
31301 tree ele;
31302 vec<tree> *fndecls;
31303 unsigned int num_versions = 0;
31304 unsigned int actual_versions = 0;
31305 unsigned int i;
31307 struct _function_version_info
31309 tree version_decl;
31310 tree predicate_chain;
31311 unsigned int dispatch_priority;
31312 }*function_version_info;
31314 gcc_assert (dispatch_decl != NULL
31315 && fndecls_p != NULL
31316 && empty_bb != NULL);
31318 /*fndecls_p is actually a vector. */
31319 fndecls = static_cast<vec<tree> *> (fndecls_p);
31321 /* At least one more version other than the default. */
31322 num_versions = fndecls->length ();
31323 gcc_assert (num_versions >= 2);
31325 function_version_info = (struct _function_version_info *)
31326 XNEWVEC (struct _function_version_info, (num_versions - 1));
31328 /* The first version in the vector is the default decl. */
31329 default_decl = (*fndecls)[0];
31331 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
31333 gseq = bb_seq (*empty_bb);
31334 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
31335 constructors, so explicity call __builtin_cpu_init here. */
31336 ifunc_cpu_init_stmt = gimple_build_call_vec (
31337 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
31338 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
31339 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
31340 set_bb_seq (*empty_bb, gseq);
31342 pop_cfun ();
31345 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
31347 tree version_decl = ele;
31348 tree predicate_chain = NULL_TREE;
31349 unsigned int priority;
31350 /* Get attribute string, parse it and find the right predicate decl.
31351 The predicate function could be a lengthy combination of many
31352 features, like arch-type and various isa-variants. */
31353 priority = get_builtin_code_for_version (version_decl,
31354 &predicate_chain);
31356 if (predicate_chain == NULL_TREE)
31357 continue;
31359 function_version_info [actual_versions].version_decl = version_decl;
31360 function_version_info [actual_versions].predicate_chain
31361 = predicate_chain;
31362 function_version_info [actual_versions].dispatch_priority = priority;
31363 actual_versions++;
31366 /* Sort the versions according to descending order of dispatch priority. The
31367 priority is based on the ISA. This is not a perfect solution. There
31368 could still be ambiguity. If more than one function version is suitable
31369 to execute, which one should be dispatched? In future, allow the user
31370 to specify a dispatch priority next to the version. */
31371 qsort (function_version_info, actual_versions,
31372 sizeof (struct _function_version_info), feature_compare);
31374 for (i = 0; i < actual_versions; ++i)
31375 *empty_bb = add_condition_to_bb (dispatch_decl,
31376 function_version_info[i].version_decl,
31377 function_version_info[i].predicate_chain,
31378 *empty_bb);
31380 /* dispatch default version at the end. */
31381 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
31382 NULL, *empty_bb);
31384 free (function_version_info);
31385 return 0;
31388 /* This function changes the assembler name for functions that are
31389 versions. If DECL is a function version and has a "target"
31390 attribute, it appends the attribute string to its assembler name. */
31392 static tree
31393 ix86_mangle_function_version_assembler_name (tree decl, tree id)
31395 tree version_attr;
31396 const char *orig_name, *version_string;
31397 char *attr_str, *assembler_name;
31399 if (DECL_DECLARED_INLINE_P (decl)
31400 && lookup_attribute ("gnu_inline",
31401 DECL_ATTRIBUTES (decl)))
31402 error_at (DECL_SOURCE_LOCATION (decl),
31403 "Function versions cannot be marked as gnu_inline,"
31404 " bodies have to be generated");
31406 if (DECL_VIRTUAL_P (decl)
31407 || DECL_VINDEX (decl))
31408 sorry ("Virtual function multiversioning not supported");
31410 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31412 /* target attribute string cannot be NULL. */
31413 gcc_assert (version_attr != NULL_TREE);
31415 orig_name = IDENTIFIER_POINTER (id);
31416 version_string
31417 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
31419 if (strcmp (version_string, "default") == 0)
31420 return id;
31422 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
31423 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
31425 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
31427 /* Allow assembler name to be modified if already set. */
31428 if (DECL_ASSEMBLER_NAME_SET_P (decl))
31429 SET_DECL_RTL (decl, NULL);
31431 tree ret = get_identifier (assembler_name);
31432 XDELETEVEC (attr_str);
31433 XDELETEVEC (assembler_name);
31434 return ret;
31438 static tree
31439 ix86_mangle_decl_assembler_name (tree decl, tree id)
31441 /* For function version, add the target suffix to the assembler name. */
31442 if (TREE_CODE (decl) == FUNCTION_DECL
31443 && DECL_FUNCTION_VERSIONED (decl))
31444 id = ix86_mangle_function_version_assembler_name (decl, id);
31445 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
31446 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
31447 #endif
31449 return id;
31452 /* Make a dispatcher declaration for the multi-versioned function DECL.
31453 Calls to DECL function will be replaced with calls to the dispatcher
31454 by the front-end. Returns the decl of the dispatcher function. */
31456 static tree
31457 ix86_get_function_versions_dispatcher (void *decl)
31459 tree fn = (tree) decl;
31460 struct cgraph_node *node = NULL;
31461 struct cgraph_node *default_node = NULL;
31462 struct cgraph_function_version_info *node_v = NULL;
31463 struct cgraph_function_version_info *first_v = NULL;
31465 tree dispatch_decl = NULL;
31467 struct cgraph_function_version_info *default_version_info = NULL;
31469 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
31471 node = cgraph_node::get (fn);
31472 gcc_assert (node != NULL);
31474 node_v = node->function_version ();
31475 gcc_assert (node_v != NULL);
31477 if (node_v->dispatcher_resolver != NULL)
31478 return node_v->dispatcher_resolver;
31480 /* Find the default version and make it the first node. */
31481 first_v = node_v;
31482 /* Go to the beginning of the chain. */
31483 while (first_v->prev != NULL)
31484 first_v = first_v->prev;
31485 default_version_info = first_v;
31486 while (default_version_info != NULL)
31488 if (is_function_default_version
31489 (default_version_info->this_node->decl))
31490 break;
31491 default_version_info = default_version_info->next;
31494 /* If there is no default node, just return NULL. */
31495 if (default_version_info == NULL)
31496 return NULL;
31498 /* Make default info the first node. */
31499 if (first_v != default_version_info)
31501 default_version_info->prev->next = default_version_info->next;
31502 if (default_version_info->next)
31503 default_version_info->next->prev = default_version_info->prev;
31504 first_v->prev = default_version_info;
31505 default_version_info->next = first_v;
31506 default_version_info->prev = NULL;
31509 default_node = default_version_info->this_node;
31511 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
31512 if (targetm.has_ifunc_p ())
31514 struct cgraph_function_version_info *it_v = NULL;
31515 struct cgraph_node *dispatcher_node = NULL;
31516 struct cgraph_function_version_info *dispatcher_version_info = NULL;
31518 /* Right now, the dispatching is done via ifunc. */
31519 dispatch_decl = make_dispatcher_decl (default_node->decl);
31521 dispatcher_node = cgraph_node::get_create (dispatch_decl);
31522 gcc_assert (dispatcher_node != NULL);
31523 dispatcher_node->dispatcher_function = 1;
31524 dispatcher_version_info
31525 = dispatcher_node->insert_new_function_version ();
31526 dispatcher_version_info->next = default_version_info;
31527 dispatcher_node->definition = 1;
31529 /* Set the dispatcher for all the versions. */
31530 it_v = default_version_info;
31531 while (it_v != NULL)
31533 it_v->dispatcher_resolver = dispatch_decl;
31534 it_v = it_v->next;
31537 else
31538 #endif
31540 error_at (DECL_SOURCE_LOCATION (default_node->decl),
31541 "multiversioning needs ifunc which is not supported "
31542 "on this target");
31545 return dispatch_decl;
31548 /* Make the resolver function decl to dispatch the versions of
31549 a multi-versioned function, DEFAULT_DECL. IFUNC_ALIAS_DECL is
31550 ifunc alias that will point to the created resolver. Create an
31551 empty basic block in the resolver and store the pointer in
31552 EMPTY_BB. Return the decl of the resolver function. */
31554 static tree
31555 make_resolver_func (const tree default_decl,
31556 const tree ifunc_alias_decl,
31557 basic_block *empty_bb)
31559 char *resolver_name;
31560 tree decl, type, decl_name, t;
31562 /* IFUNC's have to be globally visible. So, if the default_decl is
31563 not, then the name of the IFUNC should be made unique. */
31564 if (TREE_PUBLIC (default_decl) == 0)
31566 char *ifunc_name = make_unique_name (default_decl, "ifunc", true);
31567 symtab->change_decl_assembler_name (ifunc_alias_decl,
31568 get_identifier (ifunc_name));
31569 XDELETEVEC (ifunc_name);
31572 resolver_name = make_unique_name (default_decl, "resolver", false);
31574 /* The resolver function should return a (void *). */
31575 type = build_function_type_list (ptr_type_node, NULL_TREE);
31577 decl = build_fn_decl (resolver_name, type);
31578 decl_name = get_identifier (resolver_name);
31579 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
31581 DECL_NAME (decl) = decl_name;
31582 TREE_USED (decl) = 1;
31583 DECL_ARTIFICIAL (decl) = 1;
31584 DECL_IGNORED_P (decl) = 1;
31585 TREE_PUBLIC (decl) = 0;
31586 DECL_UNINLINABLE (decl) = 1;
31588 /* Resolver is not external, body is generated. */
31589 DECL_EXTERNAL (decl) = 0;
31590 DECL_EXTERNAL (ifunc_alias_decl) = 0;
31592 DECL_CONTEXT (decl) = NULL_TREE;
31593 DECL_INITIAL (decl) = make_node (BLOCK);
31594 DECL_STATIC_CONSTRUCTOR (decl) = 0;
31596 if (DECL_COMDAT_GROUP (default_decl)
31597 || TREE_PUBLIC (default_decl))
31599 /* In this case, each translation unit with a call to this
31600 versioned function will put out a resolver. Ensure it
31601 is comdat to keep just one copy. */
31602 DECL_COMDAT (decl) = 1;
31603 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
31605 /* Build result decl and add to function_decl. */
31606 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
31607 DECL_ARTIFICIAL (t) = 1;
31608 DECL_IGNORED_P (t) = 1;
31609 DECL_RESULT (decl) = t;
31611 gimplify_function_tree (decl);
31612 push_cfun (DECL_STRUCT_FUNCTION (decl));
31613 *empty_bb = init_lowered_empty_function (decl, false,
31614 profile_count::uninitialized ());
31616 cgraph_node::add_new_function (decl, true);
31617 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
31619 pop_cfun ();
31621 gcc_assert (ifunc_alias_decl != NULL);
31622 /* Mark ifunc_alias_decl as "ifunc" with resolver as resolver_name. */
31623 DECL_ATTRIBUTES (ifunc_alias_decl)
31624 = make_attribute ("ifunc", resolver_name,
31625 DECL_ATTRIBUTES (ifunc_alias_decl));
31627 /* Create the alias for dispatch to resolver here. */
31628 cgraph_node::create_same_body_alias (ifunc_alias_decl, decl);
31629 XDELETEVEC (resolver_name);
31630 return decl;
31633 /* Generate the dispatching code body to dispatch multi-versioned function
31634 DECL. The target hook is called to process the "target" attributes and
31635 provide the code to dispatch the right function at run-time. NODE points
31636 to the dispatcher decl whose body will be created. */
31638 static tree
31639 ix86_generate_version_dispatcher_body (void *node_p)
31641 tree resolver_decl;
31642 basic_block empty_bb;
31643 tree default_ver_decl;
31644 struct cgraph_node *versn;
31645 struct cgraph_node *node;
31647 struct cgraph_function_version_info *node_version_info = NULL;
31648 struct cgraph_function_version_info *versn_info = NULL;
31650 node = (cgraph_node *)node_p;
31652 node_version_info = node->function_version ();
31653 gcc_assert (node->dispatcher_function
31654 && node_version_info != NULL);
31656 if (node_version_info->dispatcher_resolver)
31657 return node_version_info->dispatcher_resolver;
31659 /* The first version in the chain corresponds to the default version. */
31660 default_ver_decl = node_version_info->next->this_node->decl;
31662 /* node is going to be an alias, so remove the finalized bit. */
31663 node->definition = false;
31665 resolver_decl = make_resolver_func (default_ver_decl,
31666 node->decl, &empty_bb);
31668 node_version_info->dispatcher_resolver = resolver_decl;
31670 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
31672 auto_vec<tree, 2> fn_ver_vec;
31674 for (versn_info = node_version_info->next; versn_info;
31675 versn_info = versn_info->next)
31677 versn = versn_info->this_node;
31678 /* Check for virtual functions here again, as by this time it should
31679 have been determined if this function needs a vtable index or
31680 not. This happens for methods in derived classes that override
31681 virtual methods in base classes but are not explicitly marked as
31682 virtual. */
31683 if (DECL_VINDEX (versn->decl))
31684 sorry ("Virtual function multiversioning not supported");
31686 fn_ver_vec.safe_push (versn->decl);
31689 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
31690 cgraph_edge::rebuild_edges ();
31691 pop_cfun ();
31692 return resolver_decl;
31694 /* This builds the processor_model struct type defined in
31695 libgcc/config/i386/cpuinfo.c */
31697 static tree
31698 build_processor_model_struct (void)
31700 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
31701 "__cpu_features"};
31702 tree field = NULL_TREE, field_chain = NULL_TREE;
31703 int i;
31704 tree type = make_node (RECORD_TYPE);
31706 /* The first 3 fields are unsigned int. */
31707 for (i = 0; i < 3; ++i)
31709 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
31710 get_identifier (field_name[i]), unsigned_type_node);
31711 if (field_chain != NULL_TREE)
31712 DECL_CHAIN (field) = field_chain;
31713 field_chain = field;
31716 /* The last field is an array of unsigned integers of size one. */
31717 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
31718 get_identifier (field_name[3]),
31719 build_array_type (unsigned_type_node,
31720 build_index_type (size_one_node)));
31721 if (field_chain != NULL_TREE)
31722 DECL_CHAIN (field) = field_chain;
31723 field_chain = field;
31725 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
31726 return type;
31729 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
31731 static tree
31732 make_var_decl (tree type, const char *name)
31734 tree new_decl;
31736 new_decl = build_decl (UNKNOWN_LOCATION,
31737 VAR_DECL,
31738 get_identifier(name),
31739 type);
31741 DECL_EXTERNAL (new_decl) = 1;
31742 TREE_STATIC (new_decl) = 1;
31743 TREE_PUBLIC (new_decl) = 1;
31744 DECL_INITIAL (new_decl) = 0;
31745 DECL_ARTIFICIAL (new_decl) = 0;
31746 DECL_PRESERVE_P (new_decl) = 1;
31748 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
31749 assemble_variable (new_decl, 0, 0, 0);
31751 return new_decl;
31754 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
31755 into an integer defined in libgcc/config/i386/cpuinfo.c */
31757 static tree
31758 fold_builtin_cpu (tree fndecl, tree *args)
31760 unsigned int i;
31761 enum ix86_builtins fn_code = (enum ix86_builtins)
31762 DECL_FUNCTION_CODE (fndecl);
31763 tree param_string_cst = NULL;
31765 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
31766 enum processor_features
31768 F_CMOV = 0,
31769 F_MMX,
31770 F_POPCNT,
31771 F_SSE,
31772 F_SSE2,
31773 F_SSE3,
31774 F_SSSE3,
31775 F_SSE4_1,
31776 F_SSE4_2,
31777 F_AVX,
31778 F_AVX2,
31779 F_SSE4_A,
31780 F_FMA4,
31781 F_XOP,
31782 F_FMA,
31783 F_AVX512F,
31784 F_BMI,
31785 F_BMI2,
31786 F_AES,
31787 F_PCLMUL,
31788 F_AVX512VL,
31789 F_AVX512BW,
31790 F_AVX512DQ,
31791 F_AVX512CD,
31792 F_AVX512ER,
31793 F_AVX512PF,
31794 F_AVX512VBMI,
31795 F_AVX512IFMA,
31796 F_AVX5124VNNIW,
31797 F_AVX5124FMAPS,
31798 F_AVX512VPOPCNTDQ,
31799 F_MAX
31802 /* These are the values for vendor types and cpu types and subtypes
31803 in cpuinfo.c. Cpu types and subtypes should be subtracted by
31804 the corresponding start value. */
31805 enum processor_model
31807 M_INTEL = 1,
31808 M_AMD,
31809 M_CPU_TYPE_START,
31810 M_INTEL_BONNELL,
31811 M_INTEL_CORE2,
31812 M_INTEL_COREI7,
31813 M_AMDFAM10H,
31814 M_AMDFAM15H,
31815 M_INTEL_SILVERMONT,
31816 M_INTEL_KNL,
31817 M_AMD_BTVER1,
31818 M_AMD_BTVER2,
31819 M_AMDFAM17H,
31820 M_INTEL_KNM,
31821 M_CPU_SUBTYPE_START,
31822 M_INTEL_COREI7_NEHALEM,
31823 M_INTEL_COREI7_WESTMERE,
31824 M_INTEL_COREI7_SANDYBRIDGE,
31825 M_AMDFAM10H_BARCELONA,
31826 M_AMDFAM10H_SHANGHAI,
31827 M_AMDFAM10H_ISTANBUL,
31828 M_AMDFAM15H_BDVER1,
31829 M_AMDFAM15H_BDVER2,
31830 M_AMDFAM15H_BDVER3,
31831 M_AMDFAM15H_BDVER4,
31832 M_AMDFAM17H_ZNVER1,
31833 M_INTEL_COREI7_IVYBRIDGE,
31834 M_INTEL_COREI7_HASWELL,
31835 M_INTEL_COREI7_BROADWELL,
31836 M_INTEL_COREI7_SKYLAKE,
31837 M_INTEL_COREI7_SKYLAKE_AVX512
31840 static struct _arch_names_table
31842 const char *const name;
31843 const enum processor_model model;
31845 const arch_names_table[] =
31847 {"amd", M_AMD},
31848 {"intel", M_INTEL},
31849 {"atom", M_INTEL_BONNELL},
31850 {"slm", M_INTEL_SILVERMONT},
31851 {"core2", M_INTEL_CORE2},
31852 {"corei7", M_INTEL_COREI7},
31853 {"nehalem", M_INTEL_COREI7_NEHALEM},
31854 {"westmere", M_INTEL_COREI7_WESTMERE},
31855 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
31856 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
31857 {"haswell", M_INTEL_COREI7_HASWELL},
31858 {"broadwell", M_INTEL_COREI7_BROADWELL},
31859 {"skylake", M_INTEL_COREI7_SKYLAKE},
31860 {"skylake-avx512", M_INTEL_COREI7_SKYLAKE_AVX512},
31861 {"bonnell", M_INTEL_BONNELL},
31862 {"silvermont", M_INTEL_SILVERMONT},
31863 {"knl", M_INTEL_KNL},
31864 {"knm", M_INTEL_KNM},
31865 {"amdfam10h", M_AMDFAM10H},
31866 {"barcelona", M_AMDFAM10H_BARCELONA},
31867 {"shanghai", M_AMDFAM10H_SHANGHAI},
31868 {"istanbul", M_AMDFAM10H_ISTANBUL},
31869 {"btver1", M_AMD_BTVER1},
31870 {"amdfam15h", M_AMDFAM15H},
31871 {"bdver1", M_AMDFAM15H_BDVER1},
31872 {"bdver2", M_AMDFAM15H_BDVER2},
31873 {"bdver3", M_AMDFAM15H_BDVER3},
31874 {"bdver4", M_AMDFAM15H_BDVER4},
31875 {"btver2", M_AMD_BTVER2},
31876 {"amdfam17h", M_AMDFAM17H},
31877 {"znver1", M_AMDFAM17H_ZNVER1},
31880 static struct _isa_names_table
31882 const char *const name;
31883 const enum processor_features feature;
31885 const isa_names_table[] =
31887 {"cmov", F_CMOV},
31888 {"mmx", F_MMX},
31889 {"popcnt", F_POPCNT},
31890 {"sse", F_SSE},
31891 {"sse2", F_SSE2},
31892 {"sse3", F_SSE3},
31893 {"ssse3", F_SSSE3},
31894 {"sse4a", F_SSE4_A},
31895 {"sse4.1", F_SSE4_1},
31896 {"sse4.2", F_SSE4_2},
31897 {"avx", F_AVX},
31898 {"fma4", F_FMA4},
31899 {"xop", F_XOP},
31900 {"fma", F_FMA},
31901 {"avx2", F_AVX2},
31902 {"avx512f", F_AVX512F},
31903 {"bmi", F_BMI},
31904 {"bmi2", F_BMI2},
31905 {"aes", F_AES},
31906 {"pclmul", F_PCLMUL},
31907 {"avx512vl",F_AVX512VL},
31908 {"avx512bw",F_AVX512BW},
31909 {"avx512dq",F_AVX512DQ},
31910 {"avx512cd",F_AVX512CD},
31911 {"avx512er",F_AVX512ER},
31912 {"avx512pf",F_AVX512PF},
31913 {"avx512vbmi",F_AVX512VBMI},
31914 {"avx512ifma",F_AVX512IFMA},
31915 {"avx5124vnniw",F_AVX5124VNNIW},
31916 {"avx5124fmaps",F_AVX5124FMAPS},
31917 {"avx512vpopcntdq",F_AVX512VPOPCNTDQ}
31920 tree __processor_model_type = build_processor_model_struct ();
31921 tree __cpu_model_var = make_var_decl (__processor_model_type,
31922 "__cpu_model");
31925 varpool_node::add (__cpu_model_var);
31927 gcc_assert ((args != NULL) && (*args != NULL));
31929 param_string_cst = *args;
31930 while (param_string_cst
31931 && TREE_CODE (param_string_cst) != STRING_CST)
31933 /* *args must be a expr that can contain other EXPRS leading to a
31934 STRING_CST. */
31935 if (!EXPR_P (param_string_cst))
31937 error ("Parameter to builtin must be a string constant or literal");
31938 return integer_zero_node;
31940 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
31943 gcc_assert (param_string_cst);
31945 if (fn_code == IX86_BUILTIN_CPU_IS)
31947 tree ref;
31948 tree field;
31949 tree final;
31951 unsigned int field_val = 0;
31952 unsigned int NUM_ARCH_NAMES
31953 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
31955 for (i = 0; i < NUM_ARCH_NAMES; i++)
31956 if (strcmp (arch_names_table[i].name,
31957 TREE_STRING_POINTER (param_string_cst)) == 0)
31958 break;
31960 if (i == NUM_ARCH_NAMES)
31962 error ("Parameter to builtin not valid: %s",
31963 TREE_STRING_POINTER (param_string_cst));
31964 return integer_zero_node;
31967 field = TYPE_FIELDS (__processor_model_type);
31968 field_val = arch_names_table[i].model;
31970 /* CPU types are stored in the next field. */
31971 if (field_val > M_CPU_TYPE_START
31972 && field_val < M_CPU_SUBTYPE_START)
31974 field = DECL_CHAIN (field);
31975 field_val -= M_CPU_TYPE_START;
31978 /* CPU subtypes are stored in the next field. */
31979 if (field_val > M_CPU_SUBTYPE_START)
31981 field = DECL_CHAIN ( DECL_CHAIN (field));
31982 field_val -= M_CPU_SUBTYPE_START;
31985 /* Get the appropriate field in __cpu_model. */
31986 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
31987 field, NULL_TREE);
31989 /* Check the value. */
31990 final = build2 (EQ_EXPR, unsigned_type_node, ref,
31991 build_int_cstu (unsigned_type_node, field_val));
31992 return build1 (CONVERT_EXPR, integer_type_node, final);
31994 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
31996 tree ref;
31997 tree array_elt;
31998 tree field;
31999 tree final;
32001 unsigned int field_val = 0;
32002 unsigned int NUM_ISA_NAMES
32003 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
32005 for (i = 0; i < NUM_ISA_NAMES; i++)
32006 if (strcmp (isa_names_table[i].name,
32007 TREE_STRING_POINTER (param_string_cst)) == 0)
32008 break;
32010 if (i == NUM_ISA_NAMES)
32012 error ("Parameter to builtin not valid: %s",
32013 TREE_STRING_POINTER (param_string_cst));
32014 return integer_zero_node;
32017 field = TYPE_FIELDS (__processor_model_type);
32018 /* Get the last field, which is __cpu_features. */
32019 while (DECL_CHAIN (field))
32020 field = DECL_CHAIN (field);
32022 /* Get the appropriate field: __cpu_model.__cpu_features */
32023 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32024 field, NULL_TREE);
32026 /* Access the 0th element of __cpu_features array. */
32027 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
32028 integer_zero_node, NULL_TREE, NULL_TREE);
32030 field_val = (1 << isa_names_table[i].feature);
32031 /* Return __cpu_model.__cpu_features[0] & field_val */
32032 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
32033 build_int_cstu (unsigned_type_node, field_val));
32034 return build1 (CONVERT_EXPR, integer_type_node, final);
32036 gcc_unreachable ();
32039 static tree
32040 ix86_fold_builtin (tree fndecl, int n_args,
32041 tree *args, bool ignore ATTRIBUTE_UNUSED)
32043 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
32045 enum ix86_builtins fn_code = (enum ix86_builtins)
32046 DECL_FUNCTION_CODE (fndecl);
32047 switch (fn_code)
32049 case IX86_BUILTIN_CPU_IS:
32050 case IX86_BUILTIN_CPU_SUPPORTS:
32051 gcc_assert (n_args == 1);
32052 return fold_builtin_cpu (fndecl, args);
32054 case IX86_BUILTIN_NANQ:
32055 case IX86_BUILTIN_NANSQ:
32057 tree type = TREE_TYPE (TREE_TYPE (fndecl));
32058 const char *str = c_getstr (*args);
32059 int quiet = fn_code == IX86_BUILTIN_NANQ;
32060 REAL_VALUE_TYPE real;
32062 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
32063 return build_real (type, real);
32064 return NULL_TREE;
32067 case IX86_BUILTIN_INFQ:
32068 case IX86_BUILTIN_HUGE_VALQ:
32070 tree type = TREE_TYPE (TREE_TYPE (fndecl));
32071 REAL_VALUE_TYPE inf;
32072 real_inf (&inf);
32073 return build_real (type, inf);
32076 case IX86_BUILTIN_TZCNT16:
32077 case IX86_BUILTIN_CTZS:
32078 case IX86_BUILTIN_TZCNT32:
32079 case IX86_BUILTIN_TZCNT64:
32080 gcc_assert (n_args == 1);
32081 if (TREE_CODE (args[0]) == INTEGER_CST)
32083 tree type = TREE_TYPE (TREE_TYPE (fndecl));
32084 tree arg = args[0];
32085 if (fn_code == IX86_BUILTIN_TZCNT16
32086 || fn_code == IX86_BUILTIN_CTZS)
32087 arg = fold_convert (short_unsigned_type_node, arg);
32088 if (integer_zerop (arg))
32089 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
32090 else
32091 return fold_const_call (CFN_CTZ, type, arg);
32093 break;
32095 case IX86_BUILTIN_LZCNT16:
32096 case IX86_BUILTIN_CLZS:
32097 case IX86_BUILTIN_LZCNT32:
32098 case IX86_BUILTIN_LZCNT64:
32099 gcc_assert (n_args == 1);
32100 if (TREE_CODE (args[0]) == INTEGER_CST)
32102 tree type = TREE_TYPE (TREE_TYPE (fndecl));
32103 tree arg = args[0];
32104 if (fn_code == IX86_BUILTIN_LZCNT16
32105 || fn_code == IX86_BUILTIN_CLZS)
32106 arg = fold_convert (short_unsigned_type_node, arg);
32107 if (integer_zerop (arg))
32108 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
32109 else
32110 return fold_const_call (CFN_CLZ, type, arg);
32112 break;
32114 case IX86_BUILTIN_BEXTR32:
32115 case IX86_BUILTIN_BEXTR64:
32116 case IX86_BUILTIN_BEXTRI32:
32117 case IX86_BUILTIN_BEXTRI64:
32118 gcc_assert (n_args == 2);
32119 if (tree_fits_uhwi_p (args[1]))
32121 unsigned HOST_WIDE_INT res = 0;
32122 unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0]));
32123 unsigned int start = tree_to_uhwi (args[1]);
32124 unsigned int len = (start & 0xff00) >> 8;
32125 start &= 0xff;
32126 if (start >= prec || len == 0)
32127 res = 0;
32128 else if (!tree_fits_uhwi_p (args[0]))
32129 break;
32130 else
32131 res = tree_to_uhwi (args[0]) >> start;
32132 if (len > prec)
32133 len = prec;
32134 if (len < HOST_BITS_PER_WIDE_INT)
32135 res &= (HOST_WIDE_INT_1U << len) - 1;
32136 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
32138 break;
32140 case IX86_BUILTIN_BZHI32:
32141 case IX86_BUILTIN_BZHI64:
32142 gcc_assert (n_args == 2);
32143 if (tree_fits_uhwi_p (args[1]))
32145 unsigned int idx = tree_to_uhwi (args[1]) & 0xff;
32146 if (idx >= TYPE_PRECISION (TREE_TYPE (args[0])))
32147 return args[0];
32148 if (!tree_fits_uhwi_p (args[0]))
32149 break;
32150 unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]);
32151 res &= ~(HOST_WIDE_INT_M1U << idx);
32152 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
32154 break;
32156 case IX86_BUILTIN_PDEP32:
32157 case IX86_BUILTIN_PDEP64:
32158 gcc_assert (n_args == 2);
32159 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
32161 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
32162 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
32163 unsigned HOST_WIDE_INT res = 0;
32164 unsigned HOST_WIDE_INT m, k = 1;
32165 for (m = 1; m; m <<= 1)
32166 if ((mask & m) != 0)
32168 if ((src & k) != 0)
32169 res |= m;
32170 k <<= 1;
32172 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
32174 break;
32176 case IX86_BUILTIN_PEXT32:
32177 case IX86_BUILTIN_PEXT64:
32178 gcc_assert (n_args == 2);
32179 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
32181 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
32182 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
32183 unsigned HOST_WIDE_INT res = 0;
32184 unsigned HOST_WIDE_INT m, k = 1;
32185 for (m = 1; m; m <<= 1)
32186 if ((mask & m) != 0)
32188 if ((src & m) != 0)
32189 res |= k;
32190 k <<= 1;
32192 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
32194 break;
32196 default:
32197 break;
32201 #ifdef SUBTARGET_FOLD_BUILTIN
32202 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
32203 #endif
32205 return NULL_TREE;
32208 /* Fold a MD builtin (use ix86_fold_builtin for folding into
32209 constant) in GIMPLE. */
32211 bool
32212 ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
32214 gimple *stmt = gsi_stmt (*gsi);
32215 tree fndecl = gimple_call_fndecl (stmt);
32216 gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD);
32217 int n_args = gimple_call_num_args (stmt);
32218 enum ix86_builtins fn_code = (enum ix86_builtins) DECL_FUNCTION_CODE (fndecl);
32219 tree decl = NULL_TREE;
32220 tree arg0, arg1;
32222 switch (fn_code)
32224 case IX86_BUILTIN_TZCNT32:
32225 decl = builtin_decl_implicit (BUILT_IN_CTZ);
32226 goto fold_tzcnt_lzcnt;
32228 case IX86_BUILTIN_TZCNT64:
32229 decl = builtin_decl_implicit (BUILT_IN_CTZLL);
32230 goto fold_tzcnt_lzcnt;
32232 case IX86_BUILTIN_LZCNT32:
32233 decl = builtin_decl_implicit (BUILT_IN_CLZ);
32234 goto fold_tzcnt_lzcnt;
32236 case IX86_BUILTIN_LZCNT64:
32237 decl = builtin_decl_implicit (BUILT_IN_CLZLL);
32238 goto fold_tzcnt_lzcnt;
32240 fold_tzcnt_lzcnt:
32241 gcc_assert (n_args == 1);
32242 arg0 = gimple_call_arg (stmt, 0);
32243 if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (stmt))
32245 int prec = TYPE_PRECISION (TREE_TYPE (arg0));
32246 /* If arg0 is provably non-zero, optimize into generic
32247 __builtin_c[tl]z{,ll} function the middle-end handles
32248 better. */
32249 if (!expr_not_equal_to (arg0, wi::zero (prec)))
32250 return false;
32252 location_t loc = gimple_location (stmt);
32253 gimple *g = gimple_build_call (decl, 1, arg0);
32254 gimple_set_location (g, loc);
32255 tree lhs = make_ssa_name (integer_type_node);
32256 gimple_call_set_lhs (g, lhs);
32257 gsi_insert_before (gsi, g, GSI_SAME_STMT);
32258 g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, lhs);
32259 gimple_set_location (g, loc);
32260 gsi_replace (gsi, g, false);
32261 return true;
32263 break;
32265 case IX86_BUILTIN_BZHI32:
32266 case IX86_BUILTIN_BZHI64:
32267 gcc_assert (n_args == 2);
32268 arg1 = gimple_call_arg (stmt, 1);
32269 if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (stmt))
32271 unsigned int idx = tree_to_uhwi (arg1) & 0xff;
32272 arg0 = gimple_call_arg (stmt, 0);
32273 if (idx < TYPE_PRECISION (TREE_TYPE (arg0)))
32274 break;
32275 location_t loc = gimple_location (stmt);
32276 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
32277 gimple_set_location (g, loc);
32278 gsi_replace (gsi, g, false);
32279 return true;
32281 break;
32283 case IX86_BUILTIN_PDEP32:
32284 case IX86_BUILTIN_PDEP64:
32285 case IX86_BUILTIN_PEXT32:
32286 case IX86_BUILTIN_PEXT64:
32287 gcc_assert (n_args == 2);
32288 arg1 = gimple_call_arg (stmt, 1);
32289 if (integer_all_onesp (arg1) && gimple_call_lhs (stmt))
32291 location_t loc = gimple_location (stmt);
32292 arg0 = gimple_call_arg (stmt, 0);
32293 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
32294 gimple_set_location (g, loc);
32295 gsi_replace (gsi, g, false);
32296 return true;
32298 break;
32300 default:
32301 break;
32304 return false;
32307 /* Make builtins to detect cpu type and features supported. NAME is
32308 the builtin name, CODE is the builtin code, and FTYPE is the function
32309 type of the builtin. */
32311 static void
32312 make_cpu_type_builtin (const char* name, int code,
32313 enum ix86_builtin_func_type ftype, bool is_const)
32315 tree decl;
32316 tree type;
32318 type = ix86_get_builtin_func_type (ftype);
32319 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
32320 NULL, NULL_TREE);
32321 gcc_assert (decl != NULL_TREE);
32322 ix86_builtins[(int) code] = decl;
32323 TREE_READONLY (decl) = is_const;
32326 /* Make builtins to get CPU type and features supported. The created
32327 builtins are :
32329 __builtin_cpu_init (), to detect cpu type and features,
32330 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
32331 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
32334 static void
32335 ix86_init_platform_type_builtins (void)
32337 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
32338 INT_FTYPE_VOID, false);
32339 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
32340 INT_FTYPE_PCCHAR, true);
32341 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
32342 INT_FTYPE_PCCHAR, true);
32345 /* Internal method for ix86_init_builtins. */
32347 static void
32348 ix86_init_builtins_va_builtins_abi (void)
32350 tree ms_va_ref, sysv_va_ref;
32351 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
32352 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
32353 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
32354 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
32356 if (!TARGET_64BIT)
32357 return;
32358 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
32359 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
32360 ms_va_ref = build_reference_type (ms_va_list_type_node);
32361 sysv_va_ref =
32362 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
32364 fnvoid_va_end_ms =
32365 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32366 fnvoid_va_start_ms =
32367 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32368 fnvoid_va_end_sysv =
32369 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
32370 fnvoid_va_start_sysv =
32371 build_varargs_function_type_list (void_type_node, sysv_va_ref,
32372 NULL_TREE);
32373 fnvoid_va_copy_ms =
32374 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
32375 NULL_TREE);
32376 fnvoid_va_copy_sysv =
32377 build_function_type_list (void_type_node, sysv_va_ref,
32378 sysv_va_ref, NULL_TREE);
32380 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
32381 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
32382 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
32383 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
32384 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
32385 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
32386 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
32387 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32388 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
32389 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32390 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
32391 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32394 static void
32395 ix86_init_builtin_types (void)
32397 tree float80_type_node, const_string_type_node;
32399 /* The __float80 type. */
32400 float80_type_node = long_double_type_node;
32401 if (TYPE_MODE (float80_type_node) != XFmode)
32403 if (float64x_type_node != NULL_TREE
32404 && TYPE_MODE (float64x_type_node) == XFmode)
32405 float80_type_node = float64x_type_node;
32406 else
32408 /* The __float80 type. */
32409 float80_type_node = make_node (REAL_TYPE);
32411 TYPE_PRECISION (float80_type_node) = 80;
32412 layout_type (float80_type_node);
32415 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
32417 /* The __float128 type. The node has already been created as
32418 _Float128, so we only need to register the __float128 name for
32419 it. */
32420 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
32422 const_string_type_node
32423 = build_pointer_type (build_qualified_type
32424 (char_type_node, TYPE_QUAL_CONST));
32426 /* This macro is built by i386-builtin-types.awk. */
32427 DEFINE_BUILTIN_PRIMITIVE_TYPES;
32430 static void
32431 ix86_init_builtins (void)
32433 tree ftype, decl;
32435 ix86_init_builtin_types ();
32437 /* Builtins to get CPU type and features. */
32438 ix86_init_platform_type_builtins ();
32440 /* TFmode support builtins. */
32441 def_builtin_const (0, "__builtin_infq",
32442 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
32443 def_builtin_const (0, "__builtin_huge_valq",
32444 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
32446 ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_CONST_STRING);
32447 decl = add_builtin_function ("__builtin_nanq", ftype, IX86_BUILTIN_NANQ,
32448 BUILT_IN_MD, "nanq", NULL_TREE);
32449 TREE_READONLY (decl) = 1;
32450 ix86_builtins[(int) IX86_BUILTIN_NANQ] = decl;
32452 decl = add_builtin_function ("__builtin_nansq", ftype, IX86_BUILTIN_NANSQ,
32453 BUILT_IN_MD, "nansq", NULL_TREE);
32454 TREE_READONLY (decl) = 1;
32455 ix86_builtins[(int) IX86_BUILTIN_NANSQ] = decl;
32457 /* We will expand them to normal call if SSE isn't available since
32458 they are used by libgcc. */
32459 ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
32460 decl = add_builtin_function ("__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ,
32461 BUILT_IN_MD, "__fabstf2", NULL_TREE);
32462 TREE_READONLY (decl) = 1;
32463 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
32465 ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
32466 decl = add_builtin_function ("__builtin_copysignq", ftype,
32467 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
32468 "__copysigntf3", NULL_TREE);
32469 TREE_READONLY (decl) = 1;
32470 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
32472 ix86_init_tm_builtins ();
32473 ix86_init_mmx_sse_builtins ();
32474 ix86_init_mpx_builtins ();
32476 if (TARGET_LP64)
32477 ix86_init_builtins_va_builtins_abi ();
32479 #ifdef SUBTARGET_INIT_BUILTINS
32480 SUBTARGET_INIT_BUILTINS;
32481 #endif
32484 /* Return the ix86 builtin for CODE. */
32486 static tree
32487 ix86_builtin_decl (unsigned code, bool)
32489 if (code >= IX86_BUILTIN_MAX)
32490 return error_mark_node;
32492 return ix86_builtins[code];
32495 /* Errors in the source file can cause expand_expr to return const0_rtx
32496 where we expect a vector. To avoid crashing, use one of the vector
32497 clear instructions. */
32498 static rtx
32499 safe_vector_operand (rtx x, machine_mode mode)
32501 if (x == const0_rtx)
32502 x = CONST0_RTX (mode);
32503 return x;
32506 /* Fixup modeless constants to fit required mode. */
32507 static rtx
32508 fixup_modeless_constant (rtx x, machine_mode mode)
32510 if (GET_MODE (x) == VOIDmode)
32511 x = convert_to_mode (mode, x, 1);
32512 return x;
32515 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
32517 static rtx
32518 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
32520 rtx pat;
32521 tree arg0 = CALL_EXPR_ARG (exp, 0);
32522 tree arg1 = CALL_EXPR_ARG (exp, 1);
32523 rtx op0 = expand_normal (arg0);
32524 rtx op1 = expand_normal (arg1);
32525 machine_mode tmode = insn_data[icode].operand[0].mode;
32526 machine_mode mode0 = insn_data[icode].operand[1].mode;
32527 machine_mode mode1 = insn_data[icode].operand[2].mode;
32529 if (VECTOR_MODE_P (mode0))
32530 op0 = safe_vector_operand (op0, mode0);
32531 if (VECTOR_MODE_P (mode1))
32532 op1 = safe_vector_operand (op1, mode1);
32534 if (optimize || !target
32535 || GET_MODE (target) != tmode
32536 || !insn_data[icode].operand[0].predicate (target, tmode))
32537 target = gen_reg_rtx (tmode);
32539 if (GET_MODE (op1) == SImode && mode1 == TImode)
32541 rtx x = gen_reg_rtx (V4SImode);
32542 emit_insn (gen_sse2_loadd (x, op1));
32543 op1 = gen_lowpart (TImode, x);
32546 if (!insn_data[icode].operand[1].predicate (op0, mode0))
32547 op0 = copy_to_mode_reg (mode0, op0);
32548 if (!insn_data[icode].operand[2].predicate (op1, mode1))
32549 op1 = copy_to_mode_reg (mode1, op1);
32551 pat = GEN_FCN (icode) (target, op0, op1);
32552 if (! pat)
32553 return 0;
32555 emit_insn (pat);
32557 return target;
32560 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
32562 static rtx
32563 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
32564 enum ix86_builtin_func_type m_type,
32565 enum rtx_code sub_code)
32567 rtx pat;
32568 int i;
32569 int nargs;
32570 bool comparison_p = false;
32571 bool tf_p = false;
32572 bool last_arg_constant = false;
32573 int num_memory = 0;
32574 struct {
32575 rtx op;
32576 machine_mode mode;
32577 } args[4];
32579 machine_mode tmode = insn_data[icode].operand[0].mode;
32581 switch (m_type)
32583 case MULTI_ARG_4_DF2_DI_I:
32584 case MULTI_ARG_4_DF2_DI_I1:
32585 case MULTI_ARG_4_SF2_SI_I:
32586 case MULTI_ARG_4_SF2_SI_I1:
32587 nargs = 4;
32588 last_arg_constant = true;
32589 break;
32591 case MULTI_ARG_3_SF:
32592 case MULTI_ARG_3_DF:
32593 case MULTI_ARG_3_SF2:
32594 case MULTI_ARG_3_DF2:
32595 case MULTI_ARG_3_DI:
32596 case MULTI_ARG_3_SI:
32597 case MULTI_ARG_3_SI_DI:
32598 case MULTI_ARG_3_HI:
32599 case MULTI_ARG_3_HI_SI:
32600 case MULTI_ARG_3_QI:
32601 case MULTI_ARG_3_DI2:
32602 case MULTI_ARG_3_SI2:
32603 case MULTI_ARG_3_HI2:
32604 case MULTI_ARG_3_QI2:
32605 nargs = 3;
32606 break;
32608 case MULTI_ARG_2_SF:
32609 case MULTI_ARG_2_DF:
32610 case MULTI_ARG_2_DI:
32611 case MULTI_ARG_2_SI:
32612 case MULTI_ARG_2_HI:
32613 case MULTI_ARG_2_QI:
32614 nargs = 2;
32615 break;
32617 case MULTI_ARG_2_DI_IMM:
32618 case MULTI_ARG_2_SI_IMM:
32619 case MULTI_ARG_2_HI_IMM:
32620 case MULTI_ARG_2_QI_IMM:
32621 nargs = 2;
32622 last_arg_constant = true;
32623 break;
32625 case MULTI_ARG_1_SF:
32626 case MULTI_ARG_1_DF:
32627 case MULTI_ARG_1_SF2:
32628 case MULTI_ARG_1_DF2:
32629 case MULTI_ARG_1_DI:
32630 case MULTI_ARG_1_SI:
32631 case MULTI_ARG_1_HI:
32632 case MULTI_ARG_1_QI:
32633 case MULTI_ARG_1_SI_DI:
32634 case MULTI_ARG_1_HI_DI:
32635 case MULTI_ARG_1_HI_SI:
32636 case MULTI_ARG_1_QI_DI:
32637 case MULTI_ARG_1_QI_SI:
32638 case MULTI_ARG_1_QI_HI:
32639 nargs = 1;
32640 break;
32642 case MULTI_ARG_2_DI_CMP:
32643 case MULTI_ARG_2_SI_CMP:
32644 case MULTI_ARG_2_HI_CMP:
32645 case MULTI_ARG_2_QI_CMP:
32646 nargs = 2;
32647 comparison_p = true;
32648 break;
32650 case MULTI_ARG_2_SF_TF:
32651 case MULTI_ARG_2_DF_TF:
32652 case MULTI_ARG_2_DI_TF:
32653 case MULTI_ARG_2_SI_TF:
32654 case MULTI_ARG_2_HI_TF:
32655 case MULTI_ARG_2_QI_TF:
32656 nargs = 2;
32657 tf_p = true;
32658 break;
32660 default:
32661 gcc_unreachable ();
32664 if (optimize || !target
32665 || GET_MODE (target) != tmode
32666 || !insn_data[icode].operand[0].predicate (target, tmode))
32667 target = gen_reg_rtx (tmode);
32668 else if (memory_operand (target, tmode))
32669 num_memory++;
32671 gcc_assert (nargs <= 4);
32673 for (i = 0; i < nargs; i++)
32675 tree arg = CALL_EXPR_ARG (exp, i);
32676 rtx op = expand_normal (arg);
32677 int adjust = (comparison_p) ? 1 : 0;
32678 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
32680 if (last_arg_constant && i == nargs - 1)
32682 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
32684 enum insn_code new_icode = icode;
32685 switch (icode)
32687 case CODE_FOR_xop_vpermil2v2df3:
32688 case CODE_FOR_xop_vpermil2v4sf3:
32689 case CODE_FOR_xop_vpermil2v4df3:
32690 case CODE_FOR_xop_vpermil2v8sf3:
32691 error ("the last argument must be a 2-bit immediate");
32692 return gen_reg_rtx (tmode);
32693 case CODE_FOR_xop_rotlv2di3:
32694 new_icode = CODE_FOR_rotlv2di3;
32695 goto xop_rotl;
32696 case CODE_FOR_xop_rotlv4si3:
32697 new_icode = CODE_FOR_rotlv4si3;
32698 goto xop_rotl;
32699 case CODE_FOR_xop_rotlv8hi3:
32700 new_icode = CODE_FOR_rotlv8hi3;
32701 goto xop_rotl;
32702 case CODE_FOR_xop_rotlv16qi3:
32703 new_icode = CODE_FOR_rotlv16qi3;
32704 xop_rotl:
32705 if (CONST_INT_P (op))
32707 int mask = GET_MODE_UNIT_BITSIZE (tmode) - 1;
32708 op = GEN_INT (INTVAL (op) & mask);
32709 gcc_checking_assert
32710 (insn_data[icode].operand[i + 1].predicate (op, mode));
32712 else
32714 gcc_checking_assert
32715 (nargs == 2
32716 && insn_data[new_icode].operand[0].mode == tmode
32717 && insn_data[new_icode].operand[1].mode == tmode
32718 && insn_data[new_icode].operand[2].mode == mode
32719 && insn_data[new_icode].operand[0].predicate
32720 == insn_data[icode].operand[0].predicate
32721 && insn_data[new_icode].operand[1].predicate
32722 == insn_data[icode].operand[1].predicate);
32723 icode = new_icode;
32724 goto non_constant;
32726 break;
32727 default:
32728 gcc_unreachable ();
32732 else
32734 non_constant:
32735 if (VECTOR_MODE_P (mode))
32736 op = safe_vector_operand (op, mode);
32738 /* If we aren't optimizing, only allow one memory operand to be
32739 generated. */
32740 if (memory_operand (op, mode))
32741 num_memory++;
32743 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
32745 if (optimize
32746 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
32747 || num_memory > 1)
32748 op = force_reg (mode, op);
32751 args[i].op = op;
32752 args[i].mode = mode;
32755 switch (nargs)
32757 case 1:
32758 pat = GEN_FCN (icode) (target, args[0].op);
32759 break;
32761 case 2:
32762 if (tf_p)
32763 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
32764 GEN_INT ((int)sub_code));
32765 else if (! comparison_p)
32766 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
32767 else
32769 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
32770 args[0].op,
32771 args[1].op);
32773 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
32775 break;
32777 case 3:
32778 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
32779 break;
32781 case 4:
32782 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
32783 break;
32785 default:
32786 gcc_unreachable ();
32789 if (! pat)
32790 return 0;
32792 emit_insn (pat);
32793 return target;
32796 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
32797 insns with vec_merge. */
32799 static rtx
32800 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
32801 rtx target)
32803 rtx pat;
32804 tree arg0 = CALL_EXPR_ARG (exp, 0);
32805 rtx op1, op0 = expand_normal (arg0);
32806 machine_mode tmode = insn_data[icode].operand[0].mode;
32807 machine_mode mode0 = insn_data[icode].operand[1].mode;
32809 if (optimize || !target
32810 || GET_MODE (target) != tmode
32811 || !insn_data[icode].operand[0].predicate (target, tmode))
32812 target = gen_reg_rtx (tmode);
32814 if (VECTOR_MODE_P (mode0))
32815 op0 = safe_vector_operand (op0, mode0);
32817 if ((optimize && !register_operand (op0, mode0))
32818 || !insn_data[icode].operand[1].predicate (op0, mode0))
32819 op0 = copy_to_mode_reg (mode0, op0);
32821 op1 = op0;
32822 if (!insn_data[icode].operand[2].predicate (op1, mode0))
32823 op1 = copy_to_mode_reg (mode0, op1);
32825 pat = GEN_FCN (icode) (target, op0, op1);
32826 if (! pat)
32827 return 0;
32828 emit_insn (pat);
32829 return target;
32832 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
32834 static rtx
32835 ix86_expand_sse_compare (const struct builtin_description *d,
32836 tree exp, rtx target, bool swap)
32838 rtx pat;
32839 tree arg0 = CALL_EXPR_ARG (exp, 0);
32840 tree arg1 = CALL_EXPR_ARG (exp, 1);
32841 rtx op0 = expand_normal (arg0);
32842 rtx op1 = expand_normal (arg1);
32843 rtx op2;
32844 machine_mode tmode = insn_data[d->icode].operand[0].mode;
32845 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
32846 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
32847 enum rtx_code comparison = d->comparison;
32849 if (VECTOR_MODE_P (mode0))
32850 op0 = safe_vector_operand (op0, mode0);
32851 if (VECTOR_MODE_P (mode1))
32852 op1 = safe_vector_operand (op1, mode1);
32854 /* Swap operands if we have a comparison that isn't available in
32855 hardware. */
32856 if (swap)
32857 std::swap (op0, op1);
32859 if (optimize || !target
32860 || GET_MODE (target) != tmode
32861 || !insn_data[d->icode].operand[0].predicate (target, tmode))
32862 target = gen_reg_rtx (tmode);
32864 if ((optimize && !register_operand (op0, mode0))
32865 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
32866 op0 = copy_to_mode_reg (mode0, op0);
32867 if ((optimize && !register_operand (op1, mode1))
32868 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
32869 op1 = copy_to_mode_reg (mode1, op1);
32871 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
32872 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
32873 if (! pat)
32874 return 0;
32875 emit_insn (pat);
32876 return target;
32879 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
32881 static rtx
32882 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
32883 rtx target)
32885 rtx pat;
32886 tree arg0 = CALL_EXPR_ARG (exp, 0);
32887 tree arg1 = CALL_EXPR_ARG (exp, 1);
32888 rtx op0 = expand_normal (arg0);
32889 rtx op1 = expand_normal (arg1);
32890 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
32891 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
32892 enum rtx_code comparison = d->comparison;
32894 if (VECTOR_MODE_P (mode0))
32895 op0 = safe_vector_operand (op0, mode0);
32896 if (VECTOR_MODE_P (mode1))
32897 op1 = safe_vector_operand (op1, mode1);
32899 /* Swap operands if we have a comparison that isn't available in
32900 hardware. */
32901 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
32902 std::swap (op0, op1);
32904 target = gen_reg_rtx (SImode);
32905 emit_move_insn (target, const0_rtx);
32906 target = gen_rtx_SUBREG (QImode, target, 0);
32908 if ((optimize && !register_operand (op0, mode0))
32909 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
32910 op0 = copy_to_mode_reg (mode0, op0);
32911 if ((optimize && !register_operand (op1, mode1))
32912 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
32913 op1 = copy_to_mode_reg (mode1, op1);
32915 pat = GEN_FCN (d->icode) (op0, op1);
32916 if (! pat)
32917 return 0;
32918 emit_insn (pat);
32919 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
32920 gen_rtx_fmt_ee (comparison, QImode,
32921 SET_DEST (pat),
32922 const0_rtx)));
32924 return SUBREG_REG (target);
32927 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
32929 static rtx
32930 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
32931 rtx target)
32933 rtx pat;
32934 tree arg0 = CALL_EXPR_ARG (exp, 0);
32935 rtx op1, op0 = expand_normal (arg0);
32936 machine_mode tmode = insn_data[d->icode].operand[0].mode;
32937 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
32939 if (optimize || target == 0
32940 || GET_MODE (target) != tmode
32941 || !insn_data[d->icode].operand[0].predicate (target, tmode))
32942 target = gen_reg_rtx (tmode);
32944 if (VECTOR_MODE_P (mode0))
32945 op0 = safe_vector_operand (op0, mode0);
32947 if ((optimize && !register_operand (op0, mode0))
32948 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
32949 op0 = copy_to_mode_reg (mode0, op0);
32951 op1 = GEN_INT (d->comparison);
32953 pat = GEN_FCN (d->icode) (target, op0, op1);
32954 if (! pat)
32955 return 0;
32956 emit_insn (pat);
32957 return target;
32960 static rtx
32961 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
32962 tree exp, rtx target)
32964 rtx pat;
32965 tree arg0 = CALL_EXPR_ARG (exp, 0);
32966 tree arg1 = CALL_EXPR_ARG (exp, 1);
32967 rtx op0 = expand_normal (arg0);
32968 rtx op1 = expand_normal (arg1);
32969 rtx op2;
32970 machine_mode tmode = insn_data[d->icode].operand[0].mode;
32971 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
32972 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
32974 if (optimize || target == 0
32975 || GET_MODE (target) != tmode
32976 || !insn_data[d->icode].operand[0].predicate (target, tmode))
32977 target = gen_reg_rtx (tmode);
32979 op0 = safe_vector_operand (op0, mode0);
32980 op1 = safe_vector_operand (op1, mode1);
32982 if ((optimize && !register_operand (op0, mode0))
32983 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
32984 op0 = copy_to_mode_reg (mode0, op0);
32985 if ((optimize && !register_operand (op1, mode1))
32986 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
32987 op1 = copy_to_mode_reg (mode1, op1);
32989 op2 = GEN_INT (d->comparison);
32991 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
32992 if (! pat)
32993 return 0;
32994 emit_insn (pat);
32995 return target;
32998 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
33000 static rtx
33001 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
33002 rtx target)
33004 rtx pat;
33005 tree arg0 = CALL_EXPR_ARG (exp, 0);
33006 tree arg1 = CALL_EXPR_ARG (exp, 1);
33007 rtx op0 = expand_normal (arg0);
33008 rtx op1 = expand_normal (arg1);
33009 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33010 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33011 enum rtx_code comparison = d->comparison;
33013 if (VECTOR_MODE_P (mode0))
33014 op0 = safe_vector_operand (op0, mode0);
33015 if (VECTOR_MODE_P (mode1))
33016 op1 = safe_vector_operand (op1, mode1);
33018 target = gen_reg_rtx (SImode);
33019 emit_move_insn (target, const0_rtx);
33020 target = gen_rtx_SUBREG (QImode, target, 0);
33022 if ((optimize && !register_operand (op0, mode0))
33023 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33024 op0 = copy_to_mode_reg (mode0, op0);
33025 if ((optimize && !register_operand (op1, mode1))
33026 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33027 op1 = copy_to_mode_reg (mode1, op1);
33029 pat = GEN_FCN (d->icode) (op0, op1);
33030 if (! pat)
33031 return 0;
33032 emit_insn (pat);
33033 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33034 gen_rtx_fmt_ee (comparison, QImode,
33035 SET_DEST (pat),
33036 const0_rtx)));
33038 return SUBREG_REG (target);
33041 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
33043 static rtx
33044 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
33045 tree exp, rtx target)
33047 rtx pat;
33048 tree arg0 = CALL_EXPR_ARG (exp, 0);
33049 tree arg1 = CALL_EXPR_ARG (exp, 1);
33050 tree arg2 = CALL_EXPR_ARG (exp, 2);
33051 tree arg3 = CALL_EXPR_ARG (exp, 3);
33052 tree arg4 = CALL_EXPR_ARG (exp, 4);
33053 rtx scratch0, scratch1;
33054 rtx op0 = expand_normal (arg0);
33055 rtx op1 = expand_normal (arg1);
33056 rtx op2 = expand_normal (arg2);
33057 rtx op3 = expand_normal (arg3);
33058 rtx op4 = expand_normal (arg4);
33059 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
33061 tmode0 = insn_data[d->icode].operand[0].mode;
33062 tmode1 = insn_data[d->icode].operand[1].mode;
33063 modev2 = insn_data[d->icode].operand[2].mode;
33064 modei3 = insn_data[d->icode].operand[3].mode;
33065 modev4 = insn_data[d->icode].operand[4].mode;
33066 modei5 = insn_data[d->icode].operand[5].mode;
33067 modeimm = insn_data[d->icode].operand[6].mode;
33069 if (VECTOR_MODE_P (modev2))
33070 op0 = safe_vector_operand (op0, modev2);
33071 if (VECTOR_MODE_P (modev4))
33072 op2 = safe_vector_operand (op2, modev4);
33074 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33075 op0 = copy_to_mode_reg (modev2, op0);
33076 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
33077 op1 = copy_to_mode_reg (modei3, op1);
33078 if ((optimize && !register_operand (op2, modev4))
33079 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
33080 op2 = copy_to_mode_reg (modev4, op2);
33081 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
33082 op3 = copy_to_mode_reg (modei5, op3);
33084 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
33086 error ("the fifth argument must be an 8-bit immediate");
33087 return const0_rtx;
33090 if (d->code == IX86_BUILTIN_PCMPESTRI128)
33092 if (optimize || !target
33093 || GET_MODE (target) != tmode0
33094 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33095 target = gen_reg_rtx (tmode0);
33097 scratch1 = gen_reg_rtx (tmode1);
33099 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
33101 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
33103 if (optimize || !target
33104 || GET_MODE (target) != tmode1
33105 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33106 target = gen_reg_rtx (tmode1);
33108 scratch0 = gen_reg_rtx (tmode0);
33110 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
33112 else
33114 gcc_assert (d->flag);
33116 scratch0 = gen_reg_rtx (tmode0);
33117 scratch1 = gen_reg_rtx (tmode1);
33119 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
33122 if (! pat)
33123 return 0;
33125 emit_insn (pat);
33127 if (d->flag)
33129 target = gen_reg_rtx (SImode);
33130 emit_move_insn (target, const0_rtx);
33131 target = gen_rtx_SUBREG (QImode, target, 0);
33133 emit_insn
33134 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33135 gen_rtx_fmt_ee (EQ, QImode,
33136 gen_rtx_REG ((machine_mode) d->flag,
33137 FLAGS_REG),
33138 const0_rtx)));
33139 return SUBREG_REG (target);
33141 else
33142 return target;
33146 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
33148 static rtx
33149 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
33150 tree exp, rtx target)
33152 rtx pat;
33153 tree arg0 = CALL_EXPR_ARG (exp, 0);
33154 tree arg1 = CALL_EXPR_ARG (exp, 1);
33155 tree arg2 = CALL_EXPR_ARG (exp, 2);
33156 rtx scratch0, scratch1;
33157 rtx op0 = expand_normal (arg0);
33158 rtx op1 = expand_normal (arg1);
33159 rtx op2 = expand_normal (arg2);
33160 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
33162 tmode0 = insn_data[d->icode].operand[0].mode;
33163 tmode1 = insn_data[d->icode].operand[1].mode;
33164 modev2 = insn_data[d->icode].operand[2].mode;
33165 modev3 = insn_data[d->icode].operand[3].mode;
33166 modeimm = insn_data[d->icode].operand[4].mode;
33168 if (VECTOR_MODE_P (modev2))
33169 op0 = safe_vector_operand (op0, modev2);
33170 if (VECTOR_MODE_P (modev3))
33171 op1 = safe_vector_operand (op1, modev3);
33173 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33174 op0 = copy_to_mode_reg (modev2, op0);
33175 if ((optimize && !register_operand (op1, modev3))
33176 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
33177 op1 = copy_to_mode_reg (modev3, op1);
33179 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
33181 error ("the third argument must be an 8-bit immediate");
33182 return const0_rtx;
33185 if (d->code == IX86_BUILTIN_PCMPISTRI128)
33187 if (optimize || !target
33188 || GET_MODE (target) != tmode0
33189 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33190 target = gen_reg_rtx (tmode0);
33192 scratch1 = gen_reg_rtx (tmode1);
33194 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
33196 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
33198 if (optimize || !target
33199 || GET_MODE (target) != tmode1
33200 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33201 target = gen_reg_rtx (tmode1);
33203 scratch0 = gen_reg_rtx (tmode0);
33205 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
33207 else
33209 gcc_assert (d->flag);
33211 scratch0 = gen_reg_rtx (tmode0);
33212 scratch1 = gen_reg_rtx (tmode1);
33214 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
33217 if (! pat)
33218 return 0;
33220 emit_insn (pat);
33222 if (d->flag)
33224 target = gen_reg_rtx (SImode);
33225 emit_move_insn (target, const0_rtx);
33226 target = gen_rtx_SUBREG (QImode, target, 0);
33228 emit_insn
33229 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33230 gen_rtx_fmt_ee (EQ, QImode,
33231 gen_rtx_REG ((machine_mode) d->flag,
33232 FLAGS_REG),
33233 const0_rtx)));
33234 return SUBREG_REG (target);
33236 else
33237 return target;
33240 /* Subroutine of ix86_expand_builtin to take care of insns with
33241 variable number of operands. */
33243 static rtx
33244 ix86_expand_args_builtin (const struct builtin_description *d,
33245 tree exp, rtx target)
33247 rtx pat, real_target;
33248 unsigned int i, nargs;
33249 unsigned int nargs_constant = 0;
33250 unsigned int mask_pos = 0;
33251 int num_memory = 0;
33252 struct
33254 rtx op;
33255 machine_mode mode;
33256 } args[6];
33257 bool second_arg_count = false;
33258 enum insn_code icode = d->icode;
33259 const struct insn_data_d *insn_p = &insn_data[icode];
33260 machine_mode tmode = insn_p->operand[0].mode;
33261 machine_mode rmode = VOIDmode;
33262 bool swap = false;
33263 enum rtx_code comparison = d->comparison;
33265 switch ((enum ix86_builtin_func_type) d->flag)
33267 case V2DF_FTYPE_V2DF_ROUND:
33268 case V4DF_FTYPE_V4DF_ROUND:
33269 case V8DF_FTYPE_V8DF_ROUND:
33270 case V4SF_FTYPE_V4SF_ROUND:
33271 case V8SF_FTYPE_V8SF_ROUND:
33272 case V16SF_FTYPE_V16SF_ROUND:
33273 case V4SI_FTYPE_V4SF_ROUND:
33274 case V8SI_FTYPE_V8SF_ROUND:
33275 case V16SI_FTYPE_V16SF_ROUND:
33276 return ix86_expand_sse_round (d, exp, target);
33277 case V4SI_FTYPE_V2DF_V2DF_ROUND:
33278 case V8SI_FTYPE_V4DF_V4DF_ROUND:
33279 case V16SI_FTYPE_V8DF_V8DF_ROUND:
33280 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
33281 case INT_FTYPE_V8SF_V8SF_PTEST:
33282 case INT_FTYPE_V4DI_V4DI_PTEST:
33283 case INT_FTYPE_V4DF_V4DF_PTEST:
33284 case INT_FTYPE_V4SF_V4SF_PTEST:
33285 case INT_FTYPE_V2DI_V2DI_PTEST:
33286 case INT_FTYPE_V2DF_V2DF_PTEST:
33287 return ix86_expand_sse_ptest (d, exp, target);
33288 case FLOAT128_FTYPE_FLOAT128:
33289 case FLOAT_FTYPE_FLOAT:
33290 case INT_FTYPE_INT:
33291 case UINT_FTYPE_UINT:
33292 case UINT16_FTYPE_UINT16:
33293 case UINT64_FTYPE_INT:
33294 case UINT64_FTYPE_UINT64:
33295 case INT64_FTYPE_INT64:
33296 case INT64_FTYPE_V4SF:
33297 case INT64_FTYPE_V2DF:
33298 case INT_FTYPE_V16QI:
33299 case INT_FTYPE_V8QI:
33300 case INT_FTYPE_V8SF:
33301 case INT_FTYPE_V4DF:
33302 case INT_FTYPE_V4SF:
33303 case INT_FTYPE_V2DF:
33304 case INT_FTYPE_V32QI:
33305 case V16QI_FTYPE_V16QI:
33306 case V8SI_FTYPE_V8SF:
33307 case V8SI_FTYPE_V4SI:
33308 case V8HI_FTYPE_V8HI:
33309 case V8HI_FTYPE_V16QI:
33310 case V8QI_FTYPE_V8QI:
33311 case V8SF_FTYPE_V8SF:
33312 case V8SF_FTYPE_V8SI:
33313 case V8SF_FTYPE_V4SF:
33314 case V8SF_FTYPE_V8HI:
33315 case V4SI_FTYPE_V4SI:
33316 case V4SI_FTYPE_V16QI:
33317 case V4SI_FTYPE_V4SF:
33318 case V4SI_FTYPE_V8SI:
33319 case V4SI_FTYPE_V8HI:
33320 case V4SI_FTYPE_V4DF:
33321 case V4SI_FTYPE_V2DF:
33322 case V4HI_FTYPE_V4HI:
33323 case V4DF_FTYPE_V4DF:
33324 case V4DF_FTYPE_V4SI:
33325 case V4DF_FTYPE_V4SF:
33326 case V4DF_FTYPE_V2DF:
33327 case V4SF_FTYPE_V4SF:
33328 case V4SF_FTYPE_V4SI:
33329 case V4SF_FTYPE_V8SF:
33330 case V4SF_FTYPE_V4DF:
33331 case V4SF_FTYPE_V8HI:
33332 case V4SF_FTYPE_V2DF:
33333 case V2DI_FTYPE_V2DI:
33334 case V2DI_FTYPE_V16QI:
33335 case V2DI_FTYPE_V8HI:
33336 case V2DI_FTYPE_V4SI:
33337 case V2DF_FTYPE_V2DF:
33338 case V2DF_FTYPE_V4SI:
33339 case V2DF_FTYPE_V4DF:
33340 case V2DF_FTYPE_V4SF:
33341 case V2DF_FTYPE_V2SI:
33342 case V2SI_FTYPE_V2SI:
33343 case V2SI_FTYPE_V4SF:
33344 case V2SI_FTYPE_V2SF:
33345 case V2SI_FTYPE_V2DF:
33346 case V2SF_FTYPE_V2SF:
33347 case V2SF_FTYPE_V2SI:
33348 case V32QI_FTYPE_V32QI:
33349 case V32QI_FTYPE_V16QI:
33350 case V16HI_FTYPE_V16HI:
33351 case V16HI_FTYPE_V8HI:
33352 case V8SI_FTYPE_V8SI:
33353 case V16HI_FTYPE_V16QI:
33354 case V8SI_FTYPE_V16QI:
33355 case V4DI_FTYPE_V16QI:
33356 case V8SI_FTYPE_V8HI:
33357 case V4DI_FTYPE_V8HI:
33358 case V4DI_FTYPE_V4SI:
33359 case V4DI_FTYPE_V2DI:
33360 case UQI_FTYPE_UQI:
33361 case UHI_FTYPE_UHI:
33362 case USI_FTYPE_USI:
33363 case USI_FTYPE_UQI:
33364 case USI_FTYPE_UHI:
33365 case UDI_FTYPE_UDI:
33366 case UHI_FTYPE_V16QI:
33367 case USI_FTYPE_V32QI:
33368 case UDI_FTYPE_V64QI:
33369 case V16QI_FTYPE_UHI:
33370 case V32QI_FTYPE_USI:
33371 case V64QI_FTYPE_UDI:
33372 case V8HI_FTYPE_UQI:
33373 case V16HI_FTYPE_UHI:
33374 case V32HI_FTYPE_USI:
33375 case V4SI_FTYPE_UQI:
33376 case V8SI_FTYPE_UQI:
33377 case V4SI_FTYPE_UHI:
33378 case V8SI_FTYPE_UHI:
33379 case UQI_FTYPE_V8HI:
33380 case UHI_FTYPE_V16HI:
33381 case USI_FTYPE_V32HI:
33382 case UQI_FTYPE_V4SI:
33383 case UQI_FTYPE_V8SI:
33384 case UHI_FTYPE_V16SI:
33385 case UQI_FTYPE_V2DI:
33386 case UQI_FTYPE_V4DI:
33387 case UQI_FTYPE_V8DI:
33388 case V16SI_FTYPE_UHI:
33389 case V2DI_FTYPE_UQI:
33390 case V4DI_FTYPE_UQI:
33391 case V16SI_FTYPE_INT:
33392 case V16SF_FTYPE_V8SF:
33393 case V16SI_FTYPE_V8SI:
33394 case V16SF_FTYPE_V4SF:
33395 case V16SI_FTYPE_V4SI:
33396 case V16SI_FTYPE_V16SF:
33397 case V16SI_FTYPE_V16SI:
33398 case V16SF_FTYPE_V16SF:
33399 case V8DI_FTYPE_UQI:
33400 case V8DI_FTYPE_V8DI:
33401 case V8DF_FTYPE_V4DF:
33402 case V8DF_FTYPE_V2DF:
33403 case V8DF_FTYPE_V8DF:
33404 nargs = 1;
33405 break;
33406 case V4SF_FTYPE_V4SF_VEC_MERGE:
33407 case V2DF_FTYPE_V2DF_VEC_MERGE:
33408 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
33409 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
33410 case V16QI_FTYPE_V16QI_V16QI:
33411 case V16QI_FTYPE_V8HI_V8HI:
33412 case V16SF_FTYPE_V16SF_V16SF:
33413 case V8QI_FTYPE_V8QI_V8QI:
33414 case V8QI_FTYPE_V4HI_V4HI:
33415 case V8HI_FTYPE_V8HI_V8HI:
33416 case V8HI_FTYPE_V16QI_V16QI:
33417 case V8HI_FTYPE_V4SI_V4SI:
33418 case V8SF_FTYPE_V8SF_V8SF:
33419 case V8SF_FTYPE_V8SF_V8SI:
33420 case V8DF_FTYPE_V8DF_V8DF:
33421 case V4SI_FTYPE_V4SI_V4SI:
33422 case V4SI_FTYPE_V8HI_V8HI:
33423 case V4SI_FTYPE_V2DF_V2DF:
33424 case V4HI_FTYPE_V4HI_V4HI:
33425 case V4HI_FTYPE_V8QI_V8QI:
33426 case V4HI_FTYPE_V2SI_V2SI:
33427 case V4DF_FTYPE_V4DF_V4DF:
33428 case V4DF_FTYPE_V4DF_V4DI:
33429 case V4SF_FTYPE_V4SF_V4SF:
33430 case V4SF_FTYPE_V4SF_V4SI:
33431 case V4SF_FTYPE_V4SF_V2SI:
33432 case V4SF_FTYPE_V4SF_V2DF:
33433 case V4SF_FTYPE_V4SF_UINT:
33434 case V4SF_FTYPE_V4SF_DI:
33435 case V4SF_FTYPE_V4SF_SI:
33436 case V2DI_FTYPE_V2DI_V2DI:
33437 case V2DI_FTYPE_V16QI_V16QI:
33438 case V2DI_FTYPE_V4SI_V4SI:
33439 case V2DI_FTYPE_V2DI_V16QI:
33440 case V2SI_FTYPE_V2SI_V2SI:
33441 case V2SI_FTYPE_V4HI_V4HI:
33442 case V2SI_FTYPE_V2SF_V2SF:
33443 case V2DF_FTYPE_V2DF_V2DF:
33444 case V2DF_FTYPE_V2DF_V4SF:
33445 case V2DF_FTYPE_V2DF_V2DI:
33446 case V2DF_FTYPE_V2DF_DI:
33447 case V2DF_FTYPE_V2DF_SI:
33448 case V2DF_FTYPE_V2DF_UINT:
33449 case V2SF_FTYPE_V2SF_V2SF:
33450 case V1DI_FTYPE_V1DI_V1DI:
33451 case V1DI_FTYPE_V8QI_V8QI:
33452 case V1DI_FTYPE_V2SI_V2SI:
33453 case V32QI_FTYPE_V16HI_V16HI:
33454 case V16HI_FTYPE_V8SI_V8SI:
33455 case V32QI_FTYPE_V32QI_V32QI:
33456 case V16HI_FTYPE_V32QI_V32QI:
33457 case V16HI_FTYPE_V16HI_V16HI:
33458 case V8SI_FTYPE_V4DF_V4DF:
33459 case V8SI_FTYPE_V8SI_V8SI:
33460 case V8SI_FTYPE_V16HI_V16HI:
33461 case V4DI_FTYPE_V4DI_V4DI:
33462 case V4DI_FTYPE_V8SI_V8SI:
33463 case V8DI_FTYPE_V64QI_V64QI:
33464 if (comparison == UNKNOWN)
33465 return ix86_expand_binop_builtin (icode, exp, target);
33466 nargs = 2;
33467 break;
33468 case V4SF_FTYPE_V4SF_V4SF_SWAP:
33469 case V2DF_FTYPE_V2DF_V2DF_SWAP:
33470 gcc_assert (comparison != UNKNOWN);
33471 nargs = 2;
33472 swap = true;
33473 break;
33474 case V16HI_FTYPE_V16HI_V8HI_COUNT:
33475 case V16HI_FTYPE_V16HI_SI_COUNT:
33476 case V8SI_FTYPE_V8SI_V4SI_COUNT:
33477 case V8SI_FTYPE_V8SI_SI_COUNT:
33478 case V4DI_FTYPE_V4DI_V2DI_COUNT:
33479 case V4DI_FTYPE_V4DI_INT_COUNT:
33480 case V8HI_FTYPE_V8HI_V8HI_COUNT:
33481 case V8HI_FTYPE_V8HI_SI_COUNT:
33482 case V4SI_FTYPE_V4SI_V4SI_COUNT:
33483 case V4SI_FTYPE_V4SI_SI_COUNT:
33484 case V4HI_FTYPE_V4HI_V4HI_COUNT:
33485 case V4HI_FTYPE_V4HI_SI_COUNT:
33486 case V2DI_FTYPE_V2DI_V2DI_COUNT:
33487 case V2DI_FTYPE_V2DI_SI_COUNT:
33488 case V2SI_FTYPE_V2SI_V2SI_COUNT:
33489 case V2SI_FTYPE_V2SI_SI_COUNT:
33490 case V1DI_FTYPE_V1DI_V1DI_COUNT:
33491 case V1DI_FTYPE_V1DI_SI_COUNT:
33492 nargs = 2;
33493 second_arg_count = true;
33494 break;
33495 case V16HI_FTYPE_V16HI_INT_V16HI_UHI_COUNT:
33496 case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI_COUNT:
33497 case V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT:
33498 case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT:
33499 case V2DI_FTYPE_V2DI_INT_V2DI_UQI_COUNT:
33500 case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI_COUNT:
33501 case V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT:
33502 case V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT:
33503 case V4DI_FTYPE_V4DI_INT_V4DI_UQI_COUNT:
33504 case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI_COUNT:
33505 case V4SI_FTYPE_V4SI_INT_V4SI_UQI_COUNT:
33506 case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI_COUNT:
33507 case V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT:
33508 case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT:
33509 case V8HI_FTYPE_V8HI_INT_V8HI_UQI_COUNT:
33510 case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI_COUNT:
33511 case V8SI_FTYPE_V8SI_INT_V8SI_UQI_COUNT:
33512 case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI_COUNT:
33513 nargs = 4;
33514 second_arg_count = true;
33515 break;
33516 case UINT64_FTYPE_UINT64_UINT64:
33517 case UINT_FTYPE_UINT_UINT:
33518 case UINT_FTYPE_UINT_USHORT:
33519 case UINT_FTYPE_UINT_UCHAR:
33520 case UINT16_FTYPE_UINT16_INT:
33521 case UINT8_FTYPE_UINT8_INT:
33522 case UQI_FTYPE_UQI_UQI:
33523 case UHI_FTYPE_UHI_UHI:
33524 case USI_FTYPE_USI_USI:
33525 case UDI_FTYPE_UDI_UDI:
33526 case V16SI_FTYPE_V8DF_V8DF:
33527 nargs = 2;
33528 break;
33529 case V2DI_FTYPE_V2DI_INT_CONVERT:
33530 nargs = 2;
33531 rmode = V1TImode;
33532 nargs_constant = 1;
33533 break;
33534 case V4DI_FTYPE_V4DI_INT_CONVERT:
33535 nargs = 2;
33536 rmode = V2TImode;
33537 nargs_constant = 1;
33538 break;
33539 case V8DI_FTYPE_V8DI_INT_CONVERT:
33540 nargs = 2;
33541 rmode = V4TImode;
33542 nargs_constant = 1;
33543 break;
33544 case V8HI_FTYPE_V8HI_INT:
33545 case V8HI_FTYPE_V8SF_INT:
33546 case V16HI_FTYPE_V16SF_INT:
33547 case V8HI_FTYPE_V4SF_INT:
33548 case V8SF_FTYPE_V8SF_INT:
33549 case V4SF_FTYPE_V16SF_INT:
33550 case V16SF_FTYPE_V16SF_INT:
33551 case V4SI_FTYPE_V4SI_INT:
33552 case V4SI_FTYPE_V8SI_INT:
33553 case V4HI_FTYPE_V4HI_INT:
33554 case V4DF_FTYPE_V4DF_INT:
33555 case V4DF_FTYPE_V8DF_INT:
33556 case V4SF_FTYPE_V4SF_INT:
33557 case V4SF_FTYPE_V8SF_INT:
33558 case V2DI_FTYPE_V2DI_INT:
33559 case V2DF_FTYPE_V2DF_INT:
33560 case V2DF_FTYPE_V4DF_INT:
33561 case V16HI_FTYPE_V16HI_INT:
33562 case V8SI_FTYPE_V8SI_INT:
33563 case V16SI_FTYPE_V16SI_INT:
33564 case V4SI_FTYPE_V16SI_INT:
33565 case V4DI_FTYPE_V4DI_INT:
33566 case V2DI_FTYPE_V4DI_INT:
33567 case V4DI_FTYPE_V8DI_INT:
33568 case QI_FTYPE_V4SF_INT:
33569 case QI_FTYPE_V2DF_INT:
33570 case UQI_FTYPE_UQI_UQI_CONST:
33571 case UHI_FTYPE_UHI_UQI:
33572 case USI_FTYPE_USI_UQI:
33573 case UDI_FTYPE_UDI_UQI:
33574 nargs = 2;
33575 nargs_constant = 1;
33576 break;
33577 case V16QI_FTYPE_V16QI_V16QI_V16QI:
33578 case V8SF_FTYPE_V8SF_V8SF_V8SF:
33579 case V4DF_FTYPE_V4DF_V4DF_V4DF:
33580 case V4SF_FTYPE_V4SF_V4SF_V4SF:
33581 case V2DF_FTYPE_V2DF_V2DF_V2DF:
33582 case V32QI_FTYPE_V32QI_V32QI_V32QI:
33583 case UHI_FTYPE_V16SI_V16SI_UHI:
33584 case UQI_FTYPE_V8DI_V8DI_UQI:
33585 case V16HI_FTYPE_V16SI_V16HI_UHI:
33586 case V16QI_FTYPE_V16SI_V16QI_UHI:
33587 case V16QI_FTYPE_V8DI_V16QI_UQI:
33588 case V16SF_FTYPE_V16SF_V16SF_UHI:
33589 case V16SF_FTYPE_V4SF_V16SF_UHI:
33590 case V16SI_FTYPE_SI_V16SI_UHI:
33591 case V16SI_FTYPE_V16HI_V16SI_UHI:
33592 case V16SI_FTYPE_V16QI_V16SI_UHI:
33593 case V8SF_FTYPE_V4SF_V8SF_UQI:
33594 case V4DF_FTYPE_V2DF_V4DF_UQI:
33595 case V8SI_FTYPE_V4SI_V8SI_UQI:
33596 case V8SI_FTYPE_SI_V8SI_UQI:
33597 case V4SI_FTYPE_V4SI_V4SI_UQI:
33598 case V4SI_FTYPE_SI_V4SI_UQI:
33599 case V4DI_FTYPE_V2DI_V4DI_UQI:
33600 case V4DI_FTYPE_DI_V4DI_UQI:
33601 case V2DI_FTYPE_V2DI_V2DI_UQI:
33602 case V2DI_FTYPE_DI_V2DI_UQI:
33603 case V64QI_FTYPE_V64QI_V64QI_UDI:
33604 case V64QI_FTYPE_V16QI_V64QI_UDI:
33605 case V64QI_FTYPE_QI_V64QI_UDI:
33606 case V32QI_FTYPE_V32QI_V32QI_USI:
33607 case V32QI_FTYPE_V16QI_V32QI_USI:
33608 case V32QI_FTYPE_QI_V32QI_USI:
33609 case V16QI_FTYPE_V16QI_V16QI_UHI:
33610 case V16QI_FTYPE_QI_V16QI_UHI:
33611 case V32HI_FTYPE_V8HI_V32HI_USI:
33612 case V32HI_FTYPE_HI_V32HI_USI:
33613 case V16HI_FTYPE_V8HI_V16HI_UHI:
33614 case V16HI_FTYPE_HI_V16HI_UHI:
33615 case V8HI_FTYPE_V8HI_V8HI_UQI:
33616 case V8HI_FTYPE_HI_V8HI_UQI:
33617 case V8SF_FTYPE_V8HI_V8SF_UQI:
33618 case V4SF_FTYPE_V8HI_V4SF_UQI:
33619 case V8SI_FTYPE_V8SF_V8SI_UQI:
33620 case V4SI_FTYPE_V4SF_V4SI_UQI:
33621 case V4DI_FTYPE_V4SF_V4DI_UQI:
33622 case V2DI_FTYPE_V4SF_V2DI_UQI:
33623 case V4SF_FTYPE_V4DI_V4SF_UQI:
33624 case V4SF_FTYPE_V2DI_V4SF_UQI:
33625 case V4DF_FTYPE_V4DI_V4DF_UQI:
33626 case V2DF_FTYPE_V2DI_V2DF_UQI:
33627 case V16QI_FTYPE_V8HI_V16QI_UQI:
33628 case V16QI_FTYPE_V16HI_V16QI_UHI:
33629 case V16QI_FTYPE_V4SI_V16QI_UQI:
33630 case V16QI_FTYPE_V8SI_V16QI_UQI:
33631 case V8HI_FTYPE_V4SI_V8HI_UQI:
33632 case V8HI_FTYPE_V8SI_V8HI_UQI:
33633 case V16QI_FTYPE_V2DI_V16QI_UQI:
33634 case V16QI_FTYPE_V4DI_V16QI_UQI:
33635 case V8HI_FTYPE_V2DI_V8HI_UQI:
33636 case V8HI_FTYPE_V4DI_V8HI_UQI:
33637 case V4SI_FTYPE_V2DI_V4SI_UQI:
33638 case V4SI_FTYPE_V4DI_V4SI_UQI:
33639 case V32QI_FTYPE_V32HI_V32QI_USI:
33640 case UHI_FTYPE_V16QI_V16QI_UHI:
33641 case USI_FTYPE_V32QI_V32QI_USI:
33642 case UDI_FTYPE_V64QI_V64QI_UDI:
33643 case UQI_FTYPE_V8HI_V8HI_UQI:
33644 case UHI_FTYPE_V16HI_V16HI_UHI:
33645 case USI_FTYPE_V32HI_V32HI_USI:
33646 case UQI_FTYPE_V4SI_V4SI_UQI:
33647 case UQI_FTYPE_V8SI_V8SI_UQI:
33648 case UQI_FTYPE_V2DI_V2DI_UQI:
33649 case UQI_FTYPE_V4DI_V4DI_UQI:
33650 case V4SF_FTYPE_V2DF_V4SF_UQI:
33651 case V4SF_FTYPE_V4DF_V4SF_UQI:
33652 case V16SI_FTYPE_V16SI_V16SI_UHI:
33653 case V16SI_FTYPE_V4SI_V16SI_UHI:
33654 case V2DI_FTYPE_V4SI_V2DI_UQI:
33655 case V2DI_FTYPE_V8HI_V2DI_UQI:
33656 case V2DI_FTYPE_V16QI_V2DI_UQI:
33657 case V4DI_FTYPE_V4DI_V4DI_UQI:
33658 case V4DI_FTYPE_V4SI_V4DI_UQI:
33659 case V4DI_FTYPE_V8HI_V4DI_UQI:
33660 case V4DI_FTYPE_V16QI_V4DI_UQI:
33661 case V4DI_FTYPE_V4DF_V4DI_UQI:
33662 case V2DI_FTYPE_V2DF_V2DI_UQI:
33663 case V4SI_FTYPE_V4DF_V4SI_UQI:
33664 case V4SI_FTYPE_V2DF_V4SI_UQI:
33665 case V4SI_FTYPE_V8HI_V4SI_UQI:
33666 case V4SI_FTYPE_V16QI_V4SI_UQI:
33667 case V4DI_FTYPE_V4DI_V4DI_V4DI:
33668 case V8DF_FTYPE_V2DF_V8DF_UQI:
33669 case V8DF_FTYPE_V4DF_V8DF_UQI:
33670 case V8DF_FTYPE_V8DF_V8DF_UQI:
33671 case V8SF_FTYPE_V8SF_V8SF_UQI:
33672 case V8SF_FTYPE_V8SI_V8SF_UQI:
33673 case V4DF_FTYPE_V4DF_V4DF_UQI:
33674 case V4SF_FTYPE_V4SF_V4SF_UQI:
33675 case V2DF_FTYPE_V2DF_V2DF_UQI:
33676 case V2DF_FTYPE_V4SF_V2DF_UQI:
33677 case V2DF_FTYPE_V4SI_V2DF_UQI:
33678 case V4SF_FTYPE_V4SI_V4SF_UQI:
33679 case V4DF_FTYPE_V4SF_V4DF_UQI:
33680 case V4DF_FTYPE_V4SI_V4DF_UQI:
33681 case V8SI_FTYPE_V8SI_V8SI_UQI:
33682 case V8SI_FTYPE_V8HI_V8SI_UQI:
33683 case V8SI_FTYPE_V16QI_V8SI_UQI:
33684 case V8DF_FTYPE_V8SI_V8DF_UQI:
33685 case V8DI_FTYPE_DI_V8DI_UQI:
33686 case V16SF_FTYPE_V8SF_V16SF_UHI:
33687 case V16SI_FTYPE_V8SI_V16SI_UHI:
33688 case V16HI_FTYPE_V16HI_V16HI_UHI:
33689 case V8HI_FTYPE_V16QI_V8HI_UQI:
33690 case V16HI_FTYPE_V16QI_V16HI_UHI:
33691 case V32HI_FTYPE_V32HI_V32HI_USI:
33692 case V32HI_FTYPE_V32QI_V32HI_USI:
33693 case V8DI_FTYPE_V16QI_V8DI_UQI:
33694 case V8DI_FTYPE_V2DI_V8DI_UQI:
33695 case V8DI_FTYPE_V4DI_V8DI_UQI:
33696 case V8DI_FTYPE_V8DI_V8DI_UQI:
33697 case V8DI_FTYPE_V8HI_V8DI_UQI:
33698 case V8DI_FTYPE_V8SI_V8DI_UQI:
33699 case V8HI_FTYPE_V8DI_V8HI_UQI:
33700 case V8SI_FTYPE_V8DI_V8SI_UQI:
33701 case V4SI_FTYPE_V4SI_V4SI_V4SI:
33702 nargs = 3;
33703 break;
33704 case V32QI_FTYPE_V32QI_V32QI_INT:
33705 case V16HI_FTYPE_V16HI_V16HI_INT:
33706 case V16QI_FTYPE_V16QI_V16QI_INT:
33707 case V4DI_FTYPE_V4DI_V4DI_INT:
33708 case V8HI_FTYPE_V8HI_V8HI_INT:
33709 case V8SI_FTYPE_V8SI_V8SI_INT:
33710 case V8SI_FTYPE_V8SI_V4SI_INT:
33711 case V8SF_FTYPE_V8SF_V8SF_INT:
33712 case V8SF_FTYPE_V8SF_V4SF_INT:
33713 case V4SI_FTYPE_V4SI_V4SI_INT:
33714 case V4DF_FTYPE_V4DF_V4DF_INT:
33715 case V16SF_FTYPE_V16SF_V16SF_INT:
33716 case V16SF_FTYPE_V16SF_V4SF_INT:
33717 case V16SI_FTYPE_V16SI_V4SI_INT:
33718 case V4DF_FTYPE_V4DF_V2DF_INT:
33719 case V4SF_FTYPE_V4SF_V4SF_INT:
33720 case V2DI_FTYPE_V2DI_V2DI_INT:
33721 case V4DI_FTYPE_V4DI_V2DI_INT:
33722 case V2DF_FTYPE_V2DF_V2DF_INT:
33723 case UQI_FTYPE_V8DI_V8UDI_INT:
33724 case UQI_FTYPE_V8DF_V8DF_INT:
33725 case UQI_FTYPE_V2DF_V2DF_INT:
33726 case UQI_FTYPE_V4SF_V4SF_INT:
33727 case UHI_FTYPE_V16SI_V16SI_INT:
33728 case UHI_FTYPE_V16SF_V16SF_INT:
33729 case V64QI_FTYPE_V64QI_V64QI_INT:
33730 nargs = 3;
33731 nargs_constant = 1;
33732 break;
33733 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
33734 nargs = 3;
33735 rmode = V4DImode;
33736 nargs_constant = 1;
33737 break;
33738 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
33739 nargs = 3;
33740 rmode = V2DImode;
33741 nargs_constant = 1;
33742 break;
33743 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
33744 nargs = 3;
33745 rmode = DImode;
33746 nargs_constant = 1;
33747 break;
33748 case V2DI_FTYPE_V2DI_UINT_UINT:
33749 nargs = 3;
33750 nargs_constant = 2;
33751 break;
33752 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
33753 nargs = 3;
33754 rmode = V8DImode;
33755 nargs_constant = 1;
33756 break;
33757 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT:
33758 nargs = 5;
33759 rmode = V8DImode;
33760 mask_pos = 2;
33761 nargs_constant = 1;
33762 break;
33763 case QI_FTYPE_V8DF_INT_UQI:
33764 case QI_FTYPE_V4DF_INT_UQI:
33765 case QI_FTYPE_V2DF_INT_UQI:
33766 case HI_FTYPE_V16SF_INT_UHI:
33767 case QI_FTYPE_V8SF_INT_UQI:
33768 case QI_FTYPE_V4SF_INT_UQI:
33769 nargs = 3;
33770 mask_pos = 1;
33771 nargs_constant = 1;
33772 break;
33773 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_USI_CONVERT:
33774 nargs = 5;
33775 rmode = V4DImode;
33776 mask_pos = 2;
33777 nargs_constant = 1;
33778 break;
33779 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UHI_CONVERT:
33780 nargs = 5;
33781 rmode = V2DImode;
33782 mask_pos = 2;
33783 nargs_constant = 1;
33784 break;
33785 case V32QI_FTYPE_V32QI_V32QI_V32QI_USI:
33786 case V32HI_FTYPE_V32HI_V32HI_V32HI_USI:
33787 case V32HI_FTYPE_V64QI_V64QI_V32HI_USI:
33788 case V16SI_FTYPE_V32HI_V32HI_V16SI_UHI:
33789 case V64QI_FTYPE_V64QI_V64QI_V64QI_UDI:
33790 case V32HI_FTYPE_V32HI_V8HI_V32HI_USI:
33791 case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI:
33792 case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI:
33793 case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI:
33794 case V64QI_FTYPE_V32HI_V32HI_V64QI_UDI:
33795 case V32QI_FTYPE_V16HI_V16HI_V32QI_USI:
33796 case V16QI_FTYPE_V8HI_V8HI_V16QI_UHI:
33797 case V32HI_FTYPE_V16SI_V16SI_V32HI_USI:
33798 case V16HI_FTYPE_V8SI_V8SI_V16HI_UHI:
33799 case V8HI_FTYPE_V4SI_V4SI_V8HI_UQI:
33800 case V4DF_FTYPE_V4DF_V4DI_V4DF_UQI:
33801 case V8SF_FTYPE_V8SF_V8SI_V8SF_UQI:
33802 case V4SF_FTYPE_V4SF_V4SI_V4SF_UQI:
33803 case V2DF_FTYPE_V2DF_V2DI_V2DF_UQI:
33804 case V2DI_FTYPE_V4SI_V4SI_V2DI_UQI:
33805 case V4DI_FTYPE_V8SI_V8SI_V4DI_UQI:
33806 case V4DF_FTYPE_V4DI_V4DF_V4DF_UQI:
33807 case V8SF_FTYPE_V8SI_V8SF_V8SF_UQI:
33808 case V2DF_FTYPE_V2DI_V2DF_V2DF_UQI:
33809 case V4SF_FTYPE_V4SI_V4SF_V4SF_UQI:
33810 case V16SF_FTYPE_V16SF_V16SF_V16SF_UHI:
33811 case V16SF_FTYPE_V16SF_V16SI_V16SF_UHI:
33812 case V16SF_FTYPE_V16SI_V16SF_V16SF_UHI:
33813 case V16SI_FTYPE_V16SI_V16SI_V16SI_UHI:
33814 case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI:
33815 case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI:
33816 case V8SI_FTYPE_V8SI_V8SI_V8SI_UQI:
33817 case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI:
33818 case V8SF_FTYPE_V8SF_V8SF_V8SF_UQI:
33819 case V16QI_FTYPE_V16QI_V16QI_V16QI_UHI:
33820 case V16HI_FTYPE_V16HI_V16HI_V16HI_UHI:
33821 case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI:
33822 case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI:
33823 case V4DI_FTYPE_V4DI_V4DI_V4DI_UQI:
33824 case V4DF_FTYPE_V4DF_V4DF_V4DF_UQI:
33825 case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI:
33826 case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI:
33827 case V8DF_FTYPE_V8DF_V8DI_V8DF_UQI:
33828 case V8DF_FTYPE_V8DI_V8DF_V8DF_UQI:
33829 case V8DI_FTYPE_V16SI_V16SI_V8DI_UQI:
33830 case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI:
33831 case V8DI_FTYPE_V8DI_V8DI_V8DI_UQI:
33832 case V8HI_FTYPE_V16QI_V16QI_V8HI_UQI:
33833 case V16HI_FTYPE_V32QI_V32QI_V16HI_UHI:
33834 case V8SI_FTYPE_V16HI_V16HI_V8SI_UQI:
33835 case V4SI_FTYPE_V8HI_V8HI_V4SI_UQI:
33836 nargs = 4;
33837 break;
33838 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
33839 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
33840 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
33841 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
33842 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
33843 nargs = 4;
33844 nargs_constant = 1;
33845 break;
33846 case UQI_FTYPE_V4DI_V4DI_INT_UQI:
33847 case UQI_FTYPE_V8SI_V8SI_INT_UQI:
33848 case QI_FTYPE_V4DF_V4DF_INT_UQI:
33849 case QI_FTYPE_V8SF_V8SF_INT_UQI:
33850 case UQI_FTYPE_V2DI_V2DI_INT_UQI:
33851 case UQI_FTYPE_V4SI_V4SI_INT_UQI:
33852 case UQI_FTYPE_V2DF_V2DF_INT_UQI:
33853 case UQI_FTYPE_V4SF_V4SF_INT_UQI:
33854 case UDI_FTYPE_V64QI_V64QI_INT_UDI:
33855 case USI_FTYPE_V32QI_V32QI_INT_USI:
33856 case UHI_FTYPE_V16QI_V16QI_INT_UHI:
33857 case USI_FTYPE_V32HI_V32HI_INT_USI:
33858 case UHI_FTYPE_V16HI_V16HI_INT_UHI:
33859 case UQI_FTYPE_V8HI_V8HI_INT_UQI:
33860 nargs = 4;
33861 mask_pos = 1;
33862 nargs_constant = 1;
33863 break;
33864 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
33865 nargs = 4;
33866 nargs_constant = 2;
33867 break;
33868 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
33869 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
33870 nargs = 4;
33871 break;
33872 case UQI_FTYPE_V8DI_V8DI_INT_UQI:
33873 case UHI_FTYPE_V16SI_V16SI_INT_UHI:
33874 mask_pos = 1;
33875 nargs = 4;
33876 nargs_constant = 1;
33877 break;
33878 case V8SF_FTYPE_V8SF_INT_V8SF_UQI:
33879 case V4SF_FTYPE_V4SF_INT_V4SF_UQI:
33880 case V2DF_FTYPE_V4DF_INT_V2DF_UQI:
33881 case V2DI_FTYPE_V4DI_INT_V2DI_UQI:
33882 case V8SF_FTYPE_V16SF_INT_V8SF_UQI:
33883 case V8SI_FTYPE_V16SI_INT_V8SI_UQI:
33884 case V2DF_FTYPE_V8DF_INT_V2DF_UQI:
33885 case V2DI_FTYPE_V8DI_INT_V2DI_UQI:
33886 case V4SF_FTYPE_V8SF_INT_V4SF_UQI:
33887 case V4SI_FTYPE_V8SI_INT_V4SI_UQI:
33888 case V8HI_FTYPE_V8SF_INT_V8HI_UQI:
33889 case V8HI_FTYPE_V4SF_INT_V8HI_UQI:
33890 case V32HI_FTYPE_V32HI_INT_V32HI_USI:
33891 case V16HI_FTYPE_V16HI_INT_V16HI_UHI:
33892 case V8HI_FTYPE_V8HI_INT_V8HI_UQI:
33893 case V4DI_FTYPE_V4DI_INT_V4DI_UQI:
33894 case V2DI_FTYPE_V2DI_INT_V2DI_UQI:
33895 case V8SI_FTYPE_V8SI_INT_V8SI_UQI:
33896 case V4SI_FTYPE_V4SI_INT_V4SI_UQI:
33897 case V4DF_FTYPE_V4DF_INT_V4DF_UQI:
33898 case V2DF_FTYPE_V2DF_INT_V2DF_UQI:
33899 case V8DF_FTYPE_V8DF_INT_V8DF_UQI:
33900 case V16SF_FTYPE_V16SF_INT_V16SF_UHI:
33901 case V16HI_FTYPE_V16SF_INT_V16HI_UHI:
33902 case V16SI_FTYPE_V16SI_INT_V16SI_UHI:
33903 case V4SI_FTYPE_V16SI_INT_V4SI_UQI:
33904 case V4DI_FTYPE_V8DI_INT_V4DI_UQI:
33905 case V4DF_FTYPE_V8DF_INT_V4DF_UQI:
33906 case V4SF_FTYPE_V16SF_INT_V4SF_UQI:
33907 case V8DI_FTYPE_V8DI_INT_V8DI_UQI:
33908 nargs = 4;
33909 mask_pos = 2;
33910 nargs_constant = 1;
33911 break;
33912 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI:
33913 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI:
33914 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI:
33915 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI:
33916 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI:
33917 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI:
33918 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI:
33919 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI:
33920 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI:
33921 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI:
33922 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI:
33923 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI:
33924 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI:
33925 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI:
33926 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI:
33927 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI:
33928 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_UQI:
33929 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UQI:
33930 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI:
33931 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI:
33932 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI:
33933 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI:
33934 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI:
33935 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_UQI:
33936 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_UQI:
33937 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_UQI:
33938 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_UQI:
33939 nargs = 5;
33940 mask_pos = 2;
33941 nargs_constant = 1;
33942 break;
33943 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI:
33944 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI:
33945 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI:
33946 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI:
33947 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI:
33948 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI:
33949 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI:
33950 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI:
33951 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI:
33952 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI:
33953 nargs = 5;
33954 mask_pos = 1;
33955 nargs_constant = 1;
33956 break;
33957 case V64QI_FTYPE_V64QI_V64QI_INT_V64QI_UDI:
33958 case V32QI_FTYPE_V32QI_V32QI_INT_V32QI_USI:
33959 case V16QI_FTYPE_V16QI_V16QI_INT_V16QI_UHI:
33960 nargs = 5;
33961 mask_pos = 1;
33962 nargs_constant = 2;
33963 break;
33965 default:
33966 gcc_unreachable ();
33969 gcc_assert (nargs <= ARRAY_SIZE (args));
33971 if (comparison != UNKNOWN)
33973 gcc_assert (nargs == 2);
33974 return ix86_expand_sse_compare (d, exp, target, swap);
33977 if (rmode == VOIDmode || rmode == tmode)
33979 if (optimize
33980 || target == 0
33981 || GET_MODE (target) != tmode
33982 || !insn_p->operand[0].predicate (target, tmode))
33983 target = gen_reg_rtx (tmode);
33984 else if (memory_operand (target, tmode))
33985 num_memory++;
33986 real_target = target;
33988 else
33990 real_target = gen_reg_rtx (tmode);
33991 target = lowpart_subreg (rmode, real_target, tmode);
33994 for (i = 0; i < nargs; i++)
33996 tree arg = CALL_EXPR_ARG (exp, i);
33997 rtx op = expand_normal (arg);
33998 machine_mode mode = insn_p->operand[i + 1].mode;
33999 bool match = insn_p->operand[i + 1].predicate (op, mode);
34001 if (second_arg_count && i == 1)
34003 /* SIMD shift insns take either an 8-bit immediate or
34004 register as count. But builtin functions take int as
34005 count. If count doesn't match, we put it in register.
34006 The instructions are using 64-bit count, if op is just
34007 32-bit, zero-extend it, as negative shift counts
34008 are undefined behavior and zero-extension is more
34009 efficient. */
34010 if (!match)
34012 if (SCALAR_INT_MODE_P (GET_MODE (op)))
34013 op = convert_modes (mode, GET_MODE (op), op, 1);
34014 else
34015 op = lowpart_subreg (mode, op, GET_MODE (op));
34016 if (!insn_p->operand[i + 1].predicate (op, mode))
34017 op = copy_to_reg (op);
34020 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34021 (!mask_pos && (nargs - i) <= nargs_constant))
34023 if (!match)
34024 switch (icode)
34026 case CODE_FOR_avx_vinsertf128v4di:
34027 case CODE_FOR_avx_vextractf128v4di:
34028 error ("the last argument must be an 1-bit immediate");
34029 return const0_rtx;
34031 case CODE_FOR_avx512f_cmpv8di3_mask:
34032 case CODE_FOR_avx512f_cmpv16si3_mask:
34033 case CODE_FOR_avx512f_ucmpv8di3_mask:
34034 case CODE_FOR_avx512f_ucmpv16si3_mask:
34035 case CODE_FOR_avx512vl_cmpv4di3_mask:
34036 case CODE_FOR_avx512vl_cmpv8si3_mask:
34037 case CODE_FOR_avx512vl_ucmpv4di3_mask:
34038 case CODE_FOR_avx512vl_ucmpv8si3_mask:
34039 case CODE_FOR_avx512vl_cmpv2di3_mask:
34040 case CODE_FOR_avx512vl_cmpv4si3_mask:
34041 case CODE_FOR_avx512vl_ucmpv2di3_mask:
34042 case CODE_FOR_avx512vl_ucmpv4si3_mask:
34043 error ("the last argument must be a 3-bit immediate");
34044 return const0_rtx;
34046 case CODE_FOR_sse4_1_roundsd:
34047 case CODE_FOR_sse4_1_roundss:
34049 case CODE_FOR_sse4_1_roundpd:
34050 case CODE_FOR_sse4_1_roundps:
34051 case CODE_FOR_avx_roundpd256:
34052 case CODE_FOR_avx_roundps256:
34054 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
34055 case CODE_FOR_sse4_1_roundps_sfix:
34056 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
34057 case CODE_FOR_avx_roundps_sfix256:
34059 case CODE_FOR_sse4_1_blendps:
34060 case CODE_FOR_avx_blendpd256:
34061 case CODE_FOR_avx_vpermilv4df:
34062 case CODE_FOR_avx_vpermilv4df_mask:
34063 case CODE_FOR_avx512f_getmantv8df_mask:
34064 case CODE_FOR_avx512f_getmantv16sf_mask:
34065 case CODE_FOR_avx512vl_getmantv8sf_mask:
34066 case CODE_FOR_avx512vl_getmantv4df_mask:
34067 case CODE_FOR_avx512vl_getmantv4sf_mask:
34068 case CODE_FOR_avx512vl_getmantv2df_mask:
34069 case CODE_FOR_avx512dq_rangepv8df_mask_round:
34070 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
34071 case CODE_FOR_avx512dq_rangepv4df_mask:
34072 case CODE_FOR_avx512dq_rangepv8sf_mask:
34073 case CODE_FOR_avx512dq_rangepv2df_mask:
34074 case CODE_FOR_avx512dq_rangepv4sf_mask:
34075 case CODE_FOR_avx_shufpd256_mask:
34076 error ("the last argument must be a 4-bit immediate");
34077 return const0_rtx;
34079 case CODE_FOR_sha1rnds4:
34080 case CODE_FOR_sse4_1_blendpd:
34081 case CODE_FOR_avx_vpermilv2df:
34082 case CODE_FOR_avx_vpermilv2df_mask:
34083 case CODE_FOR_xop_vpermil2v2df3:
34084 case CODE_FOR_xop_vpermil2v4sf3:
34085 case CODE_FOR_xop_vpermil2v4df3:
34086 case CODE_FOR_xop_vpermil2v8sf3:
34087 case CODE_FOR_avx512f_vinsertf32x4_mask:
34088 case CODE_FOR_avx512f_vinserti32x4_mask:
34089 case CODE_FOR_avx512f_vextractf32x4_mask:
34090 case CODE_FOR_avx512f_vextracti32x4_mask:
34091 case CODE_FOR_sse2_shufpd:
34092 case CODE_FOR_sse2_shufpd_mask:
34093 case CODE_FOR_avx512dq_shuf_f64x2_mask:
34094 case CODE_FOR_avx512dq_shuf_i64x2_mask:
34095 case CODE_FOR_avx512vl_shuf_i32x4_mask:
34096 case CODE_FOR_avx512vl_shuf_f32x4_mask:
34097 error ("the last argument must be a 2-bit immediate");
34098 return const0_rtx;
34100 case CODE_FOR_avx_vextractf128v4df:
34101 case CODE_FOR_avx_vextractf128v8sf:
34102 case CODE_FOR_avx_vextractf128v8si:
34103 case CODE_FOR_avx_vinsertf128v4df:
34104 case CODE_FOR_avx_vinsertf128v8sf:
34105 case CODE_FOR_avx_vinsertf128v8si:
34106 case CODE_FOR_avx512f_vinsertf64x4_mask:
34107 case CODE_FOR_avx512f_vinserti64x4_mask:
34108 case CODE_FOR_avx512f_vextractf64x4_mask:
34109 case CODE_FOR_avx512f_vextracti64x4_mask:
34110 case CODE_FOR_avx512dq_vinsertf32x8_mask:
34111 case CODE_FOR_avx512dq_vinserti32x8_mask:
34112 case CODE_FOR_avx512vl_vinsertv4df:
34113 case CODE_FOR_avx512vl_vinsertv4di:
34114 case CODE_FOR_avx512vl_vinsertv8sf:
34115 case CODE_FOR_avx512vl_vinsertv8si:
34116 error ("the last argument must be a 1-bit immediate");
34117 return const0_rtx;
34119 case CODE_FOR_avx_vmcmpv2df3:
34120 case CODE_FOR_avx_vmcmpv4sf3:
34121 case CODE_FOR_avx_cmpv2df3:
34122 case CODE_FOR_avx_cmpv4sf3:
34123 case CODE_FOR_avx_cmpv4df3:
34124 case CODE_FOR_avx_cmpv8sf3:
34125 case CODE_FOR_avx512f_cmpv8df3_mask:
34126 case CODE_FOR_avx512f_cmpv16sf3_mask:
34127 case CODE_FOR_avx512f_vmcmpv2df3_mask:
34128 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
34129 error ("the last argument must be a 5-bit immediate");
34130 return const0_rtx;
34132 default:
34133 switch (nargs_constant)
34135 case 2:
34136 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34137 (!mask_pos && (nargs - i) == nargs_constant))
34139 error ("the next to last argument must be an 8-bit immediate");
34140 break;
34142 /* FALLTHRU */
34143 case 1:
34144 error ("the last argument must be an 8-bit immediate");
34145 break;
34146 default:
34147 gcc_unreachable ();
34149 return const0_rtx;
34152 else
34154 if (VECTOR_MODE_P (mode))
34155 op = safe_vector_operand (op, mode);
34157 /* If we aren't optimizing, only allow one memory operand to
34158 be generated. */
34159 if (memory_operand (op, mode))
34160 num_memory++;
34162 op = fixup_modeless_constant (op, mode);
34164 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34166 if (optimize || !match || num_memory > 1)
34167 op = copy_to_mode_reg (mode, op);
34169 else
34171 op = copy_to_reg (op);
34172 op = lowpart_subreg (mode, op, GET_MODE (op));
34176 args[i].op = op;
34177 args[i].mode = mode;
34180 switch (nargs)
34182 case 1:
34183 pat = GEN_FCN (icode) (real_target, args[0].op);
34184 break;
34185 case 2:
34186 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
34187 break;
34188 case 3:
34189 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34190 args[2].op);
34191 break;
34192 case 4:
34193 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34194 args[2].op, args[3].op);
34195 break;
34196 case 5:
34197 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34198 args[2].op, args[3].op, args[4].op);
34199 break;
34200 case 6:
34201 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34202 args[2].op, args[3].op, args[4].op,
34203 args[5].op);
34204 break;
34205 default:
34206 gcc_unreachable ();
34209 if (! pat)
34210 return 0;
34212 emit_insn (pat);
34213 return target;
34216 /* Transform pattern of following layout:
34217 (set A
34218 (unspec [B C] UNSPEC_EMBEDDED_ROUNDING))
34220 into:
34221 (set (A B)) */
34223 static rtx
34224 ix86_erase_embedded_rounding (rtx pat)
34226 if (GET_CODE (pat) == INSN)
34227 pat = PATTERN (pat);
34229 gcc_assert (GET_CODE (pat) == SET);
34230 rtx src = SET_SRC (pat);
34231 gcc_assert (XVECLEN (src, 0) == 2);
34232 rtx p0 = XVECEXP (src, 0, 0);
34233 gcc_assert (GET_CODE (src) == UNSPEC
34234 && XINT (src, 1) == UNSPEC_EMBEDDED_ROUNDING);
34235 rtx res = gen_rtx_SET (SET_DEST (pat), p0);
34236 return res;
34239 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
34240 with rounding. */
34241 static rtx
34242 ix86_expand_sse_comi_round (const struct builtin_description *d,
34243 tree exp, rtx target)
34245 rtx pat, set_dst;
34246 tree arg0 = CALL_EXPR_ARG (exp, 0);
34247 tree arg1 = CALL_EXPR_ARG (exp, 1);
34248 tree arg2 = CALL_EXPR_ARG (exp, 2);
34249 tree arg3 = CALL_EXPR_ARG (exp, 3);
34250 rtx op0 = expand_normal (arg0);
34251 rtx op1 = expand_normal (arg1);
34252 rtx op2 = expand_normal (arg2);
34253 rtx op3 = expand_normal (arg3);
34254 enum insn_code icode = d->icode;
34255 const struct insn_data_d *insn_p = &insn_data[icode];
34256 machine_mode mode0 = insn_p->operand[0].mode;
34257 machine_mode mode1 = insn_p->operand[1].mode;
34258 enum rtx_code comparison = UNEQ;
34259 bool need_ucomi = false;
34261 /* See avxintrin.h for values. */
34262 enum rtx_code comi_comparisons[32] =
34264 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
34265 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
34266 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
34268 bool need_ucomi_values[32] =
34270 true, false, false, true, true, false, false, true,
34271 true, false, false, true, true, false, false, true,
34272 false, true, true, false, false, true, true, false,
34273 false, true, true, false, false, true, true, false
34276 if (!CONST_INT_P (op2))
34278 error ("the third argument must be comparison constant");
34279 return const0_rtx;
34281 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
34283 error ("incorrect comparison mode");
34284 return const0_rtx;
34287 if (!insn_p->operand[2].predicate (op3, SImode))
34289 error ("incorrect rounding operand");
34290 return const0_rtx;
34293 comparison = comi_comparisons[INTVAL (op2)];
34294 need_ucomi = need_ucomi_values[INTVAL (op2)];
34296 if (VECTOR_MODE_P (mode0))
34297 op0 = safe_vector_operand (op0, mode0);
34298 if (VECTOR_MODE_P (mode1))
34299 op1 = safe_vector_operand (op1, mode1);
34301 target = gen_reg_rtx (SImode);
34302 emit_move_insn (target, const0_rtx);
34303 target = gen_rtx_SUBREG (QImode, target, 0);
34305 if ((optimize && !register_operand (op0, mode0))
34306 || !insn_p->operand[0].predicate (op0, mode0))
34307 op0 = copy_to_mode_reg (mode0, op0);
34308 if ((optimize && !register_operand (op1, mode1))
34309 || !insn_p->operand[1].predicate (op1, mode1))
34310 op1 = copy_to_mode_reg (mode1, op1);
34312 if (need_ucomi)
34313 icode = icode == CODE_FOR_sse_comi_round
34314 ? CODE_FOR_sse_ucomi_round
34315 : CODE_FOR_sse2_ucomi_round;
34317 pat = GEN_FCN (icode) (op0, op1, op3);
34318 if (! pat)
34319 return 0;
34321 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
34322 if (INTVAL (op3) == NO_ROUND)
34324 pat = ix86_erase_embedded_rounding (pat);
34325 if (! pat)
34326 return 0;
34328 set_dst = SET_DEST (pat);
34330 else
34332 gcc_assert (GET_CODE (pat) == SET);
34333 set_dst = SET_DEST (pat);
34336 emit_insn (pat);
34337 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34338 gen_rtx_fmt_ee (comparison, QImode,
34339 set_dst,
34340 const0_rtx)));
34342 return SUBREG_REG (target);
34345 static rtx
34346 ix86_expand_round_builtin (const struct builtin_description *d,
34347 tree exp, rtx target)
34349 rtx pat;
34350 unsigned int i, nargs;
34351 struct
34353 rtx op;
34354 machine_mode mode;
34355 } args[6];
34356 enum insn_code icode = d->icode;
34357 const struct insn_data_d *insn_p = &insn_data[icode];
34358 machine_mode tmode = insn_p->operand[0].mode;
34359 unsigned int nargs_constant = 0;
34360 unsigned int redundant_embed_rnd = 0;
34362 switch ((enum ix86_builtin_func_type) d->flag)
34364 case UINT64_FTYPE_V2DF_INT:
34365 case UINT64_FTYPE_V4SF_INT:
34366 case UINT_FTYPE_V2DF_INT:
34367 case UINT_FTYPE_V4SF_INT:
34368 case INT64_FTYPE_V2DF_INT:
34369 case INT64_FTYPE_V4SF_INT:
34370 case INT_FTYPE_V2DF_INT:
34371 case INT_FTYPE_V4SF_INT:
34372 nargs = 2;
34373 break;
34374 case V4SF_FTYPE_V4SF_UINT_INT:
34375 case V4SF_FTYPE_V4SF_UINT64_INT:
34376 case V2DF_FTYPE_V2DF_UINT64_INT:
34377 case V4SF_FTYPE_V4SF_INT_INT:
34378 case V4SF_FTYPE_V4SF_INT64_INT:
34379 case V2DF_FTYPE_V2DF_INT64_INT:
34380 case V4SF_FTYPE_V4SF_V4SF_INT:
34381 case V2DF_FTYPE_V2DF_V2DF_INT:
34382 case V4SF_FTYPE_V4SF_V2DF_INT:
34383 case V2DF_FTYPE_V2DF_V4SF_INT:
34384 nargs = 3;
34385 break;
34386 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
34387 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
34388 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
34389 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
34390 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
34391 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
34392 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
34393 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
34394 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
34395 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
34396 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
34397 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
34398 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
34399 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
34400 nargs = 4;
34401 break;
34402 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
34403 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
34404 nargs_constant = 2;
34405 nargs = 4;
34406 break;
34407 case INT_FTYPE_V4SF_V4SF_INT_INT:
34408 case INT_FTYPE_V2DF_V2DF_INT_INT:
34409 return ix86_expand_sse_comi_round (d, exp, target);
34410 case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT:
34411 case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT:
34412 case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT:
34413 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
34414 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
34415 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
34416 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
34417 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
34418 nargs = 5;
34419 break;
34420 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
34421 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
34422 nargs_constant = 4;
34423 nargs = 5;
34424 break;
34425 case UQI_FTYPE_V8DF_V8DF_INT_UQI_INT:
34426 case UQI_FTYPE_V2DF_V2DF_INT_UQI_INT:
34427 case UHI_FTYPE_V16SF_V16SF_INT_UHI_INT:
34428 case UQI_FTYPE_V4SF_V4SF_INT_UQI_INT:
34429 nargs_constant = 3;
34430 nargs = 5;
34431 break;
34432 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
34433 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
34434 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
34435 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
34436 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT:
34437 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT:
34438 nargs = 6;
34439 nargs_constant = 4;
34440 break;
34441 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
34442 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
34443 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
34444 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
34445 nargs = 6;
34446 nargs_constant = 3;
34447 break;
34448 default:
34449 gcc_unreachable ();
34451 gcc_assert (nargs <= ARRAY_SIZE (args));
34453 if (optimize
34454 || target == 0
34455 || GET_MODE (target) != tmode
34456 || !insn_p->operand[0].predicate (target, tmode))
34457 target = gen_reg_rtx (tmode);
34459 for (i = 0; i < nargs; i++)
34461 tree arg = CALL_EXPR_ARG (exp, i);
34462 rtx op = expand_normal (arg);
34463 machine_mode mode = insn_p->operand[i + 1].mode;
34464 bool match = insn_p->operand[i + 1].predicate (op, mode);
34466 if (i == nargs - nargs_constant)
34468 if (!match)
34470 switch (icode)
34472 case CODE_FOR_avx512f_getmantv8df_mask_round:
34473 case CODE_FOR_avx512f_getmantv16sf_mask_round:
34474 case CODE_FOR_avx512f_vgetmantv2df_round:
34475 case CODE_FOR_avx512f_vgetmantv2df_mask_round:
34476 case CODE_FOR_avx512f_vgetmantv4sf_round:
34477 case CODE_FOR_avx512f_vgetmantv4sf_mask_round:
34478 error ("the immediate argument must be a 4-bit immediate");
34479 return const0_rtx;
34480 case CODE_FOR_avx512f_cmpv8df3_mask_round:
34481 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
34482 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
34483 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
34484 error ("the immediate argument must be a 5-bit immediate");
34485 return const0_rtx;
34486 default:
34487 error ("the immediate argument must be an 8-bit immediate");
34488 return const0_rtx;
34492 else if (i == nargs-1)
34494 if (!insn_p->operand[nargs].predicate (op, SImode))
34496 error ("incorrect rounding operand");
34497 return const0_rtx;
34500 /* If there is no rounding use normal version of the pattern. */
34501 if (INTVAL (op) == NO_ROUND)
34502 redundant_embed_rnd = 1;
34504 else
34506 if (VECTOR_MODE_P (mode))
34507 op = safe_vector_operand (op, mode);
34509 op = fixup_modeless_constant (op, mode);
34511 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34513 if (optimize || !match)
34514 op = copy_to_mode_reg (mode, op);
34516 else
34518 op = copy_to_reg (op);
34519 op = lowpart_subreg (mode, op, GET_MODE (op));
34523 args[i].op = op;
34524 args[i].mode = mode;
34527 switch (nargs)
34529 case 1:
34530 pat = GEN_FCN (icode) (target, args[0].op);
34531 break;
34532 case 2:
34533 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34534 break;
34535 case 3:
34536 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34537 args[2].op);
34538 break;
34539 case 4:
34540 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34541 args[2].op, args[3].op);
34542 break;
34543 case 5:
34544 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34545 args[2].op, args[3].op, args[4].op);
34546 break;
34547 case 6:
34548 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34549 args[2].op, args[3].op, args[4].op,
34550 args[5].op);
34551 break;
34552 default:
34553 gcc_unreachable ();
34556 if (!pat)
34557 return 0;
34559 if (redundant_embed_rnd)
34560 pat = ix86_erase_embedded_rounding (pat);
34562 emit_insn (pat);
34563 return target;
34566 /* Subroutine of ix86_expand_builtin to take care of special insns
34567 with variable number of operands. */
34569 static rtx
34570 ix86_expand_special_args_builtin (const struct builtin_description *d,
34571 tree exp, rtx target)
34573 tree arg;
34574 rtx pat, op;
34575 unsigned int i, nargs, arg_adjust, memory;
34576 bool aligned_mem = false;
34577 struct
34579 rtx op;
34580 machine_mode mode;
34581 } args[3];
34582 enum insn_code icode = d->icode;
34583 bool last_arg_constant = false;
34584 const struct insn_data_d *insn_p = &insn_data[icode];
34585 machine_mode tmode = insn_p->operand[0].mode;
34586 enum { load, store } klass;
34588 switch ((enum ix86_builtin_func_type) d->flag)
34590 case VOID_FTYPE_VOID:
34591 emit_insn (GEN_FCN (icode) (target));
34592 return 0;
34593 case VOID_FTYPE_UINT64:
34594 case VOID_FTYPE_UNSIGNED:
34595 nargs = 0;
34596 klass = store;
34597 memory = 0;
34598 break;
34600 case INT_FTYPE_VOID:
34601 case USHORT_FTYPE_VOID:
34602 case UINT64_FTYPE_VOID:
34603 case UNSIGNED_FTYPE_VOID:
34604 nargs = 0;
34605 klass = load;
34606 memory = 0;
34607 break;
34608 case UINT64_FTYPE_PUNSIGNED:
34609 case V2DI_FTYPE_PV2DI:
34610 case V4DI_FTYPE_PV4DI:
34611 case V32QI_FTYPE_PCCHAR:
34612 case V16QI_FTYPE_PCCHAR:
34613 case V8SF_FTYPE_PCV4SF:
34614 case V8SF_FTYPE_PCFLOAT:
34615 case V4SF_FTYPE_PCFLOAT:
34616 case V4DF_FTYPE_PCV2DF:
34617 case V4DF_FTYPE_PCDOUBLE:
34618 case V2DF_FTYPE_PCDOUBLE:
34619 case VOID_FTYPE_PVOID:
34620 case V8DI_FTYPE_PV8DI:
34621 nargs = 1;
34622 klass = load;
34623 memory = 0;
34624 switch (icode)
34626 case CODE_FOR_sse4_1_movntdqa:
34627 case CODE_FOR_avx2_movntdqa:
34628 case CODE_FOR_avx512f_movntdqa:
34629 aligned_mem = true;
34630 break;
34631 default:
34632 break;
34634 break;
34635 case VOID_FTYPE_PV2SF_V4SF:
34636 case VOID_FTYPE_PV8DI_V8DI:
34637 case VOID_FTYPE_PV4DI_V4DI:
34638 case VOID_FTYPE_PV2DI_V2DI:
34639 case VOID_FTYPE_PCHAR_V32QI:
34640 case VOID_FTYPE_PCHAR_V16QI:
34641 case VOID_FTYPE_PFLOAT_V16SF:
34642 case VOID_FTYPE_PFLOAT_V8SF:
34643 case VOID_FTYPE_PFLOAT_V4SF:
34644 case VOID_FTYPE_PDOUBLE_V8DF:
34645 case VOID_FTYPE_PDOUBLE_V4DF:
34646 case VOID_FTYPE_PDOUBLE_V2DF:
34647 case VOID_FTYPE_PLONGLONG_LONGLONG:
34648 case VOID_FTYPE_PULONGLONG_ULONGLONG:
34649 case VOID_FTYPE_PINT_INT:
34650 nargs = 1;
34651 klass = store;
34652 /* Reserve memory operand for target. */
34653 memory = ARRAY_SIZE (args);
34654 switch (icode)
34656 /* These builtins and instructions require the memory
34657 to be properly aligned. */
34658 case CODE_FOR_avx_movntv4di:
34659 case CODE_FOR_sse2_movntv2di:
34660 case CODE_FOR_avx_movntv8sf:
34661 case CODE_FOR_sse_movntv4sf:
34662 case CODE_FOR_sse4a_vmmovntv4sf:
34663 case CODE_FOR_avx_movntv4df:
34664 case CODE_FOR_sse2_movntv2df:
34665 case CODE_FOR_sse4a_vmmovntv2df:
34666 case CODE_FOR_sse2_movntidi:
34667 case CODE_FOR_sse_movntq:
34668 case CODE_FOR_sse2_movntisi:
34669 case CODE_FOR_avx512f_movntv16sf:
34670 case CODE_FOR_avx512f_movntv8df:
34671 case CODE_FOR_avx512f_movntv8di:
34672 aligned_mem = true;
34673 break;
34674 default:
34675 break;
34677 break;
34678 case V4SF_FTYPE_V4SF_PCV2SF:
34679 case V2DF_FTYPE_V2DF_PCDOUBLE:
34680 nargs = 2;
34681 klass = load;
34682 memory = 1;
34683 break;
34684 case V8SF_FTYPE_PCV8SF_V8SI:
34685 case V4DF_FTYPE_PCV4DF_V4DI:
34686 case V4SF_FTYPE_PCV4SF_V4SI:
34687 case V2DF_FTYPE_PCV2DF_V2DI:
34688 case V8SI_FTYPE_PCV8SI_V8SI:
34689 case V4DI_FTYPE_PCV4DI_V4DI:
34690 case V4SI_FTYPE_PCV4SI_V4SI:
34691 case V2DI_FTYPE_PCV2DI_V2DI:
34692 case VOID_FTYPE_INT_INT64:
34693 nargs = 2;
34694 klass = load;
34695 memory = 0;
34696 break;
34697 case VOID_FTYPE_PV8DF_V8DF_UQI:
34698 case VOID_FTYPE_PV4DF_V4DF_UQI:
34699 case VOID_FTYPE_PV2DF_V2DF_UQI:
34700 case VOID_FTYPE_PV16SF_V16SF_UHI:
34701 case VOID_FTYPE_PV8SF_V8SF_UQI:
34702 case VOID_FTYPE_PV4SF_V4SF_UQI:
34703 case VOID_FTYPE_PV8DI_V8DI_UQI:
34704 case VOID_FTYPE_PV4DI_V4DI_UQI:
34705 case VOID_FTYPE_PV2DI_V2DI_UQI:
34706 case VOID_FTYPE_PV16SI_V16SI_UHI:
34707 case VOID_FTYPE_PV8SI_V8SI_UQI:
34708 case VOID_FTYPE_PV4SI_V4SI_UQI:
34709 switch (icode)
34711 /* These builtins and instructions require the memory
34712 to be properly aligned. */
34713 case CODE_FOR_avx512f_storev16sf_mask:
34714 case CODE_FOR_avx512f_storev16si_mask:
34715 case CODE_FOR_avx512f_storev8df_mask:
34716 case CODE_FOR_avx512f_storev8di_mask:
34717 case CODE_FOR_avx512vl_storev8sf_mask:
34718 case CODE_FOR_avx512vl_storev8si_mask:
34719 case CODE_FOR_avx512vl_storev4df_mask:
34720 case CODE_FOR_avx512vl_storev4di_mask:
34721 case CODE_FOR_avx512vl_storev4sf_mask:
34722 case CODE_FOR_avx512vl_storev4si_mask:
34723 case CODE_FOR_avx512vl_storev2df_mask:
34724 case CODE_FOR_avx512vl_storev2di_mask:
34725 aligned_mem = true;
34726 break;
34727 default:
34728 break;
34730 /* FALLTHRU */
34731 case VOID_FTYPE_PV8SF_V8SI_V8SF:
34732 case VOID_FTYPE_PV4DF_V4DI_V4DF:
34733 case VOID_FTYPE_PV4SF_V4SI_V4SF:
34734 case VOID_FTYPE_PV2DF_V2DI_V2DF:
34735 case VOID_FTYPE_PV8SI_V8SI_V8SI:
34736 case VOID_FTYPE_PV4DI_V4DI_V4DI:
34737 case VOID_FTYPE_PV4SI_V4SI_V4SI:
34738 case VOID_FTYPE_PV2DI_V2DI_V2DI:
34739 case VOID_FTYPE_PV8SI_V8DI_UQI:
34740 case VOID_FTYPE_PV8HI_V8DI_UQI:
34741 case VOID_FTYPE_PV16HI_V16SI_UHI:
34742 case VOID_FTYPE_PV16QI_V8DI_UQI:
34743 case VOID_FTYPE_PV16QI_V16SI_UHI:
34744 case VOID_FTYPE_PV4SI_V4DI_UQI:
34745 case VOID_FTYPE_PV4SI_V2DI_UQI:
34746 case VOID_FTYPE_PV8HI_V4DI_UQI:
34747 case VOID_FTYPE_PV8HI_V2DI_UQI:
34748 case VOID_FTYPE_PV8HI_V8SI_UQI:
34749 case VOID_FTYPE_PV8HI_V4SI_UQI:
34750 case VOID_FTYPE_PV16QI_V4DI_UQI:
34751 case VOID_FTYPE_PV16QI_V2DI_UQI:
34752 case VOID_FTYPE_PV16QI_V8SI_UQI:
34753 case VOID_FTYPE_PV16QI_V4SI_UQI:
34754 case VOID_FTYPE_PCHAR_V64QI_UDI:
34755 case VOID_FTYPE_PCHAR_V32QI_USI:
34756 case VOID_FTYPE_PCHAR_V16QI_UHI:
34757 case VOID_FTYPE_PSHORT_V32HI_USI:
34758 case VOID_FTYPE_PSHORT_V16HI_UHI:
34759 case VOID_FTYPE_PSHORT_V8HI_UQI:
34760 case VOID_FTYPE_PINT_V16SI_UHI:
34761 case VOID_FTYPE_PINT_V8SI_UQI:
34762 case VOID_FTYPE_PINT_V4SI_UQI:
34763 case VOID_FTYPE_PINT64_V8DI_UQI:
34764 case VOID_FTYPE_PINT64_V4DI_UQI:
34765 case VOID_FTYPE_PINT64_V2DI_UQI:
34766 case VOID_FTYPE_PDOUBLE_V8DF_UQI:
34767 case VOID_FTYPE_PDOUBLE_V4DF_UQI:
34768 case VOID_FTYPE_PDOUBLE_V2DF_UQI:
34769 case VOID_FTYPE_PFLOAT_V16SF_UHI:
34770 case VOID_FTYPE_PFLOAT_V8SF_UQI:
34771 case VOID_FTYPE_PFLOAT_V4SF_UQI:
34772 case VOID_FTYPE_PV32QI_V32HI_USI:
34773 case VOID_FTYPE_PV16QI_V16HI_UHI:
34774 case VOID_FTYPE_PV8QI_V8HI_UQI:
34775 nargs = 2;
34776 klass = store;
34777 /* Reserve memory operand for target. */
34778 memory = ARRAY_SIZE (args);
34779 break;
34780 case V4SF_FTYPE_PCV4SF_V4SF_UQI:
34781 case V8SF_FTYPE_PCV8SF_V8SF_UQI:
34782 case V16SF_FTYPE_PCV16SF_V16SF_UHI:
34783 case V4SI_FTYPE_PCV4SI_V4SI_UQI:
34784 case V8SI_FTYPE_PCV8SI_V8SI_UQI:
34785 case V16SI_FTYPE_PCV16SI_V16SI_UHI:
34786 case V2DF_FTYPE_PCV2DF_V2DF_UQI:
34787 case V4DF_FTYPE_PCV4DF_V4DF_UQI:
34788 case V8DF_FTYPE_PCV8DF_V8DF_UQI:
34789 case V2DI_FTYPE_PCV2DI_V2DI_UQI:
34790 case V4DI_FTYPE_PCV4DI_V4DI_UQI:
34791 case V8DI_FTYPE_PCV8DI_V8DI_UQI:
34792 switch (icode)
34794 /* These builtins and instructions require the memory
34795 to be properly aligned. */
34796 case CODE_FOR_avx512f_loadv16sf_mask:
34797 case CODE_FOR_avx512f_loadv16si_mask:
34798 case CODE_FOR_avx512f_loadv8df_mask:
34799 case CODE_FOR_avx512f_loadv8di_mask:
34800 case CODE_FOR_avx512vl_loadv8sf_mask:
34801 case CODE_FOR_avx512vl_loadv8si_mask:
34802 case CODE_FOR_avx512vl_loadv4df_mask:
34803 case CODE_FOR_avx512vl_loadv4di_mask:
34804 case CODE_FOR_avx512vl_loadv4sf_mask:
34805 case CODE_FOR_avx512vl_loadv4si_mask:
34806 case CODE_FOR_avx512vl_loadv2df_mask:
34807 case CODE_FOR_avx512vl_loadv2di_mask:
34808 case CODE_FOR_avx512bw_loadv64qi_mask:
34809 case CODE_FOR_avx512vl_loadv32qi_mask:
34810 case CODE_FOR_avx512vl_loadv16qi_mask:
34811 case CODE_FOR_avx512bw_loadv32hi_mask:
34812 case CODE_FOR_avx512vl_loadv16hi_mask:
34813 case CODE_FOR_avx512vl_loadv8hi_mask:
34814 aligned_mem = true;
34815 break;
34816 default:
34817 break;
34819 case V64QI_FTYPE_PCCHAR_V64QI_UDI:
34820 case V32QI_FTYPE_PCCHAR_V32QI_USI:
34821 case V16QI_FTYPE_PCCHAR_V16QI_UHI:
34822 case V32HI_FTYPE_PCSHORT_V32HI_USI:
34823 case V16HI_FTYPE_PCSHORT_V16HI_UHI:
34824 case V8HI_FTYPE_PCSHORT_V8HI_UQI:
34825 case V16SI_FTYPE_PCINT_V16SI_UHI:
34826 case V8SI_FTYPE_PCINT_V8SI_UQI:
34827 case V4SI_FTYPE_PCINT_V4SI_UQI:
34828 case V8DI_FTYPE_PCINT64_V8DI_UQI:
34829 case V4DI_FTYPE_PCINT64_V4DI_UQI:
34830 case V2DI_FTYPE_PCINT64_V2DI_UQI:
34831 case V8DF_FTYPE_PCDOUBLE_V8DF_UQI:
34832 case V4DF_FTYPE_PCDOUBLE_V4DF_UQI:
34833 case V2DF_FTYPE_PCDOUBLE_V2DF_UQI:
34834 case V16SF_FTYPE_PCFLOAT_V16SF_UHI:
34835 case V8SF_FTYPE_PCFLOAT_V8SF_UQI:
34836 case V4SF_FTYPE_PCFLOAT_V4SF_UQI:
34837 nargs = 3;
34838 klass = load;
34839 memory = 0;
34840 break;
34841 case VOID_FTYPE_UINT_UINT_UINT:
34842 case VOID_FTYPE_UINT64_UINT_UINT:
34843 case UCHAR_FTYPE_UINT_UINT_UINT:
34844 case UCHAR_FTYPE_UINT64_UINT_UINT:
34845 nargs = 3;
34846 klass = load;
34847 memory = ARRAY_SIZE (args);
34848 last_arg_constant = true;
34849 break;
34850 default:
34851 gcc_unreachable ();
34854 gcc_assert (nargs <= ARRAY_SIZE (args));
34856 if (klass == store)
34858 arg = CALL_EXPR_ARG (exp, 0);
34859 op = expand_normal (arg);
34860 gcc_assert (target == 0);
34861 if (memory)
34863 op = ix86_zero_extend_to_Pmode (op);
34864 target = gen_rtx_MEM (tmode, op);
34865 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
34866 on it. Try to improve it using get_pointer_alignment,
34867 and if the special builtin is one that requires strict
34868 mode alignment, also from it's GET_MODE_ALIGNMENT.
34869 Failure to do so could lead to ix86_legitimate_combined_insn
34870 rejecting all changes to such insns. */
34871 unsigned int align = get_pointer_alignment (arg);
34872 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
34873 align = GET_MODE_ALIGNMENT (tmode);
34874 if (MEM_ALIGN (target) < align)
34875 set_mem_align (target, align);
34877 else
34878 target = force_reg (tmode, op);
34879 arg_adjust = 1;
34881 else
34883 arg_adjust = 0;
34884 if (optimize
34885 || target == 0
34886 || !register_operand (target, tmode)
34887 || GET_MODE (target) != tmode)
34888 target = gen_reg_rtx (tmode);
34891 for (i = 0; i < nargs; i++)
34893 machine_mode mode = insn_p->operand[i + 1].mode;
34894 bool match;
34896 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
34897 op = expand_normal (arg);
34898 match = insn_p->operand[i + 1].predicate (op, mode);
34900 if (last_arg_constant && (i + 1) == nargs)
34902 if (!match)
34904 if (icode == CODE_FOR_lwp_lwpvalsi3
34905 || icode == CODE_FOR_lwp_lwpinssi3
34906 || icode == CODE_FOR_lwp_lwpvaldi3
34907 || icode == CODE_FOR_lwp_lwpinsdi3)
34908 error ("the last argument must be a 32-bit immediate");
34909 else
34910 error ("the last argument must be an 8-bit immediate");
34911 return const0_rtx;
34914 else
34916 if (i == memory)
34918 /* This must be the memory operand. */
34919 op = ix86_zero_extend_to_Pmode (op);
34920 op = gen_rtx_MEM (mode, op);
34921 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
34922 on it. Try to improve it using get_pointer_alignment,
34923 and if the special builtin is one that requires strict
34924 mode alignment, also from it's GET_MODE_ALIGNMENT.
34925 Failure to do so could lead to ix86_legitimate_combined_insn
34926 rejecting all changes to such insns. */
34927 unsigned int align = get_pointer_alignment (arg);
34928 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
34929 align = GET_MODE_ALIGNMENT (mode);
34930 if (MEM_ALIGN (op) < align)
34931 set_mem_align (op, align);
34933 else
34935 /* This must be register. */
34936 if (VECTOR_MODE_P (mode))
34937 op = safe_vector_operand (op, mode);
34939 op = fixup_modeless_constant (op, mode);
34941 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34942 op = copy_to_mode_reg (mode, op);
34943 else
34945 op = copy_to_reg (op);
34946 op = lowpart_subreg (mode, op, GET_MODE (op));
34951 args[i].op = op;
34952 args[i].mode = mode;
34955 switch (nargs)
34957 case 0:
34958 pat = GEN_FCN (icode) (target);
34959 break;
34960 case 1:
34961 pat = GEN_FCN (icode) (target, args[0].op);
34962 break;
34963 case 2:
34964 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34965 break;
34966 case 3:
34967 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
34968 break;
34969 default:
34970 gcc_unreachable ();
34973 if (! pat)
34974 return 0;
34975 emit_insn (pat);
34976 return klass == store ? 0 : target;
34979 /* Return the integer constant in ARG. Constrain it to be in the range
34980 of the subparts of VEC_TYPE; issue an error if not. */
34982 static int
34983 get_element_number (tree vec_type, tree arg)
34985 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
34987 if (!tree_fits_uhwi_p (arg)
34988 || (elt = tree_to_uhwi (arg), elt > max))
34990 error ("selector must be an integer constant in the range 0..%wi", max);
34991 return 0;
34994 return elt;
34997 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34998 ix86_expand_vector_init. We DO have language-level syntax for this, in
34999 the form of (type){ init-list }. Except that since we can't place emms
35000 instructions from inside the compiler, we can't allow the use of MMX
35001 registers unless the user explicitly asks for it. So we do *not* define
35002 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
35003 we have builtins invoked by mmintrin.h that gives us license to emit
35004 these sorts of instructions. */
35006 static rtx
35007 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
35009 machine_mode tmode = TYPE_MODE (type);
35010 machine_mode inner_mode = GET_MODE_INNER (tmode);
35011 int i, n_elt = GET_MODE_NUNITS (tmode);
35012 rtvec v = rtvec_alloc (n_elt);
35014 gcc_assert (VECTOR_MODE_P (tmode));
35015 gcc_assert (call_expr_nargs (exp) == n_elt);
35017 for (i = 0; i < n_elt; ++i)
35019 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
35020 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
35023 if (!target || !register_operand (target, tmode))
35024 target = gen_reg_rtx (tmode);
35026 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
35027 return target;
35030 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
35031 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
35032 had a language-level syntax for referencing vector elements. */
35034 static rtx
35035 ix86_expand_vec_ext_builtin (tree exp, rtx target)
35037 machine_mode tmode, mode0;
35038 tree arg0, arg1;
35039 int elt;
35040 rtx op0;
35042 arg0 = CALL_EXPR_ARG (exp, 0);
35043 arg1 = CALL_EXPR_ARG (exp, 1);
35045 op0 = expand_normal (arg0);
35046 elt = get_element_number (TREE_TYPE (arg0), arg1);
35048 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
35049 mode0 = TYPE_MODE (TREE_TYPE (arg0));
35050 gcc_assert (VECTOR_MODE_P (mode0));
35052 op0 = force_reg (mode0, op0);
35054 if (optimize || !target || !register_operand (target, tmode))
35055 target = gen_reg_rtx (tmode);
35057 ix86_expand_vector_extract (true, target, op0, elt);
35059 return target;
35062 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
35063 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
35064 a language-level syntax for referencing vector elements. */
35066 static rtx
35067 ix86_expand_vec_set_builtin (tree exp)
35069 machine_mode tmode, mode1;
35070 tree arg0, arg1, arg2;
35071 int elt;
35072 rtx op0, op1, target;
35074 arg0 = CALL_EXPR_ARG (exp, 0);
35075 arg1 = CALL_EXPR_ARG (exp, 1);
35076 arg2 = CALL_EXPR_ARG (exp, 2);
35078 tmode = TYPE_MODE (TREE_TYPE (arg0));
35079 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
35080 gcc_assert (VECTOR_MODE_P (tmode));
35082 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
35083 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
35084 elt = get_element_number (TREE_TYPE (arg0), arg2);
35086 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
35087 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
35089 op0 = force_reg (tmode, op0);
35090 op1 = force_reg (mode1, op1);
35092 /* OP0 is the source of these builtin functions and shouldn't be
35093 modified. Create a copy, use it and return it as target. */
35094 target = gen_reg_rtx (tmode);
35095 emit_move_insn (target, op0);
35096 ix86_expand_vector_set (true, target, op1, elt);
35098 return target;
35101 /* Emit conditional move of SRC to DST with condition
35102 OP1 CODE OP2. */
35103 static void
35104 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
35106 rtx t;
35108 if (TARGET_CMOVE)
35110 t = ix86_expand_compare (code, op1, op2);
35111 emit_insn (gen_rtx_SET (dst, gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
35112 src, dst)));
35114 else
35116 rtx_code_label *nomove = gen_label_rtx ();
35117 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
35118 const0_rtx, GET_MODE (op1), 1, nomove);
35119 emit_move_insn (dst, src);
35120 emit_label (nomove);
35124 /* Choose max of DST and SRC and put it to DST. */
35125 static void
35126 ix86_emit_move_max (rtx dst, rtx src)
35128 ix86_emit_cmove (dst, src, LTU, dst, src);
35131 /* Expand an expression EXP that calls a built-in function,
35132 with result going to TARGET if that's convenient
35133 (and in mode MODE if that's convenient).
35134 SUBTARGET may be used as the target for computing one of EXP's operands.
35135 IGNORE is nonzero if the value is to be ignored. */
35137 static rtx
35138 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
35139 machine_mode mode, int ignore)
35141 size_t i;
35142 enum insn_code icode, icode2;
35143 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
35144 tree arg0, arg1, arg2, arg3, arg4;
35145 rtx op0, op1, op2, op3, op4, pat, pat2, insn;
35146 machine_mode mode0, mode1, mode2, mode3, mode4;
35147 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
35149 /* For CPU builtins that can be folded, fold first and expand the fold. */
35150 switch (fcode)
35152 case IX86_BUILTIN_CPU_INIT:
35154 /* Make it call __cpu_indicator_init in libgcc. */
35155 tree call_expr, fndecl, type;
35156 type = build_function_type_list (integer_type_node, NULL_TREE);
35157 fndecl = build_fn_decl ("__cpu_indicator_init", type);
35158 call_expr = build_call_expr (fndecl, 0);
35159 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
35161 case IX86_BUILTIN_CPU_IS:
35162 case IX86_BUILTIN_CPU_SUPPORTS:
35164 tree arg0 = CALL_EXPR_ARG (exp, 0);
35165 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
35166 gcc_assert (fold_expr != NULL_TREE);
35167 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
35171 /* Determine whether the builtin function is available under the current ISA.
35172 Originally the builtin was not created if it wasn't applicable to the
35173 current ISA based on the command line switches. With function specific
35174 options, we need to check in the context of the function making the call
35175 whether it is supported. Treat AVX512VL and MMX specially. For other flags,
35176 if isa includes more than one ISA bit, treat those are requiring any
35177 of them. For AVX512VL, require both AVX512VL and the non-AVX512VL
35178 ISAs. Likewise for MMX, require both MMX and the non-MMX ISAs.
35179 Similarly for 64BIT, but we shouldn't be building such builtins
35180 at all, -m64 is a whole TU option. */
35181 if (((ix86_builtins_isa[fcode].isa
35182 & ~(OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_MMX
35183 | OPTION_MASK_ISA_64BIT | OPTION_MASK_ISA_GFNI))
35184 && !(ix86_builtins_isa[fcode].isa
35185 & ~(OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_MMX
35186 | OPTION_MASK_ISA_64BIT | OPTION_MASK_ISA_GFNI)
35187 & ix86_isa_flags))
35188 || ((ix86_builtins_isa[fcode].isa & OPTION_MASK_ISA_AVX512VL)
35189 && !(ix86_isa_flags & OPTION_MASK_ISA_AVX512VL))
35190 || ((ix86_builtins_isa[fcode].isa & OPTION_MASK_ISA_MMX)
35191 && !(ix86_isa_flags & OPTION_MASK_ISA_MMX))
35192 || (ix86_builtins_isa[fcode].isa2
35193 && !(ix86_builtins_isa[fcode].isa2 & ix86_isa_flags2)))
35195 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa,
35196 ix86_builtins_isa[fcode].isa2, 0, 0,
35197 NULL, NULL, (enum fpmath_unit) 0,
35198 false);
35199 if (!opts)
35200 error ("%qE needs unknown isa option", fndecl);
35201 else
35203 gcc_assert (opts != NULL);
35204 error ("%qE needs isa option %s", fndecl, opts);
35205 free (opts);
35207 return expand_call (exp, target, ignore);
35210 switch (fcode)
35212 case IX86_BUILTIN_BNDMK:
35213 if (!target
35214 || GET_MODE (target) != BNDmode
35215 || !register_operand (target, BNDmode))
35216 target = gen_reg_rtx (BNDmode);
35218 arg0 = CALL_EXPR_ARG (exp, 0);
35219 arg1 = CALL_EXPR_ARG (exp, 1);
35221 op0 = expand_normal (arg0);
35222 op1 = expand_normal (arg1);
35224 if (!register_operand (op0, Pmode))
35225 op0 = ix86_zero_extend_to_Pmode (op0);
35226 if (!register_operand (op1, Pmode))
35227 op1 = ix86_zero_extend_to_Pmode (op1);
35229 /* Builtin arg1 is size of block but instruction op1 should
35230 be (size - 1). */
35231 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
35232 NULL_RTX, 1, OPTAB_DIRECT);
35234 emit_insn (BNDmode == BND64mode
35235 ? gen_bnd64_mk (target, op0, op1)
35236 : gen_bnd32_mk (target, op0, op1));
35237 return target;
35239 case IX86_BUILTIN_BNDSTX:
35240 arg0 = CALL_EXPR_ARG (exp, 0);
35241 arg1 = CALL_EXPR_ARG (exp, 1);
35242 arg2 = CALL_EXPR_ARG (exp, 2);
35244 op0 = expand_normal (arg0);
35245 op1 = expand_normal (arg1);
35246 op2 = expand_normal (arg2);
35248 if (!register_operand (op0, Pmode))
35249 op0 = ix86_zero_extend_to_Pmode (op0);
35250 if (!register_operand (op1, BNDmode))
35251 op1 = copy_to_mode_reg (BNDmode, op1);
35252 if (!register_operand (op2, Pmode))
35253 op2 = ix86_zero_extend_to_Pmode (op2);
35255 emit_insn (BNDmode == BND64mode
35256 ? gen_bnd64_stx (op2, op0, op1)
35257 : gen_bnd32_stx (op2, op0, op1));
35258 return 0;
35260 case IX86_BUILTIN_BNDLDX:
35261 if (!target
35262 || GET_MODE (target) != BNDmode
35263 || !register_operand (target, BNDmode))
35264 target = gen_reg_rtx (BNDmode);
35266 arg0 = CALL_EXPR_ARG (exp, 0);
35267 arg1 = CALL_EXPR_ARG (exp, 1);
35269 op0 = expand_normal (arg0);
35270 op1 = expand_normal (arg1);
35272 if (!register_operand (op0, Pmode))
35273 op0 = ix86_zero_extend_to_Pmode (op0);
35274 if (!register_operand (op1, Pmode))
35275 op1 = ix86_zero_extend_to_Pmode (op1);
35277 emit_insn (BNDmode == BND64mode
35278 ? gen_bnd64_ldx (target, op0, op1)
35279 : gen_bnd32_ldx (target, op0, op1));
35280 return target;
35282 case IX86_BUILTIN_BNDCL:
35283 arg0 = CALL_EXPR_ARG (exp, 0);
35284 arg1 = CALL_EXPR_ARG (exp, 1);
35286 op0 = expand_normal (arg0);
35287 op1 = expand_normal (arg1);
35289 if (!register_operand (op0, Pmode))
35290 op0 = ix86_zero_extend_to_Pmode (op0);
35291 if (!register_operand (op1, BNDmode))
35292 op1 = copy_to_mode_reg (BNDmode, op1);
35294 emit_insn (BNDmode == BND64mode
35295 ? gen_bnd64_cl (op1, op0)
35296 : gen_bnd32_cl (op1, op0));
35297 return 0;
35299 case IX86_BUILTIN_BNDCU:
35300 arg0 = CALL_EXPR_ARG (exp, 0);
35301 arg1 = CALL_EXPR_ARG (exp, 1);
35303 op0 = expand_normal (arg0);
35304 op1 = expand_normal (arg1);
35306 if (!register_operand (op0, Pmode))
35307 op0 = ix86_zero_extend_to_Pmode (op0);
35308 if (!register_operand (op1, BNDmode))
35309 op1 = copy_to_mode_reg (BNDmode, op1);
35311 emit_insn (BNDmode == BND64mode
35312 ? gen_bnd64_cu (op1, op0)
35313 : gen_bnd32_cu (op1, op0));
35314 return 0;
35316 case IX86_BUILTIN_BNDRET:
35317 arg0 = CALL_EXPR_ARG (exp, 0);
35318 target = chkp_get_rtl_bounds (arg0);
35320 /* If no bounds were specified for returned value,
35321 then use INIT bounds. It usually happens when
35322 some built-in function is expanded. */
35323 if (!target)
35325 rtx t1 = gen_reg_rtx (Pmode);
35326 rtx t2 = gen_reg_rtx (Pmode);
35327 target = gen_reg_rtx (BNDmode);
35328 emit_move_insn (t1, const0_rtx);
35329 emit_move_insn (t2, constm1_rtx);
35330 emit_insn (BNDmode == BND64mode
35331 ? gen_bnd64_mk (target, t1, t2)
35332 : gen_bnd32_mk (target, t1, t2));
35335 gcc_assert (target && REG_P (target));
35336 return target;
35338 case IX86_BUILTIN_BNDNARROW:
35340 rtx m1, m1h1, m1h2, lb, ub, t1;
35342 /* Return value and lb. */
35343 arg0 = CALL_EXPR_ARG (exp, 0);
35344 /* Bounds. */
35345 arg1 = CALL_EXPR_ARG (exp, 1);
35346 /* Size. */
35347 arg2 = CALL_EXPR_ARG (exp, 2);
35349 lb = expand_normal (arg0);
35350 op1 = expand_normal (arg1);
35351 op2 = expand_normal (arg2);
35353 /* Size was passed but we need to use (size - 1) as for bndmk. */
35354 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
35355 NULL_RTX, 1, OPTAB_DIRECT);
35357 /* Add LB to size and inverse to get UB. */
35358 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
35359 op2, 1, OPTAB_DIRECT);
35360 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
35362 if (!register_operand (lb, Pmode))
35363 lb = ix86_zero_extend_to_Pmode (lb);
35364 if (!register_operand (ub, Pmode))
35365 ub = ix86_zero_extend_to_Pmode (ub);
35367 /* We need to move bounds to memory before any computations. */
35368 if (MEM_P (op1))
35369 m1 = op1;
35370 else
35372 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
35373 emit_move_insn (m1, op1);
35376 /* Generate mem expression to be used for access to LB and UB. */
35377 m1h1 = adjust_address (m1, Pmode, 0);
35378 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
35380 t1 = gen_reg_rtx (Pmode);
35382 /* Compute LB. */
35383 emit_move_insn (t1, m1h1);
35384 ix86_emit_move_max (t1, lb);
35385 emit_move_insn (m1h1, t1);
35387 /* Compute UB. UB is stored in 1's complement form. Therefore
35388 we also use max here. */
35389 emit_move_insn (t1, m1h2);
35390 ix86_emit_move_max (t1, ub);
35391 emit_move_insn (m1h2, t1);
35393 op2 = gen_reg_rtx (BNDmode);
35394 emit_move_insn (op2, m1);
35396 return chkp_join_splitted_slot (lb, op2);
35399 case IX86_BUILTIN_BNDINT:
35401 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
35403 if (!target
35404 || GET_MODE (target) != BNDmode
35405 || !register_operand (target, BNDmode))
35406 target = gen_reg_rtx (BNDmode);
35408 arg0 = CALL_EXPR_ARG (exp, 0);
35409 arg1 = CALL_EXPR_ARG (exp, 1);
35411 op0 = expand_normal (arg0);
35412 op1 = expand_normal (arg1);
35414 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
35415 rh1 = adjust_address (res, Pmode, 0);
35416 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
35418 /* Put first bounds to temporaries. */
35419 lb1 = gen_reg_rtx (Pmode);
35420 ub1 = gen_reg_rtx (Pmode);
35421 if (MEM_P (op0))
35423 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
35424 emit_move_insn (ub1, adjust_address (op0, Pmode,
35425 GET_MODE_SIZE (Pmode)));
35427 else
35429 emit_move_insn (res, op0);
35430 emit_move_insn (lb1, rh1);
35431 emit_move_insn (ub1, rh2);
35434 /* Put second bounds to temporaries. */
35435 lb2 = gen_reg_rtx (Pmode);
35436 ub2 = gen_reg_rtx (Pmode);
35437 if (MEM_P (op1))
35439 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
35440 emit_move_insn (ub2, adjust_address (op1, Pmode,
35441 GET_MODE_SIZE (Pmode)));
35443 else
35445 emit_move_insn (res, op1);
35446 emit_move_insn (lb2, rh1);
35447 emit_move_insn (ub2, rh2);
35450 /* Compute LB. */
35451 ix86_emit_move_max (lb1, lb2);
35452 emit_move_insn (rh1, lb1);
35454 /* Compute UB. UB is stored in 1's complement form. Therefore
35455 we also use max here. */
35456 ix86_emit_move_max (ub1, ub2);
35457 emit_move_insn (rh2, ub1);
35459 emit_move_insn (target, res);
35461 return target;
35464 case IX86_BUILTIN_SIZEOF:
35466 tree name;
35467 rtx symbol;
35469 if (!target
35470 || GET_MODE (target) != Pmode
35471 || !register_operand (target, Pmode))
35472 target = gen_reg_rtx (Pmode);
35474 arg0 = CALL_EXPR_ARG (exp, 0);
35475 gcc_assert (VAR_P (arg0));
35477 name = DECL_ASSEMBLER_NAME (arg0);
35478 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
35480 emit_insn (Pmode == SImode
35481 ? gen_move_size_reloc_si (target, symbol)
35482 : gen_move_size_reloc_di (target, symbol));
35484 return target;
35487 case IX86_BUILTIN_BNDLOWER:
35489 rtx mem, hmem;
35491 if (!target
35492 || GET_MODE (target) != Pmode
35493 || !register_operand (target, Pmode))
35494 target = gen_reg_rtx (Pmode);
35496 arg0 = CALL_EXPR_ARG (exp, 0);
35497 op0 = expand_normal (arg0);
35499 /* We need to move bounds to memory first. */
35500 if (MEM_P (op0))
35501 mem = op0;
35502 else
35504 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
35505 emit_move_insn (mem, op0);
35508 /* Generate mem expression to access LB and load it. */
35509 hmem = adjust_address (mem, Pmode, 0);
35510 emit_move_insn (target, hmem);
35512 return target;
35515 case IX86_BUILTIN_BNDUPPER:
35517 rtx mem, hmem, res;
35519 if (!target
35520 || GET_MODE (target) != Pmode
35521 || !register_operand (target, Pmode))
35522 target = gen_reg_rtx (Pmode);
35524 arg0 = CALL_EXPR_ARG (exp, 0);
35525 op0 = expand_normal (arg0);
35527 /* We need to move bounds to memory first. */
35528 if (MEM_P (op0))
35529 mem = op0;
35530 else
35532 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
35533 emit_move_insn (mem, op0);
35536 /* Generate mem expression to access UB. */
35537 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
35539 /* We need to inverse all bits of UB. */
35540 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
35542 if (res != target)
35543 emit_move_insn (target, res);
35545 return target;
35548 case IX86_BUILTIN_MASKMOVQ:
35549 case IX86_BUILTIN_MASKMOVDQU:
35550 icode = (fcode == IX86_BUILTIN_MASKMOVQ
35551 ? CODE_FOR_mmx_maskmovq
35552 : CODE_FOR_sse2_maskmovdqu);
35553 /* Note the arg order is different from the operand order. */
35554 arg1 = CALL_EXPR_ARG (exp, 0);
35555 arg2 = CALL_EXPR_ARG (exp, 1);
35556 arg0 = CALL_EXPR_ARG (exp, 2);
35557 op0 = expand_normal (arg0);
35558 op1 = expand_normal (arg1);
35559 op2 = expand_normal (arg2);
35560 mode0 = insn_data[icode].operand[0].mode;
35561 mode1 = insn_data[icode].operand[1].mode;
35562 mode2 = insn_data[icode].operand[2].mode;
35564 op0 = ix86_zero_extend_to_Pmode (op0);
35565 op0 = gen_rtx_MEM (mode1, op0);
35567 if (!insn_data[icode].operand[0].predicate (op0, mode0))
35568 op0 = copy_to_mode_reg (mode0, op0);
35569 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35570 op1 = copy_to_mode_reg (mode1, op1);
35571 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35572 op2 = copy_to_mode_reg (mode2, op2);
35573 pat = GEN_FCN (icode) (op0, op1, op2);
35574 if (! pat)
35575 return 0;
35576 emit_insn (pat);
35577 return 0;
35579 case IX86_BUILTIN_LDMXCSR:
35580 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
35581 target = assign_386_stack_local (SImode, SLOT_TEMP);
35582 emit_move_insn (target, op0);
35583 emit_insn (gen_sse_ldmxcsr (target));
35584 return 0;
35586 case IX86_BUILTIN_STMXCSR:
35587 target = assign_386_stack_local (SImode, SLOT_TEMP);
35588 emit_insn (gen_sse_stmxcsr (target));
35589 return copy_to_mode_reg (SImode, target);
35591 case IX86_BUILTIN_CLFLUSH:
35592 arg0 = CALL_EXPR_ARG (exp, 0);
35593 op0 = expand_normal (arg0);
35594 icode = CODE_FOR_sse2_clflush;
35595 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35596 op0 = ix86_zero_extend_to_Pmode (op0);
35598 emit_insn (gen_sse2_clflush (op0));
35599 return 0;
35601 case IX86_BUILTIN_CLWB:
35602 arg0 = CALL_EXPR_ARG (exp, 0);
35603 op0 = expand_normal (arg0);
35604 icode = CODE_FOR_clwb;
35605 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35606 op0 = ix86_zero_extend_to_Pmode (op0);
35608 emit_insn (gen_clwb (op0));
35609 return 0;
35611 case IX86_BUILTIN_CLFLUSHOPT:
35612 arg0 = CALL_EXPR_ARG (exp, 0);
35613 op0 = expand_normal (arg0);
35614 icode = CODE_FOR_clflushopt;
35615 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35616 op0 = ix86_zero_extend_to_Pmode (op0);
35618 emit_insn (gen_clflushopt (op0));
35619 return 0;
35621 case IX86_BUILTIN_MONITOR:
35622 case IX86_BUILTIN_MONITORX:
35623 arg0 = CALL_EXPR_ARG (exp, 0);
35624 arg1 = CALL_EXPR_ARG (exp, 1);
35625 arg2 = CALL_EXPR_ARG (exp, 2);
35626 op0 = expand_normal (arg0);
35627 op1 = expand_normal (arg1);
35628 op2 = expand_normal (arg2);
35629 if (!REG_P (op0))
35630 op0 = ix86_zero_extend_to_Pmode (op0);
35631 if (!REG_P (op1))
35632 op1 = copy_to_mode_reg (SImode, op1);
35633 if (!REG_P (op2))
35634 op2 = copy_to_mode_reg (SImode, op2);
35636 emit_insn (fcode == IX86_BUILTIN_MONITOR
35637 ? ix86_gen_monitor (op0, op1, op2)
35638 : ix86_gen_monitorx (op0, op1, op2));
35639 return 0;
35641 case IX86_BUILTIN_MWAIT:
35642 arg0 = CALL_EXPR_ARG (exp, 0);
35643 arg1 = CALL_EXPR_ARG (exp, 1);
35644 op0 = expand_normal (arg0);
35645 op1 = expand_normal (arg1);
35646 if (!REG_P (op0))
35647 op0 = copy_to_mode_reg (SImode, op0);
35648 if (!REG_P (op1))
35649 op1 = copy_to_mode_reg (SImode, op1);
35650 emit_insn (gen_sse3_mwait (op0, op1));
35651 return 0;
35653 case IX86_BUILTIN_MWAITX:
35654 arg0 = CALL_EXPR_ARG (exp, 0);
35655 arg1 = CALL_EXPR_ARG (exp, 1);
35656 arg2 = CALL_EXPR_ARG (exp, 2);
35657 op0 = expand_normal (arg0);
35658 op1 = expand_normal (arg1);
35659 op2 = expand_normal (arg2);
35660 if (!REG_P (op0))
35661 op0 = copy_to_mode_reg (SImode, op0);
35662 if (!REG_P (op1))
35663 op1 = copy_to_mode_reg (SImode, op1);
35664 if (!REG_P (op2))
35665 op2 = copy_to_mode_reg (SImode, op2);
35666 emit_insn (gen_mwaitx (op0, op1, op2));
35667 return 0;
35669 case IX86_BUILTIN_CLZERO:
35670 arg0 = CALL_EXPR_ARG (exp, 0);
35671 op0 = expand_normal (arg0);
35672 if (!REG_P (op0))
35673 op0 = ix86_zero_extend_to_Pmode (op0);
35674 emit_insn (ix86_gen_clzero (op0));
35675 return 0;
35677 case IX86_BUILTIN_VEC_INIT_V2SI:
35678 case IX86_BUILTIN_VEC_INIT_V4HI:
35679 case IX86_BUILTIN_VEC_INIT_V8QI:
35680 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
35682 case IX86_BUILTIN_VEC_EXT_V2DF:
35683 case IX86_BUILTIN_VEC_EXT_V2DI:
35684 case IX86_BUILTIN_VEC_EXT_V4SF:
35685 case IX86_BUILTIN_VEC_EXT_V4SI:
35686 case IX86_BUILTIN_VEC_EXT_V8HI:
35687 case IX86_BUILTIN_VEC_EXT_V2SI:
35688 case IX86_BUILTIN_VEC_EXT_V4HI:
35689 case IX86_BUILTIN_VEC_EXT_V16QI:
35690 return ix86_expand_vec_ext_builtin (exp, target);
35692 case IX86_BUILTIN_VEC_SET_V2DI:
35693 case IX86_BUILTIN_VEC_SET_V4SF:
35694 case IX86_BUILTIN_VEC_SET_V4SI:
35695 case IX86_BUILTIN_VEC_SET_V8HI:
35696 case IX86_BUILTIN_VEC_SET_V4HI:
35697 case IX86_BUILTIN_VEC_SET_V16QI:
35698 return ix86_expand_vec_set_builtin (exp);
35700 case IX86_BUILTIN_NANQ:
35701 case IX86_BUILTIN_NANSQ:
35702 return expand_call (exp, target, ignore);
35704 case IX86_BUILTIN_RDPMC:
35705 case IX86_BUILTIN_RDTSC:
35706 case IX86_BUILTIN_RDTSCP:
35707 case IX86_BUILTIN_XGETBV:
35709 op0 = gen_reg_rtx (DImode);
35710 op1 = gen_reg_rtx (DImode);
35712 if (fcode == IX86_BUILTIN_RDPMC)
35714 arg0 = CALL_EXPR_ARG (exp, 0);
35715 op2 = expand_normal (arg0);
35716 if (!register_operand (op2, SImode))
35717 op2 = copy_to_mode_reg (SImode, op2);
35719 insn = (TARGET_64BIT
35720 ? gen_rdpmc_rex64 (op0, op1, op2)
35721 : gen_rdpmc (op0, op2));
35722 emit_insn (insn);
35724 else if (fcode == IX86_BUILTIN_XGETBV)
35726 arg0 = CALL_EXPR_ARG (exp, 0);
35727 op2 = expand_normal (arg0);
35728 if (!register_operand (op2, SImode))
35729 op2 = copy_to_mode_reg (SImode, op2);
35731 insn = (TARGET_64BIT
35732 ? gen_xgetbv_rex64 (op0, op1, op2)
35733 : gen_xgetbv (op0, op2));
35734 emit_insn (insn);
35736 else if (fcode == IX86_BUILTIN_RDTSC)
35738 insn = (TARGET_64BIT
35739 ? gen_rdtsc_rex64 (op0, op1)
35740 : gen_rdtsc (op0));
35741 emit_insn (insn);
35743 else
35745 op2 = gen_reg_rtx (SImode);
35747 insn = (TARGET_64BIT
35748 ? gen_rdtscp_rex64 (op0, op1, op2)
35749 : gen_rdtscp (op0, op2));
35750 emit_insn (insn);
35752 arg0 = CALL_EXPR_ARG (exp, 0);
35753 op4 = expand_normal (arg0);
35754 if (!address_operand (op4, VOIDmode))
35756 op4 = convert_memory_address (Pmode, op4);
35757 op4 = copy_addr_to_reg (op4);
35759 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
35762 if (target == 0)
35764 /* mode is VOIDmode if __builtin_rd* has been called
35765 without lhs. */
35766 if (mode == VOIDmode)
35767 return target;
35768 target = gen_reg_rtx (mode);
35771 if (TARGET_64BIT)
35773 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
35774 op1, 1, OPTAB_DIRECT);
35775 op0 = expand_simple_binop (DImode, IOR, op0, op1,
35776 op0, 1, OPTAB_DIRECT);
35779 emit_move_insn (target, op0);
35780 return target;
35782 case IX86_BUILTIN_FXSAVE:
35783 case IX86_BUILTIN_FXRSTOR:
35784 case IX86_BUILTIN_FXSAVE64:
35785 case IX86_BUILTIN_FXRSTOR64:
35786 case IX86_BUILTIN_FNSTENV:
35787 case IX86_BUILTIN_FLDENV:
35788 mode0 = BLKmode;
35789 switch (fcode)
35791 case IX86_BUILTIN_FXSAVE:
35792 icode = CODE_FOR_fxsave;
35793 break;
35794 case IX86_BUILTIN_FXRSTOR:
35795 icode = CODE_FOR_fxrstor;
35796 break;
35797 case IX86_BUILTIN_FXSAVE64:
35798 icode = CODE_FOR_fxsave64;
35799 break;
35800 case IX86_BUILTIN_FXRSTOR64:
35801 icode = CODE_FOR_fxrstor64;
35802 break;
35803 case IX86_BUILTIN_FNSTENV:
35804 icode = CODE_FOR_fnstenv;
35805 break;
35806 case IX86_BUILTIN_FLDENV:
35807 icode = CODE_FOR_fldenv;
35808 break;
35809 default:
35810 gcc_unreachable ();
35813 arg0 = CALL_EXPR_ARG (exp, 0);
35814 op0 = expand_normal (arg0);
35816 if (!address_operand (op0, VOIDmode))
35818 op0 = convert_memory_address (Pmode, op0);
35819 op0 = copy_addr_to_reg (op0);
35821 op0 = gen_rtx_MEM (mode0, op0);
35823 pat = GEN_FCN (icode) (op0);
35824 if (pat)
35825 emit_insn (pat);
35826 return 0;
35828 case IX86_BUILTIN_XSETBV:
35829 arg0 = CALL_EXPR_ARG (exp, 0);
35830 arg1 = CALL_EXPR_ARG (exp, 1);
35831 op0 = expand_normal (arg0);
35832 op1 = expand_normal (arg1);
35834 if (!REG_P (op0))
35835 op0 = copy_to_mode_reg (SImode, op0);
35837 if (TARGET_64BIT)
35839 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
35840 NULL, 1, OPTAB_DIRECT);
35842 op2 = gen_lowpart (SImode, op2);
35843 op1 = gen_lowpart (SImode, op1);
35844 if (!REG_P (op1))
35845 op1 = copy_to_mode_reg (SImode, op1);
35846 if (!REG_P (op2))
35847 op2 = copy_to_mode_reg (SImode, op2);
35848 icode = CODE_FOR_xsetbv_rex64;
35849 pat = GEN_FCN (icode) (op0, op1, op2);
35851 else
35853 if (!REG_P (op1))
35854 op1 = copy_to_mode_reg (DImode, op1);
35855 icode = CODE_FOR_xsetbv;
35856 pat = GEN_FCN (icode) (op0, op1);
35858 if (pat)
35859 emit_insn (pat);
35860 return 0;
35862 case IX86_BUILTIN_XSAVE:
35863 case IX86_BUILTIN_XRSTOR:
35864 case IX86_BUILTIN_XSAVE64:
35865 case IX86_BUILTIN_XRSTOR64:
35866 case IX86_BUILTIN_XSAVEOPT:
35867 case IX86_BUILTIN_XSAVEOPT64:
35868 case IX86_BUILTIN_XSAVES:
35869 case IX86_BUILTIN_XRSTORS:
35870 case IX86_BUILTIN_XSAVES64:
35871 case IX86_BUILTIN_XRSTORS64:
35872 case IX86_BUILTIN_XSAVEC:
35873 case IX86_BUILTIN_XSAVEC64:
35874 arg0 = CALL_EXPR_ARG (exp, 0);
35875 arg1 = CALL_EXPR_ARG (exp, 1);
35876 op0 = expand_normal (arg0);
35877 op1 = expand_normal (arg1);
35879 if (!address_operand (op0, VOIDmode))
35881 op0 = convert_memory_address (Pmode, op0);
35882 op0 = copy_addr_to_reg (op0);
35884 op0 = gen_rtx_MEM (BLKmode, op0);
35886 op1 = force_reg (DImode, op1);
35888 if (TARGET_64BIT)
35890 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
35891 NULL, 1, OPTAB_DIRECT);
35892 switch (fcode)
35894 case IX86_BUILTIN_XSAVE:
35895 icode = CODE_FOR_xsave_rex64;
35896 break;
35897 case IX86_BUILTIN_XRSTOR:
35898 icode = CODE_FOR_xrstor_rex64;
35899 break;
35900 case IX86_BUILTIN_XSAVE64:
35901 icode = CODE_FOR_xsave64;
35902 break;
35903 case IX86_BUILTIN_XRSTOR64:
35904 icode = CODE_FOR_xrstor64;
35905 break;
35906 case IX86_BUILTIN_XSAVEOPT:
35907 icode = CODE_FOR_xsaveopt_rex64;
35908 break;
35909 case IX86_BUILTIN_XSAVEOPT64:
35910 icode = CODE_FOR_xsaveopt64;
35911 break;
35912 case IX86_BUILTIN_XSAVES:
35913 icode = CODE_FOR_xsaves_rex64;
35914 break;
35915 case IX86_BUILTIN_XRSTORS:
35916 icode = CODE_FOR_xrstors_rex64;
35917 break;
35918 case IX86_BUILTIN_XSAVES64:
35919 icode = CODE_FOR_xsaves64;
35920 break;
35921 case IX86_BUILTIN_XRSTORS64:
35922 icode = CODE_FOR_xrstors64;
35923 break;
35924 case IX86_BUILTIN_XSAVEC:
35925 icode = CODE_FOR_xsavec_rex64;
35926 break;
35927 case IX86_BUILTIN_XSAVEC64:
35928 icode = CODE_FOR_xsavec64;
35929 break;
35930 default:
35931 gcc_unreachable ();
35934 op2 = gen_lowpart (SImode, op2);
35935 op1 = gen_lowpart (SImode, op1);
35936 pat = GEN_FCN (icode) (op0, op1, op2);
35938 else
35940 switch (fcode)
35942 case IX86_BUILTIN_XSAVE:
35943 icode = CODE_FOR_xsave;
35944 break;
35945 case IX86_BUILTIN_XRSTOR:
35946 icode = CODE_FOR_xrstor;
35947 break;
35948 case IX86_BUILTIN_XSAVEOPT:
35949 icode = CODE_FOR_xsaveopt;
35950 break;
35951 case IX86_BUILTIN_XSAVES:
35952 icode = CODE_FOR_xsaves;
35953 break;
35954 case IX86_BUILTIN_XRSTORS:
35955 icode = CODE_FOR_xrstors;
35956 break;
35957 case IX86_BUILTIN_XSAVEC:
35958 icode = CODE_FOR_xsavec;
35959 break;
35960 default:
35961 gcc_unreachable ();
35963 pat = GEN_FCN (icode) (op0, op1);
35966 if (pat)
35967 emit_insn (pat);
35968 return 0;
35970 case IX86_BUILTIN_LLWPCB:
35971 arg0 = CALL_EXPR_ARG (exp, 0);
35972 op0 = expand_normal (arg0);
35973 icode = CODE_FOR_lwp_llwpcb;
35974 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35975 op0 = ix86_zero_extend_to_Pmode (op0);
35976 emit_insn (gen_lwp_llwpcb (op0));
35977 return 0;
35979 case IX86_BUILTIN_SLWPCB:
35980 icode = CODE_FOR_lwp_slwpcb;
35981 if (!target
35982 || !insn_data[icode].operand[0].predicate (target, Pmode))
35983 target = gen_reg_rtx (Pmode);
35984 emit_insn (gen_lwp_slwpcb (target));
35985 return target;
35987 case IX86_BUILTIN_BEXTRI32:
35988 case IX86_BUILTIN_BEXTRI64:
35989 arg0 = CALL_EXPR_ARG (exp, 0);
35990 arg1 = CALL_EXPR_ARG (exp, 1);
35991 op0 = expand_normal (arg0);
35992 op1 = expand_normal (arg1);
35993 icode = (fcode == IX86_BUILTIN_BEXTRI32
35994 ? CODE_FOR_tbm_bextri_si
35995 : CODE_FOR_tbm_bextri_di);
35996 if (!CONST_INT_P (op1))
35998 error ("last argument must be an immediate");
35999 return const0_rtx;
36001 else
36003 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
36004 unsigned char lsb_index = INTVAL (op1) & 0xFF;
36005 op1 = GEN_INT (length);
36006 op2 = GEN_INT (lsb_index);
36007 pat = GEN_FCN (icode) (target, op0, op1, op2);
36008 if (pat)
36009 emit_insn (pat);
36010 return target;
36013 case IX86_BUILTIN_RDRAND16_STEP:
36014 icode = CODE_FOR_rdrandhi_1;
36015 mode0 = HImode;
36016 goto rdrand_step;
36018 case IX86_BUILTIN_RDRAND32_STEP:
36019 icode = CODE_FOR_rdrandsi_1;
36020 mode0 = SImode;
36021 goto rdrand_step;
36023 case IX86_BUILTIN_RDRAND64_STEP:
36024 icode = CODE_FOR_rdranddi_1;
36025 mode0 = DImode;
36027 rdrand_step:
36028 arg0 = CALL_EXPR_ARG (exp, 0);
36029 op1 = expand_normal (arg0);
36030 if (!address_operand (op1, VOIDmode))
36032 op1 = convert_memory_address (Pmode, op1);
36033 op1 = copy_addr_to_reg (op1);
36036 op0 = gen_reg_rtx (mode0);
36037 emit_insn (GEN_FCN (icode) (op0));
36039 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
36041 op1 = gen_reg_rtx (SImode);
36042 emit_move_insn (op1, CONST1_RTX (SImode));
36044 /* Emit SImode conditional move. */
36045 if (mode0 == HImode)
36047 if (TARGET_ZERO_EXTEND_WITH_AND
36048 && optimize_function_for_speed_p (cfun))
36050 op2 = force_reg (SImode, const0_rtx);
36052 emit_insn (gen_movstricthi
36053 (gen_lowpart (HImode, op2), op0));
36055 else
36057 op2 = gen_reg_rtx (SImode);
36059 emit_insn (gen_zero_extendhisi2 (op2, op0));
36062 else if (mode0 == SImode)
36063 op2 = op0;
36064 else
36065 op2 = gen_rtx_SUBREG (SImode, op0, 0);
36067 if (target == 0
36068 || !register_operand (target, SImode))
36069 target = gen_reg_rtx (SImode);
36071 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
36072 const0_rtx);
36073 emit_insn (gen_rtx_SET (target,
36074 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
36075 return target;
36077 case IX86_BUILTIN_RDSEED16_STEP:
36078 icode = CODE_FOR_rdseedhi_1;
36079 mode0 = HImode;
36080 goto rdseed_step;
36082 case IX86_BUILTIN_RDSEED32_STEP:
36083 icode = CODE_FOR_rdseedsi_1;
36084 mode0 = SImode;
36085 goto rdseed_step;
36087 case IX86_BUILTIN_RDSEED64_STEP:
36088 icode = CODE_FOR_rdseeddi_1;
36089 mode0 = DImode;
36091 rdseed_step:
36092 arg0 = CALL_EXPR_ARG (exp, 0);
36093 op1 = expand_normal (arg0);
36094 if (!address_operand (op1, VOIDmode))
36096 op1 = convert_memory_address (Pmode, op1);
36097 op1 = copy_addr_to_reg (op1);
36100 op0 = gen_reg_rtx (mode0);
36101 emit_insn (GEN_FCN (icode) (op0));
36103 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
36105 op2 = gen_reg_rtx (QImode);
36107 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
36108 const0_rtx);
36109 emit_insn (gen_rtx_SET (op2, pat));
36111 if (target == 0
36112 || !register_operand (target, SImode))
36113 target = gen_reg_rtx (SImode);
36115 emit_insn (gen_zero_extendqisi2 (target, op2));
36116 return target;
36118 case IX86_BUILTIN_SBB32:
36119 icode = CODE_FOR_subborrowsi;
36120 icode2 = CODE_FOR_subborrowsi_0;
36121 mode0 = SImode;
36122 mode1 = DImode;
36123 mode2 = CCmode;
36124 goto handlecarry;
36126 case IX86_BUILTIN_SBB64:
36127 icode = CODE_FOR_subborrowdi;
36128 icode2 = CODE_FOR_subborrowdi_0;
36129 mode0 = DImode;
36130 mode1 = TImode;
36131 mode2 = CCmode;
36132 goto handlecarry;
36134 case IX86_BUILTIN_ADDCARRYX32:
36135 icode = CODE_FOR_addcarrysi;
36136 icode2 = CODE_FOR_addcarrysi_0;
36137 mode0 = SImode;
36138 mode1 = DImode;
36139 mode2 = CCCmode;
36140 goto handlecarry;
36142 case IX86_BUILTIN_ADDCARRYX64:
36143 icode = CODE_FOR_addcarrydi;
36144 icode2 = CODE_FOR_addcarrydi_0;
36145 mode0 = DImode;
36146 mode1 = TImode;
36147 mode2 = CCCmode;
36149 handlecarry:
36150 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
36151 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
36152 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
36153 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
36155 op1 = expand_normal (arg0);
36156 if (!integer_zerop (arg0))
36157 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
36159 op2 = expand_normal (arg1);
36160 if (!register_operand (op2, mode0))
36161 op2 = copy_to_mode_reg (mode0, op2);
36163 op3 = expand_normal (arg2);
36164 if (!register_operand (op3, mode0))
36165 op3 = copy_to_mode_reg (mode0, op3);
36167 op4 = expand_normal (arg3);
36168 if (!address_operand (op4, VOIDmode))
36170 op4 = convert_memory_address (Pmode, op4);
36171 op4 = copy_addr_to_reg (op4);
36174 op0 = gen_reg_rtx (mode0);
36175 if (integer_zerop (arg0))
36177 /* If arg0 is 0, optimize right away into add or sub
36178 instruction that sets CCCmode flags. */
36179 op1 = gen_rtx_REG (mode2, FLAGS_REG);
36180 emit_insn (GEN_FCN (icode2) (op0, op2, op3));
36182 else
36184 /* Generate CF from input operand. */
36185 emit_insn (gen_addqi3_cconly_overflow (op1, constm1_rtx));
36187 /* Generate instruction that consumes CF. */
36188 op1 = gen_rtx_REG (CCCmode, FLAGS_REG);
36189 pat = gen_rtx_LTU (mode1, op1, const0_rtx);
36190 pat2 = gen_rtx_LTU (mode0, op1, const0_rtx);
36191 emit_insn (GEN_FCN (icode) (op0, op2, op3, op1, pat, pat2));
36194 /* Return current CF value. */
36195 if (target == 0)
36196 target = gen_reg_rtx (QImode);
36198 pat = gen_rtx_LTU (QImode, op1, const0_rtx);
36199 emit_insn (gen_rtx_SET (target, pat));
36201 /* Store the result. */
36202 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
36204 return target;
36206 case IX86_BUILTIN_READ_FLAGS:
36207 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
36209 if (optimize
36210 || target == NULL_RTX
36211 || !nonimmediate_operand (target, word_mode)
36212 || GET_MODE (target) != word_mode)
36213 target = gen_reg_rtx (word_mode);
36215 emit_insn (gen_pop (target));
36216 return target;
36218 case IX86_BUILTIN_WRITE_FLAGS:
36220 arg0 = CALL_EXPR_ARG (exp, 0);
36221 op0 = expand_normal (arg0);
36222 if (!general_no_elim_operand (op0, word_mode))
36223 op0 = copy_to_mode_reg (word_mode, op0);
36225 emit_insn (gen_push (op0));
36226 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
36227 return 0;
36229 case IX86_BUILTIN_KTESTC8:
36230 icode = CODE_FOR_ktestqi;
36231 mode3 = CCCmode;
36232 goto kortest;
36234 case IX86_BUILTIN_KTESTZ8:
36235 icode = CODE_FOR_ktestqi;
36236 mode3 = CCZmode;
36237 goto kortest;
36239 case IX86_BUILTIN_KTESTC16:
36240 icode = CODE_FOR_ktesthi;
36241 mode3 = CCCmode;
36242 goto kortest;
36244 case IX86_BUILTIN_KTESTZ16:
36245 icode = CODE_FOR_ktesthi;
36246 mode3 = CCZmode;
36247 goto kortest;
36249 case IX86_BUILTIN_KTESTC32:
36250 icode = CODE_FOR_ktestsi;
36251 mode3 = CCCmode;
36252 goto kortest;
36254 case IX86_BUILTIN_KTESTZ32:
36255 icode = CODE_FOR_ktestsi;
36256 mode3 = CCZmode;
36257 goto kortest;
36259 case IX86_BUILTIN_KTESTC64:
36260 icode = CODE_FOR_ktestdi;
36261 mode3 = CCCmode;
36262 goto kortest;
36264 case IX86_BUILTIN_KTESTZ64:
36265 icode = CODE_FOR_ktestdi;
36266 mode3 = CCZmode;
36267 goto kortest;
36269 case IX86_BUILTIN_KORTESTC8:
36270 icode = CODE_FOR_kortestqi;
36271 mode3 = CCCmode;
36272 goto kortest;
36274 case IX86_BUILTIN_KORTESTZ8:
36275 icode = CODE_FOR_kortestqi;
36276 mode3 = CCZmode;
36277 goto kortest;
36279 case IX86_BUILTIN_KORTESTC16:
36280 icode = CODE_FOR_kortesthi;
36281 mode3 = CCCmode;
36282 goto kortest;
36284 case IX86_BUILTIN_KORTESTZ16:
36285 icode = CODE_FOR_kortesthi;
36286 mode3 = CCZmode;
36287 goto kortest;
36289 case IX86_BUILTIN_KORTESTC32:
36290 icode = CODE_FOR_kortestsi;
36291 mode3 = CCCmode;
36292 goto kortest;
36294 case IX86_BUILTIN_KORTESTZ32:
36295 icode = CODE_FOR_kortestsi;
36296 mode3 = CCZmode;
36297 goto kortest;
36299 case IX86_BUILTIN_KORTESTC64:
36300 icode = CODE_FOR_kortestdi;
36301 mode3 = CCCmode;
36302 goto kortest;
36304 case IX86_BUILTIN_KORTESTZ64:
36305 icode = CODE_FOR_kortestdi;
36306 mode3 = CCZmode;
36308 kortest:
36309 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
36310 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
36311 op0 = expand_normal (arg0);
36312 op1 = expand_normal (arg1);
36314 mode0 = insn_data[icode].operand[0].mode;
36315 mode1 = insn_data[icode].operand[1].mode;
36317 if (GET_MODE (op0) != VOIDmode)
36318 op0 = force_reg (GET_MODE (op0), op0);
36320 op0 = gen_lowpart (mode0, op0);
36322 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36323 op0 = copy_to_mode_reg (mode0, op0);
36325 if (GET_MODE (op1) != VOIDmode)
36326 op1 = force_reg (GET_MODE (op1), op1);
36328 op1 = gen_lowpart (mode1, op1);
36330 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36331 op1 = copy_to_mode_reg (mode1, op1);
36333 target = gen_reg_rtx (QImode);
36335 /* Emit kortest. */
36336 emit_insn (GEN_FCN (icode) (op0, op1));
36337 /* And use setcc to return result from flags. */
36338 ix86_expand_setcc (target, EQ,
36339 gen_rtx_REG (mode3, FLAGS_REG), const0_rtx);
36340 return target;
36342 case IX86_BUILTIN_GATHERSIV2DF:
36343 icode = CODE_FOR_avx2_gathersiv2df;
36344 goto gather_gen;
36345 case IX86_BUILTIN_GATHERSIV4DF:
36346 icode = CODE_FOR_avx2_gathersiv4df;
36347 goto gather_gen;
36348 case IX86_BUILTIN_GATHERDIV2DF:
36349 icode = CODE_FOR_avx2_gatherdiv2df;
36350 goto gather_gen;
36351 case IX86_BUILTIN_GATHERDIV4DF:
36352 icode = CODE_FOR_avx2_gatherdiv4df;
36353 goto gather_gen;
36354 case IX86_BUILTIN_GATHERSIV4SF:
36355 icode = CODE_FOR_avx2_gathersiv4sf;
36356 goto gather_gen;
36357 case IX86_BUILTIN_GATHERSIV8SF:
36358 icode = CODE_FOR_avx2_gathersiv8sf;
36359 goto gather_gen;
36360 case IX86_BUILTIN_GATHERDIV4SF:
36361 icode = CODE_FOR_avx2_gatherdiv4sf;
36362 goto gather_gen;
36363 case IX86_BUILTIN_GATHERDIV8SF:
36364 icode = CODE_FOR_avx2_gatherdiv8sf;
36365 goto gather_gen;
36366 case IX86_BUILTIN_GATHERSIV2DI:
36367 icode = CODE_FOR_avx2_gathersiv2di;
36368 goto gather_gen;
36369 case IX86_BUILTIN_GATHERSIV4DI:
36370 icode = CODE_FOR_avx2_gathersiv4di;
36371 goto gather_gen;
36372 case IX86_BUILTIN_GATHERDIV2DI:
36373 icode = CODE_FOR_avx2_gatherdiv2di;
36374 goto gather_gen;
36375 case IX86_BUILTIN_GATHERDIV4DI:
36376 icode = CODE_FOR_avx2_gatherdiv4di;
36377 goto gather_gen;
36378 case IX86_BUILTIN_GATHERSIV4SI:
36379 icode = CODE_FOR_avx2_gathersiv4si;
36380 goto gather_gen;
36381 case IX86_BUILTIN_GATHERSIV8SI:
36382 icode = CODE_FOR_avx2_gathersiv8si;
36383 goto gather_gen;
36384 case IX86_BUILTIN_GATHERDIV4SI:
36385 icode = CODE_FOR_avx2_gatherdiv4si;
36386 goto gather_gen;
36387 case IX86_BUILTIN_GATHERDIV8SI:
36388 icode = CODE_FOR_avx2_gatherdiv8si;
36389 goto gather_gen;
36390 case IX86_BUILTIN_GATHERALTSIV4DF:
36391 icode = CODE_FOR_avx2_gathersiv4df;
36392 goto gather_gen;
36393 case IX86_BUILTIN_GATHERALTDIV8SF:
36394 icode = CODE_FOR_avx2_gatherdiv8sf;
36395 goto gather_gen;
36396 case IX86_BUILTIN_GATHERALTSIV4DI:
36397 icode = CODE_FOR_avx2_gathersiv4di;
36398 goto gather_gen;
36399 case IX86_BUILTIN_GATHERALTDIV8SI:
36400 icode = CODE_FOR_avx2_gatherdiv8si;
36401 goto gather_gen;
36402 case IX86_BUILTIN_GATHER3SIV16SF:
36403 icode = CODE_FOR_avx512f_gathersiv16sf;
36404 goto gather_gen;
36405 case IX86_BUILTIN_GATHER3SIV8DF:
36406 icode = CODE_FOR_avx512f_gathersiv8df;
36407 goto gather_gen;
36408 case IX86_BUILTIN_GATHER3DIV16SF:
36409 icode = CODE_FOR_avx512f_gatherdiv16sf;
36410 goto gather_gen;
36411 case IX86_BUILTIN_GATHER3DIV8DF:
36412 icode = CODE_FOR_avx512f_gatherdiv8df;
36413 goto gather_gen;
36414 case IX86_BUILTIN_GATHER3SIV16SI:
36415 icode = CODE_FOR_avx512f_gathersiv16si;
36416 goto gather_gen;
36417 case IX86_BUILTIN_GATHER3SIV8DI:
36418 icode = CODE_FOR_avx512f_gathersiv8di;
36419 goto gather_gen;
36420 case IX86_BUILTIN_GATHER3DIV16SI:
36421 icode = CODE_FOR_avx512f_gatherdiv16si;
36422 goto gather_gen;
36423 case IX86_BUILTIN_GATHER3DIV8DI:
36424 icode = CODE_FOR_avx512f_gatherdiv8di;
36425 goto gather_gen;
36426 case IX86_BUILTIN_GATHER3ALTSIV8DF:
36427 icode = CODE_FOR_avx512f_gathersiv8df;
36428 goto gather_gen;
36429 case IX86_BUILTIN_GATHER3ALTDIV16SF:
36430 icode = CODE_FOR_avx512f_gatherdiv16sf;
36431 goto gather_gen;
36432 case IX86_BUILTIN_GATHER3ALTSIV8DI:
36433 icode = CODE_FOR_avx512f_gathersiv8di;
36434 goto gather_gen;
36435 case IX86_BUILTIN_GATHER3ALTDIV16SI:
36436 icode = CODE_FOR_avx512f_gatherdiv16si;
36437 goto gather_gen;
36438 case IX86_BUILTIN_GATHER3SIV2DF:
36439 icode = CODE_FOR_avx512vl_gathersiv2df;
36440 goto gather_gen;
36441 case IX86_BUILTIN_GATHER3SIV4DF:
36442 icode = CODE_FOR_avx512vl_gathersiv4df;
36443 goto gather_gen;
36444 case IX86_BUILTIN_GATHER3DIV2DF:
36445 icode = CODE_FOR_avx512vl_gatherdiv2df;
36446 goto gather_gen;
36447 case IX86_BUILTIN_GATHER3DIV4DF:
36448 icode = CODE_FOR_avx512vl_gatherdiv4df;
36449 goto gather_gen;
36450 case IX86_BUILTIN_GATHER3SIV4SF:
36451 icode = CODE_FOR_avx512vl_gathersiv4sf;
36452 goto gather_gen;
36453 case IX86_BUILTIN_GATHER3SIV8SF:
36454 icode = CODE_FOR_avx512vl_gathersiv8sf;
36455 goto gather_gen;
36456 case IX86_BUILTIN_GATHER3DIV4SF:
36457 icode = CODE_FOR_avx512vl_gatherdiv4sf;
36458 goto gather_gen;
36459 case IX86_BUILTIN_GATHER3DIV8SF:
36460 icode = CODE_FOR_avx512vl_gatherdiv8sf;
36461 goto gather_gen;
36462 case IX86_BUILTIN_GATHER3SIV2DI:
36463 icode = CODE_FOR_avx512vl_gathersiv2di;
36464 goto gather_gen;
36465 case IX86_BUILTIN_GATHER3SIV4DI:
36466 icode = CODE_FOR_avx512vl_gathersiv4di;
36467 goto gather_gen;
36468 case IX86_BUILTIN_GATHER3DIV2DI:
36469 icode = CODE_FOR_avx512vl_gatherdiv2di;
36470 goto gather_gen;
36471 case IX86_BUILTIN_GATHER3DIV4DI:
36472 icode = CODE_FOR_avx512vl_gatherdiv4di;
36473 goto gather_gen;
36474 case IX86_BUILTIN_GATHER3SIV4SI:
36475 icode = CODE_FOR_avx512vl_gathersiv4si;
36476 goto gather_gen;
36477 case IX86_BUILTIN_GATHER3SIV8SI:
36478 icode = CODE_FOR_avx512vl_gathersiv8si;
36479 goto gather_gen;
36480 case IX86_BUILTIN_GATHER3DIV4SI:
36481 icode = CODE_FOR_avx512vl_gatherdiv4si;
36482 goto gather_gen;
36483 case IX86_BUILTIN_GATHER3DIV8SI:
36484 icode = CODE_FOR_avx512vl_gatherdiv8si;
36485 goto gather_gen;
36486 case IX86_BUILTIN_GATHER3ALTSIV4DF:
36487 icode = CODE_FOR_avx512vl_gathersiv4df;
36488 goto gather_gen;
36489 case IX86_BUILTIN_GATHER3ALTDIV8SF:
36490 icode = CODE_FOR_avx512vl_gatherdiv8sf;
36491 goto gather_gen;
36492 case IX86_BUILTIN_GATHER3ALTSIV4DI:
36493 icode = CODE_FOR_avx512vl_gathersiv4di;
36494 goto gather_gen;
36495 case IX86_BUILTIN_GATHER3ALTDIV8SI:
36496 icode = CODE_FOR_avx512vl_gatherdiv8si;
36497 goto gather_gen;
36498 case IX86_BUILTIN_SCATTERSIV16SF:
36499 icode = CODE_FOR_avx512f_scattersiv16sf;
36500 goto scatter_gen;
36501 case IX86_BUILTIN_SCATTERSIV8DF:
36502 icode = CODE_FOR_avx512f_scattersiv8df;
36503 goto scatter_gen;
36504 case IX86_BUILTIN_SCATTERDIV16SF:
36505 icode = CODE_FOR_avx512f_scatterdiv16sf;
36506 goto scatter_gen;
36507 case IX86_BUILTIN_SCATTERDIV8DF:
36508 icode = CODE_FOR_avx512f_scatterdiv8df;
36509 goto scatter_gen;
36510 case IX86_BUILTIN_SCATTERSIV16SI:
36511 icode = CODE_FOR_avx512f_scattersiv16si;
36512 goto scatter_gen;
36513 case IX86_BUILTIN_SCATTERSIV8DI:
36514 icode = CODE_FOR_avx512f_scattersiv8di;
36515 goto scatter_gen;
36516 case IX86_BUILTIN_SCATTERDIV16SI:
36517 icode = CODE_FOR_avx512f_scatterdiv16si;
36518 goto scatter_gen;
36519 case IX86_BUILTIN_SCATTERDIV8DI:
36520 icode = CODE_FOR_avx512f_scatterdiv8di;
36521 goto scatter_gen;
36522 case IX86_BUILTIN_SCATTERSIV8SF:
36523 icode = CODE_FOR_avx512vl_scattersiv8sf;
36524 goto scatter_gen;
36525 case IX86_BUILTIN_SCATTERSIV4SF:
36526 icode = CODE_FOR_avx512vl_scattersiv4sf;
36527 goto scatter_gen;
36528 case IX86_BUILTIN_SCATTERSIV4DF:
36529 icode = CODE_FOR_avx512vl_scattersiv4df;
36530 goto scatter_gen;
36531 case IX86_BUILTIN_SCATTERSIV2DF:
36532 icode = CODE_FOR_avx512vl_scattersiv2df;
36533 goto scatter_gen;
36534 case IX86_BUILTIN_SCATTERDIV8SF:
36535 icode = CODE_FOR_avx512vl_scatterdiv8sf;
36536 goto scatter_gen;
36537 case IX86_BUILTIN_SCATTERDIV4SF:
36538 icode = CODE_FOR_avx512vl_scatterdiv4sf;
36539 goto scatter_gen;
36540 case IX86_BUILTIN_SCATTERDIV4DF:
36541 icode = CODE_FOR_avx512vl_scatterdiv4df;
36542 goto scatter_gen;
36543 case IX86_BUILTIN_SCATTERDIV2DF:
36544 icode = CODE_FOR_avx512vl_scatterdiv2df;
36545 goto scatter_gen;
36546 case IX86_BUILTIN_SCATTERSIV8SI:
36547 icode = CODE_FOR_avx512vl_scattersiv8si;
36548 goto scatter_gen;
36549 case IX86_BUILTIN_SCATTERSIV4SI:
36550 icode = CODE_FOR_avx512vl_scattersiv4si;
36551 goto scatter_gen;
36552 case IX86_BUILTIN_SCATTERSIV4DI:
36553 icode = CODE_FOR_avx512vl_scattersiv4di;
36554 goto scatter_gen;
36555 case IX86_BUILTIN_SCATTERSIV2DI:
36556 icode = CODE_FOR_avx512vl_scattersiv2di;
36557 goto scatter_gen;
36558 case IX86_BUILTIN_SCATTERDIV8SI:
36559 icode = CODE_FOR_avx512vl_scatterdiv8si;
36560 goto scatter_gen;
36561 case IX86_BUILTIN_SCATTERDIV4SI:
36562 icode = CODE_FOR_avx512vl_scatterdiv4si;
36563 goto scatter_gen;
36564 case IX86_BUILTIN_SCATTERDIV4DI:
36565 icode = CODE_FOR_avx512vl_scatterdiv4di;
36566 goto scatter_gen;
36567 case IX86_BUILTIN_SCATTERDIV2DI:
36568 icode = CODE_FOR_avx512vl_scatterdiv2di;
36569 goto scatter_gen;
36570 case IX86_BUILTIN_GATHERPFDPD:
36571 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
36572 goto vec_prefetch_gen;
36573 case IX86_BUILTIN_SCATTERALTSIV8DF:
36574 icode = CODE_FOR_avx512f_scattersiv8df;
36575 goto scatter_gen;
36576 case IX86_BUILTIN_SCATTERALTDIV16SF:
36577 icode = CODE_FOR_avx512f_scatterdiv16sf;
36578 goto scatter_gen;
36579 case IX86_BUILTIN_SCATTERALTSIV8DI:
36580 icode = CODE_FOR_avx512f_scattersiv8di;
36581 goto scatter_gen;
36582 case IX86_BUILTIN_SCATTERALTDIV16SI:
36583 icode = CODE_FOR_avx512f_scatterdiv16si;
36584 goto scatter_gen;
36585 case IX86_BUILTIN_GATHERPFDPS:
36586 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
36587 goto vec_prefetch_gen;
36588 case IX86_BUILTIN_GATHERPFQPD:
36589 icode = CODE_FOR_avx512pf_gatherpfv8didf;
36590 goto vec_prefetch_gen;
36591 case IX86_BUILTIN_GATHERPFQPS:
36592 icode = CODE_FOR_avx512pf_gatherpfv8disf;
36593 goto vec_prefetch_gen;
36594 case IX86_BUILTIN_SCATTERPFDPD:
36595 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
36596 goto vec_prefetch_gen;
36597 case IX86_BUILTIN_SCATTERPFDPS:
36598 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
36599 goto vec_prefetch_gen;
36600 case IX86_BUILTIN_SCATTERPFQPD:
36601 icode = CODE_FOR_avx512pf_scatterpfv8didf;
36602 goto vec_prefetch_gen;
36603 case IX86_BUILTIN_SCATTERPFQPS:
36604 icode = CODE_FOR_avx512pf_scatterpfv8disf;
36605 goto vec_prefetch_gen;
36607 gather_gen:
36608 rtx half;
36609 rtx (*gen) (rtx, rtx);
36611 arg0 = CALL_EXPR_ARG (exp, 0);
36612 arg1 = CALL_EXPR_ARG (exp, 1);
36613 arg2 = CALL_EXPR_ARG (exp, 2);
36614 arg3 = CALL_EXPR_ARG (exp, 3);
36615 arg4 = CALL_EXPR_ARG (exp, 4);
36616 op0 = expand_normal (arg0);
36617 op1 = expand_normal (arg1);
36618 op2 = expand_normal (arg2);
36619 op3 = expand_normal (arg3);
36620 op4 = expand_normal (arg4);
36621 /* Note the arg order is different from the operand order. */
36622 mode0 = insn_data[icode].operand[1].mode;
36623 mode2 = insn_data[icode].operand[3].mode;
36624 mode3 = insn_data[icode].operand[4].mode;
36625 mode4 = insn_data[icode].operand[5].mode;
36627 if (target == NULL_RTX
36628 || GET_MODE (target) != insn_data[icode].operand[0].mode
36629 || !insn_data[icode].operand[0].predicate (target,
36630 GET_MODE (target)))
36631 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
36632 else
36633 subtarget = target;
36635 switch (fcode)
36637 case IX86_BUILTIN_GATHER3ALTSIV8DF:
36638 case IX86_BUILTIN_GATHER3ALTSIV8DI:
36639 half = gen_reg_rtx (V8SImode);
36640 if (!nonimmediate_operand (op2, V16SImode))
36641 op2 = copy_to_mode_reg (V16SImode, op2);
36642 emit_insn (gen_vec_extract_lo_v16si (half, op2));
36643 op2 = half;
36644 break;
36645 case IX86_BUILTIN_GATHER3ALTSIV4DF:
36646 case IX86_BUILTIN_GATHER3ALTSIV4DI:
36647 case IX86_BUILTIN_GATHERALTSIV4DF:
36648 case IX86_BUILTIN_GATHERALTSIV4DI:
36649 half = gen_reg_rtx (V4SImode);
36650 if (!nonimmediate_operand (op2, V8SImode))
36651 op2 = copy_to_mode_reg (V8SImode, op2);
36652 emit_insn (gen_vec_extract_lo_v8si (half, op2));
36653 op2 = half;
36654 break;
36655 case IX86_BUILTIN_GATHER3ALTDIV16SF:
36656 case IX86_BUILTIN_GATHER3ALTDIV16SI:
36657 half = gen_reg_rtx (mode0);
36658 if (mode0 == V8SFmode)
36659 gen = gen_vec_extract_lo_v16sf;
36660 else
36661 gen = gen_vec_extract_lo_v16si;
36662 if (!nonimmediate_operand (op0, GET_MODE (op0)))
36663 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
36664 emit_insn (gen (half, op0));
36665 op0 = half;
36666 if (GET_MODE (op3) != VOIDmode)
36668 if (!nonimmediate_operand (op3, GET_MODE (op3)))
36669 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
36670 emit_insn (gen (half, op3));
36671 op3 = half;
36673 break;
36674 case IX86_BUILTIN_GATHER3ALTDIV8SF:
36675 case IX86_BUILTIN_GATHER3ALTDIV8SI:
36676 case IX86_BUILTIN_GATHERALTDIV8SF:
36677 case IX86_BUILTIN_GATHERALTDIV8SI:
36678 half = gen_reg_rtx (mode0);
36679 if (mode0 == V4SFmode)
36680 gen = gen_vec_extract_lo_v8sf;
36681 else
36682 gen = gen_vec_extract_lo_v8si;
36683 if (!nonimmediate_operand (op0, GET_MODE (op0)))
36684 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
36685 emit_insn (gen (half, op0));
36686 op0 = half;
36687 if (GET_MODE (op3) != VOIDmode)
36689 if (!nonimmediate_operand (op3, GET_MODE (op3)))
36690 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
36691 emit_insn (gen (half, op3));
36692 op3 = half;
36694 break;
36695 default:
36696 break;
36699 /* Force memory operand only with base register here. But we
36700 don't want to do it on memory operand for other builtin
36701 functions. */
36702 op1 = ix86_zero_extend_to_Pmode (op1);
36704 if (!insn_data[icode].operand[1].predicate (op0, mode0))
36705 op0 = copy_to_mode_reg (mode0, op0);
36706 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
36707 op1 = copy_to_mode_reg (Pmode, op1);
36708 if (!insn_data[icode].operand[3].predicate (op2, mode2))
36709 op2 = copy_to_mode_reg (mode2, op2);
36711 op3 = fixup_modeless_constant (op3, mode3);
36713 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
36715 if (!insn_data[icode].operand[4].predicate (op3, mode3))
36716 op3 = copy_to_mode_reg (mode3, op3);
36718 else
36720 op3 = copy_to_reg (op3);
36721 op3 = lowpart_subreg (mode3, op3, GET_MODE (op3));
36723 if (!insn_data[icode].operand[5].predicate (op4, mode4))
36725 error ("the last argument must be scale 1, 2, 4, 8");
36726 return const0_rtx;
36729 /* Optimize. If mask is known to have all high bits set,
36730 replace op0 with pc_rtx to signal that the instruction
36731 overwrites the whole destination and doesn't use its
36732 previous contents. */
36733 if (optimize)
36735 if (TREE_CODE (arg3) == INTEGER_CST)
36737 if (integer_all_onesp (arg3))
36738 op0 = pc_rtx;
36740 else if (TREE_CODE (arg3) == VECTOR_CST)
36742 unsigned int negative = 0;
36743 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
36745 tree cst = VECTOR_CST_ELT (arg3, i);
36746 if (TREE_CODE (cst) == INTEGER_CST
36747 && tree_int_cst_sign_bit (cst))
36748 negative++;
36749 else if (TREE_CODE (cst) == REAL_CST
36750 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
36751 negative++;
36753 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
36754 op0 = pc_rtx;
36756 else if (TREE_CODE (arg3) == SSA_NAME
36757 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
36759 /* Recognize also when mask is like:
36760 __v2df src = _mm_setzero_pd ();
36761 __v2df mask = _mm_cmpeq_pd (src, src);
36763 __v8sf src = _mm256_setzero_ps ();
36764 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
36765 as that is a cheaper way to load all ones into
36766 a register than having to load a constant from
36767 memory. */
36768 gimple *def_stmt = SSA_NAME_DEF_STMT (arg3);
36769 if (is_gimple_call (def_stmt))
36771 tree fndecl = gimple_call_fndecl (def_stmt);
36772 if (fndecl
36773 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
36774 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
36776 case IX86_BUILTIN_CMPPD:
36777 case IX86_BUILTIN_CMPPS:
36778 case IX86_BUILTIN_CMPPD256:
36779 case IX86_BUILTIN_CMPPS256:
36780 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
36781 break;
36782 /* FALLTHRU */
36783 case IX86_BUILTIN_CMPEQPD:
36784 case IX86_BUILTIN_CMPEQPS:
36785 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
36786 && initializer_zerop (gimple_call_arg (def_stmt,
36787 1)))
36788 op0 = pc_rtx;
36789 break;
36790 default:
36791 break;
36797 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
36798 if (! pat)
36799 return const0_rtx;
36800 emit_insn (pat);
36802 switch (fcode)
36804 case IX86_BUILTIN_GATHER3DIV16SF:
36805 if (target == NULL_RTX)
36806 target = gen_reg_rtx (V8SFmode);
36807 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
36808 break;
36809 case IX86_BUILTIN_GATHER3DIV16SI:
36810 if (target == NULL_RTX)
36811 target = gen_reg_rtx (V8SImode);
36812 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
36813 break;
36814 case IX86_BUILTIN_GATHER3DIV8SF:
36815 case IX86_BUILTIN_GATHERDIV8SF:
36816 if (target == NULL_RTX)
36817 target = gen_reg_rtx (V4SFmode);
36818 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
36819 break;
36820 case IX86_BUILTIN_GATHER3DIV8SI:
36821 case IX86_BUILTIN_GATHERDIV8SI:
36822 if (target == NULL_RTX)
36823 target = gen_reg_rtx (V4SImode);
36824 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
36825 break;
36826 default:
36827 target = subtarget;
36828 break;
36830 return target;
36832 scatter_gen:
36833 arg0 = CALL_EXPR_ARG (exp, 0);
36834 arg1 = CALL_EXPR_ARG (exp, 1);
36835 arg2 = CALL_EXPR_ARG (exp, 2);
36836 arg3 = CALL_EXPR_ARG (exp, 3);
36837 arg4 = CALL_EXPR_ARG (exp, 4);
36838 op0 = expand_normal (arg0);
36839 op1 = expand_normal (arg1);
36840 op2 = expand_normal (arg2);
36841 op3 = expand_normal (arg3);
36842 op4 = expand_normal (arg4);
36843 mode1 = insn_data[icode].operand[1].mode;
36844 mode2 = insn_data[icode].operand[2].mode;
36845 mode3 = insn_data[icode].operand[3].mode;
36846 mode4 = insn_data[icode].operand[4].mode;
36848 /* Scatter instruction stores operand op3 to memory with
36849 indices from op2 and scale from op4 under writemask op1.
36850 If index operand op2 has more elements then source operand
36851 op3 one need to use only its low half. And vice versa. */
36852 switch (fcode)
36854 case IX86_BUILTIN_SCATTERALTSIV8DF:
36855 case IX86_BUILTIN_SCATTERALTSIV8DI:
36856 half = gen_reg_rtx (V8SImode);
36857 if (!nonimmediate_operand (op2, V16SImode))
36858 op2 = copy_to_mode_reg (V16SImode, op2);
36859 emit_insn (gen_vec_extract_lo_v16si (half, op2));
36860 op2 = half;
36861 break;
36862 case IX86_BUILTIN_SCATTERALTDIV16SF:
36863 case IX86_BUILTIN_SCATTERALTDIV16SI:
36864 half = gen_reg_rtx (mode3);
36865 if (mode3 == V8SFmode)
36866 gen = gen_vec_extract_lo_v16sf;
36867 else
36868 gen = gen_vec_extract_lo_v16si;
36869 if (!nonimmediate_operand (op3, GET_MODE (op3)))
36870 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
36871 emit_insn (gen (half, op3));
36872 op3 = half;
36873 break;
36874 default:
36875 break;
36878 /* Force memory operand only with base register here. But we
36879 don't want to do it on memory operand for other builtin
36880 functions. */
36881 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
36883 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
36884 op0 = copy_to_mode_reg (Pmode, op0);
36886 op1 = fixup_modeless_constant (op1, mode1);
36888 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
36890 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36891 op1 = copy_to_mode_reg (mode1, op1);
36893 else
36895 op1 = copy_to_reg (op1);
36896 op1 = lowpart_subreg (mode1, op1, GET_MODE (op1));
36899 if (!insn_data[icode].operand[2].predicate (op2, mode2))
36900 op2 = copy_to_mode_reg (mode2, op2);
36902 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36903 op3 = copy_to_mode_reg (mode3, op3);
36905 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36907 error ("the last argument must be scale 1, 2, 4, 8");
36908 return const0_rtx;
36911 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36912 if (! pat)
36913 return const0_rtx;
36915 emit_insn (pat);
36916 return 0;
36918 vec_prefetch_gen:
36919 arg0 = CALL_EXPR_ARG (exp, 0);
36920 arg1 = CALL_EXPR_ARG (exp, 1);
36921 arg2 = CALL_EXPR_ARG (exp, 2);
36922 arg3 = CALL_EXPR_ARG (exp, 3);
36923 arg4 = CALL_EXPR_ARG (exp, 4);
36924 op0 = expand_normal (arg0);
36925 op1 = expand_normal (arg1);
36926 op2 = expand_normal (arg2);
36927 op3 = expand_normal (arg3);
36928 op4 = expand_normal (arg4);
36929 mode0 = insn_data[icode].operand[0].mode;
36930 mode1 = insn_data[icode].operand[1].mode;
36931 mode3 = insn_data[icode].operand[3].mode;
36932 mode4 = insn_data[icode].operand[4].mode;
36934 op0 = fixup_modeless_constant (op0, mode0);
36936 if (GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
36938 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36939 op0 = copy_to_mode_reg (mode0, op0);
36941 else
36943 op0 = copy_to_reg (op0);
36944 op0 = lowpart_subreg (mode0, op0, GET_MODE (op0));
36947 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36948 op1 = copy_to_mode_reg (mode1, op1);
36950 /* Force memory operand only with base register here. But we
36951 don't want to do it on memory operand for other builtin
36952 functions. */
36953 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
36955 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
36956 op2 = copy_to_mode_reg (Pmode, op2);
36958 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36960 error ("the forth argument must be scale 1, 2, 4, 8");
36961 return const0_rtx;
36964 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36966 error ("incorrect hint operand");
36967 return const0_rtx;
36970 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36971 if (! pat)
36972 return const0_rtx;
36974 emit_insn (pat);
36976 return 0;
36978 case IX86_BUILTIN_XABORT:
36979 icode = CODE_FOR_xabort;
36980 arg0 = CALL_EXPR_ARG (exp, 0);
36981 op0 = expand_normal (arg0);
36982 mode0 = insn_data[icode].operand[0].mode;
36983 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36985 error ("the xabort's argument must be an 8-bit immediate");
36986 return const0_rtx;
36988 emit_insn (gen_xabort (op0));
36989 return 0;
36991 case IX86_BUILTIN_RSTORSSP:
36992 case IX86_BUILTIN_CLRSSBSY:
36993 arg0 = CALL_EXPR_ARG (exp, 0);
36994 op0 = expand_normal (arg0);
36995 icode = (fcode == IX86_BUILTIN_RSTORSSP
36996 ? CODE_FOR_rstorssp
36997 : CODE_FOR_clrssbsy);
36998 if (!address_operand (op0, VOIDmode))
37000 op1 = convert_memory_address (Pmode, op0);
37001 op0 = copy_addr_to_reg (op1);
37003 emit_insn (GEN_FCN (icode) (gen_rtx_MEM (Pmode, op0)));
37004 return 0;
37006 case IX86_BUILTIN_WRSSD:
37007 case IX86_BUILTIN_WRSSQ:
37008 case IX86_BUILTIN_WRUSSD:
37009 case IX86_BUILTIN_WRUSSQ:
37010 arg0 = CALL_EXPR_ARG (exp, 0);
37011 op0 = expand_normal (arg0);
37012 arg1 = CALL_EXPR_ARG (exp, 1);
37013 op1 = expand_normal (arg1);
37014 switch (fcode)
37016 case IX86_BUILTIN_WRSSD:
37017 icode = CODE_FOR_wrsssi;
37018 mode = SImode;
37019 break;
37020 case IX86_BUILTIN_WRSSQ:
37021 icode = CODE_FOR_wrssdi;
37022 mode = DImode;
37023 break;
37024 case IX86_BUILTIN_WRUSSD:
37025 icode = CODE_FOR_wrusssi;
37026 mode = SImode;
37027 break;
37028 case IX86_BUILTIN_WRUSSQ:
37029 icode = CODE_FOR_wrussdi;
37030 mode = DImode;
37031 break;
37033 op0 = force_reg (mode, op0);
37034 if (!address_operand (op1, VOIDmode))
37036 op2 = convert_memory_address (Pmode, op1);
37037 op1 = copy_addr_to_reg (op2);
37039 emit_insn (GEN_FCN (icode) (op0, gen_rtx_MEM (mode, op1)));
37040 return 0;
37042 default:
37043 break;
37046 if (fcode >= IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST
37047 && fcode <= IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST)
37049 i = fcode - IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST;
37050 return ix86_expand_special_args_builtin (bdesc_special_args + i, exp,
37051 target);
37054 if (fcode >= IX86_BUILTIN__BDESC_ARGS_FIRST
37055 && fcode <= IX86_BUILTIN__BDESC_ARGS_LAST)
37057 i = fcode - IX86_BUILTIN__BDESC_ARGS_FIRST;
37058 switch (fcode)
37060 case IX86_BUILTIN_FABSQ:
37061 case IX86_BUILTIN_COPYSIGNQ:
37062 if (!TARGET_SSE)
37063 /* Emit a normal call if SSE isn't available. */
37064 return expand_call (exp, target, ignore);
37065 /* FALLTHRU */
37066 default:
37067 return ix86_expand_args_builtin (bdesc_args + i, exp, target);
37071 if (fcode >= IX86_BUILTIN__BDESC_ARGS2_FIRST
37072 && fcode <= IX86_BUILTIN__BDESC_ARGS2_LAST)
37074 i = fcode - IX86_BUILTIN__BDESC_ARGS2_FIRST;
37075 rtx (*fcn) (rtx, rtx, rtx, rtx) = NULL;
37076 rtx (*fcn_mask) (rtx, rtx, rtx, rtx, rtx);
37077 rtx (*fcn_maskz) (rtx, rtx, rtx, rtx, rtx, rtx);
37078 int masked = 1;
37079 machine_mode mode, wide_mode, nar_mode;
37081 nar_mode = V4SFmode;
37082 mode = V16SFmode;
37083 wide_mode = V64SFmode;
37084 fcn_mask = gen_avx5124fmaddps_4fmaddps_mask;
37085 fcn_maskz = gen_avx5124fmaddps_4fmaddps_maskz;
37087 switch (fcode)
37089 case IX86_BUILTIN_4FMAPS:
37090 fcn = gen_avx5124fmaddps_4fmaddps;
37091 masked = 0;
37092 goto v4fma_expand;
37094 case IX86_BUILTIN_4DPWSSD:
37095 nar_mode = V4SImode;
37096 mode = V16SImode;
37097 wide_mode = V64SImode;
37098 fcn = gen_avx5124vnniw_vp4dpwssd;
37099 masked = 0;
37100 goto v4fma_expand;
37102 case IX86_BUILTIN_4DPWSSDS:
37103 nar_mode = V4SImode;
37104 mode = V16SImode;
37105 wide_mode = V64SImode;
37106 fcn = gen_avx5124vnniw_vp4dpwssds;
37107 masked = 0;
37108 goto v4fma_expand;
37110 case IX86_BUILTIN_4FNMAPS:
37111 fcn = gen_avx5124fmaddps_4fnmaddps;
37112 masked = 0;
37113 goto v4fma_expand;
37115 case IX86_BUILTIN_4FNMAPS_MASK:
37116 fcn_mask = gen_avx5124fmaddps_4fnmaddps_mask;
37117 fcn_maskz = gen_avx5124fmaddps_4fnmaddps_maskz;
37118 goto v4fma_expand;
37120 case IX86_BUILTIN_4DPWSSD_MASK:
37121 nar_mode = V4SImode;
37122 mode = V16SImode;
37123 wide_mode = V64SImode;
37124 fcn_mask = gen_avx5124vnniw_vp4dpwssd_mask;
37125 fcn_maskz = gen_avx5124vnniw_vp4dpwssd_maskz;
37126 goto v4fma_expand;
37128 case IX86_BUILTIN_4DPWSSDS_MASK:
37129 nar_mode = V4SImode;
37130 mode = V16SImode;
37131 wide_mode = V64SImode;
37132 fcn_mask = gen_avx5124vnniw_vp4dpwssds_mask;
37133 fcn_maskz = gen_avx5124vnniw_vp4dpwssds_maskz;
37134 goto v4fma_expand;
37136 case IX86_BUILTIN_4FMAPS_MASK:
37138 tree args[4];
37139 rtx ops[4];
37140 rtx wide_reg;
37141 rtx accum;
37142 rtx addr;
37143 rtx mem;
37145 v4fma_expand:
37146 wide_reg = gen_reg_rtx (wide_mode);
37147 for (i = 0; i < 4; i++)
37149 args[i] = CALL_EXPR_ARG (exp, i);
37150 ops[i] = expand_normal (args[i]);
37152 emit_move_insn (gen_rtx_SUBREG (mode, wide_reg, i * 64),
37153 ops[i]);
37156 accum = expand_normal (CALL_EXPR_ARG (exp, 4));
37157 accum = force_reg (mode, accum);
37159 addr = expand_normal (CALL_EXPR_ARG (exp, 5));
37160 addr = force_reg (Pmode, addr);
37162 mem = gen_rtx_MEM (nar_mode, addr);
37164 target = gen_reg_rtx (mode);
37166 emit_move_insn (target, accum);
37168 if (! masked)
37169 emit_insn (fcn (target, accum, wide_reg, mem));
37170 else
37172 rtx merge, mask;
37173 merge = expand_normal (CALL_EXPR_ARG (exp, 6));
37175 mask = expand_normal (CALL_EXPR_ARG (exp, 7));
37177 if (CONST_INT_P (mask))
37178 mask = fixup_modeless_constant (mask, HImode);
37180 mask = force_reg (HImode, mask);
37182 if (GET_MODE (mask) != HImode)
37183 mask = gen_rtx_SUBREG (HImode, mask, 0);
37185 /* If merge is 0 then we're about to emit z-masked variant. */
37186 if (const0_operand (merge, mode))
37187 emit_insn (fcn_maskz (target, accum, wide_reg, mem, merge, mask));
37188 /* If merge is the same as accum then emit merge-masked variant. */
37189 else if (CALL_EXPR_ARG (exp, 6) == CALL_EXPR_ARG (exp, 4))
37191 merge = force_reg (mode, merge);
37192 emit_insn (fcn_mask (target, wide_reg, mem, merge, mask));
37194 /* Merge with something unknown might happen if we z-mask w/ -O0. */
37195 else
37197 target = gen_reg_rtx (mode);
37198 emit_move_insn (target, merge);
37199 emit_insn (fcn_mask (target, wide_reg, mem, target, mask));
37202 return target;
37205 case IX86_BUILTIN_4FNMASS:
37206 fcn = gen_avx5124fmaddps_4fnmaddss;
37207 masked = 0;
37208 goto s4fma_expand;
37210 case IX86_BUILTIN_4FMASS:
37211 fcn = gen_avx5124fmaddps_4fmaddss;
37212 masked = 0;
37213 goto s4fma_expand;
37215 case IX86_BUILTIN_4FNMASS_MASK:
37216 fcn_mask = gen_avx5124fmaddps_4fnmaddss_mask;
37217 fcn_maskz = gen_avx5124fmaddps_4fnmaddss_maskz;
37218 goto s4fma_expand;
37220 case IX86_BUILTIN_4FMASS_MASK:
37222 tree args[4];
37223 rtx ops[4];
37224 rtx wide_reg;
37225 rtx accum;
37226 rtx addr;
37227 rtx mem;
37229 fcn_mask = gen_avx5124fmaddps_4fmaddss_mask;
37230 fcn_maskz = gen_avx5124fmaddps_4fmaddss_maskz;
37232 s4fma_expand:
37233 mode = V4SFmode;
37234 wide_reg = gen_reg_rtx (V64SFmode);
37235 for (i = 0; i < 4; i++)
37237 rtx tmp;
37238 args[i] = CALL_EXPR_ARG (exp, i);
37239 ops[i] = expand_normal (args[i]);
37241 tmp = gen_reg_rtx (SFmode);
37242 emit_move_insn (tmp, gen_rtx_SUBREG (SFmode, ops[i], 0));
37244 emit_move_insn (gen_rtx_SUBREG (V16SFmode, wide_reg, i * 64),
37245 gen_rtx_SUBREG (V16SFmode, tmp, 0));
37248 accum = expand_normal (CALL_EXPR_ARG (exp, 4));
37249 accum = force_reg (V4SFmode, accum);
37251 addr = expand_normal (CALL_EXPR_ARG (exp, 5));
37252 addr = force_reg (Pmode, addr);
37254 mem = gen_rtx_MEM (V4SFmode, addr);
37256 target = gen_reg_rtx (V4SFmode);
37258 emit_move_insn (target, accum);
37260 if (! masked)
37261 emit_insn (fcn (target, accum, wide_reg, mem));
37262 else
37264 rtx merge, mask;
37265 merge = expand_normal (CALL_EXPR_ARG (exp, 6));
37267 mask = expand_normal (CALL_EXPR_ARG (exp, 7));
37269 if (CONST_INT_P (mask))
37270 mask = fixup_modeless_constant (mask, QImode);
37272 mask = force_reg (QImode, mask);
37274 if (GET_MODE (mask) != QImode)
37275 mask = gen_rtx_SUBREG (QImode, mask, 0);
37277 /* If merge is 0 then we're about to emit z-masked variant. */
37278 if (const0_operand (merge, mode))
37279 emit_insn (fcn_maskz (target, accum, wide_reg, mem, merge, mask));
37280 /* If merge is the same as accum then emit merge-masked
37281 variant. */
37282 else if (CALL_EXPR_ARG (exp, 6) == CALL_EXPR_ARG (exp, 4))
37284 merge = force_reg (mode, merge);
37285 emit_insn (fcn_mask (target, wide_reg, mem, merge, mask));
37287 /* Merge with something unknown might happen if we z-mask
37288 w/ -O0. */
37289 else
37291 target = gen_reg_rtx (mode);
37292 emit_move_insn (target, merge);
37293 emit_insn (fcn_mask (target, wide_reg, mem, target, mask));
37296 return target;
37298 case IX86_BUILTIN_RDPID:
37299 return ix86_expand_special_args_builtin (bdesc_args2 + i, exp,
37300 target);
37301 default:
37302 return ix86_expand_args_builtin (bdesc_args2 + i, exp, target);
37306 if (fcode >= IX86_BUILTIN__BDESC_COMI_FIRST
37307 && fcode <= IX86_BUILTIN__BDESC_COMI_LAST)
37309 i = fcode - IX86_BUILTIN__BDESC_COMI_FIRST;
37310 return ix86_expand_sse_comi (bdesc_comi + i, exp, target);
37313 if (fcode >= IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST
37314 && fcode <= IX86_BUILTIN__BDESC_ROUND_ARGS_LAST)
37316 i = fcode - IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST;
37317 return ix86_expand_round_builtin (bdesc_round_args + i, exp, target);
37320 if (fcode >= IX86_BUILTIN__BDESC_PCMPESTR_FIRST
37321 && fcode <= IX86_BUILTIN__BDESC_PCMPESTR_LAST)
37323 i = fcode - IX86_BUILTIN__BDESC_PCMPESTR_FIRST;
37324 return ix86_expand_sse_pcmpestr (bdesc_pcmpestr + i, exp, target);
37327 if (fcode >= IX86_BUILTIN__BDESC_PCMPISTR_FIRST
37328 && fcode <= IX86_BUILTIN__BDESC_PCMPISTR_LAST)
37330 i = fcode - IX86_BUILTIN__BDESC_PCMPISTR_FIRST;
37331 return ix86_expand_sse_pcmpistr (bdesc_pcmpistr + i, exp, target);
37334 if (fcode >= IX86_BUILTIN__BDESC_MULTI_ARG_FIRST
37335 && fcode <= IX86_BUILTIN__BDESC_MULTI_ARG_LAST)
37337 i = fcode - IX86_BUILTIN__BDESC_MULTI_ARG_FIRST;
37338 const struct builtin_description *d = bdesc_multi_arg + i;
37339 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
37340 (enum ix86_builtin_func_type)
37341 d->flag, d->comparison);
37344 if (fcode >= IX86_BUILTIN__BDESC_CET_FIRST
37345 && fcode <= IX86_BUILTIN__BDESC_CET_LAST)
37347 i = fcode - IX86_BUILTIN__BDESC_CET_FIRST;
37348 return ix86_expand_special_args_builtin (bdesc_cet + i, exp,
37349 target);
37352 if (fcode >= IX86_BUILTIN__BDESC_CET_NORMAL_FIRST
37353 && fcode <= IX86_BUILTIN__BDESC_CET_NORMAL_LAST)
37355 i = fcode - IX86_BUILTIN__BDESC_CET_NORMAL_FIRST;
37356 return ix86_expand_args_builtin (bdesc_cet_rdssp + i, exp,
37357 target);
37360 gcc_unreachable ();
37363 /* This returns the target-specific builtin with code CODE if
37364 current_function_decl has visibility on this builtin, which is checked
37365 using isa flags. Returns NULL_TREE otherwise. */
37367 static tree ix86_get_builtin (enum ix86_builtins code)
37369 struct cl_target_option *opts;
37370 tree target_tree = NULL_TREE;
37372 /* Determine the isa flags of current_function_decl. */
37374 if (current_function_decl)
37375 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
37377 if (target_tree == NULL)
37378 target_tree = target_option_default_node;
37380 opts = TREE_TARGET_OPTION (target_tree);
37382 if ((ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
37383 || (ix86_builtins_isa[(int) code].isa2 & opts->x_ix86_isa_flags2))
37384 return ix86_builtin_decl (code, true);
37385 else
37386 return NULL_TREE;
37389 /* Return function decl for target specific builtin
37390 for given MPX builtin passed i FCODE. */
37391 static tree
37392 ix86_builtin_mpx_function (unsigned fcode)
37394 switch (fcode)
37396 case BUILT_IN_CHKP_BNDMK:
37397 return ix86_builtins[IX86_BUILTIN_BNDMK];
37399 case BUILT_IN_CHKP_BNDSTX:
37400 return ix86_builtins[IX86_BUILTIN_BNDSTX];
37402 case BUILT_IN_CHKP_BNDLDX:
37403 return ix86_builtins[IX86_BUILTIN_BNDLDX];
37405 case BUILT_IN_CHKP_BNDCL:
37406 return ix86_builtins[IX86_BUILTIN_BNDCL];
37408 case BUILT_IN_CHKP_BNDCU:
37409 return ix86_builtins[IX86_BUILTIN_BNDCU];
37411 case BUILT_IN_CHKP_BNDRET:
37412 return ix86_builtins[IX86_BUILTIN_BNDRET];
37414 case BUILT_IN_CHKP_INTERSECT:
37415 return ix86_builtins[IX86_BUILTIN_BNDINT];
37417 case BUILT_IN_CHKP_NARROW:
37418 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
37420 case BUILT_IN_CHKP_SIZEOF:
37421 return ix86_builtins[IX86_BUILTIN_SIZEOF];
37423 case BUILT_IN_CHKP_EXTRACT_LOWER:
37424 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
37426 case BUILT_IN_CHKP_EXTRACT_UPPER:
37427 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
37429 default:
37430 return NULL_TREE;
37433 gcc_unreachable ();
37436 /* Helper function for ix86_load_bounds and ix86_store_bounds.
37438 Return an address to be used to load/store bounds for pointer
37439 passed in SLOT.
37441 SLOT_NO is an integer constant holding number of a target
37442 dependent special slot to be used in case SLOT is not a memory.
37444 SPECIAL_BASE is a pointer to be used as a base of fake address
37445 to access special slots in Bounds Table. SPECIAL_BASE[-1],
37446 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
37448 static rtx
37449 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
37451 rtx addr = NULL;
37453 /* NULL slot means we pass bounds for pointer not passed to the
37454 function at all. Register slot means we pass pointer in a
37455 register. In both these cases bounds are passed via Bounds
37456 Table. Since we do not have actual pointer stored in memory,
37457 we have to use fake addresses to access Bounds Table. We
37458 start with (special_base - sizeof (void*)) and decrease this
37459 address by pointer size to get addresses for other slots. */
37460 if (!slot || REG_P (slot))
37462 gcc_assert (CONST_INT_P (slot_no));
37463 addr = plus_constant (Pmode, special_base,
37464 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
37466 /* If pointer is passed in a memory then its address is used to
37467 access Bounds Table. */
37468 else if (MEM_P (slot))
37470 addr = XEXP (slot, 0);
37471 if (!register_operand (addr, Pmode))
37472 addr = copy_addr_to_reg (addr);
37474 else
37475 gcc_unreachable ();
37477 return addr;
37480 /* Expand pass uses this hook to load bounds for function parameter
37481 PTR passed in SLOT in case its bounds are not passed in a register.
37483 If SLOT is a memory, then bounds are loaded as for regular pointer
37484 loaded from memory. PTR may be NULL in case SLOT is a memory.
37485 In such case value of PTR (if required) may be loaded from SLOT.
37487 If SLOT is NULL or a register then SLOT_NO is an integer constant
37488 holding number of the target dependent special slot which should be
37489 used to obtain bounds.
37491 Return loaded bounds. */
37493 static rtx
37494 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
37496 rtx reg = gen_reg_rtx (BNDmode);
37497 rtx addr;
37499 /* Get address to be used to access Bounds Table. Special slots start
37500 at the location of return address of the current function. */
37501 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
37503 /* Load pointer value from a memory if we don't have it. */
37504 if (!ptr)
37506 gcc_assert (MEM_P (slot));
37507 ptr = copy_addr_to_reg (slot);
37510 if (!register_operand (ptr, Pmode))
37511 ptr = ix86_zero_extend_to_Pmode (ptr);
37513 emit_insn (BNDmode == BND64mode
37514 ? gen_bnd64_ldx (reg, addr, ptr)
37515 : gen_bnd32_ldx (reg, addr, ptr));
37517 return reg;
37520 /* Expand pass uses this hook to store BOUNDS for call argument PTR
37521 passed in SLOT in case BOUNDS are not passed in a register.
37523 If SLOT is a memory, then BOUNDS are stored as for regular pointer
37524 stored in memory. PTR may be NULL in case SLOT is a memory.
37525 In such case value of PTR (if required) may be loaded from SLOT.
37527 If SLOT is NULL or a register then SLOT_NO is an integer constant
37528 holding number of the target dependent special slot which should be
37529 used to store BOUNDS. */
37531 static void
37532 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
37534 rtx addr;
37536 /* Get address to be used to access Bounds Table. Special slots start
37537 at the location of return address of a called function. */
37538 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
37540 /* Load pointer value from a memory if we don't have it. */
37541 if (!ptr)
37543 gcc_assert (MEM_P (slot));
37544 ptr = copy_addr_to_reg (slot);
37547 if (!register_operand (ptr, Pmode))
37548 ptr = ix86_zero_extend_to_Pmode (ptr);
37550 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
37551 if (!register_operand (bounds, BNDmode))
37552 bounds = copy_to_mode_reg (BNDmode, bounds);
37554 emit_insn (BNDmode == BND64mode
37555 ? gen_bnd64_stx (addr, ptr, bounds)
37556 : gen_bnd32_stx (addr, ptr, bounds));
37559 /* Load and return bounds returned by function in SLOT. */
37561 static rtx
37562 ix86_load_returned_bounds (rtx slot)
37564 rtx res;
37566 gcc_assert (REG_P (slot));
37567 res = gen_reg_rtx (BNDmode);
37568 emit_move_insn (res, slot);
37570 return res;
37573 /* Store BOUNDS returned by function into SLOT. */
37575 static void
37576 ix86_store_returned_bounds (rtx slot, rtx bounds)
37578 gcc_assert (REG_P (slot));
37579 emit_move_insn (slot, bounds);
37582 /* Returns a function decl for a vectorized version of the combined function
37583 with combined_fn code FN and the result vector type TYPE, or NULL_TREE
37584 if it is not available. */
37586 static tree
37587 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
37588 tree type_in)
37590 machine_mode in_mode, out_mode;
37591 int in_n, out_n;
37593 if (TREE_CODE (type_out) != VECTOR_TYPE
37594 || TREE_CODE (type_in) != VECTOR_TYPE)
37595 return NULL_TREE;
37597 out_mode = TYPE_MODE (TREE_TYPE (type_out));
37598 out_n = TYPE_VECTOR_SUBPARTS (type_out);
37599 in_mode = TYPE_MODE (TREE_TYPE (type_in));
37600 in_n = TYPE_VECTOR_SUBPARTS (type_in);
37602 switch (fn)
37604 CASE_CFN_EXP2:
37605 if (out_mode == SFmode && in_mode == SFmode)
37607 if (out_n == 16 && in_n == 16)
37608 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
37610 break;
37612 CASE_CFN_IFLOOR:
37613 CASE_CFN_LFLOOR:
37614 CASE_CFN_LLFLOOR:
37615 /* The round insn does not trap on denormals. */
37616 if (flag_trapping_math || !TARGET_SSE4_1)
37617 break;
37619 if (out_mode == SImode && in_mode == DFmode)
37621 if (out_n == 4 && in_n == 2)
37622 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
37623 else if (out_n == 8 && in_n == 4)
37624 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
37625 else if (out_n == 16 && in_n == 8)
37626 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
37628 if (out_mode == SImode && in_mode == SFmode)
37630 if (out_n == 4 && in_n == 4)
37631 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
37632 else if (out_n == 8 && in_n == 8)
37633 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
37634 else if (out_n == 16 && in_n == 16)
37635 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX512);
37637 break;
37639 CASE_CFN_ICEIL:
37640 CASE_CFN_LCEIL:
37641 CASE_CFN_LLCEIL:
37642 /* The round insn does not trap on denormals. */
37643 if (flag_trapping_math || !TARGET_SSE4_1)
37644 break;
37646 if (out_mode == SImode && in_mode == DFmode)
37648 if (out_n == 4 && in_n == 2)
37649 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
37650 else if (out_n == 8 && in_n == 4)
37651 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
37652 else if (out_n == 16 && in_n == 8)
37653 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
37655 if (out_mode == SImode && in_mode == SFmode)
37657 if (out_n == 4 && in_n == 4)
37658 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
37659 else if (out_n == 8 && in_n == 8)
37660 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
37661 else if (out_n == 16 && in_n == 16)
37662 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX512);
37664 break;
37666 CASE_CFN_IRINT:
37667 CASE_CFN_LRINT:
37668 CASE_CFN_LLRINT:
37669 if (out_mode == SImode && in_mode == DFmode)
37671 if (out_n == 4 && in_n == 2)
37672 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
37673 else if (out_n == 8 && in_n == 4)
37674 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
37675 else if (out_n == 16 && in_n == 8)
37676 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX512);
37678 if (out_mode == SImode && in_mode == SFmode)
37680 if (out_n == 4 && in_n == 4)
37681 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
37682 else if (out_n == 8 && in_n == 8)
37683 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
37684 else if (out_n == 16 && in_n == 16)
37685 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ512);
37687 break;
37689 CASE_CFN_IROUND:
37690 CASE_CFN_LROUND:
37691 CASE_CFN_LLROUND:
37692 /* The round insn does not trap on denormals. */
37693 if (flag_trapping_math || !TARGET_SSE4_1)
37694 break;
37696 if (out_mode == SImode && in_mode == DFmode)
37698 if (out_n == 4 && in_n == 2)
37699 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
37700 else if (out_n == 8 && in_n == 4)
37701 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
37702 else if (out_n == 16 && in_n == 8)
37703 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
37705 if (out_mode == SImode && in_mode == SFmode)
37707 if (out_n == 4 && in_n == 4)
37708 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
37709 else if (out_n == 8 && in_n == 8)
37710 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
37711 else if (out_n == 16 && in_n == 16)
37712 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX512);
37714 break;
37716 CASE_CFN_FLOOR:
37717 /* The round insn does not trap on denormals. */
37718 if (flag_trapping_math || !TARGET_SSE4_1)
37719 break;
37721 if (out_mode == DFmode && in_mode == DFmode)
37723 if (out_n == 2 && in_n == 2)
37724 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
37725 else if (out_n == 4 && in_n == 4)
37726 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
37727 else if (out_n == 8 && in_n == 8)
37728 return ix86_get_builtin (IX86_BUILTIN_FLOORPD512);
37730 if (out_mode == SFmode && in_mode == SFmode)
37732 if (out_n == 4 && in_n == 4)
37733 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
37734 else if (out_n == 8 && in_n == 8)
37735 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
37736 else if (out_n == 16 && in_n == 16)
37737 return ix86_get_builtin (IX86_BUILTIN_FLOORPS512);
37739 break;
37741 CASE_CFN_CEIL:
37742 /* The round insn does not trap on denormals. */
37743 if (flag_trapping_math || !TARGET_SSE4_1)
37744 break;
37746 if (out_mode == DFmode && in_mode == DFmode)
37748 if (out_n == 2 && in_n == 2)
37749 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
37750 else if (out_n == 4 && in_n == 4)
37751 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
37752 else if (out_n == 8 && in_n == 8)
37753 return ix86_get_builtin (IX86_BUILTIN_CEILPD512);
37755 if (out_mode == SFmode && in_mode == SFmode)
37757 if (out_n == 4 && in_n == 4)
37758 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
37759 else if (out_n == 8 && in_n == 8)
37760 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
37761 else if (out_n == 16 && in_n == 16)
37762 return ix86_get_builtin (IX86_BUILTIN_CEILPS512);
37764 break;
37766 CASE_CFN_TRUNC:
37767 /* The round insn does not trap on denormals. */
37768 if (flag_trapping_math || !TARGET_SSE4_1)
37769 break;
37771 if (out_mode == DFmode && in_mode == DFmode)
37773 if (out_n == 2 && in_n == 2)
37774 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
37775 else if (out_n == 4 && in_n == 4)
37776 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
37777 else if (out_n == 8 && in_n == 8)
37778 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD512);
37780 if (out_mode == SFmode && in_mode == SFmode)
37782 if (out_n == 4 && in_n == 4)
37783 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
37784 else if (out_n == 8 && in_n == 8)
37785 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
37786 else if (out_n == 16 && in_n == 16)
37787 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS512);
37789 break;
37791 CASE_CFN_RINT:
37792 /* The round insn does not trap on denormals. */
37793 if (flag_trapping_math || !TARGET_SSE4_1)
37794 break;
37796 if (out_mode == DFmode && in_mode == DFmode)
37798 if (out_n == 2 && in_n == 2)
37799 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
37800 else if (out_n == 4 && in_n == 4)
37801 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
37803 if (out_mode == SFmode && in_mode == SFmode)
37805 if (out_n == 4 && in_n == 4)
37806 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
37807 else if (out_n == 8 && in_n == 8)
37808 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
37810 break;
37812 CASE_CFN_FMA:
37813 if (out_mode == DFmode && in_mode == DFmode)
37815 if (out_n == 2 && in_n == 2)
37816 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
37817 if (out_n == 4 && in_n == 4)
37818 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
37820 if (out_mode == SFmode && in_mode == SFmode)
37822 if (out_n == 4 && in_n == 4)
37823 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
37824 if (out_n == 8 && in_n == 8)
37825 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
37827 break;
37829 default:
37830 break;
37833 /* Dispatch to a handler for a vectorization library. */
37834 if (ix86_veclib_handler)
37835 return ix86_veclib_handler (combined_fn (fn), type_out, type_in);
37837 return NULL_TREE;
37840 /* Handler for an SVML-style interface to
37841 a library with vectorized intrinsics. */
37843 static tree
37844 ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
37846 char name[20];
37847 tree fntype, new_fndecl, args;
37848 unsigned arity;
37849 const char *bname;
37850 machine_mode el_mode, in_mode;
37851 int n, in_n;
37853 /* The SVML is suitable for unsafe math only. */
37854 if (!flag_unsafe_math_optimizations)
37855 return NULL_TREE;
37857 el_mode = TYPE_MODE (TREE_TYPE (type_out));
37858 n = TYPE_VECTOR_SUBPARTS (type_out);
37859 in_mode = TYPE_MODE (TREE_TYPE (type_in));
37860 in_n = TYPE_VECTOR_SUBPARTS (type_in);
37861 if (el_mode != in_mode
37862 || n != in_n)
37863 return NULL_TREE;
37865 switch (fn)
37867 CASE_CFN_EXP:
37868 CASE_CFN_LOG:
37869 CASE_CFN_LOG10:
37870 CASE_CFN_POW:
37871 CASE_CFN_TANH:
37872 CASE_CFN_TAN:
37873 CASE_CFN_ATAN:
37874 CASE_CFN_ATAN2:
37875 CASE_CFN_ATANH:
37876 CASE_CFN_CBRT:
37877 CASE_CFN_SINH:
37878 CASE_CFN_SIN:
37879 CASE_CFN_ASINH:
37880 CASE_CFN_ASIN:
37881 CASE_CFN_COSH:
37882 CASE_CFN_COS:
37883 CASE_CFN_ACOSH:
37884 CASE_CFN_ACOS:
37885 if ((el_mode != DFmode || n != 2)
37886 && (el_mode != SFmode || n != 4))
37887 return NULL_TREE;
37888 break;
37890 default:
37891 return NULL_TREE;
37894 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
37895 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
37897 if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF)
37898 strcpy (name, "vmlsLn4");
37899 else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG)
37900 strcpy (name, "vmldLn2");
37901 else if (n == 4)
37903 sprintf (name, "vmls%s", bname+10);
37904 name[strlen (name)-1] = '4';
37906 else
37907 sprintf (name, "vmld%s2", bname+10);
37909 /* Convert to uppercase. */
37910 name[4] &= ~0x20;
37912 arity = 0;
37913 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
37914 arity++;
37916 if (arity == 1)
37917 fntype = build_function_type_list (type_out, type_in, NULL);
37918 else
37919 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
37921 /* Build a function declaration for the vectorized function. */
37922 new_fndecl = build_decl (BUILTINS_LOCATION,
37923 FUNCTION_DECL, get_identifier (name), fntype);
37924 TREE_PUBLIC (new_fndecl) = 1;
37925 DECL_EXTERNAL (new_fndecl) = 1;
37926 DECL_IS_NOVOPS (new_fndecl) = 1;
37927 TREE_READONLY (new_fndecl) = 1;
37929 return new_fndecl;
37932 /* Handler for an ACML-style interface to
37933 a library with vectorized intrinsics. */
37935 static tree
37936 ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
37938 char name[20] = "__vr.._";
37939 tree fntype, new_fndecl, args;
37940 unsigned arity;
37941 const char *bname;
37942 machine_mode el_mode, in_mode;
37943 int n, in_n;
37945 /* The ACML is 64bits only and suitable for unsafe math only as
37946 it does not correctly support parts of IEEE with the required
37947 precision such as denormals. */
37948 if (!TARGET_64BIT
37949 || !flag_unsafe_math_optimizations)
37950 return NULL_TREE;
37952 el_mode = TYPE_MODE (TREE_TYPE (type_out));
37953 n = TYPE_VECTOR_SUBPARTS (type_out);
37954 in_mode = TYPE_MODE (TREE_TYPE (type_in));
37955 in_n = TYPE_VECTOR_SUBPARTS (type_in);
37956 if (el_mode != in_mode
37957 || n != in_n)
37958 return NULL_TREE;
37960 switch (fn)
37962 CASE_CFN_SIN:
37963 CASE_CFN_COS:
37964 CASE_CFN_EXP:
37965 CASE_CFN_LOG:
37966 CASE_CFN_LOG2:
37967 CASE_CFN_LOG10:
37968 if (el_mode == DFmode && n == 2)
37970 name[4] = 'd';
37971 name[5] = '2';
37973 else if (el_mode == SFmode && n == 4)
37975 name[4] = 's';
37976 name[5] = '4';
37978 else
37979 return NULL_TREE;
37980 break;
37982 default:
37983 return NULL_TREE;
37986 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
37987 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
37988 sprintf (name + 7, "%s", bname+10);
37990 arity = 0;
37991 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
37992 arity++;
37994 if (arity == 1)
37995 fntype = build_function_type_list (type_out, type_in, NULL);
37996 else
37997 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
37999 /* Build a function declaration for the vectorized function. */
38000 new_fndecl = build_decl (BUILTINS_LOCATION,
38001 FUNCTION_DECL, get_identifier (name), fntype);
38002 TREE_PUBLIC (new_fndecl) = 1;
38003 DECL_EXTERNAL (new_fndecl) = 1;
38004 DECL_IS_NOVOPS (new_fndecl) = 1;
38005 TREE_READONLY (new_fndecl) = 1;
38007 return new_fndecl;
38010 /* Returns a decl of a function that implements gather load with
38011 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
38012 Return NULL_TREE if it is not available. */
38014 static tree
38015 ix86_vectorize_builtin_gather (const_tree mem_vectype,
38016 const_tree index_type, int scale)
38018 bool si;
38019 enum ix86_builtins code;
38021 if (! TARGET_AVX2)
38022 return NULL_TREE;
38024 if ((TREE_CODE (index_type) != INTEGER_TYPE
38025 && !POINTER_TYPE_P (index_type))
38026 || (TYPE_MODE (index_type) != SImode
38027 && TYPE_MODE (index_type) != DImode))
38028 return NULL_TREE;
38030 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
38031 return NULL_TREE;
38033 /* v*gather* insn sign extends index to pointer mode. */
38034 if (TYPE_PRECISION (index_type) < POINTER_SIZE
38035 && TYPE_UNSIGNED (index_type))
38036 return NULL_TREE;
38038 if (scale <= 0
38039 || scale > 8
38040 || (scale & (scale - 1)) != 0)
38041 return NULL_TREE;
38043 si = TYPE_MODE (index_type) == SImode;
38044 switch (TYPE_MODE (mem_vectype))
38046 case E_V2DFmode:
38047 if (TARGET_AVX512VL)
38048 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
38049 else
38050 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
38051 break;
38052 case E_V4DFmode:
38053 if (TARGET_AVX512VL)
38054 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
38055 else
38056 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
38057 break;
38058 case E_V2DImode:
38059 if (TARGET_AVX512VL)
38060 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
38061 else
38062 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
38063 break;
38064 case E_V4DImode:
38065 if (TARGET_AVX512VL)
38066 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
38067 else
38068 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
38069 break;
38070 case E_V4SFmode:
38071 if (TARGET_AVX512VL)
38072 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
38073 else
38074 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
38075 break;
38076 case E_V8SFmode:
38077 if (TARGET_AVX512VL)
38078 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
38079 else
38080 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
38081 break;
38082 case E_V4SImode:
38083 if (TARGET_AVX512VL)
38084 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
38085 else
38086 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
38087 break;
38088 case E_V8SImode:
38089 if (TARGET_AVX512VL)
38090 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
38091 else
38092 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
38093 break;
38094 case E_V8DFmode:
38095 if (TARGET_AVX512F)
38096 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
38097 else
38098 return NULL_TREE;
38099 break;
38100 case E_V8DImode:
38101 if (TARGET_AVX512F)
38102 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
38103 else
38104 return NULL_TREE;
38105 break;
38106 case E_V16SFmode:
38107 if (TARGET_AVX512F)
38108 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
38109 else
38110 return NULL_TREE;
38111 break;
38112 case E_V16SImode:
38113 if (TARGET_AVX512F)
38114 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
38115 else
38116 return NULL_TREE;
38117 break;
38118 default:
38119 return NULL_TREE;
38122 return ix86_get_builtin (code);
38125 /* Returns a decl of a function that implements scatter store with
38126 register type VECTYPE and index type INDEX_TYPE and SCALE.
38127 Return NULL_TREE if it is not available. */
38129 static tree
38130 ix86_vectorize_builtin_scatter (const_tree vectype,
38131 const_tree index_type, int scale)
38133 bool si;
38134 enum ix86_builtins code;
38136 if (!TARGET_AVX512F)
38137 return NULL_TREE;
38139 if ((TREE_CODE (index_type) != INTEGER_TYPE
38140 && !POINTER_TYPE_P (index_type))
38141 || (TYPE_MODE (index_type) != SImode
38142 && TYPE_MODE (index_type) != DImode))
38143 return NULL_TREE;
38145 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
38146 return NULL_TREE;
38148 /* v*scatter* insn sign extends index to pointer mode. */
38149 if (TYPE_PRECISION (index_type) < POINTER_SIZE
38150 && TYPE_UNSIGNED (index_type))
38151 return NULL_TREE;
38153 /* Scale can be 1, 2, 4 or 8. */
38154 if (scale <= 0
38155 || scale > 8
38156 || (scale & (scale - 1)) != 0)
38157 return NULL_TREE;
38159 si = TYPE_MODE (index_type) == SImode;
38160 switch (TYPE_MODE (vectype))
38162 case E_V8DFmode:
38163 code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
38164 break;
38165 case E_V8DImode:
38166 code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
38167 break;
38168 case E_V16SFmode:
38169 code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
38170 break;
38171 case E_V16SImode:
38172 code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
38173 break;
38174 default:
38175 return NULL_TREE;
38178 return ix86_builtins[code];
38181 /* Return true if it is safe to use the rsqrt optabs to optimize
38182 1.0/sqrt. */
38184 static bool
38185 use_rsqrt_p ()
38187 return (TARGET_SSE_MATH
38188 && flag_finite_math_only
38189 && !flag_trapping_math
38190 && flag_unsafe_math_optimizations);
38193 /* Returns a code for a target-specific builtin that implements
38194 reciprocal of the function, or NULL_TREE if not available. */
38196 static tree
38197 ix86_builtin_reciprocal (tree fndecl)
38199 switch (DECL_FUNCTION_CODE (fndecl))
38201 /* Vectorized version of sqrt to rsqrt conversion. */
38202 case IX86_BUILTIN_SQRTPS_NR:
38203 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
38205 case IX86_BUILTIN_SQRTPS_NR256:
38206 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
38208 default:
38209 return NULL_TREE;
38213 /* Helper for avx_vpermilps256_operand et al. This is also used by
38214 the expansion functions to turn the parallel back into a mask.
38215 The return value is 0 for no match and the imm8+1 for a match. */
38218 avx_vpermilp_parallel (rtx par, machine_mode mode)
38220 unsigned i, nelt = GET_MODE_NUNITS (mode);
38221 unsigned mask = 0;
38222 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
38224 if (XVECLEN (par, 0) != (int) nelt)
38225 return 0;
38227 /* Validate that all of the elements are constants, and not totally
38228 out of range. Copy the data into an integral array to make the
38229 subsequent checks easier. */
38230 for (i = 0; i < nelt; ++i)
38232 rtx er = XVECEXP (par, 0, i);
38233 unsigned HOST_WIDE_INT ei;
38235 if (!CONST_INT_P (er))
38236 return 0;
38237 ei = INTVAL (er);
38238 if (ei >= nelt)
38239 return 0;
38240 ipar[i] = ei;
38243 switch (mode)
38245 case E_V8DFmode:
38246 /* In the 512-bit DFmode case, we can only move elements within
38247 a 128-bit lane. First fill the second part of the mask,
38248 then fallthru. */
38249 for (i = 4; i < 6; ++i)
38251 if (ipar[i] < 4 || ipar[i] >= 6)
38252 return 0;
38253 mask |= (ipar[i] - 4) << i;
38255 for (i = 6; i < 8; ++i)
38257 if (ipar[i] < 6)
38258 return 0;
38259 mask |= (ipar[i] - 6) << i;
38261 /* FALLTHRU */
38263 case E_V4DFmode:
38264 /* In the 256-bit DFmode case, we can only move elements within
38265 a 128-bit lane. */
38266 for (i = 0; i < 2; ++i)
38268 if (ipar[i] >= 2)
38269 return 0;
38270 mask |= ipar[i] << i;
38272 for (i = 2; i < 4; ++i)
38274 if (ipar[i] < 2)
38275 return 0;
38276 mask |= (ipar[i] - 2) << i;
38278 break;
38280 case E_V16SFmode:
38281 /* In 512 bit SFmode case, permutation in the upper 256 bits
38282 must mirror the permutation in the lower 256-bits. */
38283 for (i = 0; i < 8; ++i)
38284 if (ipar[i] + 8 != ipar[i + 8])
38285 return 0;
38286 /* FALLTHRU */
38288 case E_V8SFmode:
38289 /* In 256 bit SFmode case, we have full freedom of
38290 movement within the low 128-bit lane, but the high 128-bit
38291 lane must mirror the exact same pattern. */
38292 for (i = 0; i < 4; ++i)
38293 if (ipar[i] + 4 != ipar[i + 4])
38294 return 0;
38295 nelt = 4;
38296 /* FALLTHRU */
38298 case E_V2DFmode:
38299 case E_V4SFmode:
38300 /* In the 128-bit case, we've full freedom in the placement of
38301 the elements from the source operand. */
38302 for (i = 0; i < nelt; ++i)
38303 mask |= ipar[i] << (i * (nelt / 2));
38304 break;
38306 default:
38307 gcc_unreachable ();
38310 /* Make sure success has a non-zero value by adding one. */
38311 return mask + 1;
38314 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
38315 the expansion functions to turn the parallel back into a mask.
38316 The return value is 0 for no match and the imm8+1 for a match. */
38319 avx_vperm2f128_parallel (rtx par, machine_mode mode)
38321 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
38322 unsigned mask = 0;
38323 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
38325 if (XVECLEN (par, 0) != (int) nelt)
38326 return 0;
38328 /* Validate that all of the elements are constants, and not totally
38329 out of range. Copy the data into an integral array to make the
38330 subsequent checks easier. */
38331 for (i = 0; i < nelt; ++i)
38333 rtx er = XVECEXP (par, 0, i);
38334 unsigned HOST_WIDE_INT ei;
38336 if (!CONST_INT_P (er))
38337 return 0;
38338 ei = INTVAL (er);
38339 if (ei >= 2 * nelt)
38340 return 0;
38341 ipar[i] = ei;
38344 /* Validate that the halves of the permute are halves. */
38345 for (i = 0; i < nelt2 - 1; ++i)
38346 if (ipar[i] + 1 != ipar[i + 1])
38347 return 0;
38348 for (i = nelt2; i < nelt - 1; ++i)
38349 if (ipar[i] + 1 != ipar[i + 1])
38350 return 0;
38352 /* Reconstruct the mask. */
38353 for (i = 0; i < 2; ++i)
38355 unsigned e = ipar[i * nelt2];
38356 if (e % nelt2)
38357 return 0;
38358 e /= nelt2;
38359 mask |= e << (i * 4);
38362 /* Make sure success has a non-zero value by adding one. */
38363 return mask + 1;
38366 /* Return a register priority for hard reg REGNO. */
38367 static int
38368 ix86_register_priority (int hard_regno)
38370 /* ebp and r13 as the base always wants a displacement, r12 as the
38371 base always wants an index. So discourage their usage in an
38372 address. */
38373 if (hard_regno == R12_REG || hard_regno == R13_REG)
38374 return 0;
38375 if (hard_regno == BP_REG)
38376 return 1;
38377 /* New x86-64 int registers result in bigger code size. Discourage
38378 them. */
38379 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
38380 return 2;
38381 /* New x86-64 SSE registers result in bigger code size. Discourage
38382 them. */
38383 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
38384 return 2;
38385 /* Usage of AX register results in smaller code. Prefer it. */
38386 if (hard_regno == AX_REG)
38387 return 4;
38388 return 3;
38391 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
38393 Put float CONST_DOUBLE in the constant pool instead of fp regs.
38394 QImode must go into class Q_REGS.
38395 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
38396 movdf to do mem-to-mem moves through integer regs. */
38398 static reg_class_t
38399 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
38401 machine_mode mode = GET_MODE (x);
38403 /* We're only allowed to return a subclass of CLASS. Many of the
38404 following checks fail for NO_REGS, so eliminate that early. */
38405 if (regclass == NO_REGS)
38406 return NO_REGS;
38408 /* All classes can load zeros. */
38409 if (x == CONST0_RTX (mode))
38410 return regclass;
38412 /* Force constants into memory if we are loading a (nonzero) constant into
38413 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
38414 instructions to load from a constant. */
38415 if (CONSTANT_P (x)
38416 && (MAYBE_MMX_CLASS_P (regclass)
38417 || MAYBE_SSE_CLASS_P (regclass)
38418 || MAYBE_MASK_CLASS_P (regclass)))
38419 return NO_REGS;
38421 /* Floating-point constants need more complex checks. */
38422 if (CONST_DOUBLE_P (x))
38424 /* General regs can load everything. */
38425 if (INTEGER_CLASS_P (regclass))
38426 return regclass;
38428 /* Floats can load 0 and 1 plus some others. Note that we eliminated
38429 zero above. We only want to wind up preferring 80387 registers if
38430 we plan on doing computation with them. */
38431 if (IS_STACK_MODE (mode)
38432 && standard_80387_constant_p (x) > 0)
38434 /* Limit class to FP regs. */
38435 if (FLOAT_CLASS_P (regclass))
38436 return FLOAT_REGS;
38437 else if (regclass == FP_TOP_SSE_REGS)
38438 return FP_TOP_REG;
38439 else if (regclass == FP_SECOND_SSE_REGS)
38440 return FP_SECOND_REG;
38443 return NO_REGS;
38446 /* Prefer SSE regs only, if we can use them for math. */
38447 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38448 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
38450 /* Generally when we see PLUS here, it's the function invariant
38451 (plus soft-fp const_int). Which can only be computed into general
38452 regs. */
38453 if (GET_CODE (x) == PLUS)
38454 return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS;
38456 /* QImode constants are easy to load, but non-constant QImode data
38457 must go into Q_REGS. */
38458 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
38460 if (Q_CLASS_P (regclass))
38461 return regclass;
38462 else if (reg_class_subset_p (Q_REGS, regclass))
38463 return Q_REGS;
38464 else
38465 return NO_REGS;
38468 return regclass;
38471 /* Discourage putting floating-point values in SSE registers unless
38472 SSE math is being used, and likewise for the 387 registers. */
38473 static reg_class_t
38474 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
38476 machine_mode mode = GET_MODE (x);
38478 /* Restrict the output reload class to the register bank that we are doing
38479 math on. If we would like not to return a subset of CLASS, reject this
38480 alternative: if reload cannot do this, it will still use its choice. */
38481 mode = GET_MODE (x);
38482 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38483 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
38485 if (IS_STACK_MODE (mode))
38487 if (regclass == FP_TOP_SSE_REGS)
38488 return FP_TOP_REG;
38489 else if (regclass == FP_SECOND_SSE_REGS)
38490 return FP_SECOND_REG;
38491 else
38492 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
38495 return regclass;
38498 static reg_class_t
38499 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
38500 machine_mode mode, secondary_reload_info *sri)
38502 /* Double-word spills from general registers to non-offsettable memory
38503 references (zero-extended addresses) require special handling. */
38504 if (TARGET_64BIT
38505 && MEM_P (x)
38506 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
38507 && INTEGER_CLASS_P (rclass)
38508 && !offsettable_memref_p (x))
38510 sri->icode = (in_p
38511 ? CODE_FOR_reload_noff_load
38512 : CODE_FOR_reload_noff_store);
38513 /* Add the cost of moving address to a temporary. */
38514 sri->extra_cost = 1;
38516 return NO_REGS;
38519 /* QImode spills from non-QI registers require
38520 intermediate register on 32bit targets. */
38521 if (mode == QImode
38522 && ((!TARGET_64BIT && !in_p
38523 && INTEGER_CLASS_P (rclass)
38524 && MAYBE_NON_Q_CLASS_P (rclass))
38525 || (!TARGET_AVX512DQ
38526 && MAYBE_MASK_CLASS_P (rclass))))
38528 int regno = true_regnum (x);
38530 /* Return Q_REGS if the operand is in memory. */
38531 if (regno == -1)
38532 return Q_REGS;
38534 return NO_REGS;
38537 /* This condition handles corner case where an expression involving
38538 pointers gets vectorized. We're trying to use the address of a
38539 stack slot as a vector initializer.
38541 (set (reg:V2DI 74 [ vect_cst_.2 ])
38542 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
38544 Eventually frame gets turned into sp+offset like this:
38546 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
38547 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
38548 (const_int 392 [0x188]))))
38550 That later gets turned into:
38552 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
38553 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
38554 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
38556 We'll have the following reload recorded:
38558 Reload 0: reload_in (DI) =
38559 (plus:DI (reg/f:DI 7 sp)
38560 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
38561 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
38562 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
38563 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
38564 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
38565 reload_reg_rtx: (reg:V2DI 22 xmm1)
38567 Which isn't going to work since SSE instructions can't handle scalar
38568 additions. Returning GENERAL_REGS forces the addition into integer
38569 register and reload can handle subsequent reloads without problems. */
38571 if (in_p && GET_CODE (x) == PLUS
38572 && SSE_CLASS_P (rclass)
38573 && SCALAR_INT_MODE_P (mode))
38574 return GENERAL_REGS;
38576 return NO_REGS;
38579 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
38581 static bool
38582 ix86_class_likely_spilled_p (reg_class_t rclass)
38584 switch (rclass)
38586 case AREG:
38587 case DREG:
38588 case CREG:
38589 case BREG:
38590 case AD_REGS:
38591 case SIREG:
38592 case DIREG:
38593 case SSE_FIRST_REG:
38594 case FP_TOP_REG:
38595 case FP_SECOND_REG:
38596 case BND_REGS:
38597 return true;
38599 default:
38600 break;
38603 return false;
38606 /* If we are copying between registers from different register sets
38607 (e.g. FP and integer), we may need a memory location.
38609 The function can't work reliably when one of the CLASSES is a class
38610 containing registers from multiple sets. We avoid this by never combining
38611 different sets in a single alternative in the machine description.
38612 Ensure that this constraint holds to avoid unexpected surprises.
38614 When STRICT is false, we are being called from REGISTER_MOVE_COST,
38615 so do not enforce these sanity checks.
38617 To optimize register_move_cost performance, define inline variant. */
38619 static inline bool
38620 inline_secondary_memory_needed (machine_mode mode, reg_class_t class1,
38621 reg_class_t class2, int strict)
38623 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
38624 return false;
38626 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
38627 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
38628 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
38629 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
38630 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
38631 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)
38632 || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1)
38633 || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2))
38635 gcc_assert (!strict || lra_in_progress);
38636 return true;
38639 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
38640 return true;
38642 /* Between mask and general, we have moves no larger than word size. */
38643 if ((MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
38644 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
38645 return true;
38647 /* ??? This is a lie. We do have moves between mmx/general, and for
38648 mmx/sse2. But by saying we need secondary memory we discourage the
38649 register allocator from using the mmx registers unless needed. */
38650 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
38651 return true;
38653 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
38655 /* SSE1 doesn't have any direct moves from other classes. */
38656 if (!TARGET_SSE2)
38657 return true;
38659 /* If the target says that inter-unit moves are more expensive
38660 than moving through memory, then don't generate them. */
38661 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
38662 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
38663 return true;
38665 /* Between SSE and general, we have moves no larger than word size. */
38666 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38667 return true;
38670 return false;
38673 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
38675 static bool
38676 ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1,
38677 reg_class_t class2)
38679 return inline_secondary_memory_needed (mode, class1, class2, true);
38682 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
38684 get_secondary_mem widens integral modes to BITS_PER_WORD.
38685 There is no need to emit full 64 bit move on 64 bit targets
38686 for integral modes that can be moved using 32 bit move. */
38688 static machine_mode
38689 ix86_secondary_memory_needed_mode (machine_mode mode)
38691 if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode))
38692 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
38693 return mode;
38696 /* Implement the TARGET_CLASS_MAX_NREGS hook.
38698 On the 80386, this is the size of MODE in words,
38699 except in the FP regs, where a single reg is always enough. */
38701 static unsigned char
38702 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
38704 if (MAYBE_INTEGER_CLASS_P (rclass))
38706 if (mode == XFmode)
38707 return (TARGET_64BIT ? 2 : 3);
38708 else if (mode == XCmode)
38709 return (TARGET_64BIT ? 4 : 6);
38710 else
38711 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
38713 else
38715 if (COMPLEX_MODE_P (mode))
38716 return 2;
38717 else
38718 return 1;
38722 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
38724 static bool
38725 ix86_can_change_mode_class (machine_mode from, machine_mode to,
38726 reg_class_t regclass)
38728 if (from == to)
38729 return true;
38731 /* x87 registers can't do subreg at all, as all values are reformatted
38732 to extended precision. */
38733 if (MAYBE_FLOAT_CLASS_P (regclass))
38734 return false;
38736 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
38738 /* Vector registers do not support QI or HImode loads. If we don't
38739 disallow a change to these modes, reload will assume it's ok to
38740 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
38741 the vec_dupv4hi pattern. */
38742 if (GET_MODE_SIZE (from) < 4)
38743 return false;
38746 return true;
38749 /* Return index of MODE in the sse load/store tables. */
38751 static inline int
38752 sse_store_index (machine_mode mode)
38754 switch (GET_MODE_SIZE (mode))
38756 case 4:
38757 return 0;
38758 case 8:
38759 return 1;
38760 case 16:
38761 return 2;
38762 case 32:
38763 return 3;
38764 case 64:
38765 return 4;
38766 default:
38767 return -1;
38771 /* Return the cost of moving data of mode M between a
38772 register and memory. A value of 2 is the default; this cost is
38773 relative to those in `REGISTER_MOVE_COST'.
38775 This function is used extensively by register_move_cost that is used to
38776 build tables at startup. Make it inline in this case.
38777 When IN is 2, return maximum of in and out move cost.
38779 If moving between registers and memory is more expensive than
38780 between two registers, you should define this macro to express the
38781 relative cost.
38783 Model also increased moving costs of QImode registers in non
38784 Q_REGS classes.
38786 static inline int
38787 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
38788 int in)
38790 int cost;
38791 if (FLOAT_CLASS_P (regclass))
38793 int index;
38794 switch (mode)
38796 case E_SFmode:
38797 index = 0;
38798 break;
38799 case E_DFmode:
38800 index = 1;
38801 break;
38802 case E_XFmode:
38803 index = 2;
38804 break;
38805 default:
38806 return 100;
38808 if (in == 2)
38809 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
38810 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
38812 if (SSE_CLASS_P (regclass))
38814 int index = sse_store_index (mode);
38815 if (index == -1)
38816 return 100;
38817 if (in == 2)
38818 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
38819 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
38821 if (MMX_CLASS_P (regclass))
38823 int index;
38824 switch (GET_MODE_SIZE (mode))
38826 case 4:
38827 index = 0;
38828 break;
38829 case 8:
38830 index = 1;
38831 break;
38832 default:
38833 return 100;
38835 if (in)
38836 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
38837 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
38839 switch (GET_MODE_SIZE (mode))
38841 case 1:
38842 if (Q_CLASS_P (regclass) || TARGET_64BIT)
38844 if (!in)
38845 return ix86_cost->int_store[0];
38846 if (TARGET_PARTIAL_REG_DEPENDENCY
38847 && optimize_function_for_speed_p (cfun))
38848 cost = ix86_cost->movzbl_load;
38849 else
38850 cost = ix86_cost->int_load[0];
38851 if (in == 2)
38852 return MAX (cost, ix86_cost->int_store[0]);
38853 return cost;
38855 else
38857 if (in == 2)
38858 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
38859 if (in)
38860 return ix86_cost->movzbl_load;
38861 else
38862 return ix86_cost->int_store[0] + 4;
38864 break;
38865 case 2:
38866 if (in == 2)
38867 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
38868 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
38869 default:
38870 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
38871 if (mode == TFmode)
38872 mode = XFmode;
38873 if (in == 2)
38874 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
38875 else if (in)
38876 cost = ix86_cost->int_load[2];
38877 else
38878 cost = ix86_cost->int_store[2];
38879 return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
38883 static int
38884 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
38885 bool in)
38887 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
38891 /* Return the cost of moving data from a register in class CLASS1 to
38892 one in class CLASS2.
38894 It is not required that the cost always equal 2 when FROM is the same as TO;
38895 on some machines it is expensive to move between registers if they are not
38896 general registers. */
38898 static int
38899 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
38900 reg_class_t class2_i)
38902 enum reg_class class1 = (enum reg_class) class1_i;
38903 enum reg_class class2 = (enum reg_class) class2_i;
38905 /* In case we require secondary memory, compute cost of the store followed
38906 by load. In order to avoid bad register allocation choices, we need
38907 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
38909 if (inline_secondary_memory_needed (mode, class1, class2, false))
38911 int cost = 1;
38913 cost += inline_memory_move_cost (mode, class1, 2);
38914 cost += inline_memory_move_cost (mode, class2, 2);
38916 /* In case of copying from general_purpose_register we may emit multiple
38917 stores followed by single load causing memory size mismatch stall.
38918 Count this as arbitrarily high cost of 20. */
38919 if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD
38920 && TARGET_MEMORY_MISMATCH_STALL
38921 && targetm.class_max_nregs (class1, mode)
38922 > targetm.class_max_nregs (class2, mode))
38923 cost += 20;
38925 /* In the case of FP/MMX moves, the registers actually overlap, and we
38926 have to switch modes in order to treat them differently. */
38927 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
38928 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
38929 cost += 20;
38931 return cost;
38934 /* Moves between SSE/MMX and integer unit are expensive. */
38935 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
38936 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
38938 /* ??? By keeping returned value relatively high, we limit the number
38939 of moves between integer and MMX/SSE registers for all targets.
38940 Additionally, high value prevents problem with x86_modes_tieable_p(),
38941 where integer modes in MMX/SSE registers are not tieable
38942 because of missing QImode and HImode moves to, from or between
38943 MMX/SSE registers. */
38944 return MAX (8, MMX_CLASS_P (class1) || MMX_CLASS_P (class2)
38945 ? ix86_cost->mmxsse_to_integer : ix86_cost->ssemmx_to_integer);
38947 if (MAYBE_FLOAT_CLASS_P (class1))
38948 return ix86_cost->fp_move;
38949 if (MAYBE_SSE_CLASS_P (class1))
38951 if (GET_MODE_BITSIZE (mode) <= 128)
38952 return ix86_cost->xmm_move;
38953 if (GET_MODE_BITSIZE (mode) <= 256)
38954 return ix86_cost->ymm_move;
38955 return ix86_cost->zmm_move;
38957 if (MAYBE_MMX_CLASS_P (class1))
38958 return ix86_cost->mmx_move;
38959 return 2;
38962 /* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in
38963 words of a value of mode MODE but can be less for certain modes in
38964 special long registers.
38966 Actually there are no two word move instructions for consecutive
38967 registers. And only registers 0-3 may have mov byte instructions
38968 applied to them. */
38970 static unsigned int
38971 ix86_hard_regno_nregs (unsigned int regno, machine_mode mode)
38973 if (GENERAL_REGNO_P (regno))
38975 if (mode == XFmode)
38976 return TARGET_64BIT ? 2 : 3;
38977 if (mode == XCmode)
38978 return TARGET_64BIT ? 4 : 6;
38979 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
38981 if (COMPLEX_MODE_P (mode))
38982 return 2;
38983 if (mode == V64SFmode || mode == V64SImode)
38984 return 4;
38985 return 1;
38988 /* Implement TARGET_HARD_REGNO_MODE_OK. */
38990 static bool
38991 ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
38993 /* Flags and only flags can only hold CCmode values. */
38994 if (CC_REGNO_P (regno))
38995 return GET_MODE_CLASS (mode) == MODE_CC;
38996 if (GET_MODE_CLASS (mode) == MODE_CC
38997 || GET_MODE_CLASS (mode) == MODE_RANDOM
38998 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
38999 return false;
39000 if (STACK_REGNO_P (regno))
39001 return VALID_FP_MODE_P (mode);
39002 if (MASK_REGNO_P (regno))
39003 return (VALID_MASK_REG_MODE (mode)
39004 || (TARGET_AVX512BW
39005 && VALID_MASK_AVX512BW_MODE (mode)));
39006 if (BND_REGNO_P (regno))
39007 return VALID_BND_REG_MODE (mode);
39008 if (SSE_REGNO_P (regno))
39010 /* We implement the move patterns for all vector modes into and
39011 out of SSE registers, even when no operation instructions
39012 are available. */
39014 /* For AVX-512 we allow, regardless of regno:
39015 - XI mode
39016 - any of 512-bit wide vector mode
39017 - any scalar mode. */
39018 if (TARGET_AVX512F
39019 && (mode == XImode
39020 || VALID_AVX512F_REG_MODE (mode)
39021 || VALID_AVX512F_SCALAR_MODE (mode)))
39022 return true;
39024 /* For AVX-5124FMAPS allow V64SFmode for special regnos. */
39025 if ((TARGET_AVX5124FMAPS || TARGET_AVX5124VNNIW)
39026 && MOD4_SSE_REGNO_P (regno)
39027 && mode == V64SFmode)
39028 return true;
39030 /* For AVX-5124VNNIW allow V64SImode for special regnos. */
39031 if ((TARGET_AVX5124FMAPS || TARGET_AVX5124VNNIW)
39032 && MOD4_SSE_REGNO_P (regno)
39033 && mode == V64SImode)
39034 return true;
39036 /* TODO check for QI/HI scalars. */
39037 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
39038 if (TARGET_AVX512VL
39039 && (mode == OImode
39040 || mode == TImode
39041 || VALID_AVX256_REG_MODE (mode)
39042 || VALID_AVX512VL_128_REG_MODE (mode)))
39043 return true;
39045 /* xmm16-xmm31 are only available for AVX-512. */
39046 if (EXT_REX_SSE_REGNO_P (regno))
39047 return false;
39049 /* OImode and AVX modes are available only when AVX is enabled. */
39050 return ((TARGET_AVX
39051 && VALID_AVX256_REG_OR_OI_MODE (mode))
39052 || VALID_SSE_REG_MODE (mode)
39053 || VALID_SSE2_REG_MODE (mode)
39054 || VALID_MMX_REG_MODE (mode)
39055 || VALID_MMX_REG_MODE_3DNOW (mode));
39057 if (MMX_REGNO_P (regno))
39059 /* We implement the move patterns for 3DNOW modes even in MMX mode,
39060 so if the register is available at all, then we can move data of
39061 the given mode into or out of it. */
39062 return (VALID_MMX_REG_MODE (mode)
39063 || VALID_MMX_REG_MODE_3DNOW (mode));
39066 if (mode == QImode)
39068 /* Take care for QImode values - they can be in non-QI regs,
39069 but then they do cause partial register stalls. */
39070 if (ANY_QI_REGNO_P (regno))
39071 return true;
39072 if (!TARGET_PARTIAL_REG_STALL)
39073 return true;
39074 /* LRA checks if the hard register is OK for the given mode.
39075 QImode values can live in non-QI regs, so we allow all
39076 registers here. */
39077 if (lra_in_progress)
39078 return true;
39079 return !can_create_pseudo_p ();
39081 /* We handle both integer and floats in the general purpose registers. */
39082 else if (VALID_INT_MODE_P (mode))
39083 return true;
39084 else if (VALID_FP_MODE_P (mode))
39085 return true;
39086 else if (VALID_DFP_MODE_P (mode))
39087 return true;
39088 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
39089 on to use that value in smaller contexts, this can easily force a
39090 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
39091 supporting DImode, allow it. */
39092 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
39093 return true;
39095 return false;
39098 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that
39099 saves SSE registers across calls is Win64 (thus no need to check the
39100 current ABI here), and with AVX enabled Win64 only guarantees that
39101 the low 16 bytes are saved. */
39103 static bool
39104 ix86_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode)
39106 return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16;
39109 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
39110 tieable integer mode. */
39112 static bool
39113 ix86_tieable_integer_mode_p (machine_mode mode)
39115 switch (mode)
39117 case E_HImode:
39118 case E_SImode:
39119 return true;
39121 case E_QImode:
39122 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
39124 case E_DImode:
39125 return TARGET_64BIT;
39127 default:
39128 return false;
39132 /* Implement TARGET_MODES_TIEABLE_P.
39134 Return true if MODE1 is accessible in a register that can hold MODE2
39135 without copying. That is, all register classes that can hold MODE2
39136 can also hold MODE1. */
39138 static bool
39139 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
39141 if (mode1 == mode2)
39142 return true;
39144 if (ix86_tieable_integer_mode_p (mode1)
39145 && ix86_tieable_integer_mode_p (mode2))
39146 return true;
39148 /* MODE2 being XFmode implies fp stack or general regs, which means we
39149 can tie any smaller floating point modes to it. Note that we do not
39150 tie this with TFmode. */
39151 if (mode2 == XFmode)
39152 return mode1 == SFmode || mode1 == DFmode;
39154 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
39155 that we can tie it with SFmode. */
39156 if (mode2 == DFmode)
39157 return mode1 == SFmode;
39159 /* If MODE2 is only appropriate for an SSE register, then tie with
39160 any other mode acceptable to SSE registers. */
39161 if (GET_MODE_SIZE (mode2) == 32
39162 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
39163 return (GET_MODE_SIZE (mode1) == 32
39164 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
39165 if (GET_MODE_SIZE (mode2) == 16
39166 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
39167 return (GET_MODE_SIZE (mode1) == 16
39168 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
39170 /* If MODE2 is appropriate for an MMX register, then tie
39171 with any other mode acceptable to MMX registers. */
39172 if (GET_MODE_SIZE (mode2) == 8
39173 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
39174 return (GET_MODE_SIZE (mode1) == 8
39175 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
39177 return false;
39180 /* Return the cost of moving between two registers of mode MODE. */
39182 static int
39183 ix86_set_reg_reg_cost (machine_mode mode)
39185 unsigned int units = UNITS_PER_WORD;
39187 switch (GET_MODE_CLASS (mode))
39189 default:
39190 break;
39192 case MODE_CC:
39193 units = GET_MODE_SIZE (CCmode);
39194 break;
39196 case MODE_FLOAT:
39197 if ((TARGET_SSE && mode == TFmode)
39198 || (TARGET_80387 && mode == XFmode)
39199 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
39200 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
39201 units = GET_MODE_SIZE (mode);
39202 break;
39204 case MODE_COMPLEX_FLOAT:
39205 if ((TARGET_SSE && mode == TCmode)
39206 || (TARGET_80387 && mode == XCmode)
39207 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
39208 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
39209 units = GET_MODE_SIZE (mode);
39210 break;
39212 case MODE_VECTOR_INT:
39213 case MODE_VECTOR_FLOAT:
39214 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
39215 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
39216 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
39217 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
39218 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
39219 units = GET_MODE_SIZE (mode);
39222 /* Return the cost of moving between two registers of mode MODE,
39223 assuming that the move will be in pieces of at most UNITS bytes. */
39224 return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
39227 /* Return cost of vector operation in MODE given that scalar version has
39228 COST. If PARALLEL is true assume that CPU has more than one unit
39229 performing the operation. */
39231 static int
39232 ix86_vec_cost (machine_mode mode, int cost, bool parallel)
39234 if (!VECTOR_MODE_P (mode))
39235 return cost;
39237 if (!parallel)
39238 return cost * GET_MODE_NUNITS (mode);
39239 if (GET_MODE_BITSIZE (mode) == 128
39240 && TARGET_SSE_SPLIT_REGS)
39241 return cost * 2;
39242 if (GET_MODE_BITSIZE (mode) > 128
39243 && TARGET_AVX128_OPTIMAL)
39244 return cost * GET_MODE_BITSIZE (mode) / 128;
39245 return cost;
39248 /* Compute a (partial) cost for rtx X. Return true if the complete
39249 cost has been computed, and false if subexpressions should be
39250 scanned. In either case, *TOTAL contains the cost result. */
39252 static bool
39253 ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
39254 int *total, bool speed)
39256 rtx mask;
39257 enum rtx_code code = GET_CODE (x);
39258 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
39259 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
39260 int src_cost;
39261 machine_mode inner_mode = mode;
39262 if (VECTOR_MODE_P (mode))
39263 inner_mode = GET_MODE_INNER (mode);
39265 switch (code)
39267 case SET:
39268 if (register_operand (SET_DEST (x), VOIDmode)
39269 && reg_or_0_operand (SET_SRC (x), VOIDmode))
39271 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
39272 return true;
39275 if (register_operand (SET_SRC (x), VOIDmode))
39276 /* Avoid potentially incorrect high cost from rtx_costs
39277 for non-tieable SUBREGs. */
39278 src_cost = 0;
39279 else
39281 src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed);
39283 if (CONSTANT_P (SET_SRC (x)))
39284 /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
39285 a small value, possibly zero for cheap constants. */
39286 src_cost += COSTS_N_INSNS (1);
39289 *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed);
39290 return true;
39292 case CONST_INT:
39293 case CONST:
39294 case LABEL_REF:
39295 case SYMBOL_REF:
39296 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
39297 *total = 3;
39298 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
39299 *total = 2;
39300 else if (flag_pic && SYMBOLIC_CONST (x)
39301 && !(TARGET_64BIT
39302 && (GET_CODE (x) == LABEL_REF
39303 || (GET_CODE (x) == SYMBOL_REF
39304 && SYMBOL_REF_LOCAL_P (x))))
39305 /* Use 0 cost for CONST to improve its propagation. */
39306 && (TARGET_64BIT || GET_CODE (x) != CONST))
39307 *total = 1;
39308 else
39309 *total = 0;
39310 return true;
39312 case CONST_DOUBLE:
39313 if (IS_STACK_MODE (mode))
39314 switch (standard_80387_constant_p (x))
39316 case -1:
39317 case 0:
39318 break;
39319 case 1: /* 0.0 */
39320 *total = 1;
39321 return true;
39322 default: /* Other constants */
39323 *total = 2;
39324 return true;
39326 /* FALLTHRU */
39328 case CONST_VECTOR:
39329 switch (standard_sse_constant_p (x, mode))
39331 case 0:
39332 break;
39333 case 1: /* 0: xor eliminates false dependency */
39334 *total = 0;
39335 return true;
39336 default: /* -1: cmp contains false dependency */
39337 *total = 1;
39338 return true;
39340 /* FALLTHRU */
39342 case CONST_WIDE_INT:
39343 /* Fall back to (MEM (SYMBOL_REF)), since that's where
39344 it'll probably end up. Add a penalty for size. */
39345 *total = (COSTS_N_INSNS (1)
39346 + (!TARGET_64BIT && flag_pic)
39347 + (GET_MODE_SIZE (mode) <= 4
39348 ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2));
39349 return true;
39351 case ZERO_EXTEND:
39352 /* The zero extensions is often completely free on x86_64, so make
39353 it as cheap as possible. */
39354 if (TARGET_64BIT && mode == DImode
39355 && GET_MODE (XEXP (x, 0)) == SImode)
39356 *total = 1;
39357 else if (TARGET_ZERO_EXTEND_WITH_AND)
39358 *total = cost->add;
39359 else
39360 *total = cost->movzx;
39361 return false;
39363 case SIGN_EXTEND:
39364 *total = cost->movsx;
39365 return false;
39367 case ASHIFT:
39368 if (SCALAR_INT_MODE_P (mode)
39369 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
39370 && CONST_INT_P (XEXP (x, 1)))
39372 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
39373 if (value == 1)
39375 *total = cost->add;
39376 return false;
39378 if ((value == 2 || value == 3)
39379 && cost->lea <= cost->shift_const)
39381 *total = cost->lea;
39382 return false;
39385 /* FALLTHRU */
39387 case ROTATE:
39388 case ASHIFTRT:
39389 case LSHIFTRT:
39390 case ROTATERT:
39391 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
39393 /* ??? Should be SSE vector operation cost. */
39394 /* At least for published AMD latencies, this really is the same
39395 as the latency for a simple fpu operation like fabs. */
39396 /* V*QImode is emulated with 1-11 insns. */
39397 if (mode == V16QImode || mode == V32QImode)
39399 int count = 11;
39400 if (TARGET_XOP && mode == V16QImode)
39402 /* For XOP we use vpshab, which requires a broadcast of the
39403 value to the variable shift insn. For constants this
39404 means a V16Q const in mem; even when we can perform the
39405 shift with one insn set the cost to prefer paddb. */
39406 if (CONSTANT_P (XEXP (x, 1)))
39408 *total = ix86_vec_cost (mode,
39409 cost->sse_op
39410 + rtx_cost (XEXP (x, 0), mode, code, 0, speed)
39411 + (speed ? 2 : COSTS_N_BYTES (16)), true);
39412 return true;
39414 count = 3;
39416 else if (TARGET_SSSE3)
39417 count = 7;
39418 *total = ix86_vec_cost (mode, cost->sse_op * count, true);
39420 else
39421 *total = ix86_vec_cost (mode, cost->sse_op, true);
39423 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
39425 if (CONST_INT_P (XEXP (x, 1)))
39427 if (INTVAL (XEXP (x, 1)) > 32)
39428 *total = cost->shift_const + COSTS_N_INSNS (2);
39429 else
39430 *total = cost->shift_const * 2;
39432 else
39434 if (GET_CODE (XEXP (x, 1)) == AND)
39435 *total = cost->shift_var * 2;
39436 else
39437 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
39440 else
39442 if (CONST_INT_P (XEXP (x, 1)))
39443 *total = cost->shift_const;
39444 else if (SUBREG_P (XEXP (x, 1))
39445 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
39447 /* Return the cost after shift-and truncation. */
39448 *total = cost->shift_var;
39449 return true;
39451 else
39452 *total = cost->shift_var;
39454 return false;
39456 case FMA:
39458 rtx sub;
39460 gcc_assert (FLOAT_MODE_P (mode));
39461 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
39463 *total = ix86_vec_cost (mode,
39464 mode == SFmode ? cost->fmass : cost->fmasd,
39465 true);
39466 *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
39468 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
39469 sub = XEXP (x, 0);
39470 if (GET_CODE (sub) == NEG)
39471 sub = XEXP (sub, 0);
39472 *total += rtx_cost (sub, mode, FMA, 0, speed);
39474 sub = XEXP (x, 2);
39475 if (GET_CODE (sub) == NEG)
39476 sub = XEXP (sub, 0);
39477 *total += rtx_cost (sub, mode, FMA, 2, speed);
39478 return true;
39481 case MULT:
39482 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
39484 *total = inner_mode == DFmode ? cost->mulsd : cost->mulss;
39485 return false;
39487 else if (X87_FLOAT_MODE_P (mode))
39489 *total = cost->fmul;
39490 return false;
39492 else if (FLOAT_MODE_P (mode))
39494 *total = ix86_vec_cost (mode,
39495 inner_mode == DFmode
39496 ? cost->mulsd : cost->mulss, true);
39497 return false;
39499 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
39501 /* V*QImode is emulated with 7-13 insns. */
39502 if (mode == V16QImode || mode == V32QImode)
39504 int extra = 11;
39505 if (TARGET_XOP && mode == V16QImode)
39506 extra = 5;
39507 else if (TARGET_SSSE3)
39508 extra = 6;
39509 *total = ix86_vec_cost (mode,
39510 cost->mulss * 2 + cost->sse_op * extra,
39511 true);
39513 /* V*DImode is emulated with 5-8 insns. */
39514 else if (mode == V2DImode || mode == V4DImode)
39516 if (TARGET_XOP && mode == V2DImode)
39517 *total = ix86_vec_cost (mode,
39518 cost->mulss * 2 + cost->sse_op * 3,
39519 true);
39520 else
39521 *total = ix86_vec_cost (mode,
39522 cost->mulss * 3 + cost->sse_op * 5,
39523 true);
39525 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
39526 insns, including two PMULUDQ. */
39527 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
39528 *total = ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5,
39529 true);
39530 else
39531 *total = ix86_vec_cost (mode, cost->mulss, true);
39532 return false;
39534 else
39536 rtx op0 = XEXP (x, 0);
39537 rtx op1 = XEXP (x, 1);
39538 int nbits;
39539 if (CONST_INT_P (XEXP (x, 1)))
39541 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
39542 for (nbits = 0; value != 0; value &= value - 1)
39543 nbits++;
39545 else
39546 /* This is arbitrary. */
39547 nbits = 7;
39549 /* Compute costs correctly for widening multiplication. */
39550 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
39551 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
39552 == GET_MODE_SIZE (mode))
39554 int is_mulwiden = 0;
39555 machine_mode inner_mode = GET_MODE (op0);
39557 if (GET_CODE (op0) == GET_CODE (op1))
39558 is_mulwiden = 1, op1 = XEXP (op1, 0);
39559 else if (CONST_INT_P (op1))
39561 if (GET_CODE (op0) == SIGN_EXTEND)
39562 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
39563 == INTVAL (op1);
39564 else
39565 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
39568 if (is_mulwiden)
39569 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
39572 *total = (cost->mult_init[MODE_INDEX (mode)]
39573 + nbits * cost->mult_bit
39574 + rtx_cost (op0, mode, outer_code, opno, speed)
39575 + rtx_cost (op1, mode, outer_code, opno, speed));
39577 return true;
39580 case DIV:
39581 case UDIV:
39582 case MOD:
39583 case UMOD:
39584 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
39585 *total = inner_mode == DFmode ? cost->divsd : cost->divss;
39586 else if (X87_FLOAT_MODE_P (mode))
39587 *total = cost->fdiv;
39588 else if (FLOAT_MODE_P (mode))
39589 *total = ix86_vec_cost (mode,
39590 inner_mode == DFmode ? cost->divsd : cost->divss,
39591 true);
39592 else
39593 *total = cost->divide[MODE_INDEX (mode)];
39594 return false;
39596 case PLUS:
39597 if (GET_MODE_CLASS (mode) == MODE_INT
39598 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
39600 if (GET_CODE (XEXP (x, 0)) == PLUS
39601 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
39602 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
39603 && CONSTANT_P (XEXP (x, 1)))
39605 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
39606 if (val == 2 || val == 4 || val == 8)
39608 *total = cost->lea;
39609 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
39610 outer_code, opno, speed);
39611 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
39612 outer_code, opno, speed);
39613 *total += rtx_cost (XEXP (x, 1), mode,
39614 outer_code, opno, speed);
39615 return true;
39618 else if (GET_CODE (XEXP (x, 0)) == MULT
39619 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
39621 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
39622 if (val == 2 || val == 4 || val == 8)
39624 *total = cost->lea;
39625 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
39626 outer_code, opno, speed);
39627 *total += rtx_cost (XEXP (x, 1), mode,
39628 outer_code, opno, speed);
39629 return true;
39632 else if (GET_CODE (XEXP (x, 0)) == PLUS)
39634 /* Add with carry, ignore the cost of adding a carry flag. */
39635 if (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 0), mode))
39636 *total = cost->add;
39637 else
39639 *total = cost->lea;
39640 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
39641 outer_code, opno, speed);
39644 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
39645 outer_code, opno, speed);
39646 *total += rtx_cost (XEXP (x, 1), mode,
39647 outer_code, opno, speed);
39648 return true;
39651 /* FALLTHRU */
39653 case MINUS:
39654 /* Subtract with borrow, ignore the cost of subtracting a carry flag. */
39655 if (GET_MODE_CLASS (mode) == MODE_INT
39656 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
39657 && GET_CODE (XEXP (x, 0)) == MINUS
39658 && ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode))
39660 *total = cost->add;
39661 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
39662 outer_code, opno, speed);
39663 *total += rtx_cost (XEXP (x, 1), mode,
39664 outer_code, opno, speed);
39665 return true;
39668 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
39670 *total = cost->addss;
39671 return false;
39673 else if (X87_FLOAT_MODE_P (mode))
39675 *total = cost->fadd;
39676 return false;
39678 else if (FLOAT_MODE_P (mode))
39680 *total = ix86_vec_cost (mode, cost->addss, true);
39681 return false;
39683 /* FALLTHRU */
39685 case AND:
39686 case IOR:
39687 case XOR:
39688 if (GET_MODE_CLASS (mode) == MODE_INT
39689 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
39691 *total = (cost->add * 2
39692 + (rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
39693 << (GET_MODE (XEXP (x, 0)) != DImode))
39694 + (rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
39695 << (GET_MODE (XEXP (x, 1)) != DImode)));
39696 return true;
39698 /* FALLTHRU */
39700 case NEG:
39701 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
39703 *total = cost->sse_op;
39704 return false;
39706 else if (X87_FLOAT_MODE_P (mode))
39708 *total = cost->fchs;
39709 return false;
39711 else if (FLOAT_MODE_P (mode))
39713 *total = ix86_vec_cost (mode, cost->sse_op, true);
39714 return false;
39716 /* FALLTHRU */
39718 case NOT:
39719 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
39720 *total = ix86_vec_cost (mode, cost->sse_op, true);
39721 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
39722 *total = cost->add * 2;
39723 else
39724 *total = cost->add;
39725 return false;
39727 case COMPARE:
39728 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
39729 && XEXP (XEXP (x, 0), 1) == const1_rtx
39730 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
39731 && XEXP (x, 1) == const0_rtx)
39733 /* This kind of construct is implemented using test[bwl].
39734 Treat it as if we had an AND. */
39735 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
39736 *total = (cost->add
39737 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, outer_code,
39738 opno, speed)
39739 + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
39740 return true;
39743 /* The embedded comparison operand is completely free. */
39744 if (!general_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0)))
39745 && XEXP (x, 1) == const0_rtx)
39746 *total = 0;
39748 return false;
39750 case FLOAT_EXTEND:
39751 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
39752 *total = 0;
39753 else
39754 *total = ix86_vec_cost (mode, cost->addss, true);
39755 return false;
39757 case FLOAT_TRUNCATE:
39758 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
39759 *total = cost->fadd;
39760 else
39761 *total = ix86_vec_cost (mode, cost->addss, true);
39762 return false;
39764 case ABS:
39765 /* SSE requires memory load for the constant operand. It may make
39766 sense to account for this. Of course the constant operand may or
39767 may not be reused. */
39768 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
39769 *total = cost->sse_op;
39770 else if (X87_FLOAT_MODE_P (mode))
39771 *total = cost->fabs;
39772 else if (FLOAT_MODE_P (mode))
39773 *total = ix86_vec_cost (mode, cost->sse_op, true);
39774 return false;
39776 case SQRT:
39777 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
39778 *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd;
39779 else if (X87_FLOAT_MODE_P (mode))
39780 *total = cost->fsqrt;
39781 else if (FLOAT_MODE_P (mode))
39782 *total = ix86_vec_cost (mode,
39783 mode == SFmode ? cost->sqrtss : cost->sqrtsd,
39784 true);
39785 return false;
39787 case UNSPEC:
39788 if (XINT (x, 1) == UNSPEC_TP)
39789 *total = 0;
39790 return false;
39792 case VEC_SELECT:
39793 case VEC_CONCAT:
39794 case VEC_DUPLICATE:
39795 /* ??? Assume all of these vector manipulation patterns are
39796 recognizable. In which case they all pretty much have the
39797 same cost. */
39798 *total = cost->sse_op;
39799 return true;
39800 case VEC_MERGE:
39801 mask = XEXP (x, 2);
39802 /* This is masked instruction, assume the same cost,
39803 as nonmasked variant. */
39804 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
39805 *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
39806 else
39807 *total = cost->sse_op;
39808 return true;
39810 default:
39811 return false;
39815 #if TARGET_MACHO
39817 static int current_machopic_label_num;
39819 /* Given a symbol name and its associated stub, write out the
39820 definition of the stub. */
39822 void
39823 machopic_output_stub (FILE *file, const char *symb, const char *stub)
39825 unsigned int length;
39826 char *binder_name, *symbol_name, lazy_ptr_name[32];
39827 int label = ++current_machopic_label_num;
39829 /* For 64-bit we shouldn't get here. */
39830 gcc_assert (!TARGET_64BIT);
39832 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
39833 symb = targetm.strip_name_encoding (symb);
39835 length = strlen (stub);
39836 binder_name = XALLOCAVEC (char, length + 32);
39837 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
39839 length = strlen (symb);
39840 symbol_name = XALLOCAVEC (char, length + 32);
39841 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
39843 sprintf (lazy_ptr_name, "L%d$lz", label);
39845 if (MACHOPIC_ATT_STUB)
39846 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
39847 else if (MACHOPIC_PURE)
39848 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
39849 else
39850 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
39852 fprintf (file, "%s:\n", stub);
39853 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
39855 if (MACHOPIC_ATT_STUB)
39857 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
39859 else if (MACHOPIC_PURE)
39861 /* PIC stub. */
39862 /* 25-byte PIC stub using "CALL get_pc_thunk". */
39863 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
39864 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
39865 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
39866 label, lazy_ptr_name, label);
39867 fprintf (file, "\tjmp\t*%%ecx\n");
39869 else
39870 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
39872 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
39873 it needs no stub-binding-helper. */
39874 if (MACHOPIC_ATT_STUB)
39875 return;
39877 fprintf (file, "%s:\n", binder_name);
39879 if (MACHOPIC_PURE)
39881 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
39882 fprintf (file, "\tpushl\t%%ecx\n");
39884 else
39885 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
39887 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
39889 /* N.B. Keep the correspondence of these
39890 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
39891 old-pic/new-pic/non-pic stubs; altering this will break
39892 compatibility with existing dylibs. */
39893 if (MACHOPIC_PURE)
39895 /* 25-byte PIC stub using "CALL get_pc_thunk". */
39896 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
39898 else
39899 /* 16-byte -mdynamic-no-pic stub. */
39900 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
39902 fprintf (file, "%s:\n", lazy_ptr_name);
39903 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
39904 fprintf (file, ASM_LONG "%s\n", binder_name);
39906 #endif /* TARGET_MACHO */
39908 /* Order the registers for register allocator. */
39910 void
39911 x86_order_regs_for_local_alloc (void)
39913 int pos = 0;
39914 int i;
39916 /* First allocate the local general purpose registers. */
39917 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
39918 if (GENERAL_REGNO_P (i) && call_used_regs[i])
39919 reg_alloc_order [pos++] = i;
39921 /* Global general purpose registers. */
39922 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
39923 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
39924 reg_alloc_order [pos++] = i;
39926 /* x87 registers come first in case we are doing FP math
39927 using them. */
39928 if (!TARGET_SSE_MATH)
39929 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
39930 reg_alloc_order [pos++] = i;
39932 /* SSE registers. */
39933 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
39934 reg_alloc_order [pos++] = i;
39935 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
39936 reg_alloc_order [pos++] = i;
39938 /* Extended REX SSE registers. */
39939 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
39940 reg_alloc_order [pos++] = i;
39942 /* Mask register. */
39943 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
39944 reg_alloc_order [pos++] = i;
39946 /* MPX bound registers. */
39947 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
39948 reg_alloc_order [pos++] = i;
39950 /* x87 registers. */
39951 if (TARGET_SSE_MATH)
39952 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
39953 reg_alloc_order [pos++] = i;
39955 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
39956 reg_alloc_order [pos++] = i;
39958 /* Initialize the rest of array as we do not allocate some registers
39959 at all. */
39960 while (pos < FIRST_PSEUDO_REGISTER)
39961 reg_alloc_order [pos++] = 0;
39964 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
39965 in struct attribute_spec handler. */
39966 static tree
39967 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
39968 tree args,
39969 int,
39970 bool *no_add_attrs)
39972 if (TREE_CODE (*node) != FUNCTION_TYPE
39973 && TREE_CODE (*node) != METHOD_TYPE
39974 && TREE_CODE (*node) != FIELD_DECL
39975 && TREE_CODE (*node) != TYPE_DECL)
39977 warning (OPT_Wattributes, "%qE attribute only applies to functions",
39978 name);
39979 *no_add_attrs = true;
39980 return NULL_TREE;
39982 if (TARGET_64BIT)
39984 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
39985 name);
39986 *no_add_attrs = true;
39987 return NULL_TREE;
39989 if (is_attribute_p ("callee_pop_aggregate_return", name))
39991 tree cst;
39993 cst = TREE_VALUE (args);
39994 if (TREE_CODE (cst) != INTEGER_CST)
39996 warning (OPT_Wattributes,
39997 "%qE attribute requires an integer constant argument",
39998 name);
39999 *no_add_attrs = true;
40001 else if (compare_tree_int (cst, 0) != 0
40002 && compare_tree_int (cst, 1) != 0)
40004 warning (OPT_Wattributes,
40005 "argument to %qE attribute is neither zero, nor one",
40006 name);
40007 *no_add_attrs = true;
40010 return NULL_TREE;
40013 return NULL_TREE;
40016 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
40017 struct attribute_spec.handler. */
40018 static tree
40019 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
40020 bool *no_add_attrs)
40022 if (TREE_CODE (*node) != FUNCTION_TYPE
40023 && TREE_CODE (*node) != METHOD_TYPE
40024 && TREE_CODE (*node) != FIELD_DECL
40025 && TREE_CODE (*node) != TYPE_DECL)
40027 warning (OPT_Wattributes, "%qE attribute only applies to functions",
40028 name);
40029 *no_add_attrs = true;
40030 return NULL_TREE;
40033 /* Can combine regparm with all attributes but fastcall. */
40034 if (is_attribute_p ("ms_abi", name))
40036 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
40038 error ("ms_abi and sysv_abi attributes are not compatible");
40041 return NULL_TREE;
40043 else if (is_attribute_p ("sysv_abi", name))
40045 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
40047 error ("ms_abi and sysv_abi attributes are not compatible");
40050 return NULL_TREE;
40053 return NULL_TREE;
40056 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
40057 struct attribute_spec.handler. */
40058 static tree
40059 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
40060 bool *no_add_attrs)
40062 tree *type = NULL;
40063 if (DECL_P (*node))
40065 if (TREE_CODE (*node) == TYPE_DECL)
40066 type = &TREE_TYPE (*node);
40068 else
40069 type = node;
40071 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
40073 warning (OPT_Wattributes, "%qE attribute ignored",
40074 name);
40075 *no_add_attrs = true;
40078 else if ((is_attribute_p ("ms_struct", name)
40079 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
40080 || ((is_attribute_p ("gcc_struct", name)
40081 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
40083 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
40084 name);
40085 *no_add_attrs = true;
40088 return NULL_TREE;
40091 static tree
40092 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
40093 bool *no_add_attrs)
40095 if (TREE_CODE (*node) != FUNCTION_DECL)
40097 warning (OPT_Wattributes, "%qE attribute only applies to functions",
40098 name);
40099 *no_add_attrs = true;
40101 return NULL_TREE;
40104 static tree
40105 ix86_handle_no_caller_saved_registers_attribute (tree *, tree, tree,
40106 int, bool *)
40108 return NULL_TREE;
40111 static tree
40112 ix86_handle_interrupt_attribute (tree *node, tree, tree, int, bool *)
40114 /* DECL_RESULT and DECL_ARGUMENTS do not exist there yet,
40115 but the function type contains args and return type data. */
40116 tree func_type = *node;
40117 tree return_type = TREE_TYPE (func_type);
40119 int nargs = 0;
40120 tree current_arg_type = TYPE_ARG_TYPES (func_type);
40121 while (current_arg_type
40122 && ! VOID_TYPE_P (TREE_VALUE (current_arg_type)))
40124 if (nargs == 0)
40126 if (! POINTER_TYPE_P (TREE_VALUE (current_arg_type)))
40127 error ("interrupt service routine should have a pointer "
40128 "as the first argument");
40130 else if (nargs == 1)
40132 if (TREE_CODE (TREE_VALUE (current_arg_type)) != INTEGER_TYPE
40133 || TYPE_MODE (TREE_VALUE (current_arg_type)) != word_mode)
40134 error ("interrupt service routine should have unsigned %s"
40135 "int as the second argument",
40136 TARGET_64BIT
40137 ? (TARGET_X32 ? "long long " : "long ")
40138 : "");
40140 nargs++;
40141 current_arg_type = TREE_CHAIN (current_arg_type);
40143 if (!nargs || nargs > 2)
40144 error ("interrupt service routine can only have a pointer argument "
40145 "and an optional integer argument");
40146 if (! VOID_TYPE_P (return_type))
40147 error ("interrupt service routine can't have non-void return value");
40149 return NULL_TREE;
40152 static bool
40153 ix86_ms_bitfield_layout_p (const_tree record_type)
40155 return ((TARGET_MS_BITFIELD_LAYOUT
40156 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
40157 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
40160 /* Returns an expression indicating where the this parameter is
40161 located on entry to the FUNCTION. */
40163 static rtx
40164 x86_this_parameter (tree function)
40166 tree type = TREE_TYPE (function);
40167 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
40168 int nregs;
40170 if (TARGET_64BIT)
40172 const int *parm_regs;
40174 if (ix86_function_type_abi (type) == MS_ABI)
40175 parm_regs = x86_64_ms_abi_int_parameter_registers;
40176 else
40177 parm_regs = x86_64_int_parameter_registers;
40178 return gen_rtx_REG (Pmode, parm_regs[aggr]);
40181 nregs = ix86_function_regparm (type, function);
40183 if (nregs > 0 && !stdarg_p (type))
40185 int regno;
40186 unsigned int ccvt = ix86_get_callcvt (type);
40188 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
40189 regno = aggr ? DX_REG : CX_REG;
40190 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
40192 regno = CX_REG;
40193 if (aggr)
40194 return gen_rtx_MEM (SImode,
40195 plus_constant (Pmode, stack_pointer_rtx, 4));
40197 else
40199 regno = AX_REG;
40200 if (aggr)
40202 regno = DX_REG;
40203 if (nregs == 1)
40204 return gen_rtx_MEM (SImode,
40205 plus_constant (Pmode,
40206 stack_pointer_rtx, 4));
40209 return gen_rtx_REG (SImode, regno);
40212 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
40213 aggr ? 8 : 4));
40216 /* Determine whether x86_output_mi_thunk can succeed. */
40218 static bool
40219 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
40220 const_tree function)
40222 /* 64-bit can handle anything. */
40223 if (TARGET_64BIT)
40224 return true;
40226 /* For 32-bit, everything's fine if we have one free register. */
40227 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
40228 return true;
40230 /* Need a free register for vcall_offset. */
40231 if (vcall_offset)
40232 return false;
40234 /* Need a free register for GOT references. */
40235 if (flag_pic && !targetm.binds_local_p (function))
40236 return false;
40238 /* Otherwise ok. */
40239 return true;
40242 /* Output the assembler code for a thunk function. THUNK_DECL is the
40243 declaration for the thunk function itself, FUNCTION is the decl for
40244 the target function. DELTA is an immediate constant offset to be
40245 added to THIS. If VCALL_OFFSET is nonzero, the word at
40246 *(*this + vcall_offset) should be added to THIS. */
40248 static void
40249 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
40250 HOST_WIDE_INT vcall_offset, tree function)
40252 rtx this_param = x86_this_parameter (function);
40253 rtx this_reg, tmp, fnaddr;
40254 unsigned int tmp_regno;
40255 rtx_insn *insn;
40257 if (TARGET_64BIT)
40258 tmp_regno = R10_REG;
40259 else
40261 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
40262 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
40263 tmp_regno = AX_REG;
40264 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
40265 tmp_regno = DX_REG;
40266 else
40267 tmp_regno = CX_REG;
40270 emit_note (NOTE_INSN_PROLOGUE_END);
40272 /* CET is enabled, insert EB instruction. */
40273 if ((flag_cf_protection & CF_BRANCH) && TARGET_IBT)
40274 emit_insn (gen_nop_endbr ());
40276 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
40277 pull it in now and let DELTA benefit. */
40278 if (REG_P (this_param))
40279 this_reg = this_param;
40280 else if (vcall_offset)
40282 /* Put the this parameter into %eax. */
40283 this_reg = gen_rtx_REG (Pmode, AX_REG);
40284 emit_move_insn (this_reg, this_param);
40286 else
40287 this_reg = NULL_RTX;
40289 /* Adjust the this parameter by a fixed constant. */
40290 if (delta)
40292 rtx delta_rtx = GEN_INT (delta);
40293 rtx delta_dst = this_reg ? this_reg : this_param;
40295 if (TARGET_64BIT)
40297 if (!x86_64_general_operand (delta_rtx, Pmode))
40299 tmp = gen_rtx_REG (Pmode, tmp_regno);
40300 emit_move_insn (tmp, delta_rtx);
40301 delta_rtx = tmp;
40305 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
40308 /* Adjust the this parameter by a value stored in the vtable. */
40309 if (vcall_offset)
40311 rtx vcall_addr, vcall_mem, this_mem;
40313 tmp = gen_rtx_REG (Pmode, tmp_regno);
40315 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
40316 if (Pmode != ptr_mode)
40317 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
40318 emit_move_insn (tmp, this_mem);
40320 /* Adjust the this parameter. */
40321 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
40322 if (TARGET_64BIT
40323 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
40325 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
40326 emit_move_insn (tmp2, GEN_INT (vcall_offset));
40327 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
40330 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
40331 if (Pmode != ptr_mode)
40332 emit_insn (gen_addsi_1_zext (this_reg,
40333 gen_rtx_REG (ptr_mode,
40334 REGNO (this_reg)),
40335 vcall_mem));
40336 else
40337 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
40340 /* If necessary, drop THIS back to its stack slot. */
40341 if (this_reg && this_reg != this_param)
40342 emit_move_insn (this_param, this_reg);
40344 fnaddr = XEXP (DECL_RTL (function), 0);
40345 if (TARGET_64BIT)
40347 if (!flag_pic || targetm.binds_local_p (function)
40348 || TARGET_PECOFF)
40350 else
40352 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
40353 tmp = gen_rtx_CONST (Pmode, tmp);
40354 fnaddr = gen_const_mem (Pmode, tmp);
40357 else
40359 if (!flag_pic || targetm.binds_local_p (function))
40361 #if TARGET_MACHO
40362 else if (TARGET_MACHO)
40364 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
40365 fnaddr = XEXP (fnaddr, 0);
40367 #endif /* TARGET_MACHO */
40368 else
40370 tmp = gen_rtx_REG (Pmode, CX_REG);
40371 output_set_got (tmp, NULL_RTX);
40373 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
40374 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
40375 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
40376 fnaddr = gen_const_mem (Pmode, fnaddr);
40380 /* Our sibling call patterns do not allow memories, because we have no
40381 predicate that can distinguish between frame and non-frame memory.
40382 For our purposes here, we can get away with (ab)using a jump pattern,
40383 because we're going to do no optimization. */
40384 if (MEM_P (fnaddr))
40386 if (sibcall_insn_operand (fnaddr, word_mode))
40388 fnaddr = XEXP (DECL_RTL (function), 0);
40389 tmp = gen_rtx_MEM (QImode, fnaddr);
40390 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
40391 tmp = emit_call_insn (tmp);
40392 SIBLING_CALL_P (tmp) = 1;
40394 else
40395 emit_jump_insn (gen_indirect_jump (fnaddr));
40397 else
40399 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
40401 // CM_LARGE_PIC always uses pseudo PIC register which is
40402 // uninitialized. Since FUNCTION is local and calling it
40403 // doesn't go through PLT, we use scratch register %r11 as
40404 // PIC register and initialize it here.
40405 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
40406 ix86_init_large_pic_reg (tmp_regno);
40407 fnaddr = legitimize_pic_address (fnaddr,
40408 gen_rtx_REG (Pmode, tmp_regno));
40411 if (!sibcall_insn_operand (fnaddr, word_mode))
40413 tmp = gen_rtx_REG (word_mode, tmp_regno);
40414 if (GET_MODE (fnaddr) != word_mode)
40415 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
40416 emit_move_insn (tmp, fnaddr);
40417 fnaddr = tmp;
40420 tmp = gen_rtx_MEM (QImode, fnaddr);
40421 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
40422 tmp = emit_call_insn (tmp);
40423 SIBLING_CALL_P (tmp) = 1;
40425 emit_barrier ();
40427 /* Emit just enough of rest_of_compilation to get the insns emitted.
40428 Note that use_thunk calls assemble_start_function et al. */
40429 insn = get_insns ();
40430 shorten_branches (insn);
40431 final_start_function (insn, file, 1);
40432 final (insn, file, 1);
40433 final_end_function ();
40436 static void
40437 x86_file_start (void)
40439 default_file_start ();
40440 if (TARGET_16BIT)
40441 fputs ("\t.code16gcc\n", asm_out_file);
40442 #if TARGET_MACHO
40443 darwin_file_start ();
40444 #endif
40445 if (X86_FILE_START_VERSION_DIRECTIVE)
40446 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
40447 if (X86_FILE_START_FLTUSED)
40448 fputs ("\t.global\t__fltused\n", asm_out_file);
40449 if (ix86_asm_dialect == ASM_INTEL)
40450 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
40454 x86_field_alignment (tree type, int computed)
40456 machine_mode mode;
40458 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
40459 return computed;
40460 if (TARGET_IAMCU)
40461 return iamcu_alignment (type, computed);
40462 mode = TYPE_MODE (strip_array_types (type));
40463 if (mode == DFmode || mode == DCmode
40464 || GET_MODE_CLASS (mode) == MODE_INT
40465 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
40466 return MIN (32, computed);
40467 return computed;
40470 /* Print call to TARGET to FILE. */
40472 static void
40473 x86_print_call_or_nop (FILE *file, const char *target)
40475 if (flag_nop_mcount)
40476 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
40477 else
40478 fprintf (file, "1:\tcall\t%s\n", target);
40481 /* Output assembler code to FILE to increment profiler label # LABELNO
40482 for profiling a function entry. */
40483 void
40484 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
40486 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
40487 : MCOUNT_NAME);
40488 if (TARGET_64BIT)
40490 #ifndef NO_PROFILE_COUNTERS
40491 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
40492 #endif
40494 if (!TARGET_PECOFF && flag_pic)
40495 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
40496 else
40497 x86_print_call_or_nop (file, mcount_name);
40499 else if (flag_pic)
40501 #ifndef NO_PROFILE_COUNTERS
40502 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
40503 LPREFIX, labelno);
40504 #endif
40505 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
40507 else
40509 #ifndef NO_PROFILE_COUNTERS
40510 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
40511 LPREFIX, labelno);
40512 #endif
40513 x86_print_call_or_nop (file, mcount_name);
40516 if (flag_record_mcount)
40518 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
40519 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
40520 fprintf (file, "\t.previous\n");
40524 /* We don't have exact information about the insn sizes, but we may assume
40525 quite safely that we are informed about all 1 byte insns and memory
40526 address sizes. This is enough to eliminate unnecessary padding in
40527 99% of cases. */
40530 ix86_min_insn_size (rtx_insn *insn)
40532 int l = 0, len;
40534 if (!INSN_P (insn) || !active_insn_p (insn))
40535 return 0;
40537 /* Discard alignments we've emit and jump instructions. */
40538 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
40539 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
40540 return 0;
40542 /* Important case - calls are always 5 bytes.
40543 It is common to have many calls in the row. */
40544 if (CALL_P (insn)
40545 && symbolic_reference_mentioned_p (PATTERN (insn))
40546 && !SIBLING_CALL_P (insn))
40547 return 5;
40548 len = get_attr_length (insn);
40549 if (len <= 1)
40550 return 1;
40552 /* For normal instructions we rely on get_attr_length being exact,
40553 with a few exceptions. */
40554 if (!JUMP_P (insn))
40556 enum attr_type type = get_attr_type (insn);
40558 switch (type)
40560 case TYPE_MULTI:
40561 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
40562 || asm_noperands (PATTERN (insn)) >= 0)
40563 return 0;
40564 break;
40565 case TYPE_OTHER:
40566 case TYPE_FCMP:
40567 break;
40568 default:
40569 /* Otherwise trust get_attr_length. */
40570 return len;
40573 l = get_attr_length_address (insn);
40574 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
40575 l = 4;
40577 if (l)
40578 return 1+l;
40579 else
40580 return 2;
40583 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
40585 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
40586 window. */
40588 static void
40589 ix86_avoid_jump_mispredicts (void)
40591 rtx_insn *insn, *start = get_insns ();
40592 int nbytes = 0, njumps = 0;
40593 bool isjump = false;
40595 /* Look for all minimal intervals of instructions containing 4 jumps.
40596 The intervals are bounded by START and INSN. NBYTES is the total
40597 size of instructions in the interval including INSN and not including
40598 START. When the NBYTES is smaller than 16 bytes, it is possible
40599 that the end of START and INSN ends up in the same 16byte page.
40601 The smallest offset in the page INSN can start is the case where START
40602 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
40603 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
40605 Don't consider asm goto as jump, while it can contain a jump, it doesn't
40606 have to, control transfer to label(s) can be performed through other
40607 means, and also we estimate minimum length of all asm stmts as 0. */
40608 for (insn = start; insn; insn = NEXT_INSN (insn))
40610 int min_size;
40612 if (LABEL_P (insn))
40614 int align = label_to_alignment (insn);
40615 int max_skip = label_to_max_skip (insn);
40617 if (max_skip > 15)
40618 max_skip = 15;
40619 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
40620 already in the current 16 byte page, because otherwise
40621 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
40622 bytes to reach 16 byte boundary. */
40623 if (align <= 0
40624 || (align <= 3 && max_skip != (1 << align) - 1))
40625 max_skip = 0;
40626 if (dump_file)
40627 fprintf (dump_file, "Label %i with max_skip %i\n",
40628 INSN_UID (insn), max_skip);
40629 if (max_skip)
40631 while (nbytes + max_skip >= 16)
40633 start = NEXT_INSN (start);
40634 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
40635 || CALL_P (start))
40636 njumps--, isjump = true;
40637 else
40638 isjump = false;
40639 nbytes -= ix86_min_insn_size (start);
40642 continue;
40645 min_size = ix86_min_insn_size (insn);
40646 nbytes += min_size;
40647 if (dump_file)
40648 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
40649 INSN_UID (insn), min_size);
40650 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
40651 || CALL_P (insn))
40652 njumps++;
40653 else
40654 continue;
40656 while (njumps > 3)
40658 start = NEXT_INSN (start);
40659 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
40660 || CALL_P (start))
40661 njumps--, isjump = true;
40662 else
40663 isjump = false;
40664 nbytes -= ix86_min_insn_size (start);
40666 gcc_assert (njumps >= 0);
40667 if (dump_file)
40668 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
40669 INSN_UID (start), INSN_UID (insn), nbytes);
40671 if (njumps == 3 && isjump && nbytes < 16)
40673 int padsize = 15 - nbytes + ix86_min_insn_size (insn);
40675 if (dump_file)
40676 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
40677 INSN_UID (insn), padsize);
40678 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
40682 #endif
40684 /* AMD Athlon works faster
40685 when RET is not destination of conditional jump or directly preceded
40686 by other jump instruction. We avoid the penalty by inserting NOP just
40687 before the RET instructions in such cases. */
40688 static void
40689 ix86_pad_returns (void)
40691 edge e;
40692 edge_iterator ei;
40694 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
40696 basic_block bb = e->src;
40697 rtx_insn *ret = BB_END (bb);
40698 rtx_insn *prev;
40699 bool replace = false;
40701 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
40702 || optimize_bb_for_size_p (bb))
40703 continue;
40704 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
40705 if (active_insn_p (prev) || LABEL_P (prev))
40706 break;
40707 if (prev && LABEL_P (prev))
40709 edge e;
40710 edge_iterator ei;
40712 FOR_EACH_EDGE (e, ei, bb->preds)
40713 if (EDGE_FREQUENCY (e) && e->src->index >= 0
40714 && !(e->flags & EDGE_FALLTHRU))
40716 replace = true;
40717 break;
40720 if (!replace)
40722 prev = prev_active_insn (ret);
40723 if (prev
40724 && ((JUMP_P (prev) && any_condjump_p (prev))
40725 || CALL_P (prev)))
40726 replace = true;
40727 /* Empty functions get branch mispredict even when
40728 the jump destination is not visible to us. */
40729 if (!prev && !optimize_function_for_size_p (cfun))
40730 replace = true;
40732 if (replace)
40734 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
40735 delete_insn (ret);
40740 /* Count the minimum number of instructions in BB. Return 4 if the
40741 number of instructions >= 4. */
40743 static int
40744 ix86_count_insn_bb (basic_block bb)
40746 rtx_insn *insn;
40747 int insn_count = 0;
40749 /* Count number of instructions in this block. Return 4 if the number
40750 of instructions >= 4. */
40751 FOR_BB_INSNS (bb, insn)
40753 /* Only happen in exit blocks. */
40754 if (JUMP_P (insn)
40755 && ANY_RETURN_P (PATTERN (insn)))
40756 break;
40758 if (NONDEBUG_INSN_P (insn)
40759 && GET_CODE (PATTERN (insn)) != USE
40760 && GET_CODE (PATTERN (insn)) != CLOBBER)
40762 insn_count++;
40763 if (insn_count >= 4)
40764 return insn_count;
40768 return insn_count;
40772 /* Count the minimum number of instructions in code path in BB.
40773 Return 4 if the number of instructions >= 4. */
40775 static int
40776 ix86_count_insn (basic_block bb)
40778 edge e;
40779 edge_iterator ei;
40780 int min_prev_count;
40782 /* Only bother counting instructions along paths with no
40783 more than 2 basic blocks between entry and exit. Given
40784 that BB has an edge to exit, determine if a predecessor
40785 of BB has an edge from entry. If so, compute the number
40786 of instructions in the predecessor block. If there
40787 happen to be multiple such blocks, compute the minimum. */
40788 min_prev_count = 4;
40789 FOR_EACH_EDGE (e, ei, bb->preds)
40791 edge prev_e;
40792 edge_iterator prev_ei;
40794 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
40796 min_prev_count = 0;
40797 break;
40799 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
40801 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
40803 int count = ix86_count_insn_bb (e->src);
40804 if (count < min_prev_count)
40805 min_prev_count = count;
40806 break;
40811 if (min_prev_count < 4)
40812 min_prev_count += ix86_count_insn_bb (bb);
40814 return min_prev_count;
40817 /* Pad short function to 4 instructions. */
40819 static void
40820 ix86_pad_short_function (void)
40822 edge e;
40823 edge_iterator ei;
40825 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
40827 rtx_insn *ret = BB_END (e->src);
40828 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
40830 int insn_count = ix86_count_insn (e->src);
40832 /* Pad short function. */
40833 if (insn_count < 4)
40835 rtx_insn *insn = ret;
40837 /* Find epilogue. */
40838 while (insn
40839 && (!NOTE_P (insn)
40840 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
40841 insn = PREV_INSN (insn);
40843 if (!insn)
40844 insn = ret;
40846 /* Two NOPs count as one instruction. */
40847 insn_count = 2 * (4 - insn_count);
40848 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
40854 /* Fix up a Windows system unwinder issue. If an EH region falls through into
40855 the epilogue, the Windows system unwinder will apply epilogue logic and
40856 produce incorrect offsets. This can be avoided by adding a nop between
40857 the last insn that can throw and the first insn of the epilogue. */
40859 static void
40860 ix86_seh_fixup_eh_fallthru (void)
40862 edge e;
40863 edge_iterator ei;
40865 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
40867 rtx_insn *insn, *next;
40869 /* Find the beginning of the epilogue. */
40870 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
40871 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
40872 break;
40873 if (insn == NULL)
40874 continue;
40876 /* We only care about preceding insns that can throw. */
40877 insn = prev_active_insn (insn);
40878 if (insn == NULL || !can_throw_internal (insn))
40879 continue;
40881 /* Do not separate calls from their debug information. */
40882 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
40883 if (NOTE_P (next)
40884 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
40885 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
40886 insn = next;
40887 else
40888 break;
40890 emit_insn_after (gen_nops (const1_rtx), insn);
40894 /* Given a register number BASE, the lowest of a group of registers, update
40895 regsets IN and OUT with the registers that should be avoided in input
40896 and output operands respectively when trying to avoid generating a modr/m
40897 byte for -fmitigate-rop. */
40899 static void
40900 set_rop_modrm_reg_bits (int base, HARD_REG_SET &in, HARD_REG_SET &out)
40902 SET_HARD_REG_BIT (out, base);
40903 SET_HARD_REG_BIT (out, base + 1);
40904 SET_HARD_REG_BIT (in, base + 2);
40905 SET_HARD_REG_BIT (in, base + 3);
40908 /* Called if -fmitigate_rop is in effect. Try to rewrite instructions so
40909 that certain encodings of modr/m bytes do not occur. */
40910 static void
40911 ix86_mitigate_rop (void)
40913 HARD_REG_SET input_risky;
40914 HARD_REG_SET output_risky;
40915 HARD_REG_SET inout_risky;
40917 CLEAR_HARD_REG_SET (output_risky);
40918 CLEAR_HARD_REG_SET (input_risky);
40919 SET_HARD_REG_BIT (output_risky, AX_REG);
40920 SET_HARD_REG_BIT (output_risky, CX_REG);
40921 SET_HARD_REG_BIT (input_risky, BX_REG);
40922 SET_HARD_REG_BIT (input_risky, DX_REG);
40923 set_rop_modrm_reg_bits (FIRST_SSE_REG, input_risky, output_risky);
40924 set_rop_modrm_reg_bits (FIRST_REX_INT_REG, input_risky, output_risky);
40925 set_rop_modrm_reg_bits (FIRST_REX_SSE_REG, input_risky, output_risky);
40926 set_rop_modrm_reg_bits (FIRST_EXT_REX_SSE_REG, input_risky, output_risky);
40927 set_rop_modrm_reg_bits (FIRST_MASK_REG, input_risky, output_risky);
40928 set_rop_modrm_reg_bits (FIRST_BND_REG, input_risky, output_risky);
40929 COPY_HARD_REG_SET (inout_risky, input_risky);
40930 IOR_HARD_REG_SET (inout_risky, output_risky);
40932 df_note_add_problem ();
40933 /* Fix up what stack-regs did. */
40934 df_insn_rescan_all ();
40935 df_analyze ();
40937 regrename_init (true);
40938 regrename_analyze (NULL);
40940 auto_vec<du_head_p> cands;
40942 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
40944 if (!NONDEBUG_INSN_P (insn))
40945 continue;
40947 if (GET_CODE (PATTERN (insn)) == USE
40948 || GET_CODE (PATTERN (insn)) == CLOBBER)
40949 continue;
40951 extract_insn (insn);
40953 int opno0, opno1;
40954 int modrm = ix86_get_modrm_for_rop (insn, recog_data.operand,
40955 recog_data.n_operands, &opno0,
40956 &opno1);
40958 if (!ix86_rop_should_change_byte_p (modrm))
40959 continue;
40961 insn_rr_info *info = &insn_rr[INSN_UID (insn)];
40963 /* This happens when regrename has to fail a block. */
40964 if (!info->op_info)
40965 continue;
40967 if (info->op_info[opno0].n_chains != 0)
40969 gcc_assert (info->op_info[opno0].n_chains == 1);
40970 du_head_p op0c;
40971 op0c = regrename_chain_from_id (info->op_info[opno0].heads[0]->id);
40972 if (op0c->target_data_1 + op0c->target_data_2 == 0
40973 && !op0c->cannot_rename)
40974 cands.safe_push (op0c);
40976 op0c->target_data_1++;
40978 if (info->op_info[opno1].n_chains != 0)
40980 gcc_assert (info->op_info[opno1].n_chains == 1);
40981 du_head_p op1c;
40982 op1c = regrename_chain_from_id (info->op_info[opno1].heads[0]->id);
40983 if (op1c->target_data_1 + op1c->target_data_2 == 0
40984 && !op1c->cannot_rename)
40985 cands.safe_push (op1c);
40987 op1c->target_data_2++;
40991 int i;
40992 du_head_p head;
40993 FOR_EACH_VEC_ELT (cands, i, head)
40995 int old_reg, best_reg;
40996 HARD_REG_SET unavailable;
40998 CLEAR_HARD_REG_SET (unavailable);
40999 if (head->target_data_1)
41000 IOR_HARD_REG_SET (unavailable, output_risky);
41001 if (head->target_data_2)
41002 IOR_HARD_REG_SET (unavailable, input_risky);
41004 int n_uses;
41005 reg_class superclass = regrename_find_superclass (head, &n_uses,
41006 &unavailable);
41007 old_reg = head->regno;
41008 best_reg = find_rename_reg (head, superclass, &unavailable,
41009 old_reg, false);
41010 bool ok = regrename_do_replace (head, best_reg);
41011 gcc_assert (ok);
41012 if (dump_file)
41013 fprintf (dump_file, "Chain %d renamed as %s in %s\n", head->id,
41014 reg_names[best_reg], reg_class_names[superclass]);
41018 regrename_finish ();
41020 df_analyze ();
41022 basic_block bb;
41023 regset_head live;
41025 INIT_REG_SET (&live);
41027 FOR_EACH_BB_FN (bb, cfun)
41029 rtx_insn *insn;
41031 COPY_REG_SET (&live, DF_LR_OUT (bb));
41032 df_simulate_initialize_backwards (bb, &live);
41034 FOR_BB_INSNS_REVERSE (bb, insn)
41036 if (!NONDEBUG_INSN_P (insn))
41037 continue;
41039 df_simulate_one_insn_backwards (bb, insn, &live);
41041 if (GET_CODE (PATTERN (insn)) == USE
41042 || GET_CODE (PATTERN (insn)) == CLOBBER)
41043 continue;
41045 extract_insn (insn);
41046 constrain_operands_cached (insn, reload_completed);
41047 int opno0, opno1;
41048 int modrm = ix86_get_modrm_for_rop (insn, recog_data.operand,
41049 recog_data.n_operands, &opno0,
41050 &opno1);
41051 if (modrm < 0
41052 || !ix86_rop_should_change_byte_p (modrm)
41053 || opno0 == opno1)
41054 continue;
41056 rtx oldreg = recog_data.operand[opno1];
41057 preprocess_constraints (insn);
41058 const operand_alternative *alt = which_op_alt ();
41060 int i;
41061 for (i = 0; i < recog_data.n_operands; i++)
41062 if (i != opno1
41063 && alt[i].earlyclobber
41064 && reg_overlap_mentioned_p (recog_data.operand[i],
41065 oldreg))
41066 break;
41068 if (i < recog_data.n_operands)
41069 continue;
41071 if (dump_file)
41072 fprintf (dump_file,
41073 "attempting to fix modrm byte in insn %d:"
41074 " reg %d class %s", INSN_UID (insn), REGNO (oldreg),
41075 reg_class_names[alt[opno1].cl]);
41077 HARD_REG_SET unavailable;
41078 REG_SET_TO_HARD_REG_SET (unavailable, &live);
41079 SET_HARD_REG_BIT (unavailable, REGNO (oldreg));
41080 IOR_COMPL_HARD_REG_SET (unavailable, call_used_reg_set);
41081 IOR_HARD_REG_SET (unavailable, fixed_reg_set);
41082 IOR_HARD_REG_SET (unavailable, output_risky);
41083 IOR_COMPL_HARD_REG_SET (unavailable,
41084 reg_class_contents[alt[opno1].cl]);
41086 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
41087 if (!TEST_HARD_REG_BIT (unavailable, i))
41088 break;
41089 if (i == FIRST_PSEUDO_REGISTER)
41091 if (dump_file)
41092 fprintf (dump_file, ", none available\n");
41093 continue;
41095 if (dump_file)
41096 fprintf (dump_file, " -> %d\n", i);
41097 rtx newreg = gen_rtx_REG (recog_data.operand_mode[opno1], i);
41098 validate_change (insn, recog_data.operand_loc[opno1], newreg, false);
41099 insn = emit_insn_before (gen_move_insn (newreg, oldreg), insn);
41104 /* Implement machine specific optimizations. We implement padding of returns
41105 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
41106 static void
41107 ix86_reorg (void)
41109 /* We are freeing block_for_insn in the toplev to keep compatibility
41110 with old MDEP_REORGS that are not CFG based. Recompute it now. */
41111 compute_bb_for_insn ();
41113 if (flag_mitigate_rop)
41114 ix86_mitigate_rop ();
41116 if (TARGET_SEH && current_function_has_exception_handlers ())
41117 ix86_seh_fixup_eh_fallthru ();
41119 if (optimize && optimize_function_for_speed_p (cfun))
41121 if (TARGET_PAD_SHORT_FUNCTION)
41122 ix86_pad_short_function ();
41123 else if (TARGET_PAD_RETURNS)
41124 ix86_pad_returns ();
41125 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
41126 if (TARGET_FOUR_JUMP_LIMIT)
41127 ix86_avoid_jump_mispredicts ();
41128 #endif
41132 /* Return nonzero when QImode register that must be represented via REX prefix
41133 is used. */
41134 bool
41135 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
41137 int i;
41138 extract_insn_cached (insn);
41139 for (i = 0; i < recog_data.n_operands; i++)
41140 if (GENERAL_REG_P (recog_data.operand[i])
41141 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
41142 return true;
41143 return false;
41146 /* Return true when INSN mentions register that must be encoded using REX
41147 prefix. */
41148 bool
41149 x86_extended_reg_mentioned_p (rtx insn)
41151 subrtx_iterator::array_type array;
41152 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
41154 const_rtx x = *iter;
41155 if (REG_P (x)
41156 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
41157 return true;
41159 return false;
41162 /* If profitable, negate (without causing overflow) integer constant
41163 of mode MODE at location LOC. Return true in this case. */
41164 bool
41165 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
41167 HOST_WIDE_INT val;
41169 if (!CONST_INT_P (*loc))
41170 return false;
41172 switch (mode)
41174 case E_DImode:
41175 /* DImode x86_64 constants must fit in 32 bits. */
41176 gcc_assert (x86_64_immediate_operand (*loc, mode));
41178 mode = SImode;
41179 break;
41181 case E_SImode:
41182 case E_HImode:
41183 case E_QImode:
41184 break;
41186 default:
41187 gcc_unreachable ();
41190 /* Avoid overflows. */
41191 if (mode_signbit_p (mode, *loc))
41192 return false;
41194 val = INTVAL (*loc);
41196 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
41197 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
41198 if ((val < 0 && val != -128)
41199 || val == 128)
41201 *loc = GEN_INT (-val);
41202 return true;
41205 return false;
41208 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
41209 optabs would emit if we didn't have TFmode patterns. */
41211 void
41212 x86_emit_floatuns (rtx operands[2])
41214 rtx_code_label *neglab, *donelab;
41215 rtx i0, i1, f0, in, out;
41216 machine_mode mode, inmode;
41218 inmode = GET_MODE (operands[1]);
41219 gcc_assert (inmode == SImode || inmode == DImode);
41221 out = operands[0];
41222 in = force_reg (inmode, operands[1]);
41223 mode = GET_MODE (out);
41224 neglab = gen_label_rtx ();
41225 donelab = gen_label_rtx ();
41226 f0 = gen_reg_rtx (mode);
41228 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
41230 expand_float (out, in, 0);
41232 emit_jump_insn (gen_jump (donelab));
41233 emit_barrier ();
41235 emit_label (neglab);
41237 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
41238 1, OPTAB_DIRECT);
41239 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
41240 1, OPTAB_DIRECT);
41241 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
41243 expand_float (f0, i0, 0);
41245 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
41247 emit_label (donelab);
41250 static bool canonicalize_perm (struct expand_vec_perm_d *d);
41251 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
41252 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
41253 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
41255 /* Get a vector mode of the same size as the original but with elements
41256 twice as wide. This is only guaranteed to apply to integral vectors. */
41258 static inline machine_mode
41259 get_mode_wider_vector (machine_mode o)
41261 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
41262 machine_mode n = GET_MODE_WIDER_MODE (o).require ();
41263 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
41264 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
41265 return n;
41268 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
41269 fill target with val via vec_duplicate. */
41271 static bool
41272 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
41274 bool ok;
41275 rtx_insn *insn;
41276 rtx dup;
41278 /* First attempt to recognize VAL as-is. */
41279 dup = gen_vec_duplicate (mode, val);
41280 insn = emit_insn (gen_rtx_SET (target, dup));
41281 if (recog_memoized (insn) < 0)
41283 rtx_insn *seq;
41284 machine_mode innermode = GET_MODE_INNER (mode);
41285 rtx reg;
41287 /* If that fails, force VAL into a register. */
41289 start_sequence ();
41290 reg = force_reg (innermode, val);
41291 if (GET_MODE (reg) != innermode)
41292 reg = gen_lowpart (innermode, reg);
41293 SET_SRC (PATTERN (insn)) = gen_vec_duplicate (mode, reg);
41294 seq = get_insns ();
41295 end_sequence ();
41296 if (seq)
41297 emit_insn_before (seq, insn);
41299 ok = recog_memoized (insn) >= 0;
41300 gcc_assert (ok);
41302 return true;
41305 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
41306 with all elements equal to VAR. Return true if successful. */
41308 static bool
41309 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
41310 rtx target, rtx val)
41312 bool ok;
41314 switch (mode)
41316 case E_V2SImode:
41317 case E_V2SFmode:
41318 if (!mmx_ok)
41319 return false;
41320 /* FALLTHRU */
41322 case E_V4DFmode:
41323 case E_V4DImode:
41324 case E_V8SFmode:
41325 case E_V8SImode:
41326 case E_V2DFmode:
41327 case E_V2DImode:
41328 case E_V4SFmode:
41329 case E_V4SImode:
41330 case E_V16SImode:
41331 case E_V8DImode:
41332 case E_V16SFmode:
41333 case E_V8DFmode:
41334 return ix86_vector_duplicate_value (mode, target, val);
41336 case E_V4HImode:
41337 if (!mmx_ok)
41338 return false;
41339 if (TARGET_SSE || TARGET_3DNOW_A)
41341 rtx x;
41343 val = gen_lowpart (SImode, val);
41344 x = gen_rtx_TRUNCATE (HImode, val);
41345 x = gen_rtx_VEC_DUPLICATE (mode, x);
41346 emit_insn (gen_rtx_SET (target, x));
41347 return true;
41349 goto widen;
41351 case E_V8QImode:
41352 if (!mmx_ok)
41353 return false;
41354 goto widen;
41356 case E_V8HImode:
41357 if (TARGET_AVX2)
41358 return ix86_vector_duplicate_value (mode, target, val);
41360 if (TARGET_SSE2)
41362 struct expand_vec_perm_d dperm;
41363 rtx tmp1, tmp2;
41365 permute:
41366 memset (&dperm, 0, sizeof (dperm));
41367 dperm.target = target;
41368 dperm.vmode = mode;
41369 dperm.nelt = GET_MODE_NUNITS (mode);
41370 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
41371 dperm.one_operand_p = true;
41373 /* Extend to SImode using a paradoxical SUBREG. */
41374 tmp1 = gen_reg_rtx (SImode);
41375 emit_move_insn (tmp1, gen_lowpart (SImode, val));
41377 /* Insert the SImode value as low element of a V4SImode vector. */
41378 tmp2 = gen_reg_rtx (V4SImode);
41379 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
41380 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
41382 ok = (expand_vec_perm_1 (&dperm)
41383 || expand_vec_perm_broadcast_1 (&dperm));
41384 gcc_assert (ok);
41385 return ok;
41387 goto widen;
41389 case E_V16QImode:
41390 if (TARGET_AVX2)
41391 return ix86_vector_duplicate_value (mode, target, val);
41393 if (TARGET_SSE2)
41394 goto permute;
41395 goto widen;
41397 widen:
41398 /* Replicate the value once into the next wider mode and recurse. */
41400 machine_mode smode, wsmode, wvmode;
41401 rtx x;
41403 smode = GET_MODE_INNER (mode);
41404 wvmode = get_mode_wider_vector (mode);
41405 wsmode = GET_MODE_INNER (wvmode);
41407 val = convert_modes (wsmode, smode, val, true);
41408 x = expand_simple_binop (wsmode, ASHIFT, val,
41409 GEN_INT (GET_MODE_BITSIZE (smode)),
41410 NULL_RTX, 1, OPTAB_LIB_WIDEN);
41411 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
41413 x = gen_reg_rtx (wvmode);
41414 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
41415 gcc_assert (ok);
41416 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
41417 return ok;
41420 case E_V16HImode:
41421 case E_V32QImode:
41422 if (TARGET_AVX2)
41423 return ix86_vector_duplicate_value (mode, target, val);
41424 else
41426 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
41427 rtx x = gen_reg_rtx (hvmode);
41429 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
41430 gcc_assert (ok);
41432 x = gen_rtx_VEC_CONCAT (mode, x, x);
41433 emit_insn (gen_rtx_SET (target, x));
41435 return true;
41437 case E_V64QImode:
41438 case E_V32HImode:
41439 if (TARGET_AVX512BW)
41440 return ix86_vector_duplicate_value (mode, target, val);
41441 else
41443 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
41444 rtx x = gen_reg_rtx (hvmode);
41446 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
41447 gcc_assert (ok);
41449 x = gen_rtx_VEC_CONCAT (mode, x, x);
41450 emit_insn (gen_rtx_SET (target, x));
41452 return true;
41454 default:
41455 return false;
41459 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
41460 whose ONE_VAR element is VAR, and other elements are zero. Return true
41461 if successful. */
41463 static bool
41464 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
41465 rtx target, rtx var, int one_var)
41467 machine_mode vsimode;
41468 rtx new_target;
41469 rtx x, tmp;
41470 bool use_vector_set = false;
41472 switch (mode)
41474 case E_V2DImode:
41475 /* For SSE4.1, we normally use vector set. But if the second
41476 element is zero and inter-unit moves are OK, we use movq
41477 instead. */
41478 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
41479 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
41480 && one_var == 0));
41481 break;
41482 case E_V16QImode:
41483 case E_V4SImode:
41484 case E_V4SFmode:
41485 use_vector_set = TARGET_SSE4_1;
41486 break;
41487 case E_V8HImode:
41488 use_vector_set = TARGET_SSE2;
41489 break;
41490 case E_V4HImode:
41491 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
41492 break;
41493 case E_V32QImode:
41494 case E_V16HImode:
41495 case E_V8SImode:
41496 case E_V8SFmode:
41497 case E_V4DFmode:
41498 use_vector_set = TARGET_AVX;
41499 break;
41500 case E_V4DImode:
41501 /* Use ix86_expand_vector_set in 64bit mode only. */
41502 use_vector_set = TARGET_AVX && TARGET_64BIT;
41503 break;
41504 default:
41505 break;
41508 if (use_vector_set)
41510 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
41511 var = force_reg (GET_MODE_INNER (mode), var);
41512 ix86_expand_vector_set (mmx_ok, target, var, one_var);
41513 return true;
41516 switch (mode)
41518 case E_V2SFmode:
41519 case E_V2SImode:
41520 if (!mmx_ok)
41521 return false;
41522 /* FALLTHRU */
41524 case E_V2DFmode:
41525 case E_V2DImode:
41526 if (one_var != 0)
41527 return false;
41528 var = force_reg (GET_MODE_INNER (mode), var);
41529 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
41530 emit_insn (gen_rtx_SET (target, x));
41531 return true;
41533 case E_V4SFmode:
41534 case E_V4SImode:
41535 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
41536 new_target = gen_reg_rtx (mode);
41537 else
41538 new_target = target;
41539 var = force_reg (GET_MODE_INNER (mode), var);
41540 x = gen_rtx_VEC_DUPLICATE (mode, var);
41541 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
41542 emit_insn (gen_rtx_SET (new_target, x));
41543 if (one_var != 0)
41545 /* We need to shuffle the value to the correct position, so
41546 create a new pseudo to store the intermediate result. */
41548 /* With SSE2, we can use the integer shuffle insns. */
41549 if (mode != V4SFmode && TARGET_SSE2)
41551 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
41552 const1_rtx,
41553 GEN_INT (one_var == 1 ? 0 : 1),
41554 GEN_INT (one_var == 2 ? 0 : 1),
41555 GEN_INT (one_var == 3 ? 0 : 1)));
41556 if (target != new_target)
41557 emit_move_insn (target, new_target);
41558 return true;
41561 /* Otherwise convert the intermediate result to V4SFmode and
41562 use the SSE1 shuffle instructions. */
41563 if (mode != V4SFmode)
41565 tmp = gen_reg_rtx (V4SFmode);
41566 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
41568 else
41569 tmp = new_target;
41571 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
41572 const1_rtx,
41573 GEN_INT (one_var == 1 ? 0 : 1),
41574 GEN_INT (one_var == 2 ? 0+4 : 1+4),
41575 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
41577 if (mode != V4SFmode)
41578 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
41579 else if (tmp != target)
41580 emit_move_insn (target, tmp);
41582 else if (target != new_target)
41583 emit_move_insn (target, new_target);
41584 return true;
41586 case E_V8HImode:
41587 case E_V16QImode:
41588 vsimode = V4SImode;
41589 goto widen;
41590 case E_V4HImode:
41591 case E_V8QImode:
41592 if (!mmx_ok)
41593 return false;
41594 vsimode = V2SImode;
41595 goto widen;
41596 widen:
41597 if (one_var != 0)
41598 return false;
41600 /* Zero extend the variable element to SImode and recurse. */
41601 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
41603 x = gen_reg_rtx (vsimode);
41604 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
41605 var, one_var))
41606 gcc_unreachable ();
41608 emit_move_insn (target, gen_lowpart (mode, x));
41609 return true;
41611 default:
41612 return false;
41616 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
41617 consisting of the values in VALS. It is known that all elements
41618 except ONE_VAR are constants. Return true if successful. */
41620 static bool
41621 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
41622 rtx target, rtx vals, int one_var)
41624 rtx var = XVECEXP (vals, 0, one_var);
41625 machine_mode wmode;
41626 rtx const_vec, x;
41628 const_vec = copy_rtx (vals);
41629 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
41630 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
41632 switch (mode)
41634 case E_V2DFmode:
41635 case E_V2DImode:
41636 case E_V2SFmode:
41637 case E_V2SImode:
41638 /* For the two element vectors, it's just as easy to use
41639 the general case. */
41640 return false;
41642 case E_V4DImode:
41643 /* Use ix86_expand_vector_set in 64bit mode only. */
41644 if (!TARGET_64BIT)
41645 return false;
41646 /* FALLTHRU */
41647 case E_V4DFmode:
41648 case E_V8SFmode:
41649 case E_V8SImode:
41650 case E_V16HImode:
41651 case E_V32QImode:
41652 case E_V4SFmode:
41653 case E_V4SImode:
41654 case E_V8HImode:
41655 case E_V4HImode:
41656 break;
41658 case E_V16QImode:
41659 if (TARGET_SSE4_1)
41660 break;
41661 wmode = V8HImode;
41662 goto widen;
41663 case E_V8QImode:
41664 wmode = V4HImode;
41665 goto widen;
41666 widen:
41667 /* There's no way to set one QImode entry easily. Combine
41668 the variable value with its adjacent constant value, and
41669 promote to an HImode set. */
41670 x = XVECEXP (vals, 0, one_var ^ 1);
41671 if (one_var & 1)
41673 var = convert_modes (HImode, QImode, var, true);
41674 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
41675 NULL_RTX, 1, OPTAB_LIB_WIDEN);
41676 x = GEN_INT (INTVAL (x) & 0xff);
41678 else
41680 var = convert_modes (HImode, QImode, var, true);
41681 x = gen_int_mode (INTVAL (x) << 8, HImode);
41683 if (x != const0_rtx)
41684 var = expand_simple_binop (HImode, IOR, var, x, var,
41685 1, OPTAB_LIB_WIDEN);
41687 x = gen_reg_rtx (wmode);
41688 emit_move_insn (x, gen_lowpart (wmode, const_vec));
41689 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
41691 emit_move_insn (target, gen_lowpart (mode, x));
41692 return true;
41694 default:
41695 return false;
41698 emit_move_insn (target, const_vec);
41699 ix86_expand_vector_set (mmx_ok, target, var, one_var);
41700 return true;
41703 /* A subroutine of ix86_expand_vector_init_general. Use vector
41704 concatenate to handle the most general case: all values variable,
41705 and none identical. */
41707 static void
41708 ix86_expand_vector_init_concat (machine_mode mode,
41709 rtx target, rtx *ops, int n)
41711 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
41712 rtx first[16], second[8], third[4];
41713 rtvec v;
41714 int i, j;
41716 switch (n)
41718 case 2:
41719 switch (mode)
41721 case E_V16SImode:
41722 cmode = V8SImode;
41723 break;
41724 case E_V16SFmode:
41725 cmode = V8SFmode;
41726 break;
41727 case E_V8DImode:
41728 cmode = V4DImode;
41729 break;
41730 case E_V8DFmode:
41731 cmode = V4DFmode;
41732 break;
41733 case E_V8SImode:
41734 cmode = V4SImode;
41735 break;
41736 case E_V8SFmode:
41737 cmode = V4SFmode;
41738 break;
41739 case E_V4DImode:
41740 cmode = V2DImode;
41741 break;
41742 case E_V4DFmode:
41743 cmode = V2DFmode;
41744 break;
41745 case E_V4SImode:
41746 cmode = V2SImode;
41747 break;
41748 case E_V4SFmode:
41749 cmode = V2SFmode;
41750 break;
41751 case E_V2DImode:
41752 cmode = DImode;
41753 break;
41754 case E_V2SImode:
41755 cmode = SImode;
41756 break;
41757 case E_V2DFmode:
41758 cmode = DFmode;
41759 break;
41760 case E_V2SFmode:
41761 cmode = SFmode;
41762 break;
41763 default:
41764 gcc_unreachable ();
41767 if (!register_operand (ops[1], cmode))
41768 ops[1] = force_reg (cmode, ops[1]);
41769 if (!register_operand (ops[0], cmode))
41770 ops[0] = force_reg (cmode, ops[0]);
41771 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, ops[0],
41772 ops[1])));
41773 break;
41775 case 4:
41776 switch (mode)
41778 case E_V4DImode:
41779 cmode = V2DImode;
41780 break;
41781 case E_V4DFmode:
41782 cmode = V2DFmode;
41783 break;
41784 case E_V4SImode:
41785 cmode = V2SImode;
41786 break;
41787 case E_V4SFmode:
41788 cmode = V2SFmode;
41789 break;
41790 default:
41791 gcc_unreachable ();
41793 goto half;
41795 case 8:
41796 switch (mode)
41798 case E_V8DImode:
41799 cmode = V2DImode;
41800 hmode = V4DImode;
41801 break;
41802 case E_V8DFmode:
41803 cmode = V2DFmode;
41804 hmode = V4DFmode;
41805 break;
41806 case E_V8SImode:
41807 cmode = V2SImode;
41808 hmode = V4SImode;
41809 break;
41810 case E_V8SFmode:
41811 cmode = V2SFmode;
41812 hmode = V4SFmode;
41813 break;
41814 default:
41815 gcc_unreachable ();
41817 goto half;
41819 case 16:
41820 switch (mode)
41822 case E_V16SImode:
41823 cmode = V2SImode;
41824 hmode = V4SImode;
41825 gmode = V8SImode;
41826 break;
41827 case E_V16SFmode:
41828 cmode = V2SFmode;
41829 hmode = V4SFmode;
41830 gmode = V8SFmode;
41831 break;
41832 default:
41833 gcc_unreachable ();
41835 goto half;
41837 half:
41838 /* FIXME: We process inputs backward to help RA. PR 36222. */
41839 i = n - 1;
41840 j = (n >> 1) - 1;
41841 for (; i > 0; i -= 2, j--)
41843 first[j] = gen_reg_rtx (cmode);
41844 v = gen_rtvec (2, ops[i - 1], ops[i]);
41845 ix86_expand_vector_init (false, first[j],
41846 gen_rtx_PARALLEL (cmode, v));
41849 n >>= 1;
41850 if (n > 4)
41852 gcc_assert (hmode != VOIDmode);
41853 gcc_assert (gmode != VOIDmode);
41854 for (i = j = 0; i < n; i += 2, j++)
41856 second[j] = gen_reg_rtx (hmode);
41857 ix86_expand_vector_init_concat (hmode, second [j],
41858 &first [i], 2);
41860 n >>= 1;
41861 for (i = j = 0; i < n; i += 2, j++)
41863 third[j] = gen_reg_rtx (gmode);
41864 ix86_expand_vector_init_concat (gmode, third[j],
41865 &second[i], 2);
41867 n >>= 1;
41868 ix86_expand_vector_init_concat (mode, target, third, n);
41870 else if (n > 2)
41872 gcc_assert (hmode != VOIDmode);
41873 for (i = j = 0; i < n; i += 2, j++)
41875 second[j] = gen_reg_rtx (hmode);
41876 ix86_expand_vector_init_concat (hmode, second [j],
41877 &first [i], 2);
41879 n >>= 1;
41880 ix86_expand_vector_init_concat (mode, target, second, n);
41882 else
41883 ix86_expand_vector_init_concat (mode, target, first, n);
41884 break;
41886 default:
41887 gcc_unreachable ();
41891 /* A subroutine of ix86_expand_vector_init_general. Use vector
41892 interleave to handle the most general case: all values variable,
41893 and none identical. */
41895 static void
41896 ix86_expand_vector_init_interleave (machine_mode mode,
41897 rtx target, rtx *ops, int n)
41899 machine_mode first_imode, second_imode, third_imode, inner_mode;
41900 int i, j;
41901 rtx op0, op1;
41902 rtx (*gen_load_even) (rtx, rtx, rtx);
41903 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
41904 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
41906 switch (mode)
41908 case E_V8HImode:
41909 gen_load_even = gen_vec_setv8hi;
41910 gen_interleave_first_low = gen_vec_interleave_lowv4si;
41911 gen_interleave_second_low = gen_vec_interleave_lowv2di;
41912 inner_mode = HImode;
41913 first_imode = V4SImode;
41914 second_imode = V2DImode;
41915 third_imode = VOIDmode;
41916 break;
41917 case E_V16QImode:
41918 gen_load_even = gen_vec_setv16qi;
41919 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
41920 gen_interleave_second_low = gen_vec_interleave_lowv4si;
41921 inner_mode = QImode;
41922 first_imode = V8HImode;
41923 second_imode = V4SImode;
41924 third_imode = V2DImode;
41925 break;
41926 default:
41927 gcc_unreachable ();
41930 for (i = 0; i < n; i++)
41932 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
41933 op0 = gen_reg_rtx (SImode);
41934 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
41936 /* Insert the SImode value as low element of V4SImode vector. */
41937 op1 = gen_reg_rtx (V4SImode);
41938 op0 = gen_rtx_VEC_MERGE (V4SImode,
41939 gen_rtx_VEC_DUPLICATE (V4SImode,
41940 op0),
41941 CONST0_RTX (V4SImode),
41942 const1_rtx);
41943 emit_insn (gen_rtx_SET (op1, op0));
41945 /* Cast the V4SImode vector back to a vector in orignal mode. */
41946 op0 = gen_reg_rtx (mode);
41947 emit_move_insn (op0, gen_lowpart (mode, op1));
41949 /* Load even elements into the second position. */
41950 emit_insn (gen_load_even (op0,
41951 force_reg (inner_mode,
41952 ops [i + i + 1]),
41953 const1_rtx));
41955 /* Cast vector to FIRST_IMODE vector. */
41956 ops[i] = gen_reg_rtx (first_imode);
41957 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
41960 /* Interleave low FIRST_IMODE vectors. */
41961 for (i = j = 0; i < n; i += 2, j++)
41963 op0 = gen_reg_rtx (first_imode);
41964 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
41966 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
41967 ops[j] = gen_reg_rtx (second_imode);
41968 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
41971 /* Interleave low SECOND_IMODE vectors. */
41972 switch (second_imode)
41974 case E_V4SImode:
41975 for (i = j = 0; i < n / 2; i += 2, j++)
41977 op0 = gen_reg_rtx (second_imode);
41978 emit_insn (gen_interleave_second_low (op0, ops[i],
41979 ops[i + 1]));
41981 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
41982 vector. */
41983 ops[j] = gen_reg_rtx (third_imode);
41984 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
41986 second_imode = V2DImode;
41987 gen_interleave_second_low = gen_vec_interleave_lowv2di;
41988 /* FALLTHRU */
41990 case E_V2DImode:
41991 op0 = gen_reg_rtx (second_imode);
41992 emit_insn (gen_interleave_second_low (op0, ops[0],
41993 ops[1]));
41995 /* Cast the SECOND_IMODE vector back to a vector on original
41996 mode. */
41997 emit_insn (gen_rtx_SET (target, gen_lowpart (mode, op0)));
41998 break;
42000 default:
42001 gcc_unreachable ();
42005 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
42006 all values variable, and none identical. */
42008 static void
42009 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
42010 rtx target, rtx vals)
42012 rtx ops[64], op0, op1, op2, op3, op4, op5;
42013 machine_mode half_mode = VOIDmode;
42014 machine_mode quarter_mode = VOIDmode;
42015 int n, i;
42017 switch (mode)
42019 case E_V2SFmode:
42020 case E_V2SImode:
42021 if (!mmx_ok && !TARGET_SSE)
42022 break;
42023 /* FALLTHRU */
42025 case E_V16SImode:
42026 case E_V16SFmode:
42027 case E_V8DFmode:
42028 case E_V8DImode:
42029 case E_V8SFmode:
42030 case E_V8SImode:
42031 case E_V4DFmode:
42032 case E_V4DImode:
42033 case E_V4SFmode:
42034 case E_V4SImode:
42035 case E_V2DFmode:
42036 case E_V2DImode:
42037 n = GET_MODE_NUNITS (mode);
42038 for (i = 0; i < n; i++)
42039 ops[i] = XVECEXP (vals, 0, i);
42040 ix86_expand_vector_init_concat (mode, target, ops, n);
42041 return;
42043 case E_V2TImode:
42044 for (i = 0; i < 2; i++)
42045 ops[i] = gen_lowpart (V2DImode, XVECEXP (vals, 0, i));
42046 op0 = gen_reg_rtx (V4DImode);
42047 ix86_expand_vector_init_concat (V4DImode, op0, ops, 2);
42048 emit_move_insn (target, gen_lowpart (GET_MODE (target), op0));
42049 return;
42051 case E_V4TImode:
42052 for (i = 0; i < 4; i++)
42053 ops[i] = gen_lowpart (V2DImode, XVECEXP (vals, 0, i));
42054 ops[4] = gen_reg_rtx (V4DImode);
42055 ix86_expand_vector_init_concat (V4DImode, ops[4], ops, 2);
42056 ops[5] = gen_reg_rtx (V4DImode);
42057 ix86_expand_vector_init_concat (V4DImode, ops[5], ops + 2, 2);
42058 op0 = gen_reg_rtx (V8DImode);
42059 ix86_expand_vector_init_concat (V8DImode, op0, ops + 4, 2);
42060 emit_move_insn (target, gen_lowpart (GET_MODE (target), op0));
42061 return;
42063 case E_V32QImode:
42064 half_mode = V16QImode;
42065 goto half;
42067 case E_V16HImode:
42068 half_mode = V8HImode;
42069 goto half;
42071 half:
42072 n = GET_MODE_NUNITS (mode);
42073 for (i = 0; i < n; i++)
42074 ops[i] = XVECEXP (vals, 0, i);
42075 op0 = gen_reg_rtx (half_mode);
42076 op1 = gen_reg_rtx (half_mode);
42077 ix86_expand_vector_init_interleave (half_mode, op0, ops,
42078 n >> 2);
42079 ix86_expand_vector_init_interleave (half_mode, op1,
42080 &ops [n >> 1], n >> 2);
42081 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op0, op1)));
42082 return;
42084 case E_V64QImode:
42085 quarter_mode = V16QImode;
42086 half_mode = V32QImode;
42087 goto quarter;
42089 case E_V32HImode:
42090 quarter_mode = V8HImode;
42091 half_mode = V16HImode;
42092 goto quarter;
42094 quarter:
42095 n = GET_MODE_NUNITS (mode);
42096 for (i = 0; i < n; i++)
42097 ops[i] = XVECEXP (vals, 0, i);
42098 op0 = gen_reg_rtx (quarter_mode);
42099 op1 = gen_reg_rtx (quarter_mode);
42100 op2 = gen_reg_rtx (quarter_mode);
42101 op3 = gen_reg_rtx (quarter_mode);
42102 op4 = gen_reg_rtx (half_mode);
42103 op5 = gen_reg_rtx (half_mode);
42104 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
42105 n >> 3);
42106 ix86_expand_vector_init_interleave (quarter_mode, op1,
42107 &ops [n >> 2], n >> 3);
42108 ix86_expand_vector_init_interleave (quarter_mode, op2,
42109 &ops [n >> 1], n >> 3);
42110 ix86_expand_vector_init_interleave (quarter_mode, op3,
42111 &ops [(n >> 1) | (n >> 2)], n >> 3);
42112 emit_insn (gen_rtx_SET (op4, gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
42113 emit_insn (gen_rtx_SET (op5, gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
42114 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op4, op5)));
42115 return;
42117 case E_V16QImode:
42118 if (!TARGET_SSE4_1)
42119 break;
42120 /* FALLTHRU */
42122 case E_V8HImode:
42123 if (!TARGET_SSE2)
42124 break;
42126 /* Don't use ix86_expand_vector_init_interleave if we can't
42127 move from GPR to SSE register directly. */
42128 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
42129 break;
42131 n = GET_MODE_NUNITS (mode);
42132 for (i = 0; i < n; i++)
42133 ops[i] = XVECEXP (vals, 0, i);
42134 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
42135 return;
42137 case E_V4HImode:
42138 case E_V8QImode:
42139 break;
42141 default:
42142 gcc_unreachable ();
42146 int i, j, n_elts, n_words, n_elt_per_word;
42147 machine_mode inner_mode;
42148 rtx words[4], shift;
42150 inner_mode = GET_MODE_INNER (mode);
42151 n_elts = GET_MODE_NUNITS (mode);
42152 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
42153 n_elt_per_word = n_elts / n_words;
42154 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
42156 for (i = 0; i < n_words; ++i)
42158 rtx word = NULL_RTX;
42160 for (j = 0; j < n_elt_per_word; ++j)
42162 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
42163 elt = convert_modes (word_mode, inner_mode, elt, true);
42165 if (j == 0)
42166 word = elt;
42167 else
42169 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
42170 word, 1, OPTAB_LIB_WIDEN);
42171 word = expand_simple_binop (word_mode, IOR, word, elt,
42172 word, 1, OPTAB_LIB_WIDEN);
42176 words[i] = word;
42179 if (n_words == 1)
42180 emit_move_insn (target, gen_lowpart (mode, words[0]));
42181 else if (n_words == 2)
42183 rtx tmp = gen_reg_rtx (mode);
42184 emit_clobber (tmp);
42185 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
42186 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
42187 emit_move_insn (target, tmp);
42189 else if (n_words == 4)
42191 rtx tmp = gen_reg_rtx (V4SImode);
42192 gcc_assert (word_mode == SImode);
42193 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
42194 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
42195 emit_move_insn (target, gen_lowpart (mode, tmp));
42197 else
42198 gcc_unreachable ();
42202 /* Initialize vector TARGET via VALS. Suppress the use of MMX
42203 instructions unless MMX_OK is true. */
42205 void
42206 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
42208 machine_mode mode = GET_MODE (target);
42209 machine_mode inner_mode = GET_MODE_INNER (mode);
42210 int n_elts = GET_MODE_NUNITS (mode);
42211 int n_var = 0, one_var = -1;
42212 bool all_same = true, all_const_zero = true;
42213 int i;
42214 rtx x;
42216 /* Handle first initialization from vector elts. */
42217 if (n_elts != XVECLEN (vals, 0))
42219 rtx subtarget = target;
42220 x = XVECEXP (vals, 0, 0);
42221 gcc_assert (GET_MODE_INNER (GET_MODE (x)) == inner_mode);
42222 if (GET_MODE_NUNITS (GET_MODE (x)) * 2 == n_elts)
42224 rtx ops[2] = { XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1) };
42225 if (inner_mode == QImode || inner_mode == HImode)
42227 unsigned int n_bits = n_elts * GET_MODE_SIZE (inner_mode);
42228 mode = mode_for_vector (SImode, n_bits / 4).require ();
42229 inner_mode = mode_for_vector (SImode, n_bits / 8).require ();
42230 ops[0] = gen_lowpart (inner_mode, ops[0]);
42231 ops[1] = gen_lowpart (inner_mode, ops[1]);
42232 subtarget = gen_reg_rtx (mode);
42234 ix86_expand_vector_init_concat (mode, subtarget, ops, 2);
42235 if (subtarget != target)
42236 emit_move_insn (target, gen_lowpart (GET_MODE (target), subtarget));
42237 return;
42239 gcc_unreachable ();
42242 for (i = 0; i < n_elts; ++i)
42244 x = XVECEXP (vals, 0, i);
42245 if (!(CONST_SCALAR_INT_P (x)
42246 || CONST_DOUBLE_P (x)
42247 || CONST_FIXED_P (x)))
42248 n_var++, one_var = i;
42249 else if (x != CONST0_RTX (inner_mode))
42250 all_const_zero = false;
42251 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
42252 all_same = false;
42255 /* Constants are best loaded from the constant pool. */
42256 if (n_var == 0)
42258 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
42259 return;
42262 /* If all values are identical, broadcast the value. */
42263 if (all_same
42264 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
42265 XVECEXP (vals, 0, 0)))
42266 return;
42268 /* Values where only one field is non-constant are best loaded from
42269 the pool and overwritten via move later. */
42270 if (n_var == 1)
42272 if (all_const_zero
42273 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
42274 XVECEXP (vals, 0, one_var),
42275 one_var))
42276 return;
42278 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
42279 return;
42282 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
42285 void
42286 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
42288 machine_mode mode = GET_MODE (target);
42289 machine_mode inner_mode = GET_MODE_INNER (mode);
42290 machine_mode half_mode;
42291 bool use_vec_merge = false;
42292 rtx tmp;
42293 static rtx (*gen_extract[6][2]) (rtx, rtx)
42295 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
42296 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
42297 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
42298 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
42299 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
42300 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
42302 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
42304 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
42305 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
42306 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
42307 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
42308 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
42309 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
42311 int i, j, n;
42312 machine_mode mmode = VOIDmode;
42313 rtx (*gen_blendm) (rtx, rtx, rtx, rtx);
42315 switch (mode)
42317 case E_V2SFmode:
42318 case E_V2SImode:
42319 if (mmx_ok)
42321 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
42322 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
42323 if (elt == 0)
42324 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
42325 else
42326 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
42327 emit_insn (gen_rtx_SET (target, tmp));
42328 return;
42330 break;
42332 case E_V2DImode:
42333 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
42334 if (use_vec_merge)
42335 break;
42337 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
42338 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
42339 if (elt == 0)
42340 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
42341 else
42342 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
42343 emit_insn (gen_rtx_SET (target, tmp));
42344 return;
42346 case E_V2DFmode:
42348 rtx op0, op1;
42350 /* For the two element vectors, we implement a VEC_CONCAT with
42351 the extraction of the other element. */
42353 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
42354 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
42356 if (elt == 0)
42357 op0 = val, op1 = tmp;
42358 else
42359 op0 = tmp, op1 = val;
42361 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
42362 emit_insn (gen_rtx_SET (target, tmp));
42364 return;
42366 case E_V4SFmode:
42367 use_vec_merge = TARGET_SSE4_1;
42368 if (use_vec_merge)
42369 break;
42371 switch (elt)
42373 case 0:
42374 use_vec_merge = true;
42375 break;
42377 case 1:
42378 /* tmp = target = A B C D */
42379 tmp = copy_to_reg (target);
42380 /* target = A A B B */
42381 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
42382 /* target = X A B B */
42383 ix86_expand_vector_set (false, target, val, 0);
42384 /* target = A X C D */
42385 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
42386 const1_rtx, const0_rtx,
42387 GEN_INT (2+4), GEN_INT (3+4)));
42388 return;
42390 case 2:
42391 /* tmp = target = A B C D */
42392 tmp = copy_to_reg (target);
42393 /* tmp = X B C D */
42394 ix86_expand_vector_set (false, tmp, val, 0);
42395 /* target = A B X D */
42396 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
42397 const0_rtx, const1_rtx,
42398 GEN_INT (0+4), GEN_INT (3+4)));
42399 return;
42401 case 3:
42402 /* tmp = target = A B C D */
42403 tmp = copy_to_reg (target);
42404 /* tmp = X B C D */
42405 ix86_expand_vector_set (false, tmp, val, 0);
42406 /* target = A B X D */
42407 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
42408 const0_rtx, const1_rtx,
42409 GEN_INT (2+4), GEN_INT (0+4)));
42410 return;
42412 default:
42413 gcc_unreachable ();
42415 break;
42417 case E_V4SImode:
42418 use_vec_merge = TARGET_SSE4_1;
42419 if (use_vec_merge)
42420 break;
42422 /* Element 0 handled by vec_merge below. */
42423 if (elt == 0)
42425 use_vec_merge = true;
42426 break;
42429 if (TARGET_SSE2)
42431 /* With SSE2, use integer shuffles to swap element 0 and ELT,
42432 store into element 0, then shuffle them back. */
42434 rtx order[4];
42436 order[0] = GEN_INT (elt);
42437 order[1] = const1_rtx;
42438 order[2] = const2_rtx;
42439 order[3] = GEN_INT (3);
42440 order[elt] = const0_rtx;
42442 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
42443 order[1], order[2], order[3]));
42445 ix86_expand_vector_set (false, target, val, 0);
42447 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
42448 order[1], order[2], order[3]));
42450 else
42452 /* For SSE1, we have to reuse the V4SF code. */
42453 rtx t = gen_reg_rtx (V4SFmode);
42454 emit_move_insn (t, gen_lowpart (V4SFmode, target));
42455 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
42456 emit_move_insn (target, gen_lowpart (mode, t));
42458 return;
42460 case E_V8HImode:
42461 use_vec_merge = TARGET_SSE2;
42462 break;
42463 case E_V4HImode:
42464 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
42465 break;
42467 case E_V16QImode:
42468 use_vec_merge = TARGET_SSE4_1;
42469 break;
42471 case E_V8QImode:
42472 break;
42474 case E_V32QImode:
42475 half_mode = V16QImode;
42476 j = 0;
42477 n = 16;
42478 goto half;
42480 case E_V16HImode:
42481 half_mode = V8HImode;
42482 j = 1;
42483 n = 8;
42484 goto half;
42486 case E_V8SImode:
42487 half_mode = V4SImode;
42488 j = 2;
42489 n = 4;
42490 goto half;
42492 case E_V4DImode:
42493 half_mode = V2DImode;
42494 j = 3;
42495 n = 2;
42496 goto half;
42498 case E_V8SFmode:
42499 half_mode = V4SFmode;
42500 j = 4;
42501 n = 4;
42502 goto half;
42504 case E_V4DFmode:
42505 half_mode = V2DFmode;
42506 j = 5;
42507 n = 2;
42508 goto half;
42510 half:
42511 /* Compute offset. */
42512 i = elt / n;
42513 elt %= n;
42515 gcc_assert (i <= 1);
42517 /* Extract the half. */
42518 tmp = gen_reg_rtx (half_mode);
42519 emit_insn (gen_extract[j][i] (tmp, target));
42521 /* Put val in tmp at elt. */
42522 ix86_expand_vector_set (false, tmp, val, elt);
42524 /* Put it back. */
42525 emit_insn (gen_insert[j][i] (target, target, tmp));
42526 return;
42528 case E_V8DFmode:
42529 if (TARGET_AVX512F)
42531 mmode = QImode;
42532 gen_blendm = gen_avx512f_blendmv8df;
42534 break;
42536 case E_V8DImode:
42537 if (TARGET_AVX512F)
42539 mmode = QImode;
42540 gen_blendm = gen_avx512f_blendmv8di;
42542 break;
42544 case E_V16SFmode:
42545 if (TARGET_AVX512F)
42547 mmode = HImode;
42548 gen_blendm = gen_avx512f_blendmv16sf;
42550 break;
42552 case E_V16SImode:
42553 if (TARGET_AVX512F)
42555 mmode = HImode;
42556 gen_blendm = gen_avx512f_blendmv16si;
42558 break;
42560 case E_V32HImode:
42561 if (TARGET_AVX512F && TARGET_AVX512BW)
42563 mmode = SImode;
42564 gen_blendm = gen_avx512bw_blendmv32hi;
42566 break;
42568 case E_V64QImode:
42569 if (TARGET_AVX512F && TARGET_AVX512BW)
42571 mmode = DImode;
42572 gen_blendm = gen_avx512bw_blendmv64qi;
42574 break;
42576 default:
42577 break;
42580 if (mmode != VOIDmode)
42582 tmp = gen_reg_rtx (mode);
42583 emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val)));
42584 /* The avx512*_blendm<mode> expanders have different operand order
42585 from VEC_MERGE. In VEC_MERGE, the first input operand is used for
42586 elements where the mask is set and second input operand otherwise,
42587 in {sse,avx}*_*blend* the first input operand is used for elements
42588 where the mask is clear and second input operand otherwise. */
42589 emit_insn (gen_blendm (target, target, tmp,
42590 force_reg (mmode,
42591 gen_int_mode (1 << elt, mmode))));
42593 else if (use_vec_merge)
42595 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
42596 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
42597 emit_insn (gen_rtx_SET (target, tmp));
42599 else
42601 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
42603 emit_move_insn (mem, target);
42605 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
42606 emit_move_insn (tmp, val);
42608 emit_move_insn (target, mem);
42612 void
42613 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
42615 machine_mode mode = GET_MODE (vec);
42616 machine_mode inner_mode = GET_MODE_INNER (mode);
42617 bool use_vec_extr = false;
42618 rtx tmp;
42620 switch (mode)
42622 case E_V2SImode:
42623 case E_V2SFmode:
42624 if (!mmx_ok)
42625 break;
42626 /* FALLTHRU */
42628 case E_V2DFmode:
42629 case E_V2DImode:
42630 case E_V2TImode:
42631 case E_V4TImode:
42632 use_vec_extr = true;
42633 break;
42635 case E_V4SFmode:
42636 use_vec_extr = TARGET_SSE4_1;
42637 if (use_vec_extr)
42638 break;
42640 switch (elt)
42642 case 0:
42643 tmp = vec;
42644 break;
42646 case 1:
42647 case 3:
42648 tmp = gen_reg_rtx (mode);
42649 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
42650 GEN_INT (elt), GEN_INT (elt),
42651 GEN_INT (elt+4), GEN_INT (elt+4)));
42652 break;
42654 case 2:
42655 tmp = gen_reg_rtx (mode);
42656 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
42657 break;
42659 default:
42660 gcc_unreachable ();
42662 vec = tmp;
42663 use_vec_extr = true;
42664 elt = 0;
42665 break;
42667 case E_V4SImode:
42668 use_vec_extr = TARGET_SSE4_1;
42669 if (use_vec_extr)
42670 break;
42672 if (TARGET_SSE2)
42674 switch (elt)
42676 case 0:
42677 tmp = vec;
42678 break;
42680 case 1:
42681 case 3:
42682 tmp = gen_reg_rtx (mode);
42683 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
42684 GEN_INT (elt), GEN_INT (elt),
42685 GEN_INT (elt), GEN_INT (elt)));
42686 break;
42688 case 2:
42689 tmp = gen_reg_rtx (mode);
42690 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
42691 break;
42693 default:
42694 gcc_unreachable ();
42696 vec = tmp;
42697 use_vec_extr = true;
42698 elt = 0;
42700 else
42702 /* For SSE1, we have to reuse the V4SF code. */
42703 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
42704 gen_lowpart (V4SFmode, vec), elt);
42705 return;
42707 break;
42709 case E_V8HImode:
42710 use_vec_extr = TARGET_SSE2;
42711 break;
42712 case E_V4HImode:
42713 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
42714 break;
42716 case E_V16QImode:
42717 use_vec_extr = TARGET_SSE4_1;
42718 break;
42720 case E_V8SFmode:
42721 if (TARGET_AVX)
42723 tmp = gen_reg_rtx (V4SFmode);
42724 if (elt < 4)
42725 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
42726 else
42727 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
42728 ix86_expand_vector_extract (false, target, tmp, elt & 3);
42729 return;
42731 break;
42733 case E_V4DFmode:
42734 if (TARGET_AVX)
42736 tmp = gen_reg_rtx (V2DFmode);
42737 if (elt < 2)
42738 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
42739 else
42740 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
42741 ix86_expand_vector_extract (false, target, tmp, elt & 1);
42742 return;
42744 break;
42746 case E_V32QImode:
42747 if (TARGET_AVX)
42749 tmp = gen_reg_rtx (V16QImode);
42750 if (elt < 16)
42751 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
42752 else
42753 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
42754 ix86_expand_vector_extract (false, target, tmp, elt & 15);
42755 return;
42757 break;
42759 case E_V16HImode:
42760 if (TARGET_AVX)
42762 tmp = gen_reg_rtx (V8HImode);
42763 if (elt < 8)
42764 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
42765 else
42766 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
42767 ix86_expand_vector_extract (false, target, tmp, elt & 7);
42768 return;
42770 break;
42772 case E_V8SImode:
42773 if (TARGET_AVX)
42775 tmp = gen_reg_rtx (V4SImode);
42776 if (elt < 4)
42777 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
42778 else
42779 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
42780 ix86_expand_vector_extract (false, target, tmp, elt & 3);
42781 return;
42783 break;
42785 case E_V4DImode:
42786 if (TARGET_AVX)
42788 tmp = gen_reg_rtx (V2DImode);
42789 if (elt < 2)
42790 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
42791 else
42792 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
42793 ix86_expand_vector_extract (false, target, tmp, elt & 1);
42794 return;
42796 break;
42798 case E_V32HImode:
42799 if (TARGET_AVX512BW)
42801 tmp = gen_reg_rtx (V16HImode);
42802 if (elt < 16)
42803 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
42804 else
42805 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
42806 ix86_expand_vector_extract (false, target, tmp, elt & 15);
42807 return;
42809 break;
42811 case E_V64QImode:
42812 if (TARGET_AVX512BW)
42814 tmp = gen_reg_rtx (V32QImode);
42815 if (elt < 32)
42816 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
42817 else
42818 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
42819 ix86_expand_vector_extract (false, target, tmp, elt & 31);
42820 return;
42822 break;
42824 case E_V16SFmode:
42825 tmp = gen_reg_rtx (V8SFmode);
42826 if (elt < 8)
42827 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
42828 else
42829 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
42830 ix86_expand_vector_extract (false, target, tmp, elt & 7);
42831 return;
42833 case E_V8DFmode:
42834 tmp = gen_reg_rtx (V4DFmode);
42835 if (elt < 4)
42836 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
42837 else
42838 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
42839 ix86_expand_vector_extract (false, target, tmp, elt & 3);
42840 return;
42842 case E_V16SImode:
42843 tmp = gen_reg_rtx (V8SImode);
42844 if (elt < 8)
42845 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
42846 else
42847 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
42848 ix86_expand_vector_extract (false, target, tmp, elt & 7);
42849 return;
42851 case E_V8DImode:
42852 tmp = gen_reg_rtx (V4DImode);
42853 if (elt < 4)
42854 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
42855 else
42856 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
42857 ix86_expand_vector_extract (false, target, tmp, elt & 3);
42858 return;
42860 case E_V8QImode:
42861 /* ??? Could extract the appropriate HImode element and shift. */
42862 default:
42863 break;
42866 if (use_vec_extr)
42868 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
42869 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
42871 /* Let the rtl optimizers know about the zero extension performed. */
42872 if (inner_mode == QImode || inner_mode == HImode)
42874 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
42875 target = gen_lowpart (SImode, target);
42878 emit_insn (gen_rtx_SET (target, tmp));
42880 else
42882 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
42884 emit_move_insn (mem, vec);
42886 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
42887 emit_move_insn (target, tmp);
42891 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
42892 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
42893 The upper bits of DEST are undefined, though they shouldn't cause
42894 exceptions (some bits from src or all zeros are ok). */
42896 static void
42897 emit_reduc_half (rtx dest, rtx src, int i)
42899 rtx tem, d = dest;
42900 switch (GET_MODE (src))
42902 case E_V4SFmode:
42903 if (i == 128)
42904 tem = gen_sse_movhlps (dest, src, src);
42905 else
42906 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
42907 GEN_INT (1 + 4), GEN_INT (1 + 4));
42908 break;
42909 case E_V2DFmode:
42910 tem = gen_vec_interleave_highv2df (dest, src, src);
42911 break;
42912 case E_V16QImode:
42913 case E_V8HImode:
42914 case E_V4SImode:
42915 case E_V2DImode:
42916 d = gen_reg_rtx (V1TImode);
42917 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
42918 GEN_INT (i / 2));
42919 break;
42920 case E_V8SFmode:
42921 if (i == 256)
42922 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
42923 else
42924 tem = gen_avx_shufps256 (dest, src, src,
42925 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
42926 break;
42927 case E_V4DFmode:
42928 if (i == 256)
42929 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
42930 else
42931 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
42932 break;
42933 case E_V32QImode:
42934 case E_V16HImode:
42935 case E_V8SImode:
42936 case E_V4DImode:
42937 if (i == 256)
42939 if (GET_MODE (dest) != V4DImode)
42940 d = gen_reg_rtx (V4DImode);
42941 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
42942 gen_lowpart (V4DImode, src),
42943 const1_rtx);
42945 else
42947 d = gen_reg_rtx (V2TImode);
42948 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
42949 GEN_INT (i / 2));
42951 break;
42952 case E_V64QImode:
42953 case E_V32HImode:
42954 case E_V16SImode:
42955 case E_V16SFmode:
42956 case E_V8DImode:
42957 case E_V8DFmode:
42958 if (i > 128)
42959 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
42960 gen_lowpart (V16SImode, src),
42961 gen_lowpart (V16SImode, src),
42962 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
42963 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
42964 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
42965 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
42966 GEN_INT (0xC), GEN_INT (0xD),
42967 GEN_INT (0xE), GEN_INT (0xF),
42968 GEN_INT (0x10), GEN_INT (0x11),
42969 GEN_INT (0x12), GEN_INT (0x13),
42970 GEN_INT (0x14), GEN_INT (0x15),
42971 GEN_INT (0x16), GEN_INT (0x17));
42972 else
42973 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
42974 gen_lowpart (V16SImode, src),
42975 GEN_INT (i == 128 ? 0x2 : 0x1),
42976 GEN_INT (0x3),
42977 GEN_INT (0x3),
42978 GEN_INT (0x3),
42979 GEN_INT (i == 128 ? 0x6 : 0x5),
42980 GEN_INT (0x7),
42981 GEN_INT (0x7),
42982 GEN_INT (0x7),
42983 GEN_INT (i == 128 ? 0xA : 0x9),
42984 GEN_INT (0xB),
42985 GEN_INT (0xB),
42986 GEN_INT (0xB),
42987 GEN_INT (i == 128 ? 0xE : 0xD),
42988 GEN_INT (0xF),
42989 GEN_INT (0xF),
42990 GEN_INT (0xF));
42991 break;
42992 default:
42993 gcc_unreachable ();
42995 emit_insn (tem);
42996 if (d != dest)
42997 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
43000 /* Expand a vector reduction. FN is the binary pattern to reduce;
43001 DEST is the destination; IN is the input vector. */
43003 void
43004 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
43006 rtx half, dst, vec = in;
43007 machine_mode mode = GET_MODE (in);
43008 int i;
43010 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
43011 if (TARGET_SSE4_1
43012 && mode == V8HImode
43013 && fn == gen_uminv8hi3)
43015 emit_insn (gen_sse4_1_phminposuw (dest, in));
43016 return;
43019 for (i = GET_MODE_BITSIZE (mode);
43020 i > GET_MODE_UNIT_BITSIZE (mode);
43021 i >>= 1)
43023 half = gen_reg_rtx (mode);
43024 emit_reduc_half (half, vec, i);
43025 if (i == GET_MODE_UNIT_BITSIZE (mode) * 2)
43026 dst = dest;
43027 else
43028 dst = gen_reg_rtx (mode);
43029 emit_insn (fn (dst, half, vec));
43030 vec = dst;
43034 /* Target hook for scalar_mode_supported_p. */
43035 static bool
43036 ix86_scalar_mode_supported_p (scalar_mode mode)
43038 if (DECIMAL_FLOAT_MODE_P (mode))
43039 return default_decimal_float_supported_p ();
43040 else if (mode == TFmode)
43041 return true;
43042 else
43043 return default_scalar_mode_supported_p (mode);
43046 /* Implements target hook vector_mode_supported_p. */
43047 static bool
43048 ix86_vector_mode_supported_p (machine_mode mode)
43050 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
43051 return true;
43052 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
43053 return true;
43054 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
43055 return true;
43056 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
43057 return true;
43058 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
43059 return true;
43060 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
43061 return true;
43062 return false;
43065 /* Target hook for c_mode_for_suffix. */
43066 static machine_mode
43067 ix86_c_mode_for_suffix (char suffix)
43069 if (suffix == 'q')
43070 return TFmode;
43071 if (suffix == 'w')
43072 return XFmode;
43074 return VOIDmode;
43077 /* Worker function for TARGET_MD_ASM_ADJUST.
43079 We implement asm flag outputs, and maintain source compatibility
43080 with the old cc0-based compiler. */
43082 static rtx_insn *
43083 ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
43084 vec<const char *> &constraints,
43085 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
43087 clobbers.safe_push (gen_rtx_REG (CCFPmode, FPSR_REG));
43088 SET_HARD_REG_BIT (clobbered_regs, FPSR_REG);
43090 bool saw_asm_flag = false;
43092 start_sequence ();
43093 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
43095 const char *con = constraints[i];
43096 if (strncmp (con, "=@cc", 4) != 0)
43097 continue;
43098 con += 4;
43099 if (strchr (con, ',') != NULL)
43101 error ("alternatives not allowed in asm flag output");
43102 continue;
43105 bool invert = false;
43106 if (con[0] == 'n')
43107 invert = true, con++;
43109 machine_mode mode = CCmode;
43110 rtx_code code = UNKNOWN;
43112 switch (con[0])
43114 case 'a':
43115 if (con[1] == 0)
43116 mode = CCAmode, code = EQ;
43117 else if (con[1] == 'e' && con[2] == 0)
43118 mode = CCCmode, code = NE;
43119 break;
43120 case 'b':
43121 if (con[1] == 0)
43122 mode = CCCmode, code = EQ;
43123 else if (con[1] == 'e' && con[2] == 0)
43124 mode = CCAmode, code = NE;
43125 break;
43126 case 'c':
43127 if (con[1] == 0)
43128 mode = CCCmode, code = EQ;
43129 break;
43130 case 'e':
43131 if (con[1] == 0)
43132 mode = CCZmode, code = EQ;
43133 break;
43134 case 'g':
43135 if (con[1] == 0)
43136 mode = CCGCmode, code = GT;
43137 else if (con[1] == 'e' && con[2] == 0)
43138 mode = CCGCmode, code = GE;
43139 break;
43140 case 'l':
43141 if (con[1] == 0)
43142 mode = CCGCmode, code = LT;
43143 else if (con[1] == 'e' && con[2] == 0)
43144 mode = CCGCmode, code = LE;
43145 break;
43146 case 'o':
43147 if (con[1] == 0)
43148 mode = CCOmode, code = EQ;
43149 break;
43150 case 'p':
43151 if (con[1] == 0)
43152 mode = CCPmode, code = EQ;
43153 break;
43154 case 's':
43155 if (con[1] == 0)
43156 mode = CCSmode, code = EQ;
43157 break;
43158 case 'z':
43159 if (con[1] == 0)
43160 mode = CCZmode, code = EQ;
43161 break;
43163 if (code == UNKNOWN)
43165 error ("unknown asm flag output %qs", constraints[i]);
43166 continue;
43168 if (invert)
43169 code = reverse_condition (code);
43171 rtx dest = outputs[i];
43172 if (!saw_asm_flag)
43174 /* This is the first asm flag output. Here we put the flags
43175 register in as the real output and adjust the condition to
43176 allow it. */
43177 constraints[i] = "=Bf";
43178 outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
43179 saw_asm_flag = true;
43181 else
43183 /* We don't need the flags register as output twice. */
43184 constraints[i] = "=X";
43185 outputs[i] = gen_rtx_SCRATCH (SImode);
43188 rtx x = gen_rtx_REG (mode, FLAGS_REG);
43189 x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
43191 machine_mode dest_mode = GET_MODE (dest);
43192 if (!SCALAR_INT_MODE_P (dest_mode))
43194 error ("invalid type for asm flag output");
43195 continue;
43198 if (dest_mode == DImode && !TARGET_64BIT)
43199 dest_mode = SImode;
43201 if (dest_mode != QImode)
43203 rtx destqi = gen_reg_rtx (QImode);
43204 emit_insn (gen_rtx_SET (destqi, x));
43206 if (TARGET_ZERO_EXTEND_WITH_AND
43207 && optimize_function_for_speed_p (cfun))
43209 x = force_reg (dest_mode, const0_rtx);
43211 emit_insn (gen_movstrictqi
43212 (gen_lowpart (QImode, x), destqi));
43214 else
43215 x = gen_rtx_ZERO_EXTEND (dest_mode, destqi);
43218 if (dest_mode != GET_MODE (dest))
43220 rtx tmp = gen_reg_rtx (SImode);
43222 emit_insn (gen_rtx_SET (tmp, x));
43223 emit_insn (gen_zero_extendsidi2 (dest, tmp));
43225 else
43226 emit_insn (gen_rtx_SET (dest, x));
43228 rtx_insn *seq = get_insns ();
43229 end_sequence ();
43231 if (saw_asm_flag)
43232 return seq;
43233 else
43235 /* If we had no asm flag outputs, clobber the flags. */
43236 clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
43237 SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
43238 return NULL;
43242 /* Implements target vector targetm.asm.encode_section_info. */
43244 static void ATTRIBUTE_UNUSED
43245 ix86_encode_section_info (tree decl, rtx rtl, int first)
43247 default_encode_section_info (decl, rtl, first);
43249 if (ix86_in_large_data_p (decl))
43250 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
43253 /* Worker function for REVERSE_CONDITION. */
43255 enum rtx_code
43256 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
43258 return (mode == CCFPmode
43259 ? reverse_condition_maybe_unordered (code)
43260 : reverse_condition (code));
43263 /* Output code to perform an x87 FP register move, from OPERANDS[1]
43264 to OPERANDS[0]. */
43266 const char *
43267 output_387_reg_move (rtx_insn *insn, rtx *operands)
43269 if (REG_P (operands[0]))
43271 if (REG_P (operands[1])
43272 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
43274 if (REGNO (operands[0]) == FIRST_STACK_REG)
43275 return output_387_ffreep (operands, 0);
43276 return "fstp\t%y0";
43278 if (STACK_TOP_P (operands[0]))
43279 return "fld%Z1\t%y1";
43280 return "fst\t%y0";
43282 else if (MEM_P (operands[0]))
43284 gcc_assert (REG_P (operands[1]));
43285 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
43286 return "fstp%Z0\t%y0";
43287 else
43289 /* There is no non-popping store to memory for XFmode.
43290 So if we need one, follow the store with a load. */
43291 if (GET_MODE (operands[0]) == XFmode)
43292 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
43293 else
43294 return "fst%Z0\t%y0";
43297 else
43298 gcc_unreachable();
43301 /* Output code to perform a conditional jump to LABEL, if C2 flag in
43302 FP status register is set. */
43304 void
43305 ix86_emit_fp_unordered_jump (rtx label)
43307 rtx reg = gen_reg_rtx (HImode);
43308 rtx temp;
43310 emit_insn (gen_x86_fnstsw_1 (reg));
43312 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
43314 emit_insn (gen_x86_sahf_1 (reg));
43316 temp = gen_rtx_REG (CCmode, FLAGS_REG);
43317 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
43319 else
43321 emit_insn (gen_testqi_ext_1_ccno (reg, GEN_INT (0x04)));
43323 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
43324 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
43327 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
43328 gen_rtx_LABEL_REF (VOIDmode, label),
43329 pc_rtx);
43330 temp = gen_rtx_SET (pc_rtx, temp);
43332 emit_jump_insn (temp);
43333 predict_jump (REG_BR_PROB_BASE * 10 / 100);
43336 /* Output code to perform a log1p XFmode calculation. */
43338 void ix86_emit_i387_log1p (rtx op0, rtx op1)
43340 rtx_code_label *label1 = gen_label_rtx ();
43341 rtx_code_label *label2 = gen_label_rtx ();
43343 rtx tmp = gen_reg_rtx (XFmode);
43344 rtx tmp2 = gen_reg_rtx (XFmode);
43345 rtx test;
43347 emit_insn (gen_absxf2 (tmp, op1));
43348 test = gen_rtx_GE (VOIDmode, tmp,
43349 const_double_from_real_value (
43350 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
43351 XFmode));
43352 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
43354 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
43355 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
43356 emit_jump (label2);
43358 emit_label (label1);
43359 emit_move_insn (tmp, CONST1_RTX (XFmode));
43360 emit_insn (gen_addxf3 (tmp, op1, tmp));
43361 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
43362 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
43364 emit_label (label2);
43367 /* Emit code for round calculation. */
43368 void ix86_emit_i387_round (rtx op0, rtx op1)
43370 machine_mode inmode = GET_MODE (op1);
43371 machine_mode outmode = GET_MODE (op0);
43372 rtx e1, e2, res, tmp, tmp1, half;
43373 rtx scratch = gen_reg_rtx (HImode);
43374 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
43375 rtx_code_label *jump_label = gen_label_rtx ();
43376 rtx insn;
43377 rtx (*gen_abs) (rtx, rtx);
43378 rtx (*gen_neg) (rtx, rtx);
43380 switch (inmode)
43382 case E_SFmode:
43383 gen_abs = gen_abssf2;
43384 break;
43385 case E_DFmode:
43386 gen_abs = gen_absdf2;
43387 break;
43388 case E_XFmode:
43389 gen_abs = gen_absxf2;
43390 break;
43391 default:
43392 gcc_unreachable ();
43395 switch (outmode)
43397 case E_SFmode:
43398 gen_neg = gen_negsf2;
43399 break;
43400 case E_DFmode:
43401 gen_neg = gen_negdf2;
43402 break;
43403 case E_XFmode:
43404 gen_neg = gen_negxf2;
43405 break;
43406 case E_HImode:
43407 gen_neg = gen_neghi2;
43408 break;
43409 case E_SImode:
43410 gen_neg = gen_negsi2;
43411 break;
43412 case E_DImode:
43413 gen_neg = gen_negdi2;
43414 break;
43415 default:
43416 gcc_unreachable ();
43419 e1 = gen_reg_rtx (inmode);
43420 e2 = gen_reg_rtx (inmode);
43421 res = gen_reg_rtx (outmode);
43423 half = const_double_from_real_value (dconsthalf, inmode);
43425 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
43427 /* scratch = fxam(op1) */
43428 emit_insn (gen_rtx_SET (scratch,
43429 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
43430 UNSPEC_FXAM)));
43431 /* e1 = fabs(op1) */
43432 emit_insn (gen_abs (e1, op1));
43434 /* e2 = e1 + 0.5 */
43435 half = force_reg (inmode, half);
43436 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (inmode, e1, half)));
43438 /* res = floor(e2) */
43439 if (inmode != XFmode)
43441 tmp1 = gen_reg_rtx (XFmode);
43443 emit_insn (gen_rtx_SET (tmp1, gen_rtx_FLOAT_EXTEND (XFmode, e2)));
43445 else
43446 tmp1 = e2;
43448 switch (outmode)
43450 case E_SFmode:
43451 case E_DFmode:
43453 rtx tmp0 = gen_reg_rtx (XFmode);
43455 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
43457 emit_insn (gen_rtx_SET (res,
43458 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
43459 UNSPEC_TRUNC_NOOP)));
43461 break;
43462 case E_XFmode:
43463 emit_insn (gen_frndintxf2_floor (res, tmp1));
43464 break;
43465 case E_HImode:
43466 emit_insn (gen_lfloorxfhi2 (res, tmp1));
43467 break;
43468 case E_SImode:
43469 emit_insn (gen_lfloorxfsi2 (res, tmp1));
43470 break;
43471 case E_DImode:
43472 emit_insn (gen_lfloorxfdi2 (res, tmp1));
43473 break;
43474 default:
43475 gcc_unreachable ();
43478 /* flags = signbit(a) */
43479 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x02)));
43481 /* if (flags) then res = -res */
43482 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
43483 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
43484 gen_rtx_LABEL_REF (VOIDmode, jump_label),
43485 pc_rtx);
43486 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
43487 predict_jump (REG_BR_PROB_BASE * 50 / 100);
43488 JUMP_LABEL (insn) = jump_label;
43490 emit_insn (gen_neg (res, res));
43492 emit_label (jump_label);
43493 LABEL_NUSES (jump_label) = 1;
43495 emit_move_insn (op0, res);
43498 /* Output code to perform a Newton-Rhapson approximation of a single precision
43499 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
43501 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
43503 rtx x0, x1, e0, e1;
43505 x0 = gen_reg_rtx (mode);
43506 e0 = gen_reg_rtx (mode);
43507 e1 = gen_reg_rtx (mode);
43508 x1 = gen_reg_rtx (mode);
43510 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
43512 b = force_reg (mode, b);
43514 /* x0 = rcp(b) estimate */
43515 if (mode == V16SFmode || mode == V8DFmode)
43517 if (TARGET_AVX512ER)
43519 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
43520 UNSPEC_RCP28)));
43521 /* res = a * x0 */
43522 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x0)));
43523 return;
43525 else
43526 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
43527 UNSPEC_RCP14)));
43529 else
43530 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
43531 UNSPEC_RCP)));
43533 /* e0 = x0 * b */
43534 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
43536 /* e0 = x0 * e0 */
43537 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
43539 /* e1 = x0 + x0 */
43540 emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
43542 /* x1 = e1 - e0 */
43543 emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
43545 /* res = a * x1 */
43546 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
43549 /* Output code to perform a Newton-Rhapson approximation of a
43550 single precision floating point [reciprocal] square root. */
43552 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode, bool recip)
43554 rtx x0, e0, e1, e2, e3, mthree, mhalf;
43555 REAL_VALUE_TYPE r;
43556 int unspec;
43558 x0 = gen_reg_rtx (mode);
43559 e0 = gen_reg_rtx (mode);
43560 e1 = gen_reg_rtx (mode);
43561 e2 = gen_reg_rtx (mode);
43562 e3 = gen_reg_rtx (mode);
43564 if (TARGET_AVX512ER && mode == V16SFmode)
43566 if (recip)
43567 /* res = rsqrt28(a) estimate */
43568 emit_insn (gen_rtx_SET (res, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
43569 UNSPEC_RSQRT28)));
43570 else
43572 /* x0 = rsqrt28(a) estimate */
43573 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
43574 UNSPEC_RSQRT28)));
43575 /* res = rcp28(x0) estimate */
43576 emit_insn (gen_rtx_SET (res, gen_rtx_UNSPEC (mode, gen_rtvec (1, x0),
43577 UNSPEC_RCP28)));
43579 return;
43582 real_from_integer (&r, VOIDmode, -3, SIGNED);
43583 mthree = const_double_from_real_value (r, SFmode);
43585 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
43586 mhalf = const_double_from_real_value (r, SFmode);
43587 unspec = UNSPEC_RSQRT;
43589 if (VECTOR_MODE_P (mode))
43591 mthree = ix86_build_const_vector (mode, true, mthree);
43592 mhalf = ix86_build_const_vector (mode, true, mhalf);
43593 /* There is no 512-bit rsqrt. There is however rsqrt14. */
43594 if (GET_MODE_SIZE (mode) == 64)
43595 unspec = UNSPEC_RSQRT14;
43598 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
43599 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
43601 a = force_reg (mode, a);
43603 /* x0 = rsqrt(a) estimate */
43604 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
43605 unspec)));
43607 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
43608 if (!recip)
43610 rtx zero = force_reg (mode, CONST0_RTX(mode));
43611 rtx mask;
43613 /* Handle masked compare. */
43614 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
43616 mask = gen_reg_rtx (HImode);
43617 /* Imm value 0x4 corresponds to not-equal comparison. */
43618 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
43619 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
43621 else
43623 mask = gen_reg_rtx (mode);
43624 emit_insn (gen_rtx_SET (mask, gen_rtx_NE (mode, zero, a)));
43625 emit_insn (gen_rtx_SET (x0, gen_rtx_AND (mode, x0, mask)));
43629 /* e0 = x0 * a */
43630 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
43631 /* e1 = e0 * x0 */
43632 emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0)));
43634 /* e2 = e1 - 3. */
43635 mthree = force_reg (mode, mthree);
43636 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (mode, e1, mthree)));
43638 mhalf = force_reg (mode, mhalf);
43639 if (recip)
43640 /* e3 = -.5 * x0 */
43641 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, x0, mhalf)));
43642 else
43643 /* e3 = -.5 * e0 */
43644 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, e0, mhalf)));
43645 /* ret = e2 * e3 */
43646 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e2, e3)));
43649 #ifdef TARGET_SOLARIS
43650 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
43652 static void
43653 i386_solaris_elf_named_section (const char *name, unsigned int flags,
43654 tree decl)
43656 /* With Binutils 2.15, the "@unwind" marker must be specified on
43657 every occurrence of the ".eh_frame" section, not just the first
43658 one. */
43659 if (TARGET_64BIT
43660 && strcmp (name, ".eh_frame") == 0)
43662 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
43663 flags & SECTION_WRITE ? "aw" : "a");
43664 return;
43667 #ifndef USE_GAS
43668 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
43670 solaris_elf_asm_comdat_section (name, flags, decl);
43671 return;
43673 #endif
43675 default_elf_asm_named_section (name, flags, decl);
43677 #endif /* TARGET_SOLARIS */
43679 /* Return the mangling of TYPE if it is an extended fundamental type. */
43681 static const char *
43682 ix86_mangle_type (const_tree type)
43684 type = TYPE_MAIN_VARIANT (type);
43686 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
43687 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
43688 return NULL;
43690 switch (TYPE_MODE (type))
43692 case E_TFmode:
43693 /* __float128 is "g". */
43694 return "g";
43695 case E_XFmode:
43696 /* "long double" or __float80 is "e". */
43697 return "e";
43698 default:
43699 return NULL;
43703 static GTY(()) tree ix86_tls_stack_chk_guard_decl;
43705 static tree
43706 ix86_stack_protect_guard (void)
43708 if (TARGET_SSP_TLS_GUARD)
43710 tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1);
43711 int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg);
43712 tree type = build_qualified_type (type_node, qual);
43713 tree t;
43715 if (global_options_set.x_ix86_stack_protector_guard_symbol_str)
43717 t = ix86_tls_stack_chk_guard_decl;
43719 if (t == NULL)
43721 rtx x;
43723 t = build_decl
43724 (UNKNOWN_LOCATION, VAR_DECL,
43725 get_identifier (ix86_stack_protector_guard_symbol_str),
43726 type);
43727 TREE_STATIC (t) = 1;
43728 TREE_PUBLIC (t) = 1;
43729 DECL_EXTERNAL (t) = 1;
43730 TREE_USED (t) = 1;
43731 TREE_THIS_VOLATILE (t) = 1;
43732 DECL_ARTIFICIAL (t) = 1;
43733 DECL_IGNORED_P (t) = 1;
43735 /* Do not share RTL as the declaration is visible outside of
43736 current function. */
43737 x = DECL_RTL (t);
43738 RTX_FLAG (x, used) = 1;
43740 ix86_tls_stack_chk_guard_decl = t;
43743 else
43745 tree asptrtype = build_pointer_type (type);
43747 t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset);
43748 t = build2 (MEM_REF, asptrtype, t,
43749 build_int_cst (asptrtype, 0));
43752 return t;
43755 return default_stack_protect_guard ();
43758 /* For 32-bit code we can save PIC register setup by using
43759 __stack_chk_fail_local hidden function instead of calling
43760 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
43761 register, so it is better to call __stack_chk_fail directly. */
43763 static tree ATTRIBUTE_UNUSED
43764 ix86_stack_protect_fail (void)
43766 return TARGET_64BIT
43767 ? default_external_stack_protect_fail ()
43768 : default_hidden_stack_protect_fail ();
43771 /* Select a format to encode pointers in exception handling data. CODE
43772 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
43773 true if the symbol may be affected by dynamic relocations.
43775 ??? All x86 object file formats are capable of representing this.
43776 After all, the relocation needed is the same as for the call insn.
43777 Whether or not a particular assembler allows us to enter such, I
43778 guess we'll have to see. */
43780 asm_preferred_eh_data_format (int code, int global)
43782 if (flag_pic)
43784 int type = DW_EH_PE_sdata8;
43785 if (!TARGET_64BIT
43786 || ix86_cmodel == CM_SMALL_PIC
43787 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
43788 type = DW_EH_PE_sdata4;
43789 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
43791 if (ix86_cmodel == CM_SMALL
43792 || (ix86_cmodel == CM_MEDIUM && code))
43793 return DW_EH_PE_udata4;
43794 return DW_EH_PE_absptr;
43797 /* Expand copysign from SIGN to the positive value ABS_VALUE
43798 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
43799 the sign-bit. */
43800 static void
43801 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
43803 machine_mode mode = GET_MODE (sign);
43804 rtx sgn = gen_reg_rtx (mode);
43805 if (mask == NULL_RTX)
43807 machine_mode vmode;
43809 if (mode == SFmode)
43810 vmode = V4SFmode;
43811 else if (mode == DFmode)
43812 vmode = V2DFmode;
43813 else
43814 vmode = mode;
43816 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
43817 if (!VECTOR_MODE_P (mode))
43819 /* We need to generate a scalar mode mask in this case. */
43820 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
43821 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
43822 mask = gen_reg_rtx (mode);
43823 emit_insn (gen_rtx_SET (mask, tmp));
43826 else
43827 mask = gen_rtx_NOT (mode, mask);
43828 emit_insn (gen_rtx_SET (sgn, gen_rtx_AND (mode, mask, sign)));
43829 emit_insn (gen_rtx_SET (result, gen_rtx_IOR (mode, abs_value, sgn)));
43832 /* Expand fabs (OP0) and return a new rtx that holds the result. The
43833 mask for masking out the sign-bit is stored in *SMASK, if that is
43834 non-null. */
43835 static rtx
43836 ix86_expand_sse_fabs (rtx op0, rtx *smask)
43838 machine_mode vmode, mode = GET_MODE (op0);
43839 rtx xa, mask;
43841 xa = gen_reg_rtx (mode);
43842 if (mode == SFmode)
43843 vmode = V4SFmode;
43844 else if (mode == DFmode)
43845 vmode = V2DFmode;
43846 else
43847 vmode = mode;
43848 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
43849 if (!VECTOR_MODE_P (mode))
43851 /* We need to generate a scalar mode mask in this case. */
43852 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
43853 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
43854 mask = gen_reg_rtx (mode);
43855 emit_insn (gen_rtx_SET (mask, tmp));
43857 emit_insn (gen_rtx_SET (xa, gen_rtx_AND (mode, op0, mask)));
43859 if (smask)
43860 *smask = mask;
43862 return xa;
43865 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
43866 swapping the operands if SWAP_OPERANDS is true. The expanded
43867 code is a forward jump to a newly created label in case the
43868 comparison is true. The generated label rtx is returned. */
43869 static rtx_code_label *
43870 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
43871 bool swap_operands)
43873 bool unordered_compare = ix86_unordered_fp_compare (code);
43874 rtx_code_label *label;
43875 rtx tmp, reg;
43877 if (swap_operands)
43878 std::swap (op0, op1);
43880 label = gen_label_rtx ();
43881 tmp = gen_rtx_COMPARE (CCFPmode, op0, op1);
43882 if (unordered_compare)
43883 tmp = gen_rtx_UNSPEC (CCFPmode, gen_rtvec (1, tmp), UNSPEC_NOTRAP);
43884 reg = gen_rtx_REG (CCFPmode, FLAGS_REG);
43885 emit_insn (gen_rtx_SET (reg, tmp));
43886 tmp = gen_rtx_fmt_ee (code, VOIDmode, reg, const0_rtx);
43887 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
43888 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
43889 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
43890 JUMP_LABEL (tmp) = label;
43892 return label;
43895 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
43896 using comparison code CODE. Operands are swapped for the comparison if
43897 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
43898 static rtx
43899 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
43900 bool swap_operands)
43902 rtx (*insn)(rtx, rtx, rtx, rtx);
43903 machine_mode mode = GET_MODE (op0);
43904 rtx mask = gen_reg_rtx (mode);
43906 if (swap_operands)
43907 std::swap (op0, op1);
43909 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
43911 emit_insn (insn (mask, op0, op1,
43912 gen_rtx_fmt_ee (code, mode, op0, op1)));
43913 return mask;
43916 /* Generate and return a rtx of mode MODE for 2**n where n is the number
43917 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
43918 static rtx
43919 ix86_gen_TWO52 (machine_mode mode)
43921 REAL_VALUE_TYPE TWO52r;
43922 rtx TWO52;
43924 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
43925 TWO52 = const_double_from_real_value (TWO52r, mode);
43926 TWO52 = force_reg (mode, TWO52);
43928 return TWO52;
43931 /* Expand SSE sequence for computing lround from OP1 storing
43932 into OP0. */
43933 void
43934 ix86_expand_lround (rtx op0, rtx op1)
43936 /* C code for the stuff we're doing below:
43937 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
43938 return (long)tmp;
43940 machine_mode mode = GET_MODE (op1);
43941 const struct real_format *fmt;
43942 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
43943 rtx adj;
43945 /* load nextafter (0.5, 0.0) */
43946 fmt = REAL_MODE_FORMAT (mode);
43947 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
43948 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
43950 /* adj = copysign (0.5, op1) */
43951 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
43952 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
43954 /* adj = op1 + adj */
43955 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
43957 /* op0 = (imode)adj */
43958 expand_fix (op0, adj, 0);
43961 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
43962 into OPERAND0. */
43963 void
43964 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
43966 /* C code for the stuff we're doing below (for do_floor):
43967 xi = (long)op1;
43968 xi -= (double)xi > op1 ? 1 : 0;
43969 return xi;
43971 machine_mode fmode = GET_MODE (op1);
43972 machine_mode imode = GET_MODE (op0);
43973 rtx ireg, freg, tmp;
43974 rtx_code_label *label;
43976 /* reg = (long)op1 */
43977 ireg = gen_reg_rtx (imode);
43978 expand_fix (ireg, op1, 0);
43980 /* freg = (double)reg */
43981 freg = gen_reg_rtx (fmode);
43982 expand_float (freg, ireg, 0);
43984 /* ireg = (freg > op1) ? ireg - 1 : ireg */
43985 label = ix86_expand_sse_compare_and_jump (UNLE,
43986 freg, op1, !do_floor);
43987 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
43988 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
43989 emit_move_insn (ireg, tmp);
43991 emit_label (label);
43992 LABEL_NUSES (label) = 1;
43994 emit_move_insn (op0, ireg);
43997 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
43998 result in OPERAND0. */
43999 void
44000 ix86_expand_rint (rtx operand0, rtx operand1)
44002 /* C code for the stuff we're doing below:
44003 xa = fabs (operand1);
44004 if (!isless (xa, 2**52))
44005 return operand1;
44006 xa = xa + 2**52 - 2**52;
44007 return copysign (xa, operand1);
44009 machine_mode mode = GET_MODE (operand0);
44010 rtx res, xa, TWO52, mask;
44011 rtx_code_label *label;
44013 res = gen_reg_rtx (mode);
44014 emit_move_insn (res, operand1);
44016 /* xa = abs (operand1) */
44017 xa = ix86_expand_sse_fabs (res, &mask);
44019 /* if (!isless (xa, TWO52)) goto label; */
44020 TWO52 = ix86_gen_TWO52 (mode);
44021 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
44023 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
44024 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
44026 ix86_sse_copysign_to_positive (res, xa, res, mask);
44028 emit_label (label);
44029 LABEL_NUSES (label) = 1;
44031 emit_move_insn (operand0, res);
44034 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
44035 into OPERAND0. */
44036 void
44037 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
44039 /* C code for the stuff we expand below.
44040 double xa = fabs (x), x2;
44041 if (!isless (xa, TWO52))
44042 return x;
44043 xa = xa + TWO52 - TWO52;
44044 x2 = copysign (xa, x);
44045 Compensate. Floor:
44046 if (x2 > x)
44047 x2 -= 1;
44048 Compensate. Ceil:
44049 if (x2 < x)
44050 x2 -= -1;
44051 return x2;
44053 machine_mode mode = GET_MODE (operand0);
44054 rtx xa, TWO52, tmp, one, res, mask;
44055 rtx_code_label *label;
44057 TWO52 = ix86_gen_TWO52 (mode);
44059 /* Temporary for holding the result, initialized to the input
44060 operand to ease control flow. */
44061 res = gen_reg_rtx (mode);
44062 emit_move_insn (res, operand1);
44064 /* xa = abs (operand1) */
44065 xa = ix86_expand_sse_fabs (res, &mask);
44067 /* if (!isless (xa, TWO52)) goto label; */
44068 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
44070 /* xa = xa + TWO52 - TWO52; */
44071 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
44072 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
44074 /* xa = copysign (xa, operand1) */
44075 ix86_sse_copysign_to_positive (xa, xa, res, mask);
44077 /* generate 1.0 or -1.0 */
44078 one = force_reg (mode,
44079 const_double_from_real_value (do_floor
44080 ? dconst1 : dconstm1, mode));
44082 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
44083 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
44084 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
44085 /* We always need to subtract here to preserve signed zero. */
44086 tmp = expand_simple_binop (mode, MINUS,
44087 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
44088 emit_move_insn (res, tmp);
44090 emit_label (label);
44091 LABEL_NUSES (label) = 1;
44093 emit_move_insn (operand0, res);
44096 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
44097 into OPERAND0. */
44098 void
44099 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
44101 /* C code for the stuff we expand below.
44102 double xa = fabs (x), x2;
44103 if (!isless (xa, TWO52))
44104 return x;
44105 x2 = (double)(long)x;
44106 Compensate. Floor:
44107 if (x2 > x)
44108 x2 -= 1;
44109 Compensate. Ceil:
44110 if (x2 < x)
44111 x2 += 1;
44112 if (HONOR_SIGNED_ZEROS (mode))
44113 return copysign (x2, x);
44114 return x2;
44116 machine_mode mode = GET_MODE (operand0);
44117 rtx xa, xi, TWO52, tmp, one, res, mask;
44118 rtx_code_label *label;
44120 TWO52 = ix86_gen_TWO52 (mode);
44122 /* Temporary for holding the result, initialized to the input
44123 operand to ease control flow. */
44124 res = gen_reg_rtx (mode);
44125 emit_move_insn (res, operand1);
44127 /* xa = abs (operand1) */
44128 xa = ix86_expand_sse_fabs (res, &mask);
44130 /* if (!isless (xa, TWO52)) goto label; */
44131 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
44133 /* xa = (double)(long)x */
44134 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
44135 expand_fix (xi, res, 0);
44136 expand_float (xa, xi, 0);
44138 /* generate 1.0 */
44139 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
44141 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
44142 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
44143 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
44144 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
44145 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
44146 emit_move_insn (res, tmp);
44148 if (HONOR_SIGNED_ZEROS (mode))
44149 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
44151 emit_label (label);
44152 LABEL_NUSES (label) = 1;
44154 emit_move_insn (operand0, res);
44157 /* Expand SSE sequence for computing round from OPERAND1 storing
44158 into OPERAND0. Sequence that works without relying on DImode truncation
44159 via cvttsd2siq that is only available on 64bit targets. */
44160 void
44161 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
44163 /* C code for the stuff we expand below.
44164 double xa = fabs (x), xa2, x2;
44165 if (!isless (xa, TWO52))
44166 return x;
44167 Using the absolute value and copying back sign makes
44168 -0.0 -> -0.0 correct.
44169 xa2 = xa + TWO52 - TWO52;
44170 Compensate.
44171 dxa = xa2 - xa;
44172 if (dxa <= -0.5)
44173 xa2 += 1;
44174 else if (dxa > 0.5)
44175 xa2 -= 1;
44176 x2 = copysign (xa2, x);
44177 return x2;
44179 machine_mode mode = GET_MODE (operand0);
44180 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
44181 rtx_code_label *label;
44183 TWO52 = ix86_gen_TWO52 (mode);
44185 /* Temporary for holding the result, initialized to the input
44186 operand to ease control flow. */
44187 res = gen_reg_rtx (mode);
44188 emit_move_insn (res, operand1);
44190 /* xa = abs (operand1) */
44191 xa = ix86_expand_sse_fabs (res, &mask);
44193 /* if (!isless (xa, TWO52)) goto label; */
44194 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
44196 /* xa2 = xa + TWO52 - TWO52; */
44197 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
44198 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
44200 /* dxa = xa2 - xa; */
44201 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
44203 /* generate 0.5, 1.0 and -0.5 */
44204 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
44205 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
44206 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
44207 0, OPTAB_DIRECT);
44209 /* Compensate. */
44210 tmp = gen_reg_rtx (mode);
44211 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
44212 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
44213 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
44214 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
44215 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
44216 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
44217 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
44218 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
44220 /* res = copysign (xa2, operand1) */
44221 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
44223 emit_label (label);
44224 LABEL_NUSES (label) = 1;
44226 emit_move_insn (operand0, res);
44229 /* Expand SSE sequence for computing trunc from OPERAND1 storing
44230 into OPERAND0. */
44231 void
44232 ix86_expand_trunc (rtx operand0, rtx operand1)
44234 /* C code for SSE variant we expand below.
44235 double xa = fabs (x), x2;
44236 if (!isless (xa, TWO52))
44237 return x;
44238 x2 = (double)(long)x;
44239 if (HONOR_SIGNED_ZEROS (mode))
44240 return copysign (x2, x);
44241 return x2;
44243 machine_mode mode = GET_MODE (operand0);
44244 rtx xa, xi, TWO52, res, mask;
44245 rtx_code_label *label;
44247 TWO52 = ix86_gen_TWO52 (mode);
44249 /* Temporary for holding the result, initialized to the input
44250 operand to ease control flow. */
44251 res = gen_reg_rtx (mode);
44252 emit_move_insn (res, operand1);
44254 /* xa = abs (operand1) */
44255 xa = ix86_expand_sse_fabs (res, &mask);
44257 /* if (!isless (xa, TWO52)) goto label; */
44258 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
44260 /* x = (double)(long)x */
44261 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
44262 expand_fix (xi, res, 0);
44263 expand_float (res, xi, 0);
44265 if (HONOR_SIGNED_ZEROS (mode))
44266 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
44268 emit_label (label);
44269 LABEL_NUSES (label) = 1;
44271 emit_move_insn (operand0, res);
44274 /* Expand SSE sequence for computing trunc from OPERAND1 storing
44275 into OPERAND0. */
44276 void
44277 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
44279 machine_mode mode = GET_MODE (operand0);
44280 rtx xa, mask, TWO52, one, res, smask, tmp;
44281 rtx_code_label *label;
44283 /* C code for SSE variant we expand below.
44284 double xa = fabs (x), x2;
44285 if (!isless (xa, TWO52))
44286 return x;
44287 xa2 = xa + TWO52 - TWO52;
44288 Compensate:
44289 if (xa2 > xa)
44290 xa2 -= 1.0;
44291 x2 = copysign (xa2, x);
44292 return x2;
44295 TWO52 = ix86_gen_TWO52 (mode);
44297 /* Temporary for holding the result, initialized to the input
44298 operand to ease control flow. */
44299 res = gen_reg_rtx (mode);
44300 emit_move_insn (res, operand1);
44302 /* xa = abs (operand1) */
44303 xa = ix86_expand_sse_fabs (res, &smask);
44305 /* if (!isless (xa, TWO52)) goto label; */
44306 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
44308 /* res = xa + TWO52 - TWO52; */
44309 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
44310 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
44311 emit_move_insn (res, tmp);
44313 /* generate 1.0 */
44314 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
44316 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
44317 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
44318 emit_insn (gen_rtx_SET (mask, gen_rtx_AND (mode, mask, one)));
44319 tmp = expand_simple_binop (mode, MINUS,
44320 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
44321 emit_move_insn (res, tmp);
44323 /* res = copysign (res, operand1) */
44324 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
44326 emit_label (label);
44327 LABEL_NUSES (label) = 1;
44329 emit_move_insn (operand0, res);
44332 /* Expand SSE sequence for computing round from OPERAND1 storing
44333 into OPERAND0. */
44334 void
44335 ix86_expand_round (rtx operand0, rtx operand1)
44337 /* C code for the stuff we're doing below:
44338 double xa = fabs (x);
44339 if (!isless (xa, TWO52))
44340 return x;
44341 xa = (double)(long)(xa + nextafter (0.5, 0.0));
44342 return copysign (xa, x);
44344 machine_mode mode = GET_MODE (operand0);
44345 rtx res, TWO52, xa, xi, half, mask;
44346 rtx_code_label *label;
44347 const struct real_format *fmt;
44348 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
44350 /* Temporary for holding the result, initialized to the input
44351 operand to ease control flow. */
44352 res = gen_reg_rtx (mode);
44353 emit_move_insn (res, operand1);
44355 TWO52 = ix86_gen_TWO52 (mode);
44356 xa = ix86_expand_sse_fabs (res, &mask);
44357 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
44359 /* load nextafter (0.5, 0.0) */
44360 fmt = REAL_MODE_FORMAT (mode);
44361 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
44362 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
44364 /* xa = xa + 0.5 */
44365 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
44366 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
44368 /* xa = (double)(int64_t)xa */
44369 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
44370 expand_fix (xi, xa, 0);
44371 expand_float (xa, xi, 0);
44373 /* res = copysign (xa, operand1) */
44374 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
44376 emit_label (label);
44377 LABEL_NUSES (label) = 1;
44379 emit_move_insn (operand0, res);
44382 /* Expand SSE sequence for computing round
44383 from OP1 storing into OP0 using sse4 round insn. */
44384 void
44385 ix86_expand_round_sse4 (rtx op0, rtx op1)
44387 machine_mode mode = GET_MODE (op0);
44388 rtx e1, e2, res, half;
44389 const struct real_format *fmt;
44390 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
44391 rtx (*gen_copysign) (rtx, rtx, rtx);
44392 rtx (*gen_round) (rtx, rtx, rtx);
44394 switch (mode)
44396 case E_SFmode:
44397 gen_copysign = gen_copysignsf3;
44398 gen_round = gen_sse4_1_roundsf2;
44399 break;
44400 case E_DFmode:
44401 gen_copysign = gen_copysigndf3;
44402 gen_round = gen_sse4_1_rounddf2;
44403 break;
44404 default:
44405 gcc_unreachable ();
44408 /* round (a) = trunc (a + copysign (0.5, a)) */
44410 /* load nextafter (0.5, 0.0) */
44411 fmt = REAL_MODE_FORMAT (mode);
44412 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
44413 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
44414 half = const_double_from_real_value (pred_half, mode);
44416 /* e1 = copysign (0.5, op1) */
44417 e1 = gen_reg_rtx (mode);
44418 emit_insn (gen_copysign (e1, half, op1));
44420 /* e2 = op1 + e1 */
44421 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
44423 /* res = trunc (e2) */
44424 res = gen_reg_rtx (mode);
44425 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
44427 emit_move_insn (op0, res);
44431 /* Table of valid machine attributes. */
44432 static const struct attribute_spec ix86_attribute_table[] =
44434 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
44435 affects_type_identity } */
44436 /* Stdcall attribute says callee is responsible for popping arguments
44437 if they are not variable. */
44438 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
44439 true },
44440 /* Fastcall attribute says callee is responsible for popping arguments
44441 if they are not variable. */
44442 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
44443 true },
44444 /* Thiscall attribute says callee is responsible for popping arguments
44445 if they are not variable. */
44446 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
44447 true },
44448 /* Cdecl attribute says the callee is a normal C declaration */
44449 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
44450 true },
44451 /* Regparm attribute specifies how many integer arguments are to be
44452 passed in registers. */
44453 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
44454 true },
44455 /* Sseregparm attribute says we are using x86_64 calling conventions
44456 for FP arguments. */
44457 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
44458 true },
44459 /* The transactional memory builtins are implicitly regparm or fastcall
44460 depending on the ABI. Override the generic do-nothing attribute that
44461 these builtins were declared with. */
44462 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
44463 true },
44464 /* force_align_arg_pointer says this function realigns the stack at entry. */
44465 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
44466 false, true, true, ix86_handle_force_align_arg_pointer_attribute, false },
44467 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
44468 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
44469 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
44470 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
44471 false },
44472 #endif
44473 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
44474 false },
44475 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
44476 false },
44477 #ifdef SUBTARGET_ATTRIBUTE_TABLE
44478 SUBTARGET_ATTRIBUTE_TABLE,
44479 #endif
44480 /* ms_abi and sysv_abi calling convention function attributes. */
44481 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
44482 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
44483 { "ms_abi va_list", 0, 0, false, false, false, NULL, false },
44484 { "sysv_abi va_list", 0, 0, false, false, false, NULL, false },
44485 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
44486 false },
44487 { "callee_pop_aggregate_return", 1, 1, false, true, true,
44488 ix86_handle_callee_pop_aggregate_return, true },
44489 { "interrupt", 0, 0, false, true, true,
44490 ix86_handle_interrupt_attribute, false },
44491 { "no_caller_saved_registers", 0, 0, false, true, true,
44492 ix86_handle_no_caller_saved_registers_attribute, false },
44493 { "naked", 0, 0, true, false, false,
44494 ix86_handle_fndecl_attribute, false },
44496 /* End element. */
44497 { NULL, 0, 0, false, false, false, NULL, false }
44500 /* Implement targetm.vectorize.builtin_vectorization_cost. */
44501 static int
44502 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
44503 tree vectype, int)
44505 bool fp = false;
44506 machine_mode mode = TImode;
44507 int index;
44508 if (vectype != NULL)
44510 fp = FLOAT_TYPE_P (vectype);
44511 mode = TYPE_MODE (vectype);
44514 switch (type_of_cost)
44516 case scalar_stmt:
44517 return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
44519 case scalar_load:
44520 /* load/store costs are relative to register move which is 2. Recompute
44521 it to COSTS_N_INSNS so everything have same base. */
44522 return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0]
44523 : ix86_cost->int_load [2]) / 2;
44525 case scalar_store:
44526 return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0]
44527 : ix86_cost->int_store [2]) / 2;
44529 case vector_stmt:
44530 return ix86_vec_cost (mode,
44531 fp ? ix86_cost->addss : ix86_cost->sse_op,
44532 true);
44534 case vector_load:
44535 index = sse_store_index (mode);
44536 gcc_assert (index >= 0);
44537 return ix86_vec_cost (mode,
44538 COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2,
44539 true);
44541 case vector_store:
44542 index = sse_store_index (mode);
44543 return ix86_vec_cost (mode,
44544 COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2,
44545 true);
44547 case vec_to_scalar:
44548 case scalar_to_vec:
44549 return ix86_vec_cost (mode, ix86_cost->sse_op, true);
44551 /* We should have separate costs for unaligned loads and gather/scatter.
44552 Do that incrementally. */
44553 case unaligned_load:
44554 index = sse_store_index (mode);
44555 return ix86_vec_cost (mode,
44556 COSTS_N_INSNS
44557 (ix86_cost->sse_unaligned_load[index]) / 2,
44558 true);
44560 case unaligned_store:
44561 index = sse_store_index (mode);
44562 return ix86_vec_cost (mode,
44563 COSTS_N_INSNS
44564 (ix86_cost->sse_unaligned_store[index]) / 2,
44565 true);
44567 case vector_gather_load:
44568 return ix86_vec_cost (mode,
44569 COSTS_N_INSNS
44570 (ix86_cost->gather_static
44571 + ix86_cost->gather_per_elt
44572 * TYPE_VECTOR_SUBPARTS (vectype)) / 2,
44573 true);
44575 case vector_scatter_store:
44576 return ix86_vec_cost (mode,
44577 COSTS_N_INSNS
44578 (ix86_cost->scatter_static
44579 + ix86_cost->scatter_per_elt
44580 * TYPE_VECTOR_SUBPARTS (vectype)) / 2,
44581 true);
44583 case cond_branch_taken:
44584 return ix86_cost->cond_taken_branch_cost;
44586 case cond_branch_not_taken:
44587 return ix86_cost->cond_not_taken_branch_cost;
44589 case vec_perm:
44590 case vec_promote_demote:
44591 return ix86_vec_cost (mode,
44592 ix86_cost->sse_op, true);
44594 case vec_construct:
44595 return ix86_vec_cost (mode, ix86_cost->sse_op, false);
44597 default:
44598 gcc_unreachable ();
44602 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
44603 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
44604 insn every time. */
44606 static GTY(()) rtx_insn *vselect_insn;
44608 /* Initialize vselect_insn. */
44610 static void
44611 init_vselect_insn (void)
44613 unsigned i;
44614 rtx x;
44616 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
44617 for (i = 0; i < MAX_VECT_LEN; ++i)
44618 XVECEXP (x, 0, i) = const0_rtx;
44619 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
44620 const0_rtx), x);
44621 x = gen_rtx_SET (const0_rtx, x);
44622 start_sequence ();
44623 vselect_insn = emit_insn (x);
44624 end_sequence ();
44627 /* Construct (set target (vec_select op0 (parallel perm))) and
44628 return true if that's a valid instruction in the active ISA. */
44630 static bool
44631 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
44632 unsigned nelt, bool testing_p)
44634 unsigned int i;
44635 rtx x, save_vconcat;
44636 int icode;
44638 if (vselect_insn == NULL_RTX)
44639 init_vselect_insn ();
44641 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
44642 PUT_NUM_ELEM (XVEC (x, 0), nelt);
44643 for (i = 0; i < nelt; ++i)
44644 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
44645 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
44646 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
44647 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
44648 SET_DEST (PATTERN (vselect_insn)) = target;
44649 icode = recog_memoized (vselect_insn);
44651 if (icode >= 0 && !testing_p)
44652 emit_insn (copy_rtx (PATTERN (vselect_insn)));
44654 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
44655 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
44656 INSN_CODE (vselect_insn) = -1;
44658 return icode >= 0;
44661 /* Similar, but generate a vec_concat from op0 and op1 as well. */
44663 static bool
44664 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
44665 const unsigned char *perm, unsigned nelt,
44666 bool testing_p)
44668 machine_mode v2mode;
44669 rtx x;
44670 bool ok;
44672 if (vselect_insn == NULL_RTX)
44673 init_vselect_insn ();
44675 if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0)).exists (&v2mode))
44676 return false;
44677 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
44678 PUT_MODE (x, v2mode);
44679 XEXP (x, 0) = op0;
44680 XEXP (x, 1) = op1;
44681 ok = expand_vselect (target, x, perm, nelt, testing_p);
44682 XEXP (x, 0) = const0_rtx;
44683 XEXP (x, 1) = const0_rtx;
44684 return ok;
44687 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
44688 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
44690 static bool
44691 expand_vec_perm_blend (struct expand_vec_perm_d *d)
44693 machine_mode mmode, vmode = d->vmode;
44694 unsigned i, mask, nelt = d->nelt;
44695 rtx target, op0, op1, maskop, x;
44696 rtx rperm[32], vperm;
44698 if (d->one_operand_p)
44699 return false;
44700 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
44701 && (TARGET_AVX512BW
44702 || GET_MODE_UNIT_SIZE (vmode) >= 4))
44704 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
44706 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
44708 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
44710 else
44711 return false;
44713 /* This is a blend, not a permute. Elements must stay in their
44714 respective lanes. */
44715 for (i = 0; i < nelt; ++i)
44717 unsigned e = d->perm[i];
44718 if (!(e == i || e == i + nelt))
44719 return false;
44722 if (d->testing_p)
44723 return true;
44725 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
44726 decision should be extracted elsewhere, so that we only try that
44727 sequence once all budget==3 options have been tried. */
44728 target = d->target;
44729 op0 = d->op0;
44730 op1 = d->op1;
44731 mask = 0;
44733 switch (vmode)
44735 case E_V8DFmode:
44736 case E_V16SFmode:
44737 case E_V4DFmode:
44738 case E_V8SFmode:
44739 case E_V2DFmode:
44740 case E_V4SFmode:
44741 case E_V8HImode:
44742 case E_V8SImode:
44743 case E_V32HImode:
44744 case E_V64QImode:
44745 case E_V16SImode:
44746 case E_V8DImode:
44747 for (i = 0; i < nelt; ++i)
44748 mask |= (d->perm[i] >= nelt) << i;
44749 break;
44751 case E_V2DImode:
44752 for (i = 0; i < 2; ++i)
44753 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
44754 vmode = V8HImode;
44755 goto do_subreg;
44757 case E_V4SImode:
44758 for (i = 0; i < 4; ++i)
44759 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
44760 vmode = V8HImode;
44761 goto do_subreg;
44763 case E_V16QImode:
44764 /* See if bytes move in pairs so we can use pblendw with
44765 an immediate argument, rather than pblendvb with a vector
44766 argument. */
44767 for (i = 0; i < 16; i += 2)
44768 if (d->perm[i] + 1 != d->perm[i + 1])
44770 use_pblendvb:
44771 for (i = 0; i < nelt; ++i)
44772 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
44774 finish_pblendvb:
44775 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
44776 vperm = force_reg (vmode, vperm);
44778 if (GET_MODE_SIZE (vmode) == 16)
44779 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
44780 else
44781 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
44782 if (target != d->target)
44783 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
44784 return true;
44787 for (i = 0; i < 8; ++i)
44788 mask |= (d->perm[i * 2] >= 16) << i;
44789 vmode = V8HImode;
44790 /* FALLTHRU */
44792 do_subreg:
44793 target = gen_reg_rtx (vmode);
44794 op0 = gen_lowpart (vmode, op0);
44795 op1 = gen_lowpart (vmode, op1);
44796 break;
44798 case E_V32QImode:
44799 /* See if bytes move in pairs. If not, vpblendvb must be used. */
44800 for (i = 0; i < 32; i += 2)
44801 if (d->perm[i] + 1 != d->perm[i + 1])
44802 goto use_pblendvb;
44803 /* See if bytes move in quadruplets. If yes, vpblendd
44804 with immediate can be used. */
44805 for (i = 0; i < 32; i += 4)
44806 if (d->perm[i] + 2 != d->perm[i + 2])
44807 break;
44808 if (i < 32)
44810 /* See if bytes move the same in both lanes. If yes,
44811 vpblendw with immediate can be used. */
44812 for (i = 0; i < 16; i += 2)
44813 if (d->perm[i] + 16 != d->perm[i + 16])
44814 goto use_pblendvb;
44816 /* Use vpblendw. */
44817 for (i = 0; i < 16; ++i)
44818 mask |= (d->perm[i * 2] >= 32) << i;
44819 vmode = V16HImode;
44820 goto do_subreg;
44823 /* Use vpblendd. */
44824 for (i = 0; i < 8; ++i)
44825 mask |= (d->perm[i * 4] >= 32) << i;
44826 vmode = V8SImode;
44827 goto do_subreg;
44829 case E_V16HImode:
44830 /* See if words move in pairs. If yes, vpblendd can be used. */
44831 for (i = 0; i < 16; i += 2)
44832 if (d->perm[i] + 1 != d->perm[i + 1])
44833 break;
44834 if (i < 16)
44836 /* See if words move the same in both lanes. If not,
44837 vpblendvb must be used. */
44838 for (i = 0; i < 8; i++)
44839 if (d->perm[i] + 8 != d->perm[i + 8])
44841 /* Use vpblendvb. */
44842 for (i = 0; i < 32; ++i)
44843 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
44845 vmode = V32QImode;
44846 nelt = 32;
44847 target = gen_reg_rtx (vmode);
44848 op0 = gen_lowpart (vmode, op0);
44849 op1 = gen_lowpart (vmode, op1);
44850 goto finish_pblendvb;
44853 /* Use vpblendw. */
44854 for (i = 0; i < 16; ++i)
44855 mask |= (d->perm[i] >= 16) << i;
44856 break;
44859 /* Use vpblendd. */
44860 for (i = 0; i < 8; ++i)
44861 mask |= (d->perm[i * 2] >= 16) << i;
44862 vmode = V8SImode;
44863 goto do_subreg;
44865 case E_V4DImode:
44866 /* Use vpblendd. */
44867 for (i = 0; i < 4; ++i)
44868 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
44869 vmode = V8SImode;
44870 goto do_subreg;
44872 default:
44873 gcc_unreachable ();
44876 switch (vmode)
44878 case E_V8DFmode:
44879 case E_V8DImode:
44880 mmode = QImode;
44881 break;
44882 case E_V16SFmode:
44883 case E_V16SImode:
44884 mmode = HImode;
44885 break;
44886 case E_V32HImode:
44887 mmode = SImode;
44888 break;
44889 case E_V64QImode:
44890 mmode = DImode;
44891 break;
44892 default:
44893 mmode = VOIDmode;
44896 if (mmode != VOIDmode)
44897 maskop = force_reg (mmode, gen_int_mode (mask, mmode));
44898 else
44899 maskop = GEN_INT (mask);
44901 /* This matches five different patterns with the different modes. */
44902 x = gen_rtx_VEC_MERGE (vmode, op1, op0, maskop);
44903 x = gen_rtx_SET (target, x);
44904 emit_insn (x);
44905 if (target != d->target)
44906 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
44908 return true;
44911 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
44912 in terms of the variable form of vpermilps.
44914 Note that we will have already failed the immediate input vpermilps,
44915 which requires that the high and low part shuffle be identical; the
44916 variable form doesn't require that. */
44918 static bool
44919 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
44921 rtx rperm[8], vperm;
44922 unsigned i;
44924 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
44925 return false;
44927 /* We can only permute within the 128-bit lane. */
44928 for (i = 0; i < 8; ++i)
44930 unsigned e = d->perm[i];
44931 if (i < 4 ? e >= 4 : e < 4)
44932 return false;
44935 if (d->testing_p)
44936 return true;
44938 for (i = 0; i < 8; ++i)
44940 unsigned e = d->perm[i];
44942 /* Within each 128-bit lane, the elements of op0 are numbered
44943 from 0 and the elements of op1 are numbered from 4. */
44944 if (e >= 8 + 4)
44945 e -= 8;
44946 else if (e >= 4)
44947 e -= 4;
44949 rperm[i] = GEN_INT (e);
44952 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
44953 vperm = force_reg (V8SImode, vperm);
44954 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
44956 return true;
44959 /* Return true if permutation D can be performed as VMODE permutation
44960 instead. */
44962 static bool
44963 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
44965 unsigned int i, j, chunk;
44967 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
44968 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
44969 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
44970 return false;
44972 if (GET_MODE_NUNITS (vmode) >= d->nelt)
44973 return true;
44975 chunk = d->nelt / GET_MODE_NUNITS (vmode);
44976 for (i = 0; i < d->nelt; i += chunk)
44977 if (d->perm[i] & (chunk - 1))
44978 return false;
44979 else
44980 for (j = 1; j < chunk; ++j)
44981 if (d->perm[i] + j != d->perm[i + j])
44982 return false;
44984 return true;
44987 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
44988 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
44990 static bool
44991 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
44993 unsigned i, nelt, eltsz, mask;
44994 unsigned char perm[64];
44995 machine_mode vmode = V16QImode;
44996 rtx rperm[64], vperm, target, op0, op1;
44998 nelt = d->nelt;
45000 if (!d->one_operand_p)
45002 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
45004 if (TARGET_AVX2
45005 && valid_perm_using_mode_p (V2TImode, d))
45007 if (d->testing_p)
45008 return true;
45010 /* Use vperm2i128 insn. The pattern uses
45011 V4DImode instead of V2TImode. */
45012 target = d->target;
45013 if (d->vmode != V4DImode)
45014 target = gen_reg_rtx (V4DImode);
45015 op0 = gen_lowpart (V4DImode, d->op0);
45016 op1 = gen_lowpart (V4DImode, d->op1);
45017 rperm[0]
45018 = GEN_INT ((d->perm[0] / (nelt / 2))
45019 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
45020 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
45021 if (target != d->target)
45022 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
45023 return true;
45025 return false;
45028 else
45030 if (GET_MODE_SIZE (d->vmode) == 16)
45032 if (!TARGET_SSSE3)
45033 return false;
45035 else if (GET_MODE_SIZE (d->vmode) == 32)
45037 if (!TARGET_AVX2)
45038 return false;
45040 /* V4DImode should be already handled through
45041 expand_vselect by vpermq instruction. */
45042 gcc_assert (d->vmode != V4DImode);
45044 vmode = V32QImode;
45045 if (d->vmode == V8SImode
45046 || d->vmode == V16HImode
45047 || d->vmode == V32QImode)
45049 /* First see if vpermq can be used for
45050 V8SImode/V16HImode/V32QImode. */
45051 if (valid_perm_using_mode_p (V4DImode, d))
45053 for (i = 0; i < 4; i++)
45054 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
45055 if (d->testing_p)
45056 return true;
45057 target = gen_reg_rtx (V4DImode);
45058 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
45059 perm, 4, false))
45061 emit_move_insn (d->target,
45062 gen_lowpart (d->vmode, target));
45063 return true;
45065 return false;
45068 /* Next see if vpermd can be used. */
45069 if (valid_perm_using_mode_p (V8SImode, d))
45070 vmode = V8SImode;
45072 /* Or if vpermps can be used. */
45073 else if (d->vmode == V8SFmode)
45074 vmode = V8SImode;
45076 if (vmode == V32QImode)
45078 /* vpshufb only works intra lanes, it is not
45079 possible to shuffle bytes in between the lanes. */
45080 for (i = 0; i < nelt; ++i)
45081 if ((d->perm[i] ^ i) & (nelt / 2))
45082 return false;
45085 else if (GET_MODE_SIZE (d->vmode) == 64)
45087 if (!TARGET_AVX512BW)
45088 return false;
45090 /* If vpermq didn't work, vpshufb won't work either. */
45091 if (d->vmode == V8DFmode || d->vmode == V8DImode)
45092 return false;
45094 vmode = V64QImode;
45095 if (d->vmode == V16SImode
45096 || d->vmode == V32HImode
45097 || d->vmode == V64QImode)
45099 /* First see if vpermq can be used for
45100 V16SImode/V32HImode/V64QImode. */
45101 if (valid_perm_using_mode_p (V8DImode, d))
45103 for (i = 0; i < 8; i++)
45104 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
45105 if (d->testing_p)
45106 return true;
45107 target = gen_reg_rtx (V8DImode);
45108 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
45109 perm, 8, false))
45111 emit_move_insn (d->target,
45112 gen_lowpart (d->vmode, target));
45113 return true;
45115 return false;
45118 /* Next see if vpermd can be used. */
45119 if (valid_perm_using_mode_p (V16SImode, d))
45120 vmode = V16SImode;
45122 /* Or if vpermps can be used. */
45123 else if (d->vmode == V16SFmode)
45124 vmode = V16SImode;
45125 if (vmode == V64QImode)
45127 /* vpshufb only works intra lanes, it is not
45128 possible to shuffle bytes in between the lanes. */
45129 for (i = 0; i < nelt; ++i)
45130 if ((d->perm[i] ^ i) & (nelt / 4))
45131 return false;
45134 else
45135 return false;
45138 if (d->testing_p)
45139 return true;
45141 if (vmode == V8SImode)
45142 for (i = 0; i < 8; ++i)
45143 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
45144 else if (vmode == V16SImode)
45145 for (i = 0; i < 16; ++i)
45146 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
45147 else
45149 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
45150 if (!d->one_operand_p)
45151 mask = 2 * nelt - 1;
45152 else if (vmode == V16QImode)
45153 mask = nelt - 1;
45154 else if (vmode == V64QImode)
45155 mask = nelt / 4 - 1;
45156 else
45157 mask = nelt / 2 - 1;
45159 for (i = 0; i < nelt; ++i)
45161 unsigned j, e = d->perm[i] & mask;
45162 for (j = 0; j < eltsz; ++j)
45163 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
45167 vperm = gen_rtx_CONST_VECTOR (vmode,
45168 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
45169 vperm = force_reg (vmode, vperm);
45171 target = d->target;
45172 if (d->vmode != vmode)
45173 target = gen_reg_rtx (vmode);
45174 op0 = gen_lowpart (vmode, d->op0);
45175 if (d->one_operand_p)
45177 if (vmode == V16QImode)
45178 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
45179 else if (vmode == V32QImode)
45180 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
45181 else if (vmode == V64QImode)
45182 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
45183 else if (vmode == V8SFmode)
45184 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
45185 else if (vmode == V8SImode)
45186 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
45187 else if (vmode == V16SFmode)
45188 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
45189 else if (vmode == V16SImode)
45190 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
45191 else
45192 gcc_unreachable ();
45194 else
45196 op1 = gen_lowpart (vmode, d->op1);
45197 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
45199 if (target != d->target)
45200 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
45202 return true;
45205 /* For V*[QHS]Imode permutations, check if the same permutation
45206 can't be performed in a 2x, 4x or 8x wider inner mode. */
45208 static bool
45209 canonicalize_vector_int_perm (const struct expand_vec_perm_d *d,
45210 struct expand_vec_perm_d *nd)
45212 int i;
45213 machine_mode mode = VOIDmode;
45215 switch (d->vmode)
45217 case E_V16QImode: mode = V8HImode; break;
45218 case E_V32QImode: mode = V16HImode; break;
45219 case E_V64QImode: mode = V32HImode; break;
45220 case E_V8HImode: mode = V4SImode; break;
45221 case E_V16HImode: mode = V8SImode; break;
45222 case E_V32HImode: mode = V16SImode; break;
45223 case E_V4SImode: mode = V2DImode; break;
45224 case E_V8SImode: mode = V4DImode; break;
45225 case E_V16SImode: mode = V8DImode; break;
45226 default: return false;
45228 for (i = 0; i < d->nelt; i += 2)
45229 if ((d->perm[i] & 1) || d->perm[i + 1] != d->perm[i] + 1)
45230 return false;
45231 nd->vmode = mode;
45232 nd->nelt = d->nelt / 2;
45233 for (i = 0; i < nd->nelt; i++)
45234 nd->perm[i] = d->perm[2 * i] / 2;
45235 if (GET_MODE_INNER (mode) != DImode)
45236 canonicalize_vector_int_perm (nd, nd);
45237 if (nd != d)
45239 nd->one_operand_p = d->one_operand_p;
45240 nd->testing_p = d->testing_p;
45241 if (d->op0 == d->op1)
45242 nd->op0 = nd->op1 = gen_lowpart (nd->vmode, d->op0);
45243 else
45245 nd->op0 = gen_lowpart (nd->vmode, d->op0);
45246 nd->op1 = gen_lowpart (nd->vmode, d->op1);
45248 if (d->testing_p)
45249 nd->target = gen_raw_REG (nd->vmode, LAST_VIRTUAL_REGISTER + 1);
45250 else
45251 nd->target = gen_reg_rtx (nd->vmode);
45253 return true;
45256 /* Try to expand one-operand permutation with constant mask. */
45258 static bool
45259 ix86_expand_vec_one_operand_perm_avx512 (struct expand_vec_perm_d *d)
45261 machine_mode mode = GET_MODE (d->op0);
45262 machine_mode maskmode = mode;
45263 rtx (*gen) (rtx, rtx, rtx) = NULL;
45264 rtx target, op0, mask;
45265 rtx vec[64];
45267 if (!rtx_equal_p (d->op0, d->op1))
45268 return false;
45270 if (!TARGET_AVX512F)
45271 return false;
45273 switch (mode)
45275 case E_V16SImode:
45276 gen = gen_avx512f_permvarv16si;
45277 break;
45278 case E_V16SFmode:
45279 gen = gen_avx512f_permvarv16sf;
45280 maskmode = V16SImode;
45281 break;
45282 case E_V8DImode:
45283 gen = gen_avx512f_permvarv8di;
45284 break;
45285 case E_V8DFmode:
45286 gen = gen_avx512f_permvarv8df;
45287 maskmode = V8DImode;
45288 break;
45289 default:
45290 return false;
45293 target = d->target;
45294 op0 = d->op0;
45295 for (int i = 0; i < d->nelt; ++i)
45296 vec[i] = GEN_INT (d->perm[i]);
45297 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
45298 emit_insn (gen (target, op0, force_reg (maskmode, mask)));
45299 return true;
45302 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
45303 in a single instruction. */
45305 static bool
45306 expand_vec_perm_1 (struct expand_vec_perm_d *d)
45308 unsigned i, nelt = d->nelt;
45309 struct expand_vec_perm_d nd;
45311 /* Check plain VEC_SELECT first, because AVX has instructions that could
45312 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
45313 input where SEL+CONCAT may not. */
45314 if (d->one_operand_p)
45316 int mask = nelt - 1;
45317 bool identity_perm = true;
45318 bool broadcast_perm = true;
45320 for (i = 0; i < nelt; i++)
45322 nd.perm[i] = d->perm[i] & mask;
45323 if (nd.perm[i] != i)
45324 identity_perm = false;
45325 if (nd.perm[i])
45326 broadcast_perm = false;
45329 if (identity_perm)
45331 if (!d->testing_p)
45332 emit_move_insn (d->target, d->op0);
45333 return true;
45335 else if (broadcast_perm && TARGET_AVX2)
45337 /* Use vpbroadcast{b,w,d}. */
45338 rtx (*gen) (rtx, rtx) = NULL;
45339 switch (d->vmode)
45341 case E_V64QImode:
45342 if (TARGET_AVX512BW)
45343 gen = gen_avx512bw_vec_dupv64qi_1;
45344 break;
45345 case E_V32QImode:
45346 gen = gen_avx2_pbroadcastv32qi_1;
45347 break;
45348 case E_V32HImode:
45349 if (TARGET_AVX512BW)
45350 gen = gen_avx512bw_vec_dupv32hi_1;
45351 break;
45352 case E_V16HImode:
45353 gen = gen_avx2_pbroadcastv16hi_1;
45354 break;
45355 case E_V16SImode:
45356 if (TARGET_AVX512F)
45357 gen = gen_avx512f_vec_dupv16si_1;
45358 break;
45359 case E_V8SImode:
45360 gen = gen_avx2_pbroadcastv8si_1;
45361 break;
45362 case E_V16QImode:
45363 gen = gen_avx2_pbroadcastv16qi;
45364 break;
45365 case E_V8HImode:
45366 gen = gen_avx2_pbroadcastv8hi;
45367 break;
45368 case E_V16SFmode:
45369 if (TARGET_AVX512F)
45370 gen = gen_avx512f_vec_dupv16sf_1;
45371 break;
45372 case E_V8SFmode:
45373 gen = gen_avx2_vec_dupv8sf_1;
45374 break;
45375 case E_V8DFmode:
45376 if (TARGET_AVX512F)
45377 gen = gen_avx512f_vec_dupv8df_1;
45378 break;
45379 case E_V8DImode:
45380 if (TARGET_AVX512F)
45381 gen = gen_avx512f_vec_dupv8di_1;
45382 break;
45383 /* For other modes prefer other shuffles this function creates. */
45384 default: break;
45386 if (gen != NULL)
45388 if (!d->testing_p)
45389 emit_insn (gen (d->target, d->op0));
45390 return true;
45394 if (expand_vselect (d->target, d->op0, nd.perm, nelt, d->testing_p))
45395 return true;
45397 /* There are plenty of patterns in sse.md that are written for
45398 SEL+CONCAT and are not replicated for a single op. Perhaps
45399 that should be changed, to avoid the nastiness here. */
45401 /* Recognize interleave style patterns, which means incrementing
45402 every other permutation operand. */
45403 for (i = 0; i < nelt; i += 2)
45405 nd.perm[i] = d->perm[i] & mask;
45406 nd.perm[i + 1] = (d->perm[i + 1] & mask) + nelt;
45408 if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt,
45409 d->testing_p))
45410 return true;
45412 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
45413 if (nelt >= 4)
45415 for (i = 0; i < nelt; i += 4)
45417 nd.perm[i + 0] = d->perm[i + 0] & mask;
45418 nd.perm[i + 1] = d->perm[i + 1] & mask;
45419 nd.perm[i + 2] = (d->perm[i + 2] & mask) + nelt;
45420 nd.perm[i + 3] = (d->perm[i + 3] & mask) + nelt;
45423 if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt,
45424 d->testing_p))
45425 return true;
45429 /* Finally, try the fully general two operand permute. */
45430 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
45431 d->testing_p))
45432 return true;
45434 /* Recognize interleave style patterns with reversed operands. */
45435 if (!d->one_operand_p)
45437 for (i = 0; i < nelt; ++i)
45439 unsigned e = d->perm[i];
45440 if (e >= nelt)
45441 e -= nelt;
45442 else
45443 e += nelt;
45444 nd.perm[i] = e;
45447 if (expand_vselect_vconcat (d->target, d->op1, d->op0, nd.perm, nelt,
45448 d->testing_p))
45449 return true;
45452 /* Try the SSE4.1 blend variable merge instructions. */
45453 if (expand_vec_perm_blend (d))
45454 return true;
45456 /* Try one of the AVX vpermil variable permutations. */
45457 if (expand_vec_perm_vpermil (d))
45458 return true;
45460 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
45461 vpshufb, vpermd, vpermps or vpermq variable permutation. */
45462 if (expand_vec_perm_pshufb (d))
45463 return true;
45465 /* Try the AVX2 vpalignr instruction. */
45466 if (expand_vec_perm_palignr (d, true))
45467 return true;
45469 /* Try the AVX512F vperm{s,d} instructions. */
45470 if (ix86_expand_vec_one_operand_perm_avx512 (d))
45471 return true;
45473 /* Try the AVX512F vpermt2/vpermi2 instructions. */
45474 if (ix86_expand_vec_perm_vpermt2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
45475 return true;
45477 /* See if we can get the same permutation in different vector integer
45478 mode. */
45479 if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd))
45481 if (!d->testing_p)
45482 emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target));
45483 return true;
45485 return false;
45488 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
45489 in terms of a pair of pshuflw + pshufhw instructions. */
45491 static bool
45492 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
45494 unsigned char perm2[MAX_VECT_LEN];
45495 unsigned i;
45496 bool ok;
45498 if (d->vmode != V8HImode || !d->one_operand_p)
45499 return false;
45501 /* The two permutations only operate in 64-bit lanes. */
45502 for (i = 0; i < 4; ++i)
45503 if (d->perm[i] >= 4)
45504 return false;
45505 for (i = 4; i < 8; ++i)
45506 if (d->perm[i] < 4)
45507 return false;
45509 if (d->testing_p)
45510 return true;
45512 /* Emit the pshuflw. */
45513 memcpy (perm2, d->perm, 4);
45514 for (i = 4; i < 8; ++i)
45515 perm2[i] = i;
45516 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
45517 gcc_assert (ok);
45519 /* Emit the pshufhw. */
45520 memcpy (perm2 + 4, d->perm + 4, 4);
45521 for (i = 0; i < 4; ++i)
45522 perm2[i] = i;
45523 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
45524 gcc_assert (ok);
45526 return true;
45529 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
45530 the permutation using the SSSE3 palignr instruction. This succeeds
45531 when all of the elements in PERM fit within one vector and we merely
45532 need to shift them down so that a single vector permutation has a
45533 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
45534 the vpalignr instruction itself can perform the requested permutation. */
45536 static bool
45537 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
45539 unsigned i, nelt = d->nelt;
45540 unsigned min, max, minswap, maxswap;
45541 bool in_order, ok, swap = false;
45542 rtx shift, target;
45543 struct expand_vec_perm_d dcopy;
45545 /* Even with AVX, palignr only operates on 128-bit vectors,
45546 in AVX2 palignr operates on both 128-bit lanes. */
45547 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
45548 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
45549 return false;
45551 min = 2 * nelt;
45552 max = 0;
45553 minswap = 2 * nelt;
45554 maxswap = 0;
45555 for (i = 0; i < nelt; ++i)
45557 unsigned e = d->perm[i];
45558 unsigned eswap = d->perm[i] ^ nelt;
45559 if (GET_MODE_SIZE (d->vmode) == 32)
45561 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
45562 eswap = e ^ (nelt / 2);
45564 if (e < min)
45565 min = e;
45566 if (e > max)
45567 max = e;
45568 if (eswap < minswap)
45569 minswap = eswap;
45570 if (eswap > maxswap)
45571 maxswap = eswap;
45573 if (min == 0
45574 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
45576 if (d->one_operand_p
45577 || minswap == 0
45578 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
45579 ? nelt / 2 : nelt))
45580 return false;
45581 swap = true;
45582 min = minswap;
45583 max = maxswap;
45586 /* Given that we have SSSE3, we know we'll be able to implement the
45587 single operand permutation after the palignr with pshufb for
45588 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
45589 first. */
45590 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
45591 return true;
45593 dcopy = *d;
45594 if (swap)
45596 dcopy.op0 = d->op1;
45597 dcopy.op1 = d->op0;
45598 for (i = 0; i < nelt; ++i)
45599 dcopy.perm[i] ^= nelt;
45602 in_order = true;
45603 for (i = 0; i < nelt; ++i)
45605 unsigned e = dcopy.perm[i];
45606 if (GET_MODE_SIZE (d->vmode) == 32
45607 && e >= nelt
45608 && (e & (nelt / 2 - 1)) < min)
45609 e = e - min - (nelt / 2);
45610 else
45611 e = e - min;
45612 if (e != i)
45613 in_order = false;
45614 dcopy.perm[i] = e;
45616 dcopy.one_operand_p = true;
45618 if (single_insn_only_p && !in_order)
45619 return false;
45621 /* For AVX2, test whether we can permute the result in one instruction. */
45622 if (d->testing_p)
45624 if (in_order)
45625 return true;
45626 dcopy.op1 = dcopy.op0;
45627 return expand_vec_perm_1 (&dcopy);
45630 shift = GEN_INT (min * GET_MODE_UNIT_BITSIZE (d->vmode));
45631 if (GET_MODE_SIZE (d->vmode) == 16)
45633 target = gen_reg_rtx (TImode);
45634 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
45635 gen_lowpart (TImode, dcopy.op0), shift));
45637 else
45639 target = gen_reg_rtx (V2TImode);
45640 emit_insn (gen_avx2_palignrv2ti (target,
45641 gen_lowpart (V2TImode, dcopy.op1),
45642 gen_lowpart (V2TImode, dcopy.op0),
45643 shift));
45646 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
45648 /* Test for the degenerate case where the alignment by itself
45649 produces the desired permutation. */
45650 if (in_order)
45652 emit_move_insn (d->target, dcopy.op0);
45653 return true;
45656 ok = expand_vec_perm_1 (&dcopy);
45657 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
45659 return ok;
45662 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
45663 the permutation using the SSE4_1 pblendv instruction. Potentially
45664 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
45666 static bool
45667 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
45669 unsigned i, which, nelt = d->nelt;
45670 struct expand_vec_perm_d dcopy, dcopy1;
45671 machine_mode vmode = d->vmode;
45672 bool ok;
45674 /* Use the same checks as in expand_vec_perm_blend. */
45675 if (d->one_operand_p)
45676 return false;
45677 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
45679 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
45681 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
45683 else
45684 return false;
45686 /* Figure out where permutation elements stay not in their
45687 respective lanes. */
45688 for (i = 0, which = 0; i < nelt; ++i)
45690 unsigned e = d->perm[i];
45691 if (e != i)
45692 which |= (e < nelt ? 1 : 2);
45694 /* We can pblend the part where elements stay not in their
45695 respective lanes only when these elements are all in one
45696 half of a permutation.
45697 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
45698 lanes, but both 8 and 9 >= 8
45699 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
45700 respective lanes and 8 >= 8, but 2 not. */
45701 if (which != 1 && which != 2)
45702 return false;
45703 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
45704 return true;
45706 /* First we apply one operand permutation to the part where
45707 elements stay not in their respective lanes. */
45708 dcopy = *d;
45709 if (which == 2)
45710 dcopy.op0 = dcopy.op1 = d->op1;
45711 else
45712 dcopy.op0 = dcopy.op1 = d->op0;
45713 if (!d->testing_p)
45714 dcopy.target = gen_reg_rtx (vmode);
45715 dcopy.one_operand_p = true;
45717 for (i = 0; i < nelt; ++i)
45718 dcopy.perm[i] = d->perm[i] & (nelt - 1);
45720 ok = expand_vec_perm_1 (&dcopy);
45721 if (GET_MODE_SIZE (vmode) != 16 && !ok)
45722 return false;
45723 else
45724 gcc_assert (ok);
45725 if (d->testing_p)
45726 return true;
45728 /* Next we put permuted elements into their positions. */
45729 dcopy1 = *d;
45730 if (which == 2)
45731 dcopy1.op1 = dcopy.target;
45732 else
45733 dcopy1.op0 = dcopy.target;
45735 for (i = 0; i < nelt; ++i)
45736 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
45738 ok = expand_vec_perm_blend (&dcopy1);
45739 gcc_assert (ok);
45741 return true;
45744 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
45746 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
45747 a two vector permutation into a single vector permutation by using
45748 an interleave operation to merge the vectors. */
45750 static bool
45751 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
45753 struct expand_vec_perm_d dremap, dfinal;
45754 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
45755 unsigned HOST_WIDE_INT contents;
45756 unsigned char remap[2 * MAX_VECT_LEN];
45757 rtx_insn *seq;
45758 bool ok, same_halves = false;
45760 if (GET_MODE_SIZE (d->vmode) == 16)
45762 if (d->one_operand_p)
45763 return false;
45765 else if (GET_MODE_SIZE (d->vmode) == 32)
45767 if (!TARGET_AVX)
45768 return false;
45769 /* For 32-byte modes allow even d->one_operand_p.
45770 The lack of cross-lane shuffling in some instructions
45771 might prevent a single insn shuffle. */
45772 dfinal = *d;
45773 dfinal.testing_p = true;
45774 /* If expand_vec_perm_interleave3 can expand this into
45775 a 3 insn sequence, give up and let it be expanded as
45776 3 insn sequence. While that is one insn longer,
45777 it doesn't need a memory operand and in the common
45778 case that both interleave low and high permutations
45779 with the same operands are adjacent needs 4 insns
45780 for both after CSE. */
45781 if (expand_vec_perm_interleave3 (&dfinal))
45782 return false;
45784 else
45785 return false;
45787 /* Examine from whence the elements come. */
45788 contents = 0;
45789 for (i = 0; i < nelt; ++i)
45790 contents |= HOST_WIDE_INT_1U << d->perm[i];
45792 memset (remap, 0xff, sizeof (remap));
45793 dremap = *d;
45795 if (GET_MODE_SIZE (d->vmode) == 16)
45797 unsigned HOST_WIDE_INT h1, h2, h3, h4;
45799 /* Split the two input vectors into 4 halves. */
45800 h1 = (HOST_WIDE_INT_1U << nelt2) - 1;
45801 h2 = h1 << nelt2;
45802 h3 = h2 << nelt2;
45803 h4 = h3 << nelt2;
45805 /* If the elements from the low halves use interleave low, and similarly
45806 for interleave high. If the elements are from mis-matched halves, we
45807 can use shufps for V4SF/V4SI or do a DImode shuffle. */
45808 if ((contents & (h1 | h3)) == contents)
45810 /* punpckl* */
45811 for (i = 0; i < nelt2; ++i)
45813 remap[i] = i * 2;
45814 remap[i + nelt] = i * 2 + 1;
45815 dremap.perm[i * 2] = i;
45816 dremap.perm[i * 2 + 1] = i + nelt;
45818 if (!TARGET_SSE2 && d->vmode == V4SImode)
45819 dremap.vmode = V4SFmode;
45821 else if ((contents & (h2 | h4)) == contents)
45823 /* punpckh* */
45824 for (i = 0; i < nelt2; ++i)
45826 remap[i + nelt2] = i * 2;
45827 remap[i + nelt + nelt2] = i * 2 + 1;
45828 dremap.perm[i * 2] = i + nelt2;
45829 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
45831 if (!TARGET_SSE2 && d->vmode == V4SImode)
45832 dremap.vmode = V4SFmode;
45834 else if ((contents & (h1 | h4)) == contents)
45836 /* shufps */
45837 for (i = 0; i < nelt2; ++i)
45839 remap[i] = i;
45840 remap[i + nelt + nelt2] = i + nelt2;
45841 dremap.perm[i] = i;
45842 dremap.perm[i + nelt2] = i + nelt + nelt2;
45844 if (nelt != 4)
45846 /* shufpd */
45847 dremap.vmode = V2DImode;
45848 dremap.nelt = 2;
45849 dremap.perm[0] = 0;
45850 dremap.perm[1] = 3;
45853 else if ((contents & (h2 | h3)) == contents)
45855 /* shufps */
45856 for (i = 0; i < nelt2; ++i)
45858 remap[i + nelt2] = i;
45859 remap[i + nelt] = i + nelt2;
45860 dremap.perm[i] = i + nelt2;
45861 dremap.perm[i + nelt2] = i + nelt;
45863 if (nelt != 4)
45865 /* shufpd */
45866 dremap.vmode = V2DImode;
45867 dremap.nelt = 2;
45868 dremap.perm[0] = 1;
45869 dremap.perm[1] = 2;
45872 else
45873 return false;
45875 else
45877 unsigned int nelt4 = nelt / 4, nzcnt = 0;
45878 unsigned HOST_WIDE_INT q[8];
45879 unsigned int nonzero_halves[4];
45881 /* Split the two input vectors into 8 quarters. */
45882 q[0] = (HOST_WIDE_INT_1U << nelt4) - 1;
45883 for (i = 1; i < 8; ++i)
45884 q[i] = q[0] << (nelt4 * i);
45885 for (i = 0; i < 4; ++i)
45886 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
45888 nonzero_halves[nzcnt] = i;
45889 ++nzcnt;
45892 if (nzcnt == 1)
45894 gcc_assert (d->one_operand_p);
45895 nonzero_halves[1] = nonzero_halves[0];
45896 same_halves = true;
45898 else if (d->one_operand_p)
45900 gcc_assert (nonzero_halves[0] == 0);
45901 gcc_assert (nonzero_halves[1] == 1);
45904 if (nzcnt <= 2)
45906 if (d->perm[0] / nelt2 == nonzero_halves[1])
45908 /* Attempt to increase the likelihood that dfinal
45909 shuffle will be intra-lane. */
45910 std::swap (nonzero_halves[0], nonzero_halves[1]);
45913 /* vperm2f128 or vperm2i128. */
45914 for (i = 0; i < nelt2; ++i)
45916 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
45917 remap[i + nonzero_halves[0] * nelt2] = i;
45918 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
45919 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
45922 if (d->vmode != V8SFmode
45923 && d->vmode != V4DFmode
45924 && d->vmode != V8SImode)
45926 dremap.vmode = V8SImode;
45927 dremap.nelt = 8;
45928 for (i = 0; i < 4; ++i)
45930 dremap.perm[i] = i + nonzero_halves[0] * 4;
45931 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
45935 else if (d->one_operand_p)
45936 return false;
45937 else if (TARGET_AVX2
45938 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
45940 /* vpunpckl* */
45941 for (i = 0; i < nelt4; ++i)
45943 remap[i] = i * 2;
45944 remap[i + nelt] = i * 2 + 1;
45945 remap[i + nelt2] = i * 2 + nelt2;
45946 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
45947 dremap.perm[i * 2] = i;
45948 dremap.perm[i * 2 + 1] = i + nelt;
45949 dremap.perm[i * 2 + nelt2] = i + nelt2;
45950 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
45953 else if (TARGET_AVX2
45954 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
45956 /* vpunpckh* */
45957 for (i = 0; i < nelt4; ++i)
45959 remap[i + nelt4] = i * 2;
45960 remap[i + nelt + nelt4] = i * 2 + 1;
45961 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
45962 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
45963 dremap.perm[i * 2] = i + nelt4;
45964 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
45965 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
45966 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
45969 else
45970 return false;
45973 /* Use the remapping array set up above to move the elements from their
45974 swizzled locations into their final destinations. */
45975 dfinal = *d;
45976 for (i = 0; i < nelt; ++i)
45978 unsigned e = remap[d->perm[i]];
45979 gcc_assert (e < nelt);
45980 /* If same_halves is true, both halves of the remapped vector are the
45981 same. Avoid cross-lane accesses if possible. */
45982 if (same_halves && i >= nelt2)
45984 gcc_assert (e < nelt2);
45985 dfinal.perm[i] = e + nelt2;
45987 else
45988 dfinal.perm[i] = e;
45990 if (!d->testing_p)
45992 dremap.target = gen_reg_rtx (dremap.vmode);
45993 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
45995 dfinal.op1 = dfinal.op0;
45996 dfinal.one_operand_p = true;
45998 /* Test if the final remap can be done with a single insn. For V4SFmode or
45999 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
46000 start_sequence ();
46001 ok = expand_vec_perm_1 (&dfinal);
46002 seq = get_insns ();
46003 end_sequence ();
46005 if (!ok)
46006 return false;
46008 if (d->testing_p)
46009 return true;
46011 if (dremap.vmode != dfinal.vmode)
46013 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
46014 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
46017 ok = expand_vec_perm_1 (&dremap);
46018 gcc_assert (ok);
46020 emit_insn (seq);
46021 return true;
46024 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
46025 a single vector cross-lane permutation into vpermq followed
46026 by any of the single insn permutations. */
46028 static bool
46029 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
46031 struct expand_vec_perm_d dremap, dfinal;
46032 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
46033 unsigned contents[2];
46034 bool ok;
46036 if (!(TARGET_AVX2
46037 && (d->vmode == V32QImode || d->vmode == V16HImode)
46038 && d->one_operand_p))
46039 return false;
46041 contents[0] = 0;
46042 contents[1] = 0;
46043 for (i = 0; i < nelt2; ++i)
46045 contents[0] |= 1u << (d->perm[i] / nelt4);
46046 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
46049 for (i = 0; i < 2; ++i)
46051 unsigned int cnt = 0;
46052 for (j = 0; j < 4; ++j)
46053 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
46054 return false;
46057 if (d->testing_p)
46058 return true;
46060 dremap = *d;
46061 dremap.vmode = V4DImode;
46062 dremap.nelt = 4;
46063 dremap.target = gen_reg_rtx (V4DImode);
46064 dremap.op0 = gen_lowpart (V4DImode, d->op0);
46065 dremap.op1 = dremap.op0;
46066 dremap.one_operand_p = true;
46067 for (i = 0; i < 2; ++i)
46069 unsigned int cnt = 0;
46070 for (j = 0; j < 4; ++j)
46071 if ((contents[i] & (1u << j)) != 0)
46072 dremap.perm[2 * i + cnt++] = j;
46073 for (; cnt < 2; ++cnt)
46074 dremap.perm[2 * i + cnt] = 0;
46077 dfinal = *d;
46078 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
46079 dfinal.op1 = dfinal.op0;
46080 dfinal.one_operand_p = true;
46081 for (i = 0, j = 0; i < nelt; ++i)
46083 if (i == nelt2)
46084 j = 2;
46085 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
46086 if ((d->perm[i] / nelt4) == dremap.perm[j])
46088 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
46089 dfinal.perm[i] |= nelt4;
46090 else
46091 gcc_unreachable ();
46094 ok = expand_vec_perm_1 (&dremap);
46095 gcc_assert (ok);
46097 ok = expand_vec_perm_1 (&dfinal);
46098 gcc_assert (ok);
46100 return true;
46103 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
46104 a vector permutation using two instructions, vperm2f128 resp.
46105 vperm2i128 followed by any single in-lane permutation. */
46107 static bool
46108 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
46110 struct expand_vec_perm_d dfirst, dsecond;
46111 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
46112 bool ok;
46114 if (!TARGET_AVX
46115 || GET_MODE_SIZE (d->vmode) != 32
46116 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
46117 return false;
46119 dsecond = *d;
46120 dsecond.one_operand_p = false;
46121 dsecond.testing_p = true;
46123 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
46124 immediate. For perm < 16 the second permutation uses
46125 d->op0 as first operand, for perm >= 16 it uses d->op1
46126 as first operand. The second operand is the result of
46127 vperm2[fi]128. */
46128 for (perm = 0; perm < 32; perm++)
46130 /* Ignore permutations which do not move anything cross-lane. */
46131 if (perm < 16)
46133 /* The second shuffle for e.g. V4DFmode has
46134 0123 and ABCD operands.
46135 Ignore AB23, as 23 is already in the second lane
46136 of the first operand. */
46137 if ((perm & 0xc) == (1 << 2)) continue;
46138 /* And 01CD, as 01 is in the first lane of the first
46139 operand. */
46140 if ((perm & 3) == 0) continue;
46141 /* And 4567, as then the vperm2[fi]128 doesn't change
46142 anything on the original 4567 second operand. */
46143 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
46145 else
46147 /* The second shuffle for e.g. V4DFmode has
46148 4567 and ABCD operands.
46149 Ignore AB67, as 67 is already in the second lane
46150 of the first operand. */
46151 if ((perm & 0xc) == (3 << 2)) continue;
46152 /* And 45CD, as 45 is in the first lane of the first
46153 operand. */
46154 if ((perm & 3) == 2) continue;
46155 /* And 0123, as then the vperm2[fi]128 doesn't change
46156 anything on the original 0123 first operand. */
46157 if ((perm & 0xf) == (1 << 2)) continue;
46160 for (i = 0; i < nelt; i++)
46162 j = d->perm[i] / nelt2;
46163 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
46164 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
46165 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
46166 dsecond.perm[i] = d->perm[i] & (nelt - 1);
46167 else
46168 break;
46171 if (i == nelt)
46173 start_sequence ();
46174 ok = expand_vec_perm_1 (&dsecond);
46175 end_sequence ();
46177 else
46178 ok = false;
46180 if (ok)
46182 if (d->testing_p)
46183 return true;
46185 /* Found a usable second shuffle. dfirst will be
46186 vperm2f128 on d->op0 and d->op1. */
46187 dsecond.testing_p = false;
46188 dfirst = *d;
46189 dfirst.target = gen_reg_rtx (d->vmode);
46190 for (i = 0; i < nelt; i++)
46191 dfirst.perm[i] = (i & (nelt2 - 1))
46192 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
46194 canonicalize_perm (&dfirst);
46195 ok = expand_vec_perm_1 (&dfirst);
46196 gcc_assert (ok);
46198 /* And dsecond is some single insn shuffle, taking
46199 d->op0 and result of vperm2f128 (if perm < 16) or
46200 d->op1 and result of vperm2f128 (otherwise). */
46201 if (perm >= 16)
46202 dsecond.op0 = dsecond.op1;
46203 dsecond.op1 = dfirst.target;
46205 ok = expand_vec_perm_1 (&dsecond);
46206 gcc_assert (ok);
46208 return true;
46211 /* For one operand, the only useful vperm2f128 permutation is 0x01
46212 aka lanes swap. */
46213 if (d->one_operand_p)
46214 return false;
46217 return false;
46220 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
46221 a two vector permutation using 2 intra-lane interleave insns
46222 and cross-lane shuffle for 32-byte vectors. */
46224 static bool
46225 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
46227 unsigned i, nelt;
46228 rtx (*gen) (rtx, rtx, rtx);
46230 if (d->one_operand_p)
46231 return false;
46232 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
46234 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
46236 else
46237 return false;
46239 nelt = d->nelt;
46240 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
46241 return false;
46242 for (i = 0; i < nelt; i += 2)
46243 if (d->perm[i] != d->perm[0] + i / 2
46244 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
46245 return false;
46247 if (d->testing_p)
46248 return true;
46250 switch (d->vmode)
46252 case E_V32QImode:
46253 if (d->perm[0])
46254 gen = gen_vec_interleave_highv32qi;
46255 else
46256 gen = gen_vec_interleave_lowv32qi;
46257 break;
46258 case E_V16HImode:
46259 if (d->perm[0])
46260 gen = gen_vec_interleave_highv16hi;
46261 else
46262 gen = gen_vec_interleave_lowv16hi;
46263 break;
46264 case E_V8SImode:
46265 if (d->perm[0])
46266 gen = gen_vec_interleave_highv8si;
46267 else
46268 gen = gen_vec_interleave_lowv8si;
46269 break;
46270 case E_V4DImode:
46271 if (d->perm[0])
46272 gen = gen_vec_interleave_highv4di;
46273 else
46274 gen = gen_vec_interleave_lowv4di;
46275 break;
46276 case E_V8SFmode:
46277 if (d->perm[0])
46278 gen = gen_vec_interleave_highv8sf;
46279 else
46280 gen = gen_vec_interleave_lowv8sf;
46281 break;
46282 case E_V4DFmode:
46283 if (d->perm[0])
46284 gen = gen_vec_interleave_highv4df;
46285 else
46286 gen = gen_vec_interleave_lowv4df;
46287 break;
46288 default:
46289 gcc_unreachable ();
46292 emit_insn (gen (d->target, d->op0, d->op1));
46293 return true;
46296 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
46297 a single vector permutation using a single intra-lane vector
46298 permutation, vperm2f128 swapping the lanes and vblend* insn blending
46299 the non-swapped and swapped vectors together. */
46301 static bool
46302 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
46304 struct expand_vec_perm_d dfirst, dsecond;
46305 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
46306 rtx_insn *seq;
46307 bool ok;
46308 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
46310 if (!TARGET_AVX
46311 || TARGET_AVX2
46312 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
46313 || !d->one_operand_p)
46314 return false;
46316 dfirst = *d;
46317 for (i = 0; i < nelt; i++)
46318 dfirst.perm[i] = 0xff;
46319 for (i = 0, msk = 0; i < nelt; i++)
46321 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
46322 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
46323 return false;
46324 dfirst.perm[j] = d->perm[i];
46325 if (j != i)
46326 msk |= (1 << i);
46328 for (i = 0; i < nelt; i++)
46329 if (dfirst.perm[i] == 0xff)
46330 dfirst.perm[i] = i;
46332 if (!d->testing_p)
46333 dfirst.target = gen_reg_rtx (dfirst.vmode);
46335 start_sequence ();
46336 ok = expand_vec_perm_1 (&dfirst);
46337 seq = get_insns ();
46338 end_sequence ();
46340 if (!ok)
46341 return false;
46343 if (d->testing_p)
46344 return true;
46346 emit_insn (seq);
46348 dsecond = *d;
46349 dsecond.op0 = dfirst.target;
46350 dsecond.op1 = dfirst.target;
46351 dsecond.one_operand_p = true;
46352 dsecond.target = gen_reg_rtx (dsecond.vmode);
46353 for (i = 0; i < nelt; i++)
46354 dsecond.perm[i] = i ^ nelt2;
46356 ok = expand_vec_perm_1 (&dsecond);
46357 gcc_assert (ok);
46359 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
46360 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
46361 return true;
46364 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
46365 permutation using two vperm2f128, followed by a vshufpd insn blending
46366 the two vectors together. */
46368 static bool
46369 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
46371 struct expand_vec_perm_d dfirst, dsecond, dthird;
46372 bool ok;
46374 if (!TARGET_AVX || (d->vmode != V4DFmode))
46375 return false;
46377 if (d->testing_p)
46378 return true;
46380 dfirst = *d;
46381 dsecond = *d;
46382 dthird = *d;
46384 dfirst.perm[0] = (d->perm[0] & ~1);
46385 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
46386 dfirst.perm[2] = (d->perm[2] & ~1);
46387 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
46388 dsecond.perm[0] = (d->perm[1] & ~1);
46389 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
46390 dsecond.perm[2] = (d->perm[3] & ~1);
46391 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
46392 dthird.perm[0] = (d->perm[0] % 2);
46393 dthird.perm[1] = (d->perm[1] % 2) + 4;
46394 dthird.perm[2] = (d->perm[2] % 2) + 2;
46395 dthird.perm[3] = (d->perm[3] % 2) + 6;
46397 dfirst.target = gen_reg_rtx (dfirst.vmode);
46398 dsecond.target = gen_reg_rtx (dsecond.vmode);
46399 dthird.op0 = dfirst.target;
46400 dthird.op1 = dsecond.target;
46401 dthird.one_operand_p = false;
46403 canonicalize_perm (&dfirst);
46404 canonicalize_perm (&dsecond);
46406 ok = expand_vec_perm_1 (&dfirst)
46407 && expand_vec_perm_1 (&dsecond)
46408 && expand_vec_perm_1 (&dthird);
46410 gcc_assert (ok);
46412 return true;
46415 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
46416 permutation with two pshufb insns and an ior. We should have already
46417 failed all two instruction sequences. */
46419 static bool
46420 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
46422 rtx rperm[2][16], vperm, l, h, op, m128;
46423 unsigned int i, nelt, eltsz;
46425 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
46426 return false;
46427 gcc_assert (!d->one_operand_p);
46429 if (d->testing_p)
46430 return true;
46432 nelt = d->nelt;
46433 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
46435 /* Generate two permutation masks. If the required element is within
46436 the given vector it is shuffled into the proper lane. If the required
46437 element is in the other vector, force a zero into the lane by setting
46438 bit 7 in the permutation mask. */
46439 m128 = GEN_INT (-128);
46440 for (i = 0; i < nelt; ++i)
46442 unsigned j, e = d->perm[i];
46443 unsigned which = (e >= nelt);
46444 if (e >= nelt)
46445 e -= nelt;
46447 for (j = 0; j < eltsz; ++j)
46449 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
46450 rperm[1-which][i*eltsz + j] = m128;
46454 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
46455 vperm = force_reg (V16QImode, vperm);
46457 l = gen_reg_rtx (V16QImode);
46458 op = gen_lowpart (V16QImode, d->op0);
46459 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
46461 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
46462 vperm = force_reg (V16QImode, vperm);
46464 h = gen_reg_rtx (V16QImode);
46465 op = gen_lowpart (V16QImode, d->op1);
46466 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
46468 op = d->target;
46469 if (d->vmode != V16QImode)
46470 op = gen_reg_rtx (V16QImode);
46471 emit_insn (gen_iorv16qi3 (op, l, h));
46472 if (op != d->target)
46473 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
46475 return true;
46478 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
46479 with two vpshufb insns, vpermq and vpor. We should have already failed
46480 all two or three instruction sequences. */
46482 static bool
46483 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
46485 rtx rperm[2][32], vperm, l, h, hp, op, m128;
46486 unsigned int i, nelt, eltsz;
46488 if (!TARGET_AVX2
46489 || !d->one_operand_p
46490 || (d->vmode != V32QImode && d->vmode != V16HImode))
46491 return false;
46493 if (d->testing_p)
46494 return true;
46496 nelt = d->nelt;
46497 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
46499 /* Generate two permutation masks. If the required element is within
46500 the same lane, it is shuffled in. If the required element from the
46501 other lane, force a zero by setting bit 7 in the permutation mask.
46502 In the other mask the mask has non-negative elements if element
46503 is requested from the other lane, but also moved to the other lane,
46504 so that the result of vpshufb can have the two V2TImode halves
46505 swapped. */
46506 m128 = GEN_INT (-128);
46507 for (i = 0; i < nelt; ++i)
46509 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
46510 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
46512 for (j = 0; j < eltsz; ++j)
46514 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
46515 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
46519 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
46520 vperm = force_reg (V32QImode, vperm);
46522 h = gen_reg_rtx (V32QImode);
46523 op = gen_lowpart (V32QImode, d->op0);
46524 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
46526 /* Swap the 128-byte lanes of h into hp. */
46527 hp = gen_reg_rtx (V4DImode);
46528 op = gen_lowpart (V4DImode, h);
46529 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
46530 const1_rtx));
46532 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
46533 vperm = force_reg (V32QImode, vperm);
46535 l = gen_reg_rtx (V32QImode);
46536 op = gen_lowpart (V32QImode, d->op0);
46537 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
46539 op = d->target;
46540 if (d->vmode != V32QImode)
46541 op = gen_reg_rtx (V32QImode);
46542 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
46543 if (op != d->target)
46544 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
46546 return true;
46549 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
46550 and extract-odd permutations of two V32QImode and V16QImode operand
46551 with two vpshufb insns, vpor and vpermq. We should have already
46552 failed all two or three instruction sequences. */
46554 static bool
46555 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
46557 rtx rperm[2][32], vperm, l, h, ior, op, m128;
46558 unsigned int i, nelt, eltsz;
46560 if (!TARGET_AVX2
46561 || d->one_operand_p
46562 || (d->vmode != V32QImode && d->vmode != V16HImode))
46563 return false;
46565 for (i = 0; i < d->nelt; ++i)
46566 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
46567 return false;
46569 if (d->testing_p)
46570 return true;
46572 nelt = d->nelt;
46573 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
46575 /* Generate two permutation masks. In the first permutation mask
46576 the first quarter will contain indexes for the first half
46577 of the op0, the second quarter will contain bit 7 set, third quarter
46578 will contain indexes for the second half of the op0 and the
46579 last quarter bit 7 set. In the second permutation mask
46580 the first quarter will contain bit 7 set, the second quarter
46581 indexes for the first half of the op1, the third quarter bit 7 set
46582 and last quarter indexes for the second half of the op1.
46583 I.e. the first mask e.g. for V32QImode extract even will be:
46584 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
46585 (all values masked with 0xf except for -128) and second mask
46586 for extract even will be
46587 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
46588 m128 = GEN_INT (-128);
46589 for (i = 0; i < nelt; ++i)
46591 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
46592 unsigned which = d->perm[i] >= nelt;
46593 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
46595 for (j = 0; j < eltsz; ++j)
46597 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
46598 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
46602 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
46603 vperm = force_reg (V32QImode, vperm);
46605 l = gen_reg_rtx (V32QImode);
46606 op = gen_lowpart (V32QImode, d->op0);
46607 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
46609 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
46610 vperm = force_reg (V32QImode, vperm);
46612 h = gen_reg_rtx (V32QImode);
46613 op = gen_lowpart (V32QImode, d->op1);
46614 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
46616 ior = gen_reg_rtx (V32QImode);
46617 emit_insn (gen_iorv32qi3 (ior, l, h));
46619 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
46620 op = gen_reg_rtx (V4DImode);
46621 ior = gen_lowpart (V4DImode, ior);
46622 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
46623 const1_rtx, GEN_INT (3)));
46624 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
46626 return true;
46629 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
46630 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
46631 with two "and" and "pack" or two "shift" and "pack" insns. We should
46632 have already failed all two instruction sequences. */
46634 static bool
46635 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
46637 rtx op, dop0, dop1, t;
46638 unsigned i, odd, c, s, nelt = d->nelt;
46639 bool end_perm = false;
46640 machine_mode half_mode;
46641 rtx (*gen_and) (rtx, rtx, rtx);
46642 rtx (*gen_pack) (rtx, rtx, rtx);
46643 rtx (*gen_shift) (rtx, rtx, rtx);
46645 if (d->one_operand_p)
46646 return false;
46648 switch (d->vmode)
46650 case E_V8HImode:
46651 /* Required for "pack". */
46652 if (!TARGET_SSE4_1)
46653 return false;
46654 c = 0xffff;
46655 s = 16;
46656 half_mode = V4SImode;
46657 gen_and = gen_andv4si3;
46658 gen_pack = gen_sse4_1_packusdw;
46659 gen_shift = gen_lshrv4si3;
46660 break;
46661 case E_V16QImode:
46662 /* No check as all instructions are SSE2. */
46663 c = 0xff;
46664 s = 8;
46665 half_mode = V8HImode;
46666 gen_and = gen_andv8hi3;
46667 gen_pack = gen_sse2_packuswb;
46668 gen_shift = gen_lshrv8hi3;
46669 break;
46670 case E_V16HImode:
46671 if (!TARGET_AVX2)
46672 return false;
46673 c = 0xffff;
46674 s = 16;
46675 half_mode = V8SImode;
46676 gen_and = gen_andv8si3;
46677 gen_pack = gen_avx2_packusdw;
46678 gen_shift = gen_lshrv8si3;
46679 end_perm = true;
46680 break;
46681 case E_V32QImode:
46682 if (!TARGET_AVX2)
46683 return false;
46684 c = 0xff;
46685 s = 8;
46686 half_mode = V16HImode;
46687 gen_and = gen_andv16hi3;
46688 gen_pack = gen_avx2_packuswb;
46689 gen_shift = gen_lshrv16hi3;
46690 end_perm = true;
46691 break;
46692 default:
46693 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
46694 general shuffles. */
46695 return false;
46698 /* Check that permutation is even or odd. */
46699 odd = d->perm[0];
46700 if (odd > 1)
46701 return false;
46703 for (i = 1; i < nelt; ++i)
46704 if (d->perm[i] != 2 * i + odd)
46705 return false;
46707 if (d->testing_p)
46708 return true;
46710 dop0 = gen_reg_rtx (half_mode);
46711 dop1 = gen_reg_rtx (half_mode);
46712 if (odd == 0)
46714 t = gen_const_vec_duplicate (half_mode, GEN_INT (c));
46715 t = force_reg (half_mode, t);
46716 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
46717 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
46719 else
46721 emit_insn (gen_shift (dop0,
46722 gen_lowpart (half_mode, d->op0),
46723 GEN_INT (s)));
46724 emit_insn (gen_shift (dop1,
46725 gen_lowpart (half_mode, d->op1),
46726 GEN_INT (s)));
46728 /* In AVX2 for 256 bit case we need to permute pack result. */
46729 if (TARGET_AVX2 && end_perm)
46731 op = gen_reg_rtx (d->vmode);
46732 t = gen_reg_rtx (V4DImode);
46733 emit_insn (gen_pack (op, dop0, dop1));
46734 emit_insn (gen_avx2_permv4di_1 (t,
46735 gen_lowpart (V4DImode, op),
46736 const0_rtx,
46737 const2_rtx,
46738 const1_rtx,
46739 GEN_INT (3)));
46740 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
46742 else
46743 emit_insn (gen_pack (d->target, dop0, dop1));
46745 return true;
46748 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
46749 and extract-odd permutations of two V64QI operands
46750 with two "shifts", two "truncs" and one "concat" insns for "odd"
46751 and two "truncs" and one concat insn for "even."
46752 Have already failed all two instruction sequences. */
46754 static bool
46755 expand_vec_perm_even_odd_trunc (struct expand_vec_perm_d *d)
46757 rtx t1, t2, t3, t4;
46758 unsigned i, odd, nelt = d->nelt;
46760 if (!TARGET_AVX512BW
46761 || d->one_operand_p
46762 || d->vmode != V64QImode)
46763 return false;
46765 /* Check that permutation is even or odd. */
46766 odd = d->perm[0];
46767 if (odd > 1)
46768 return false;
46770 for (i = 1; i < nelt; ++i)
46771 if (d->perm[i] != 2 * i + odd)
46772 return false;
46774 if (d->testing_p)
46775 return true;
46778 if (odd)
46780 t1 = gen_reg_rtx (V32HImode);
46781 t2 = gen_reg_rtx (V32HImode);
46782 emit_insn (gen_lshrv32hi3 (t1,
46783 gen_lowpart (V32HImode, d->op0),
46784 GEN_INT (8)));
46785 emit_insn (gen_lshrv32hi3 (t2,
46786 gen_lowpart (V32HImode, d->op1),
46787 GEN_INT (8)));
46789 else
46791 t1 = gen_lowpart (V32HImode, d->op0);
46792 t2 = gen_lowpart (V32HImode, d->op1);
46795 t3 = gen_reg_rtx (V32QImode);
46796 t4 = gen_reg_rtx (V32QImode);
46797 emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t3, t1));
46798 emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t4, t2));
46799 emit_insn (gen_avx_vec_concatv64qi (d->target, t3, t4));
46801 return true;
46804 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
46805 and extract-odd permutations. */
46807 static bool
46808 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
46810 rtx t1, t2, t3, t4, t5;
46812 switch (d->vmode)
46814 case E_V4DFmode:
46815 if (d->testing_p)
46816 break;
46817 t1 = gen_reg_rtx (V4DFmode);
46818 t2 = gen_reg_rtx (V4DFmode);
46820 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
46821 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
46822 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
46824 /* Now an unpck[lh]pd will produce the result required. */
46825 if (odd)
46826 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
46827 else
46828 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
46829 emit_insn (t3);
46830 break;
46832 case E_V8SFmode:
46834 int mask = odd ? 0xdd : 0x88;
46836 if (d->testing_p)
46837 break;
46838 t1 = gen_reg_rtx (V8SFmode);
46839 t2 = gen_reg_rtx (V8SFmode);
46840 t3 = gen_reg_rtx (V8SFmode);
46842 /* Shuffle within the 128-bit lanes to produce:
46843 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
46844 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
46845 GEN_INT (mask)));
46847 /* Shuffle the lanes around to produce:
46848 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
46849 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
46850 GEN_INT (0x3)));
46852 /* Shuffle within the 128-bit lanes to produce:
46853 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
46854 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
46856 /* Shuffle within the 128-bit lanes to produce:
46857 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
46858 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
46860 /* Shuffle the lanes around to produce:
46861 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
46862 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
46863 GEN_INT (0x20)));
46865 break;
46867 case E_V2DFmode:
46868 case E_V4SFmode:
46869 case E_V2DImode:
46870 case E_V4SImode:
46871 /* These are always directly implementable by expand_vec_perm_1. */
46872 gcc_unreachable ();
46874 case E_V8HImode:
46875 if (TARGET_SSE4_1)
46876 return expand_vec_perm_even_odd_pack (d);
46877 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
46878 return expand_vec_perm_pshufb2 (d);
46879 else
46881 if (d->testing_p)
46882 break;
46883 /* We need 2*log2(N)-1 operations to achieve odd/even
46884 with interleave. */
46885 t1 = gen_reg_rtx (V8HImode);
46886 t2 = gen_reg_rtx (V8HImode);
46887 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
46888 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
46889 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
46890 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
46891 if (odd)
46892 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
46893 else
46894 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
46895 emit_insn (t3);
46897 break;
46899 case E_V16QImode:
46900 return expand_vec_perm_even_odd_pack (d);
46902 case E_V16HImode:
46903 case E_V32QImode:
46904 return expand_vec_perm_even_odd_pack (d);
46906 case E_V64QImode:
46907 return expand_vec_perm_even_odd_trunc (d);
46909 case E_V4DImode:
46910 if (!TARGET_AVX2)
46912 struct expand_vec_perm_d d_copy = *d;
46913 d_copy.vmode = V4DFmode;
46914 if (d->testing_p)
46915 d_copy.target = gen_raw_REG (V4DFmode, LAST_VIRTUAL_REGISTER + 1);
46916 else
46917 d_copy.target = gen_reg_rtx (V4DFmode);
46918 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
46919 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
46920 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
46922 if (!d->testing_p)
46923 emit_move_insn (d->target,
46924 gen_lowpart (V4DImode, d_copy.target));
46925 return true;
46927 return false;
46930 if (d->testing_p)
46931 break;
46933 t1 = gen_reg_rtx (V4DImode);
46934 t2 = gen_reg_rtx (V4DImode);
46936 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
46937 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
46938 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
46940 /* Now an vpunpck[lh]qdq will produce the result required. */
46941 if (odd)
46942 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
46943 else
46944 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
46945 emit_insn (t3);
46946 break;
46948 case E_V8SImode:
46949 if (!TARGET_AVX2)
46951 struct expand_vec_perm_d d_copy = *d;
46952 d_copy.vmode = V8SFmode;
46953 if (d->testing_p)
46954 d_copy.target = gen_raw_REG (V8SFmode, LAST_VIRTUAL_REGISTER + 1);
46955 else
46956 d_copy.target = gen_reg_rtx (V8SFmode);
46957 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
46958 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
46959 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
46961 if (!d->testing_p)
46962 emit_move_insn (d->target,
46963 gen_lowpart (V8SImode, d_copy.target));
46964 return true;
46966 return false;
46969 if (d->testing_p)
46970 break;
46972 t1 = gen_reg_rtx (V8SImode);
46973 t2 = gen_reg_rtx (V8SImode);
46974 t3 = gen_reg_rtx (V4DImode);
46975 t4 = gen_reg_rtx (V4DImode);
46976 t5 = gen_reg_rtx (V4DImode);
46978 /* Shuffle the lanes around into
46979 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
46980 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
46981 gen_lowpart (V4DImode, d->op1),
46982 GEN_INT (0x20)));
46983 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
46984 gen_lowpart (V4DImode, d->op1),
46985 GEN_INT (0x31)));
46987 /* Swap the 2nd and 3rd position in each lane into
46988 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
46989 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
46990 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
46991 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
46992 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
46994 /* Now an vpunpck[lh]qdq will produce
46995 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
46996 if (odd)
46997 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
46998 gen_lowpart (V4DImode, t2));
46999 else
47000 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
47001 gen_lowpart (V4DImode, t2));
47002 emit_insn (t3);
47003 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
47004 break;
47006 default:
47007 gcc_unreachable ();
47010 return true;
47013 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
47014 extract-even and extract-odd permutations. */
47016 static bool
47017 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
47019 unsigned i, odd, nelt = d->nelt;
47021 odd = d->perm[0];
47022 if (odd != 0 && odd != 1)
47023 return false;
47025 for (i = 1; i < nelt; ++i)
47026 if (d->perm[i] != 2 * i + odd)
47027 return false;
47029 return expand_vec_perm_even_odd_1 (d, odd);
47032 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
47033 permutations. We assume that expand_vec_perm_1 has already failed. */
47035 static bool
47036 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
47038 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
47039 machine_mode vmode = d->vmode;
47040 unsigned char perm2[4];
47041 rtx op0 = d->op0, dest;
47042 bool ok;
47044 switch (vmode)
47046 case E_V4DFmode:
47047 case E_V8SFmode:
47048 /* These are special-cased in sse.md so that we can optionally
47049 use the vbroadcast instruction. They expand to two insns
47050 if the input happens to be in a register. */
47051 gcc_unreachable ();
47053 case E_V2DFmode:
47054 case E_V2DImode:
47055 case E_V4SFmode:
47056 case E_V4SImode:
47057 /* These are always implementable using standard shuffle patterns. */
47058 gcc_unreachable ();
47060 case E_V8HImode:
47061 case E_V16QImode:
47062 /* These can be implemented via interleave. We save one insn by
47063 stopping once we have promoted to V4SImode and then use pshufd. */
47064 if (d->testing_p)
47065 return true;
47068 rtx dest;
47069 rtx (*gen) (rtx, rtx, rtx)
47070 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
47071 : gen_vec_interleave_lowv8hi;
47073 if (elt >= nelt2)
47075 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
47076 : gen_vec_interleave_highv8hi;
47077 elt -= nelt2;
47079 nelt2 /= 2;
47081 dest = gen_reg_rtx (vmode);
47082 emit_insn (gen (dest, op0, op0));
47083 vmode = get_mode_wider_vector (vmode);
47084 op0 = gen_lowpart (vmode, dest);
47086 while (vmode != V4SImode);
47088 memset (perm2, elt, 4);
47089 dest = gen_reg_rtx (V4SImode);
47090 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
47091 gcc_assert (ok);
47092 if (!d->testing_p)
47093 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
47094 return true;
47096 case E_V64QImode:
47097 case E_V32QImode:
47098 case E_V16HImode:
47099 case E_V8SImode:
47100 case E_V4DImode:
47101 /* For AVX2 broadcasts of the first element vpbroadcast* or
47102 vpermq should be used by expand_vec_perm_1. */
47103 gcc_assert (!TARGET_AVX2 || d->perm[0]);
47104 return false;
47106 default:
47107 gcc_unreachable ();
47111 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
47112 broadcast permutations. */
47114 static bool
47115 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
47117 unsigned i, elt, nelt = d->nelt;
47119 if (!d->one_operand_p)
47120 return false;
47122 elt = d->perm[0];
47123 for (i = 1; i < nelt; ++i)
47124 if (d->perm[i] != elt)
47125 return false;
47127 return expand_vec_perm_broadcast_1 (d);
47130 /* Implement arbitrary permutations of two V64QImode operands
47131 with 2 vperm[it]2w, 2 vpshufb and one vpor instruction. */
47132 static bool
47133 expand_vec_perm_vpermt2_vpshub2 (struct expand_vec_perm_d *d)
47135 if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
47136 return false;
47138 if (d->testing_p)
47139 return true;
47141 struct expand_vec_perm_d ds[2];
47142 rtx rperm[128], vperm, target0, target1;
47143 unsigned int i, nelt;
47144 machine_mode vmode;
47146 nelt = d->nelt;
47147 vmode = V64QImode;
47149 for (i = 0; i < 2; i++)
47151 ds[i] = *d;
47152 ds[i].vmode = V32HImode;
47153 ds[i].nelt = 32;
47154 ds[i].target = gen_reg_rtx (V32HImode);
47155 ds[i].op0 = gen_lowpart (V32HImode, d->op0);
47156 ds[i].op1 = gen_lowpart (V32HImode, d->op1);
47159 /* Prepare permutations such that the first one takes care of
47160 putting the even bytes into the right positions or one higher
47161 positions (ds[0]) and the second one takes care of
47162 putting the odd bytes into the right positions or one below
47163 (ds[1]). */
47165 for (i = 0; i < nelt; i++)
47167 ds[i & 1].perm[i / 2] = d->perm[i] / 2;
47168 if (i & 1)
47170 rperm[i] = constm1_rtx;
47171 rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
47173 else
47175 rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
47176 rperm[i + 64] = constm1_rtx;
47180 bool ok = expand_vec_perm_1 (&ds[0]);
47181 gcc_assert (ok);
47182 ds[0].target = gen_lowpart (V64QImode, ds[0].target);
47184 ok = expand_vec_perm_1 (&ds[1]);
47185 gcc_assert (ok);
47186 ds[1].target = gen_lowpart (V64QImode, ds[1].target);
47188 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
47189 vperm = force_reg (vmode, vperm);
47190 target0 = gen_reg_rtx (V64QImode);
47191 emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
47193 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
47194 vperm = force_reg (vmode, vperm);
47195 target1 = gen_reg_rtx (V64QImode);
47196 emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
47198 emit_insn (gen_iorv64qi3 (d->target, target0, target1));
47199 return true;
47202 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
47203 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
47204 all the shorter instruction sequences. */
47206 static bool
47207 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
47209 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
47210 unsigned int i, nelt, eltsz;
47211 bool used[4];
47213 if (!TARGET_AVX2
47214 || d->one_operand_p
47215 || (d->vmode != V32QImode && d->vmode != V16HImode))
47216 return false;
47218 if (d->testing_p)
47219 return true;
47221 nelt = d->nelt;
47222 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
47224 /* Generate 4 permutation masks. If the required element is within
47225 the same lane, it is shuffled in. If the required element from the
47226 other lane, force a zero by setting bit 7 in the permutation mask.
47227 In the other mask the mask has non-negative elements if element
47228 is requested from the other lane, but also moved to the other lane,
47229 so that the result of vpshufb can have the two V2TImode halves
47230 swapped. */
47231 m128 = GEN_INT (-128);
47232 for (i = 0; i < 32; ++i)
47234 rperm[0][i] = m128;
47235 rperm[1][i] = m128;
47236 rperm[2][i] = m128;
47237 rperm[3][i] = m128;
47239 used[0] = false;
47240 used[1] = false;
47241 used[2] = false;
47242 used[3] = false;
47243 for (i = 0; i < nelt; ++i)
47245 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
47246 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
47247 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
47249 for (j = 0; j < eltsz; ++j)
47250 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
47251 used[which] = true;
47254 for (i = 0; i < 2; ++i)
47256 if (!used[2 * i + 1])
47258 h[i] = NULL_RTX;
47259 continue;
47261 vperm = gen_rtx_CONST_VECTOR (V32QImode,
47262 gen_rtvec_v (32, rperm[2 * i + 1]));
47263 vperm = force_reg (V32QImode, vperm);
47264 h[i] = gen_reg_rtx (V32QImode);
47265 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
47266 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
47269 /* Swap the 128-byte lanes of h[X]. */
47270 for (i = 0; i < 2; ++i)
47272 if (h[i] == NULL_RTX)
47273 continue;
47274 op = gen_reg_rtx (V4DImode);
47275 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
47276 const2_rtx, GEN_INT (3), const0_rtx,
47277 const1_rtx));
47278 h[i] = gen_lowpart (V32QImode, op);
47281 for (i = 0; i < 2; ++i)
47283 if (!used[2 * i])
47285 l[i] = NULL_RTX;
47286 continue;
47288 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
47289 vperm = force_reg (V32QImode, vperm);
47290 l[i] = gen_reg_rtx (V32QImode);
47291 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
47292 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
47295 for (i = 0; i < 2; ++i)
47297 if (h[i] && l[i])
47299 op = gen_reg_rtx (V32QImode);
47300 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
47301 l[i] = op;
47303 else if (h[i])
47304 l[i] = h[i];
47307 gcc_assert (l[0] && l[1]);
47308 op = d->target;
47309 if (d->vmode != V32QImode)
47310 op = gen_reg_rtx (V32QImode);
47311 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
47312 if (op != d->target)
47313 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
47314 return true;
47317 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
47318 With all of the interface bits taken care of, perform the expansion
47319 in D and return true on success. */
47321 static bool
47322 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
47324 /* Try a single instruction expansion. */
47325 if (expand_vec_perm_1 (d))
47326 return true;
47328 /* Try sequences of two instructions. */
47330 if (expand_vec_perm_pshuflw_pshufhw (d))
47331 return true;
47333 if (expand_vec_perm_palignr (d, false))
47334 return true;
47336 if (expand_vec_perm_interleave2 (d))
47337 return true;
47339 if (expand_vec_perm_broadcast (d))
47340 return true;
47342 if (expand_vec_perm_vpermq_perm_1 (d))
47343 return true;
47345 if (expand_vec_perm_vperm2f128 (d))
47346 return true;
47348 if (expand_vec_perm_pblendv (d))
47349 return true;
47351 /* Try sequences of three instructions. */
47353 if (expand_vec_perm_even_odd_pack (d))
47354 return true;
47356 if (expand_vec_perm_2vperm2f128_vshuf (d))
47357 return true;
47359 if (expand_vec_perm_pshufb2 (d))
47360 return true;
47362 if (expand_vec_perm_interleave3 (d))
47363 return true;
47365 if (expand_vec_perm_vperm2f128_vblend (d))
47366 return true;
47368 /* Try sequences of four instructions. */
47370 if (expand_vec_perm_even_odd_trunc (d))
47371 return true;
47372 if (expand_vec_perm_vpshufb2_vpermq (d))
47373 return true;
47375 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
47376 return true;
47378 if (expand_vec_perm_vpermt2_vpshub2 (d))
47379 return true;
47381 /* ??? Look for narrow permutations whose element orderings would
47382 allow the promotion to a wider mode. */
47384 /* ??? Look for sequences of interleave or a wider permute that place
47385 the data into the correct lanes for a half-vector shuffle like
47386 pshuf[lh]w or vpermilps. */
47388 /* ??? Look for sequences of interleave that produce the desired results.
47389 The combinatorics of punpck[lh] get pretty ugly... */
47391 if (expand_vec_perm_even_odd (d))
47392 return true;
47394 /* Even longer sequences. */
47395 if (expand_vec_perm_vpshufb4_vpermq2 (d))
47396 return true;
47398 /* See if we can get the same permutation in different vector integer
47399 mode. */
47400 struct expand_vec_perm_d nd;
47401 if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd))
47403 if (!d->testing_p)
47404 emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target));
47405 return true;
47408 return false;
47411 /* If a permutation only uses one operand, make it clear. Returns true
47412 if the permutation references both operands. */
47414 static bool
47415 canonicalize_perm (struct expand_vec_perm_d *d)
47417 int i, which, nelt = d->nelt;
47419 for (i = which = 0; i < nelt; ++i)
47420 which |= (d->perm[i] < nelt ? 1 : 2);
47422 d->one_operand_p = true;
47423 switch (which)
47425 default:
47426 gcc_unreachable();
47428 case 3:
47429 if (!rtx_equal_p (d->op0, d->op1))
47431 d->one_operand_p = false;
47432 break;
47434 /* The elements of PERM do not suggest that only the first operand
47435 is used, but both operands are identical. Allow easier matching
47436 of the permutation by folding the permutation into the single
47437 input vector. */
47438 /* FALLTHRU */
47440 case 2:
47441 for (i = 0; i < nelt; ++i)
47442 d->perm[i] &= nelt - 1;
47443 d->op0 = d->op1;
47444 break;
47446 case 1:
47447 d->op1 = d->op0;
47448 break;
47451 return (which == 3);
47454 bool
47455 ix86_expand_vec_perm_const (rtx operands[4])
47457 struct expand_vec_perm_d d;
47458 unsigned char perm[MAX_VECT_LEN];
47459 int i, nelt;
47460 bool two_args;
47461 rtx sel;
47463 d.target = operands[0];
47464 d.op0 = operands[1];
47465 d.op1 = operands[2];
47466 sel = operands[3];
47468 d.vmode = GET_MODE (d.target);
47469 gcc_assert (VECTOR_MODE_P (d.vmode));
47470 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
47471 d.testing_p = false;
47473 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
47474 gcc_assert (XVECLEN (sel, 0) == nelt);
47475 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
47477 for (i = 0; i < nelt; ++i)
47479 rtx e = XVECEXP (sel, 0, i);
47480 int ei = INTVAL (e) & (2 * nelt - 1);
47481 d.perm[i] = ei;
47482 perm[i] = ei;
47485 two_args = canonicalize_perm (&d);
47487 if (ix86_expand_vec_perm_const_1 (&d))
47488 return true;
47490 /* If the selector says both arguments are needed, but the operands are the
47491 same, the above tried to expand with one_operand_p and flattened selector.
47492 If that didn't work, retry without one_operand_p; we succeeded with that
47493 during testing. */
47494 if (two_args && d.one_operand_p)
47496 d.one_operand_p = false;
47497 memcpy (d.perm, perm, sizeof (perm));
47498 return ix86_expand_vec_perm_const_1 (&d);
47501 return false;
47504 /* Implement targetm.vectorize.vec_perm_const_ok. */
47506 static bool
47507 ix86_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
47509 struct expand_vec_perm_d d;
47510 unsigned int i, nelt, which;
47511 bool ret;
47513 d.vmode = vmode;
47514 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
47515 d.testing_p = true;
47517 /* Given sufficient ISA support we can just return true here
47518 for selected vector modes. */
47519 switch (d.vmode)
47521 case E_V16SFmode:
47522 case E_V16SImode:
47523 case E_V8DImode:
47524 case E_V8DFmode:
47525 if (TARGET_AVX512F)
47526 /* All implementable with a single vperm[it]2 insn. */
47527 return true;
47528 break;
47529 case E_V32HImode:
47530 if (TARGET_AVX512BW)
47531 /* All implementable with a single vperm[it]2 insn. */
47532 return true;
47533 break;
47534 case E_V64QImode:
47535 if (TARGET_AVX512BW)
47536 /* Implementable with 2 vperm[it]2, 2 vpshufb and 1 or insn. */
47537 return true;
47538 break;
47539 case E_V8SImode:
47540 case E_V8SFmode:
47541 case E_V4DFmode:
47542 case E_V4DImode:
47543 if (TARGET_AVX512VL)
47544 /* All implementable with a single vperm[it]2 insn. */
47545 return true;
47546 break;
47547 case E_V16HImode:
47548 if (TARGET_AVX2)
47549 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
47550 return true;
47551 break;
47552 case E_V32QImode:
47553 if (TARGET_AVX2)
47554 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
47555 return true;
47556 break;
47557 case E_V4SImode:
47558 case E_V4SFmode:
47559 case E_V8HImode:
47560 case E_V16QImode:
47561 /* All implementable with a single vpperm insn. */
47562 if (TARGET_XOP)
47563 return true;
47564 /* All implementable with 2 pshufb + 1 ior. */
47565 if (TARGET_SSSE3)
47566 return true;
47567 break;
47568 case E_V2DImode:
47569 case E_V2DFmode:
47570 /* All implementable with shufpd or unpck[lh]pd. */
47571 return true;
47572 default:
47573 return false;
47576 /* Extract the values from the vector CST into the permutation
47577 array in D. */
47578 for (i = which = 0; i < nelt; ++i)
47580 unsigned char e = sel[i];
47581 gcc_assert (e < 2 * nelt);
47582 d.perm[i] = e;
47583 which |= (e < nelt ? 1 : 2);
47586 /* For all elements from second vector, fold the elements to first. */
47587 if (which == 2)
47588 for (i = 0; i < nelt; ++i)
47589 d.perm[i] -= nelt;
47591 /* Check whether the mask can be applied to the vector type. */
47592 d.one_operand_p = (which != 3);
47594 /* Implementable with shufps or pshufd. */
47595 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
47596 return true;
47598 /* Otherwise we have to go through the motions and see if we can
47599 figure out how to generate the requested permutation. */
47600 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
47601 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
47602 if (!d.one_operand_p)
47603 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
47605 start_sequence ();
47606 ret = ix86_expand_vec_perm_const_1 (&d);
47607 end_sequence ();
47609 return ret;
47612 void
47613 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
47615 struct expand_vec_perm_d d;
47616 unsigned i, nelt;
47618 d.target = targ;
47619 d.op0 = op0;
47620 d.op1 = op1;
47621 d.vmode = GET_MODE (targ);
47622 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
47623 d.one_operand_p = false;
47624 d.testing_p = false;
47626 for (i = 0; i < nelt; ++i)
47627 d.perm[i] = i * 2 + odd;
47629 /* We'll either be able to implement the permutation directly... */
47630 if (expand_vec_perm_1 (&d))
47631 return;
47633 /* ... or we use the special-case patterns. */
47634 expand_vec_perm_even_odd_1 (&d, odd);
47637 static void
47638 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
47640 struct expand_vec_perm_d d;
47641 unsigned i, nelt, base;
47642 bool ok;
47644 d.target = targ;
47645 d.op0 = op0;
47646 d.op1 = op1;
47647 d.vmode = GET_MODE (targ);
47648 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
47649 d.one_operand_p = false;
47650 d.testing_p = false;
47652 base = high_p ? nelt / 2 : 0;
47653 for (i = 0; i < nelt / 2; ++i)
47655 d.perm[i * 2] = i + base;
47656 d.perm[i * 2 + 1] = i + base + nelt;
47659 /* Note that for AVX this isn't one instruction. */
47660 ok = ix86_expand_vec_perm_const_1 (&d);
47661 gcc_assert (ok);
47665 /* Expand a vector operation CODE for a V*QImode in terms of the
47666 same operation on V*HImode. */
47668 void
47669 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
47671 machine_mode qimode = GET_MODE (dest);
47672 machine_mode himode;
47673 rtx (*gen_il) (rtx, rtx, rtx);
47674 rtx (*gen_ih) (rtx, rtx, rtx);
47675 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
47676 struct expand_vec_perm_d d;
47677 bool ok, full_interleave;
47678 bool uns_p = false;
47679 int i;
47681 switch (qimode)
47683 case E_V16QImode:
47684 himode = V8HImode;
47685 gen_il = gen_vec_interleave_lowv16qi;
47686 gen_ih = gen_vec_interleave_highv16qi;
47687 break;
47688 case E_V32QImode:
47689 himode = V16HImode;
47690 gen_il = gen_avx2_interleave_lowv32qi;
47691 gen_ih = gen_avx2_interleave_highv32qi;
47692 break;
47693 case E_V64QImode:
47694 himode = V32HImode;
47695 gen_il = gen_avx512bw_interleave_lowv64qi;
47696 gen_ih = gen_avx512bw_interleave_highv64qi;
47697 break;
47698 default:
47699 gcc_unreachable ();
47702 op2_l = op2_h = op2;
47703 switch (code)
47705 case MULT:
47706 /* Unpack data such that we've got a source byte in each low byte of
47707 each word. We don't care what goes into the high byte of each word.
47708 Rather than trying to get zero in there, most convenient is to let
47709 it be a copy of the low byte. */
47710 op2_l = gen_reg_rtx (qimode);
47711 op2_h = gen_reg_rtx (qimode);
47712 emit_insn (gen_il (op2_l, op2, op2));
47713 emit_insn (gen_ih (op2_h, op2, op2));
47715 op1_l = gen_reg_rtx (qimode);
47716 op1_h = gen_reg_rtx (qimode);
47717 emit_insn (gen_il (op1_l, op1, op1));
47718 emit_insn (gen_ih (op1_h, op1, op1));
47719 full_interleave = qimode == V16QImode;
47720 break;
47722 case ASHIFT:
47723 case LSHIFTRT:
47724 uns_p = true;
47725 /* FALLTHRU */
47726 case ASHIFTRT:
47727 op1_l = gen_reg_rtx (himode);
47728 op1_h = gen_reg_rtx (himode);
47729 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
47730 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
47731 full_interleave = true;
47732 break;
47733 default:
47734 gcc_unreachable ();
47737 /* Perform the operation. */
47738 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
47739 1, OPTAB_DIRECT);
47740 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
47741 1, OPTAB_DIRECT);
47742 gcc_assert (res_l && res_h);
47744 /* Merge the data back into the right place. */
47745 d.target = dest;
47746 d.op0 = gen_lowpart (qimode, res_l);
47747 d.op1 = gen_lowpart (qimode, res_h);
47748 d.vmode = qimode;
47749 d.nelt = GET_MODE_NUNITS (qimode);
47750 d.one_operand_p = false;
47751 d.testing_p = false;
47753 if (full_interleave)
47755 /* For SSE2, we used an full interleave, so the desired
47756 results are in the even elements. */
47757 for (i = 0; i < d.nelt; ++i)
47758 d.perm[i] = i * 2;
47760 else
47762 /* For AVX, the interleave used above was not cross-lane. So the
47763 extraction is evens but with the second and third quarter swapped.
47764 Happily, that is even one insn shorter than even extraction.
47765 For AVX512BW we have 4 lanes. We extract evens from within a lane,
47766 always first from the first and then from the second source operand,
47767 the index bits above the low 4 bits remains the same.
47768 Thus, for d.nelt == 32 we want permutation
47769 0,2,4,..14, 32,34,36,..46, 16,18,20,..30, 48,50,52,..62
47770 and for d.nelt == 64 we want permutation
47771 0,2,4,..14, 64,66,68,..78, 16,18,20,..30, 80,82,84,..94,
47772 32,34,36,..46, 96,98,100,..110, 48,50,52,..62, 112,114,116,..126. */
47773 for (i = 0; i < d.nelt; ++i)
47774 d.perm[i] = ((i * 2) & 14) + ((i & 8) ? d.nelt : 0) + (i & ~15);
47777 ok = ix86_expand_vec_perm_const_1 (&d);
47778 gcc_assert (ok);
47780 set_unique_reg_note (get_last_insn (), REG_EQUAL,
47781 gen_rtx_fmt_ee (code, qimode, op1, op2));
47784 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
47785 if op is CONST_VECTOR with all odd elements equal to their
47786 preceding element. */
47788 static bool
47789 const_vector_equal_evenodd_p (rtx op)
47791 machine_mode mode = GET_MODE (op);
47792 int i, nunits = GET_MODE_NUNITS (mode);
47793 if (GET_CODE (op) != CONST_VECTOR
47794 || nunits != CONST_VECTOR_NUNITS (op))
47795 return false;
47796 for (i = 0; i < nunits; i += 2)
47797 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
47798 return false;
47799 return true;
47802 void
47803 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
47804 bool uns_p, bool odd_p)
47806 machine_mode mode = GET_MODE (op1);
47807 machine_mode wmode = GET_MODE (dest);
47808 rtx x;
47809 rtx orig_op1 = op1, orig_op2 = op2;
47811 if (!nonimmediate_operand (op1, mode))
47812 op1 = force_reg (mode, op1);
47813 if (!nonimmediate_operand (op2, mode))
47814 op2 = force_reg (mode, op2);
47816 /* We only play even/odd games with vectors of SImode. */
47817 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
47819 /* If we're looking for the odd results, shift those members down to
47820 the even slots. For some cpus this is faster than a PSHUFD. */
47821 if (odd_p)
47823 /* For XOP use vpmacsdqh, but only for smult, as it is only
47824 signed. */
47825 if (TARGET_XOP && mode == V4SImode && !uns_p)
47827 x = force_reg (wmode, CONST0_RTX (wmode));
47828 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
47829 return;
47832 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
47833 if (!const_vector_equal_evenodd_p (orig_op1))
47834 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
47835 x, NULL, 1, OPTAB_DIRECT);
47836 if (!const_vector_equal_evenodd_p (orig_op2))
47837 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
47838 x, NULL, 1, OPTAB_DIRECT);
47839 op1 = gen_lowpart (mode, op1);
47840 op2 = gen_lowpart (mode, op2);
47843 if (mode == V16SImode)
47845 if (uns_p)
47846 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
47847 else
47848 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
47850 else if (mode == V8SImode)
47852 if (uns_p)
47853 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
47854 else
47855 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
47857 else if (uns_p)
47858 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
47859 else if (TARGET_SSE4_1)
47860 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
47861 else
47863 rtx s1, s2, t0, t1, t2;
47865 /* The easiest way to implement this without PMULDQ is to go through
47866 the motions as if we are performing a full 64-bit multiply. With
47867 the exception that we need to do less shuffling of the elements. */
47869 /* Compute the sign-extension, aka highparts, of the two operands. */
47870 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
47871 op1, pc_rtx, pc_rtx);
47872 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
47873 op2, pc_rtx, pc_rtx);
47875 /* Multiply LO(A) * HI(B), and vice-versa. */
47876 t1 = gen_reg_rtx (wmode);
47877 t2 = gen_reg_rtx (wmode);
47878 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
47879 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
47881 /* Multiply LO(A) * LO(B). */
47882 t0 = gen_reg_rtx (wmode);
47883 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
47885 /* Combine and shift the highparts into place. */
47886 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
47887 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
47888 1, OPTAB_DIRECT);
47890 /* Combine high and low parts. */
47891 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
47892 return;
47894 emit_insn (x);
47897 void
47898 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
47899 bool uns_p, bool high_p)
47901 machine_mode wmode = GET_MODE (dest);
47902 machine_mode mode = GET_MODE (op1);
47903 rtx t1, t2, t3, t4, mask;
47905 switch (mode)
47907 case E_V4SImode:
47908 t1 = gen_reg_rtx (mode);
47909 t2 = gen_reg_rtx (mode);
47910 if (TARGET_XOP && !uns_p)
47912 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
47913 shuffle the elements once so that all elements are in the right
47914 place for immediate use: { A C B D }. */
47915 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
47916 const1_rtx, GEN_INT (3)));
47917 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
47918 const1_rtx, GEN_INT (3)));
47920 else
47922 /* Put the elements into place for the multiply. */
47923 ix86_expand_vec_interleave (t1, op1, op1, high_p);
47924 ix86_expand_vec_interleave (t2, op2, op2, high_p);
47925 high_p = false;
47927 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
47928 break;
47930 case E_V8SImode:
47931 /* Shuffle the elements between the lanes. After this we
47932 have { A B E F | C D G H } for each operand. */
47933 t1 = gen_reg_rtx (V4DImode);
47934 t2 = gen_reg_rtx (V4DImode);
47935 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
47936 const0_rtx, const2_rtx,
47937 const1_rtx, GEN_INT (3)));
47938 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
47939 const0_rtx, const2_rtx,
47940 const1_rtx, GEN_INT (3)));
47942 /* Shuffle the elements within the lanes. After this we
47943 have { A A B B | C C D D } or { E E F F | G G H H }. */
47944 t3 = gen_reg_rtx (V8SImode);
47945 t4 = gen_reg_rtx (V8SImode);
47946 mask = GEN_INT (high_p
47947 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
47948 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
47949 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
47950 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
47952 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
47953 break;
47955 case E_V8HImode:
47956 case E_V16HImode:
47957 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
47958 uns_p, OPTAB_DIRECT);
47959 t2 = expand_binop (mode,
47960 uns_p ? umul_highpart_optab : smul_highpart_optab,
47961 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
47962 gcc_assert (t1 && t2);
47964 t3 = gen_reg_rtx (mode);
47965 ix86_expand_vec_interleave (t3, t1, t2, high_p);
47966 emit_move_insn (dest, gen_lowpart (wmode, t3));
47967 break;
47969 case E_V16QImode:
47970 case E_V32QImode:
47971 case E_V32HImode:
47972 case E_V16SImode:
47973 case E_V64QImode:
47974 t1 = gen_reg_rtx (wmode);
47975 t2 = gen_reg_rtx (wmode);
47976 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
47977 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
47979 emit_insn (gen_rtx_SET (dest, gen_rtx_MULT (wmode, t1, t2)));
47980 break;
47982 default:
47983 gcc_unreachable ();
47987 void
47988 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
47990 rtx res_1, res_2, res_3, res_4;
47992 res_1 = gen_reg_rtx (V4SImode);
47993 res_2 = gen_reg_rtx (V4SImode);
47994 res_3 = gen_reg_rtx (V2DImode);
47995 res_4 = gen_reg_rtx (V2DImode);
47996 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
47997 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
47999 /* Move the results in element 2 down to element 1; we don't care
48000 what goes in elements 2 and 3. Then we can merge the parts
48001 back together with an interleave.
48003 Note that two other sequences were tried:
48004 (1) Use interleaves at the start instead of psrldq, which allows
48005 us to use a single shufps to merge things back at the end.
48006 (2) Use shufps here to combine the two vectors, then pshufd to
48007 put the elements in the correct order.
48008 In both cases the cost of the reformatting stall was too high
48009 and the overall sequence slower. */
48011 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
48012 const0_rtx, const2_rtx,
48013 const0_rtx, const0_rtx));
48014 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
48015 const0_rtx, const2_rtx,
48016 const0_rtx, const0_rtx));
48017 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
48019 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
48022 void
48023 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
48025 machine_mode mode = GET_MODE (op0);
48026 rtx t1, t2, t3, t4, t5, t6;
48028 if (TARGET_AVX512DQ && mode == V8DImode)
48029 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
48030 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
48031 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
48032 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
48033 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
48034 else if (TARGET_XOP && mode == V2DImode)
48036 /* op1: A,B,C,D, op2: E,F,G,H */
48037 op1 = gen_lowpart (V4SImode, op1);
48038 op2 = gen_lowpart (V4SImode, op2);
48040 t1 = gen_reg_rtx (V4SImode);
48041 t2 = gen_reg_rtx (V4SImode);
48042 t3 = gen_reg_rtx (V2DImode);
48043 t4 = gen_reg_rtx (V2DImode);
48045 /* t1: B,A,D,C */
48046 emit_insn (gen_sse2_pshufd_1 (t1, op1,
48047 GEN_INT (1),
48048 GEN_INT (0),
48049 GEN_INT (3),
48050 GEN_INT (2)));
48052 /* t2: (B*E),(A*F),(D*G),(C*H) */
48053 emit_insn (gen_mulv4si3 (t2, t1, op2));
48055 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
48056 emit_insn (gen_xop_phadddq (t3, t2));
48058 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
48059 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
48061 /* Multiply lower parts and add all */
48062 t5 = gen_reg_rtx (V2DImode);
48063 emit_insn (gen_vec_widen_umult_even_v4si (t5,
48064 gen_lowpart (V4SImode, op1),
48065 gen_lowpart (V4SImode, op2)));
48066 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
48069 else
48071 machine_mode nmode;
48072 rtx (*umul) (rtx, rtx, rtx);
48074 if (mode == V2DImode)
48076 umul = gen_vec_widen_umult_even_v4si;
48077 nmode = V4SImode;
48079 else if (mode == V4DImode)
48081 umul = gen_vec_widen_umult_even_v8si;
48082 nmode = V8SImode;
48084 else if (mode == V8DImode)
48086 umul = gen_vec_widen_umult_even_v16si;
48087 nmode = V16SImode;
48089 else
48090 gcc_unreachable ();
48093 /* Multiply low parts. */
48094 t1 = gen_reg_rtx (mode);
48095 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
48097 /* Shift input vectors right 32 bits so we can multiply high parts. */
48098 t6 = GEN_INT (32);
48099 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
48100 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
48102 /* Multiply high parts by low parts. */
48103 t4 = gen_reg_rtx (mode);
48104 t5 = gen_reg_rtx (mode);
48105 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
48106 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
48108 /* Combine and shift the highparts back. */
48109 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
48110 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
48112 /* Combine high and low parts. */
48113 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
48116 set_unique_reg_note (get_last_insn (), REG_EQUAL,
48117 gen_rtx_MULT (mode, op1, op2));
48120 /* Return 1 if control tansfer instruction INSN
48121 should be encoded with bnd prefix.
48122 If insn is NULL then return 1 when control
48123 transfer instructions should be prefixed with
48124 bnd by default for current function. */
48126 bool
48127 ix86_bnd_prefixed_insn_p (rtx insn)
48129 /* For call insns check special flag. */
48130 if (insn && CALL_P (insn))
48132 rtx call = get_call_rtx_from (insn);
48133 if (call)
48134 return CALL_EXPR_WITH_BOUNDS_P (call);
48137 /* All other insns are prefixed only if function is instrumented. */
48138 return chkp_function_instrumented_p (current_function_decl);
48141 /* Return 1 if control tansfer instruction INSN
48142 should be encoded with notrack prefix. */
48144 static bool
48145 ix86_notrack_prefixed_insn_p (rtx insn)
48147 if (!insn || !((flag_cf_protection & CF_BRANCH) && TARGET_IBT))
48148 return false;
48150 if (CALL_P (insn))
48152 rtx call = get_call_rtx_from (insn);
48153 gcc_assert (call != NULL_RTX);
48154 rtx addr = XEXP (call, 0);
48156 /* Do not emit 'notrack' if it's not an indirect call. */
48157 if (MEM_P (addr)
48158 && GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
48159 return false;
48160 else
48161 return find_reg_note (insn, REG_CALL_NOCF_CHECK, 0);
48164 if (JUMP_P (insn) && !flag_cet_switch)
48166 rtx target = JUMP_LABEL (insn);
48167 if (target == NULL_RTX || ANY_RETURN_P (target))
48168 return false;
48170 /* Check the jump is a switch table. */
48171 rtx_insn *label = as_a<rtx_insn *> (target);
48172 rtx_insn *table = next_insn (label);
48173 if (table == NULL_RTX || !JUMP_TABLE_DATA_P (table))
48174 return false;
48175 else
48176 return true;
48178 return false;
48181 /* Calculate integer abs() using only SSE2 instructions. */
48183 void
48184 ix86_expand_sse2_abs (rtx target, rtx input)
48186 machine_mode mode = GET_MODE (target);
48187 rtx tmp0, tmp1, x;
48189 switch (mode)
48191 /* For 32-bit signed integer X, the best way to calculate the absolute
48192 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
48193 case E_V4SImode:
48194 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
48195 GEN_INT (GET_MODE_UNIT_BITSIZE (mode) - 1),
48196 NULL, 0, OPTAB_DIRECT);
48197 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
48198 NULL, 0, OPTAB_DIRECT);
48199 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
48200 target, 0, OPTAB_DIRECT);
48201 break;
48203 /* For 16-bit signed integer X, the best way to calculate the absolute
48204 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
48205 case E_V8HImode:
48206 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
48208 x = expand_simple_binop (mode, SMAX, tmp0, input,
48209 target, 0, OPTAB_DIRECT);
48210 break;
48212 /* For 8-bit signed integer X, the best way to calculate the absolute
48213 value of X is min ((unsigned char) X, (unsigned char) (-X)),
48214 as SSE2 provides the PMINUB insn. */
48215 case E_V16QImode:
48216 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
48218 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
48219 target, 0, OPTAB_DIRECT);
48220 break;
48222 default:
48223 gcc_unreachable ();
48226 if (x != target)
48227 emit_move_insn (target, x);
48230 /* Expand an extract from a vector register through pextr insn.
48231 Return true if successful. */
48233 bool
48234 ix86_expand_pextr (rtx *operands)
48236 rtx dst = operands[0];
48237 rtx src = operands[1];
48239 unsigned int size = INTVAL (operands[2]);
48240 unsigned int pos = INTVAL (operands[3]);
48242 if (SUBREG_P (dst))
48244 /* Reject non-lowpart subregs. */
48245 if (SUBREG_BYTE (dst) > 0)
48246 return false;
48247 dst = SUBREG_REG (dst);
48250 if (SUBREG_P (src))
48252 pos += SUBREG_BYTE (src) * BITS_PER_UNIT;
48253 src = SUBREG_REG (src);
48256 switch (GET_MODE (src))
48258 case E_V16QImode:
48259 case E_V8HImode:
48260 case E_V4SImode:
48261 case E_V2DImode:
48262 case E_V1TImode:
48263 case E_TImode:
48265 machine_mode srcmode, dstmode;
48266 rtx d, pat;
48268 if (!int_mode_for_size (size, 0).exists (&dstmode))
48269 return false;
48271 switch (dstmode)
48273 case E_QImode:
48274 if (!TARGET_SSE4_1)
48275 return false;
48276 srcmode = V16QImode;
48277 break;
48279 case E_HImode:
48280 if (!TARGET_SSE2)
48281 return false;
48282 srcmode = V8HImode;
48283 break;
48285 case E_SImode:
48286 if (!TARGET_SSE4_1)
48287 return false;
48288 srcmode = V4SImode;
48289 break;
48291 case E_DImode:
48292 gcc_assert (TARGET_64BIT);
48293 if (!TARGET_SSE4_1)
48294 return false;
48295 srcmode = V2DImode;
48296 break;
48298 default:
48299 return false;
48302 /* Reject extractions from misaligned positions. */
48303 if (pos & (size-1))
48304 return false;
48306 if (GET_MODE (dst) == dstmode)
48307 d = dst;
48308 else
48309 d = gen_reg_rtx (dstmode);
48311 /* Construct insn pattern. */
48312 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (pos / size)));
48313 pat = gen_rtx_VEC_SELECT (dstmode, gen_lowpart (srcmode, src), pat);
48315 /* Let the rtl optimizers know about the zero extension performed. */
48316 if (dstmode == QImode || dstmode == HImode)
48318 pat = gen_rtx_ZERO_EXTEND (SImode, pat);
48319 d = gen_lowpart (SImode, d);
48322 emit_insn (gen_rtx_SET (d, pat));
48324 if (d != dst)
48325 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
48326 return true;
48329 default:
48330 return false;
48334 /* Expand an insert into a vector register through pinsr insn.
48335 Return true if successful. */
48337 bool
48338 ix86_expand_pinsr (rtx *operands)
48340 rtx dst = operands[0];
48341 rtx src = operands[3];
48343 unsigned int size = INTVAL (operands[1]);
48344 unsigned int pos = INTVAL (operands[2]);
48346 if (SUBREG_P (dst))
48348 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
48349 dst = SUBREG_REG (dst);
48352 switch (GET_MODE (dst))
48354 case E_V16QImode:
48355 case E_V8HImode:
48356 case E_V4SImode:
48357 case E_V2DImode:
48358 case E_V1TImode:
48359 case E_TImode:
48361 machine_mode srcmode, dstmode;
48362 rtx (*pinsr)(rtx, rtx, rtx, rtx);
48363 rtx d;
48365 if (!int_mode_for_size (size, 0).exists (&srcmode))
48366 return false;
48368 switch (srcmode)
48370 case E_QImode:
48371 if (!TARGET_SSE4_1)
48372 return false;
48373 dstmode = V16QImode;
48374 pinsr = gen_sse4_1_pinsrb;
48375 break;
48377 case E_HImode:
48378 if (!TARGET_SSE2)
48379 return false;
48380 dstmode = V8HImode;
48381 pinsr = gen_sse2_pinsrw;
48382 break;
48384 case E_SImode:
48385 if (!TARGET_SSE4_1)
48386 return false;
48387 dstmode = V4SImode;
48388 pinsr = gen_sse4_1_pinsrd;
48389 break;
48391 case E_DImode:
48392 gcc_assert (TARGET_64BIT);
48393 if (!TARGET_SSE4_1)
48394 return false;
48395 dstmode = V2DImode;
48396 pinsr = gen_sse4_1_pinsrq;
48397 break;
48399 default:
48400 return false;
48403 /* Reject insertions to misaligned positions. */
48404 if (pos & (size-1))
48405 return false;
48407 if (SUBREG_P (src))
48409 unsigned int srcpos = SUBREG_BYTE (src);
48411 if (srcpos > 0)
48413 rtx extr_ops[4];
48415 extr_ops[0] = gen_reg_rtx (srcmode);
48416 extr_ops[1] = gen_lowpart (srcmode, SUBREG_REG (src));
48417 extr_ops[2] = GEN_INT (size);
48418 extr_ops[3] = GEN_INT (srcpos * BITS_PER_UNIT);
48420 if (!ix86_expand_pextr (extr_ops))
48421 return false;
48423 src = extr_ops[0];
48425 else
48426 src = gen_lowpart (srcmode, SUBREG_REG (src));
48429 if (GET_MODE (dst) == dstmode)
48430 d = dst;
48431 else
48432 d = gen_reg_rtx (dstmode);
48434 emit_insn (pinsr (d, gen_lowpart (dstmode, dst),
48435 gen_lowpart (srcmode, src),
48436 GEN_INT (1 << (pos / size))));
48437 if (d != dst)
48438 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
48439 return true;
48442 default:
48443 return false;
48447 /* This function returns the calling abi specific va_list type node.
48448 It returns the FNDECL specific va_list type. */
48450 static tree
48451 ix86_fn_abi_va_list (tree fndecl)
48453 if (!TARGET_64BIT)
48454 return va_list_type_node;
48455 gcc_assert (fndecl != NULL_TREE);
48457 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
48458 return ms_va_list_type_node;
48459 else
48460 return sysv_va_list_type_node;
48463 /* Returns the canonical va_list type specified by TYPE. If there
48464 is no valid TYPE provided, it return NULL_TREE. */
48466 static tree
48467 ix86_canonical_va_list_type (tree type)
48469 if (TARGET_64BIT)
48471 if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type)))
48472 return ms_va_list_type_node;
48474 if ((TREE_CODE (type) == ARRAY_TYPE
48475 && integer_zerop (array_type_nelts (type)))
48476 || POINTER_TYPE_P (type))
48478 tree elem_type = TREE_TYPE (type);
48479 if (TREE_CODE (elem_type) == RECORD_TYPE
48480 && lookup_attribute ("sysv_abi va_list",
48481 TYPE_ATTRIBUTES (elem_type)))
48482 return sysv_va_list_type_node;
48485 return NULL_TREE;
48488 return std_canonical_va_list_type (type);
48491 /* Iterate through the target-specific builtin types for va_list.
48492 IDX denotes the iterator, *PTREE is set to the result type of
48493 the va_list builtin, and *PNAME to its internal type.
48494 Returns zero if there is no element for this index, otherwise
48495 IDX should be increased upon the next call.
48496 Note, do not iterate a base builtin's name like __builtin_va_list.
48497 Used from c_common_nodes_and_builtins. */
48499 static int
48500 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
48502 if (TARGET_64BIT)
48504 switch (idx)
48506 default:
48507 break;
48509 case 0:
48510 *ptree = ms_va_list_type_node;
48511 *pname = "__builtin_ms_va_list";
48512 return 1;
48514 case 1:
48515 *ptree = sysv_va_list_type_node;
48516 *pname = "__builtin_sysv_va_list";
48517 return 1;
48521 return 0;
48524 #undef TARGET_SCHED_DISPATCH
48525 #define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
48526 #undef TARGET_SCHED_DISPATCH_DO
48527 #define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
48528 #undef TARGET_SCHED_REASSOCIATION_WIDTH
48529 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
48530 #undef TARGET_SCHED_REORDER
48531 #define TARGET_SCHED_REORDER ix86_atom_sched_reorder
48532 #undef TARGET_SCHED_ADJUST_PRIORITY
48533 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
48534 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
48535 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
48536 ix86_dependencies_evaluation_hook
48539 /* Implementation of reassociation_width target hook used by
48540 reassoc phase to identify parallelism level in reassociated
48541 tree. Statements tree_code is passed in OPC. Arguments type
48542 is passed in MODE. */
48544 static int
48545 ix86_reassociation_width (unsigned int op, machine_mode mode)
48547 int width = 1;
48548 /* Vector part. */
48549 if (VECTOR_MODE_P (mode))
48551 int div = 1;
48552 if (INTEGRAL_MODE_P (mode))
48553 width = ix86_cost->reassoc_vec_int;
48554 else if (FLOAT_MODE_P (mode))
48555 width = ix86_cost->reassoc_vec_fp;
48557 if (width == 1)
48558 return 1;
48560 /* Integer vector instructions execute in FP unit
48561 and can execute 3 additions and one multiplication per cycle. */
48562 if (ix86_tune == PROCESSOR_ZNVER1 && INTEGRAL_MODE_P (mode)
48563 && op != PLUS && op != MINUS)
48564 return 1;
48566 /* Account for targets that splits wide vectors into multiple parts. */
48567 if (TARGET_AVX128_OPTIMAL && GET_MODE_BITSIZE (mode) > 128)
48568 div = GET_MODE_BITSIZE (mode) / 128;
48569 else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64)
48570 div = GET_MODE_BITSIZE (mode) / 64;
48571 width = (width + div - 1) / div;
48573 /* Scalar part. */
48574 else if (INTEGRAL_MODE_P (mode))
48575 width = ix86_cost->reassoc_int;
48576 else if (FLOAT_MODE_P (mode))
48577 width = ix86_cost->reassoc_fp;
48579 /* Avoid using too many registers in 32bit mode. */
48580 if (!TARGET_64BIT && width > 2)
48581 width = 2;
48582 return width;
48585 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
48586 place emms and femms instructions. */
48588 static machine_mode
48589 ix86_preferred_simd_mode (scalar_mode mode)
48591 if (!TARGET_SSE)
48592 return word_mode;
48594 switch (mode)
48596 case E_QImode:
48597 if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
48598 return V64QImode;
48599 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
48600 return V32QImode;
48601 else
48602 return V16QImode;
48604 case E_HImode:
48605 if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
48606 return V32HImode;
48607 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
48608 return V16HImode;
48609 else
48610 return V8HImode;
48612 case E_SImode:
48613 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
48614 return V16SImode;
48615 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
48616 return V8SImode;
48617 else
48618 return V4SImode;
48620 case E_DImode:
48621 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
48622 return V8DImode;
48623 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
48624 return V4DImode;
48625 else
48626 return V2DImode;
48628 case E_SFmode:
48629 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
48630 return V16SFmode;
48631 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
48632 return V8SFmode;
48633 else
48634 return V4SFmode;
48636 case E_DFmode:
48637 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
48638 return V8DFmode;
48639 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
48640 return V4DFmode;
48641 else if (TARGET_SSE2)
48642 return V2DFmode;
48643 /* FALLTHRU */
48645 default:
48646 return word_mode;
48650 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
48651 vectors. If AVX512F is enabled then try vectorizing with 512bit,
48652 256bit and 128bit vectors. */
48654 static unsigned int
48655 ix86_autovectorize_vector_sizes (void)
48657 unsigned int bytesizes = 0;
48659 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
48660 bytesizes |= (64 | 32 | 16);
48661 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
48662 bytesizes |= (32 | 16);
48664 return bytesizes;
48667 /* Implemenation of targetm.vectorize.get_mask_mode. */
48669 static opt_machine_mode
48670 ix86_get_mask_mode (unsigned nunits, unsigned vector_size)
48672 unsigned elem_size = vector_size / nunits;
48674 /* Scalar mask case. */
48675 if ((TARGET_AVX512F && vector_size == 64)
48676 || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
48678 if (elem_size == 4 || elem_size == 8 || TARGET_AVX512BW)
48679 return smallest_int_mode_for_size (nunits);
48682 scalar_int_mode elem_mode
48683 = smallest_int_mode_for_size (elem_size * BITS_PER_UNIT);
48685 gcc_assert (elem_size * nunits == vector_size);
48687 return mode_for_vector (elem_mode, nunits);
48692 /* Return class of registers which could be used for pseudo of MODE
48693 and of class RCLASS for spilling instead of memory. Return NO_REGS
48694 if it is not possible or non-profitable. */
48696 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
48698 static reg_class_t
48699 ix86_spill_class (reg_class_t rclass, machine_mode mode)
48701 if (0 && TARGET_GENERAL_REGS_SSE_SPILL
48702 && TARGET_SSE2
48703 && TARGET_INTER_UNIT_MOVES_TO_VEC
48704 && TARGET_INTER_UNIT_MOVES_FROM_VEC
48705 && (mode == SImode || (TARGET_64BIT && mode == DImode))
48706 && INTEGER_CLASS_P (rclass))
48707 return ALL_SSE_REGS;
48708 return NO_REGS;
48711 /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
48712 but returns a lower bound. */
48714 static unsigned int
48715 ix86_max_noce_ifcvt_seq_cost (edge e)
48717 bool predictable_p = predictable_edge_p (e);
48719 enum compiler_param param
48720 = (predictable_p
48721 ? PARAM_MAX_RTL_IF_CONVERSION_PREDICTABLE_COST
48722 : PARAM_MAX_RTL_IF_CONVERSION_UNPREDICTABLE_COST);
48724 /* If we have a parameter set, use that, otherwise take a guess using
48725 BRANCH_COST. */
48726 if (global_options_set.x_param_values[param])
48727 return PARAM_VALUE (param);
48728 else
48729 return BRANCH_COST (true, predictable_p) * COSTS_N_INSNS (2);
48732 /* Return true if SEQ is a good candidate as a replacement for the
48733 if-convertible sequence described in IF_INFO. */
48735 static bool
48736 ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
48738 if (TARGET_ONE_IF_CONV_INSN && if_info->speed_p)
48740 int cmov_cnt = 0;
48741 /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
48742 Maybe we should allow even more conditional moves as long as they
48743 are used far enough not to stall the CPU, or also consider
48744 IF_INFO->TEST_BB succ edge probabilities. */
48745 for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
48747 rtx set = single_set (insn);
48748 if (!set)
48749 continue;
48750 if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
48751 continue;
48752 rtx src = SET_SRC (set);
48753 machine_mode mode = GET_MODE (src);
48754 if (GET_MODE_CLASS (mode) != MODE_INT
48755 && GET_MODE_CLASS (mode) != MODE_FLOAT)
48756 continue;
48757 if ((!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
48758 || (!REG_P (XEXP (src, 2)) && !MEM_P (XEXP (src, 2))))
48759 continue;
48760 /* insn is CMOV or FCMOV. */
48761 if (++cmov_cnt > 1)
48762 return false;
48765 return default_noce_conversion_profitable_p (seq, if_info);
48768 /* Implement targetm.vectorize.init_cost. */
48770 static void *
48771 ix86_init_cost (struct loop *)
48773 unsigned *cost = XNEWVEC (unsigned, 3);
48774 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
48775 return cost;
48778 /* Implement targetm.vectorize.add_stmt_cost. */
48780 static unsigned
48781 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
48782 struct _stmt_vec_info *stmt_info, int misalign,
48783 enum vect_cost_model_location where)
48785 unsigned *cost = (unsigned *) data;
48786 unsigned retval = 0;
48788 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
48789 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
48791 /* Penalize DFmode vector operations for Bonnell. */
48792 if (TARGET_BONNELL && kind == vector_stmt
48793 && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode)
48794 stmt_cost *= 5; /* FIXME: The value here is arbitrary. */
48796 /* Statements in an inner loop relative to the loop being
48797 vectorized are weighted more heavily. The value here is
48798 arbitrary and could potentially be improved with analysis. */
48799 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
48800 count *= 50; /* FIXME. */
48802 retval = (unsigned) (count * stmt_cost);
48804 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
48805 for Silvermont as it has out of order integer pipeline and can execute
48806 2 scalar instruction per tick, but has in order SIMD pipeline. */
48807 if ((TARGET_SILVERMONT || TARGET_INTEL)
48808 && stmt_info && stmt_info->stmt)
48810 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
48811 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
48812 retval = (retval * 17) / 10;
48815 cost[where] += retval;
48817 return retval;
48820 /* Implement targetm.vectorize.finish_cost. */
48822 static void
48823 ix86_finish_cost (void *data, unsigned *prologue_cost,
48824 unsigned *body_cost, unsigned *epilogue_cost)
48826 unsigned *cost = (unsigned *) data;
48827 *prologue_cost = cost[vect_prologue];
48828 *body_cost = cost[vect_body];
48829 *epilogue_cost = cost[vect_epilogue];
48832 /* Implement targetm.vectorize.destroy_cost_data. */
48834 static void
48835 ix86_destroy_cost_data (void *data)
48837 free (data);
48840 /* Validate target specific memory model bits in VAL. */
48842 static unsigned HOST_WIDE_INT
48843 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
48845 enum memmodel model = memmodel_from_int (val);
48846 bool strong;
48848 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
48849 |MEMMODEL_MASK)
48850 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
48852 warning (OPT_Winvalid_memory_model,
48853 "Unknown architecture specific memory model");
48854 return MEMMODEL_SEQ_CST;
48856 strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
48857 if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
48859 warning (OPT_Winvalid_memory_model,
48860 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
48861 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
48863 if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
48865 warning (OPT_Winvalid_memory_model,
48866 "HLE_RELEASE not used with RELEASE or stronger memory model");
48867 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
48869 return val;
48872 /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
48873 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
48874 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
48875 or number of vecsize_mangle variants that should be emitted. */
48877 static int
48878 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
48879 struct cgraph_simd_clone *clonei,
48880 tree base_type, int num)
48882 int ret = 1;
48884 if (clonei->simdlen
48885 && (clonei->simdlen < 2
48886 || clonei->simdlen > 1024
48887 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
48889 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
48890 "unsupported simdlen %d", clonei->simdlen);
48891 return 0;
48894 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
48895 if (TREE_CODE (ret_type) != VOID_TYPE)
48896 switch (TYPE_MODE (ret_type))
48898 case E_QImode:
48899 case E_HImode:
48900 case E_SImode:
48901 case E_DImode:
48902 case E_SFmode:
48903 case E_DFmode:
48904 /* case E_SCmode: */
48905 /* case E_DCmode: */
48906 break;
48907 default:
48908 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
48909 "unsupported return type %qT for simd\n", ret_type);
48910 return 0;
48913 tree t;
48914 int i;
48916 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
48917 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
48918 switch (TYPE_MODE (TREE_TYPE (t)))
48920 case E_QImode:
48921 case E_HImode:
48922 case E_SImode:
48923 case E_DImode:
48924 case E_SFmode:
48925 case E_DFmode:
48926 /* case E_SCmode: */
48927 /* case E_DCmode: */
48928 break;
48929 default:
48930 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
48931 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
48932 return 0;
48935 if (clonei->cilk_elemental)
48937 /* Parse here processor clause. If not present, default to 'b'. */
48938 clonei->vecsize_mangle = 'b';
48940 else if (!TREE_PUBLIC (node->decl))
48942 /* If the function isn't exported, we can pick up just one ISA
48943 for the clones. */
48944 if (TARGET_AVX512F)
48945 clonei->vecsize_mangle = 'e';
48946 else if (TARGET_AVX2)
48947 clonei->vecsize_mangle = 'd';
48948 else if (TARGET_AVX)
48949 clonei->vecsize_mangle = 'c';
48950 else
48951 clonei->vecsize_mangle = 'b';
48952 ret = 1;
48954 else
48956 clonei->vecsize_mangle = "bcde"[num];
48957 ret = 4;
48959 clonei->mask_mode = VOIDmode;
48960 switch (clonei->vecsize_mangle)
48962 case 'b':
48963 clonei->vecsize_int = 128;
48964 clonei->vecsize_float = 128;
48965 break;
48966 case 'c':
48967 clonei->vecsize_int = 128;
48968 clonei->vecsize_float = 256;
48969 break;
48970 case 'd':
48971 clonei->vecsize_int = 256;
48972 clonei->vecsize_float = 256;
48973 break;
48974 case 'e':
48975 clonei->vecsize_int = 512;
48976 clonei->vecsize_float = 512;
48977 if (TYPE_MODE (base_type) == QImode)
48978 clonei->mask_mode = DImode;
48979 else
48980 clonei->mask_mode = SImode;
48981 break;
48983 if (clonei->simdlen == 0)
48985 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
48986 clonei->simdlen = clonei->vecsize_int;
48987 else
48988 clonei->simdlen = clonei->vecsize_float;
48989 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
48991 else if (clonei->simdlen > 16)
48993 /* For compatibility with ICC, use the same upper bounds
48994 for simdlen. In particular, for CTYPE below, use the return type,
48995 unless the function returns void, in that case use the characteristic
48996 type. If it is possible for given SIMDLEN to pass CTYPE value
48997 in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
48998 for 64-bit code), accept that SIMDLEN, otherwise warn and don't
48999 emit corresponding clone. */
49000 tree ctype = ret_type;
49001 if (TREE_CODE (ret_type) == VOID_TYPE)
49002 ctype = base_type;
49003 int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen;
49004 if (SCALAR_INT_MODE_P (TYPE_MODE (ctype)))
49005 cnt /= clonei->vecsize_int;
49006 else
49007 cnt /= clonei->vecsize_float;
49008 if (cnt > (TARGET_64BIT ? 16 : 8))
49010 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
49011 "unsupported simdlen %d", clonei->simdlen);
49012 return 0;
49015 return ret;
49018 /* Add target attribute to SIMD clone NODE if needed. */
49020 static void
49021 ix86_simd_clone_adjust (struct cgraph_node *node)
49023 const char *str = NULL;
49024 gcc_assert (node->decl == cfun->decl);
49025 switch (node->simdclone->vecsize_mangle)
49027 case 'b':
49028 if (!TARGET_SSE2)
49029 str = "sse2";
49030 break;
49031 case 'c':
49032 if (!TARGET_AVX)
49033 str = "avx";
49034 break;
49035 case 'd':
49036 if (!TARGET_AVX2)
49037 str = "avx2";
49038 break;
49039 case 'e':
49040 if (!TARGET_AVX512F)
49041 str = "avx512f";
49042 break;
49043 default:
49044 gcc_unreachable ();
49046 if (str == NULL)
49047 return;
49048 push_cfun (NULL);
49049 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
49050 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
49051 gcc_assert (ok);
49052 pop_cfun ();
49053 ix86_reset_previous_fndecl ();
49054 ix86_set_current_function (node->decl);
49057 /* If SIMD clone NODE can't be used in a vectorized loop
49058 in current function, return -1, otherwise return a badness of using it
49059 (0 if it is most desirable from vecsize_mangle point of view, 1
49060 slightly less desirable, etc.). */
49062 static int
49063 ix86_simd_clone_usable (struct cgraph_node *node)
49065 switch (node->simdclone->vecsize_mangle)
49067 case 'b':
49068 if (!TARGET_SSE2)
49069 return -1;
49070 if (!TARGET_AVX)
49071 return 0;
49072 return TARGET_AVX2 ? 2 : 1;
49073 case 'c':
49074 if (!TARGET_AVX)
49075 return -1;
49076 return TARGET_AVX2 ? 1 : 0;
49077 case 'd':
49078 if (!TARGET_AVX2)
49079 return -1;
49080 return 0;
49081 case 'e':
49082 if (!TARGET_AVX512F)
49083 return -1;
49084 return 0;
49085 default:
49086 gcc_unreachable ();
49090 /* This function adjusts the unroll factor based on
49091 the hardware capabilities. For ex, bdver3 has
49092 a loop buffer which makes unrolling of smaller
49093 loops less important. This function decides the
49094 unroll factor using number of memory references
49095 (value 32 is used) as a heuristic. */
49097 static unsigned
49098 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
49100 basic_block *bbs;
49101 rtx_insn *insn;
49102 unsigned i;
49103 unsigned mem_count = 0;
49105 if (!TARGET_ADJUST_UNROLL)
49106 return nunroll;
49108 /* Count the number of memory references within the loop body.
49109 This value determines the unrolling factor for bdver3 and bdver4
49110 architectures. */
49111 subrtx_iterator::array_type array;
49112 bbs = get_loop_body (loop);
49113 for (i = 0; i < loop->num_nodes; i++)
49114 FOR_BB_INSNS (bbs[i], insn)
49115 if (NONDEBUG_INSN_P (insn))
49116 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
49117 if (const_rtx x = *iter)
49118 if (MEM_P (x))
49120 machine_mode mode = GET_MODE (x);
49121 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
49122 if (n_words > 4)
49123 mem_count += 2;
49124 else
49125 mem_count += 1;
49127 free (bbs);
49129 if (mem_count && mem_count <=32)
49130 return 32/mem_count;
49132 return nunroll;
49136 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
49138 static bool
49139 ix86_float_exceptions_rounding_supported_p (void)
49141 /* For x87 floating point with standard excess precision handling,
49142 there is no adddf3 pattern (since x87 floating point only has
49143 XFmode operations) so the default hook implementation gets this
49144 wrong. */
49145 return TARGET_80387 || TARGET_SSE_MATH;
49148 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
49150 static void
49151 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
49153 if (!TARGET_80387 && !TARGET_SSE_MATH)
49154 return;
49155 tree exceptions_var = create_tmp_var_raw (integer_type_node);
49156 if (TARGET_80387)
49158 tree fenv_index_type = build_index_type (size_int (6));
49159 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
49160 tree fenv_var = create_tmp_var_raw (fenv_type);
49161 TREE_ADDRESSABLE (fenv_var) = 1;
49162 tree fenv_ptr = build_pointer_type (fenv_type);
49163 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
49164 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
49165 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
49166 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
49167 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
49168 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
49169 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
49170 tree hold_fnclex = build_call_expr (fnclex, 0);
49171 fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
49172 NULL_TREE, NULL_TREE);
49173 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
49174 hold_fnclex);
49175 *clear = build_call_expr (fnclex, 0);
49176 tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
49177 tree fnstsw_call = build_call_expr (fnstsw, 0);
49178 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
49179 sw_var, fnstsw_call);
49180 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
49181 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
49182 exceptions_var, exceptions_x87);
49183 *update = build2 (COMPOUND_EXPR, integer_type_node,
49184 sw_mod, update_mod);
49185 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
49186 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
49188 if (TARGET_SSE_MATH)
49190 tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
49191 tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
49192 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
49193 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
49194 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
49195 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
49196 mxcsr_orig_var, stmxcsr_hold_call);
49197 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
49198 mxcsr_orig_var,
49199 build_int_cst (unsigned_type_node, 0x1f80));
49200 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
49201 build_int_cst (unsigned_type_node, 0xffffffc0));
49202 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
49203 mxcsr_mod_var, hold_mod_val);
49204 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
49205 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
49206 hold_assign_orig, hold_assign_mod);
49207 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
49208 ldmxcsr_hold_call);
49209 if (*hold)
49210 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
49211 else
49212 *hold = hold_all;
49213 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
49214 if (*clear)
49215 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
49216 ldmxcsr_clear_call);
49217 else
49218 *clear = ldmxcsr_clear_call;
49219 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
49220 tree exceptions_sse = fold_convert (integer_type_node,
49221 stxmcsr_update_call);
49222 if (*update)
49224 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
49225 exceptions_var, exceptions_sse);
49226 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
49227 exceptions_var, exceptions_mod);
49228 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
49229 exceptions_assign);
49231 else
49232 *update = build2 (MODIFY_EXPR, integer_type_node,
49233 exceptions_var, exceptions_sse);
49234 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
49235 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
49236 ldmxcsr_update_call);
49238 tree atomic_feraiseexcept
49239 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
49240 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
49241 1, exceptions_var);
49242 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
49243 atomic_feraiseexcept_call);
49246 /* Return mode to be used for bounds or VOIDmode
49247 if bounds are not supported. */
49249 static machine_mode
49250 ix86_mpx_bound_mode ()
49252 /* Do not support pointer checker if MPX
49253 is not enabled. */
49254 if (!TARGET_MPX)
49256 if (flag_check_pointer_bounds)
49257 warning (0, "Pointer Checker requires MPX support on this target."
49258 " Use -mmpx options to enable MPX.");
49259 return VOIDmode;
49262 return BNDmode;
49265 /* Return constant used to statically initialize constant bounds.
49267 This function is used to create special bound values. For now
49268 only INIT bounds and NONE bounds are expected. More special
49269 values may be added later. */
49271 static tree
49272 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
49274 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
49275 : build_zero_cst (pointer_sized_int_node);
49276 tree high = ub ? build_zero_cst (pointer_sized_int_node)
49277 : build_minus_one_cst (pointer_sized_int_node);
49279 /* This function is supposed to be used to create INIT and
49280 NONE bounds only. */
49281 gcc_assert ((lb == 0 && ub == -1)
49282 || (lb == -1 && ub == 0));
49284 return build_complex (NULL, low, high);
49287 /* Generate a list of statements STMTS to initialize pointer bounds
49288 variable VAR with bounds LB and UB. Return the number of generated
49289 statements. */
49291 static int
49292 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
49294 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
49295 tree lhs, modify, var_p;
49297 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
49298 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
49300 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
49301 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
49302 append_to_statement_list (modify, stmts);
49304 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
49305 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
49306 TYPE_SIZE_UNIT (pointer_sized_int_node)));
49307 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
49308 append_to_statement_list (modify, stmts);
49310 return 2;
49313 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
49314 /* For i386, common symbol is local only for non-PIE binaries. For
49315 x86-64, common symbol is local only for non-PIE binaries or linker
49316 supports copy reloc in PIE binaries. */
49318 static bool
49319 ix86_binds_local_p (const_tree exp)
49321 return default_binds_local_p_3 (exp, flag_shlib != 0, true, true,
49322 (!flag_pic
49323 || (TARGET_64BIT
49324 && HAVE_LD_PIE_COPYRELOC != 0)));
49326 #endif
49328 /* If MEM is in the form of [base+offset], extract the two parts
49329 of address and set to BASE and OFFSET, otherwise return false. */
49331 static bool
49332 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
49334 rtx addr;
49336 gcc_assert (MEM_P (mem));
49338 addr = XEXP (mem, 0);
49340 if (GET_CODE (addr) == CONST)
49341 addr = XEXP (addr, 0);
49343 if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
49345 *base = addr;
49346 *offset = const0_rtx;
49347 return true;
49350 if (GET_CODE (addr) == PLUS
49351 && (REG_P (XEXP (addr, 0))
49352 || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
49353 && CONST_INT_P (XEXP (addr, 1)))
49355 *base = XEXP (addr, 0);
49356 *offset = XEXP (addr, 1);
49357 return true;
49360 return false;
49363 /* Given OPERANDS of consecutive load/store, check if we can merge
49364 them into move multiple. LOAD is true if they are load instructions.
49365 MODE is the mode of memory operands. */
49367 bool
49368 ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
49369 machine_mode mode)
49371 HOST_WIDE_INT offval_1, offval_2, msize;
49372 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
49374 if (load)
49376 mem_1 = operands[1];
49377 mem_2 = operands[3];
49378 reg_1 = operands[0];
49379 reg_2 = operands[2];
49381 else
49383 mem_1 = operands[0];
49384 mem_2 = operands[2];
49385 reg_1 = operands[1];
49386 reg_2 = operands[3];
49389 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
49391 if (REGNO (reg_1) != REGNO (reg_2))
49392 return false;
49394 /* Check if the addresses are in the form of [base+offset]. */
49395 if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
49396 return false;
49397 if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
49398 return false;
49400 /* Check if the bases are the same. */
49401 if (!rtx_equal_p (base_1, base_2))
49402 return false;
49404 offval_1 = INTVAL (offset_1);
49405 offval_2 = INTVAL (offset_2);
49406 msize = GET_MODE_SIZE (mode);
49407 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
49408 if (offval_1 + msize != offval_2)
49409 return false;
49411 return true;
49414 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
49416 static bool
49417 ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
49418 optimization_type opt_type)
49420 switch (op)
49422 case asin_optab:
49423 case acos_optab:
49424 case log1p_optab:
49425 case exp_optab:
49426 case exp10_optab:
49427 case exp2_optab:
49428 case expm1_optab:
49429 case ldexp_optab:
49430 case scalb_optab:
49431 case round_optab:
49432 return opt_type == OPTIMIZE_FOR_SPEED;
49434 case rint_optab:
49435 if (SSE_FLOAT_MODE_P (mode1)
49436 && TARGET_SSE_MATH
49437 && !flag_trapping_math
49438 && !TARGET_SSE4_1)
49439 return opt_type == OPTIMIZE_FOR_SPEED;
49440 return true;
49442 case floor_optab:
49443 case ceil_optab:
49444 case btrunc_optab:
49445 if (SSE_FLOAT_MODE_P (mode1)
49446 && TARGET_SSE_MATH
49447 && !flag_trapping_math
49448 && TARGET_SSE4_1)
49449 return true;
49450 return opt_type == OPTIMIZE_FOR_SPEED;
49452 case rsqrt_optab:
49453 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p ();
49455 default:
49456 return true;
49460 /* Address space support.
49462 This is not "far pointers" in the 16-bit sense, but an easy way
49463 to use %fs and %gs segment prefixes. Therefore:
49465 (a) All address spaces have the same modes,
49466 (b) All address spaces have the same addresss forms,
49467 (c) While %fs and %gs are technically subsets of the generic
49468 address space, they are probably not subsets of each other.
49469 (d) Since we have no access to the segment base register values
49470 without resorting to a system call, we cannot convert a
49471 non-default address space to a default address space.
49472 Therefore we do not claim %fs or %gs are subsets of generic.
49474 Therefore we can (mostly) use the default hooks. */
49476 /* All use of segmentation is assumed to make address 0 valid. */
49478 static bool
49479 ix86_addr_space_zero_address_valid (addr_space_t as)
49481 return as != ADDR_SPACE_GENERIC;
49484 static void
49485 ix86_init_libfuncs (void)
49487 if (TARGET_64BIT)
49489 set_optab_libfunc (sdivmod_optab, TImode, "__divmodti4");
49490 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
49492 else
49494 set_optab_libfunc (sdivmod_optab, DImode, "__divmoddi4");
49495 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
49498 #if TARGET_MACHO
49499 darwin_rename_builtins ();
49500 #endif
49503 /* Generate call to __divmoddi4. */
49505 static void
49506 ix86_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
49507 rtx op0, rtx op1,
49508 rtx *quot_p, rtx *rem_p)
49510 rtx rem = assign_386_stack_local (mode, SLOT_TEMP);
49512 rtx quot = emit_library_call_value (libfunc, NULL_RTX, LCT_NORMAL,
49513 mode,
49514 op0, GET_MODE (op0),
49515 op1, GET_MODE (op1),
49516 XEXP (rem, 0), Pmode);
49517 *quot_p = quot;
49518 *rem_p = rem;
49521 /* Set the value of FLT_EVAL_METHOD in float.h. When using only the
49522 FPU, assume that the fpcw is set to extended precision; when using
49523 only SSE, rounding is correct; when using both SSE and the FPU,
49524 the rounding precision is indeterminate, since either may be chosen
49525 apparently at random. */
49527 static enum flt_eval_method
49528 ix86_excess_precision (enum excess_precision_type type)
49530 switch (type)
49532 case EXCESS_PRECISION_TYPE_FAST:
49533 /* The fastest type to promote to will always be the native type,
49534 whether that occurs with implicit excess precision or
49535 otherwise. */
49536 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
49537 case EXCESS_PRECISION_TYPE_STANDARD:
49538 case EXCESS_PRECISION_TYPE_IMPLICIT:
49539 /* Otherwise, the excess precision we want when we are
49540 in a standards compliant mode, and the implicit precision we
49541 provide would be identical were it not for the unpredictable
49542 cases. */
49543 if (!TARGET_80387)
49544 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
49545 else if (!TARGET_MIX_SSE_I387)
49547 if (!TARGET_SSE_MATH)
49548 return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE;
49549 else if (TARGET_SSE2)
49550 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
49553 /* If we are in standards compliant mode, but we know we will
49554 calculate in unpredictable precision, return
49555 FLT_EVAL_METHOD_FLOAT. There is no reason to introduce explicit
49556 excess precision if the target can't guarantee it will honor
49557 it. */
49558 return (type == EXCESS_PRECISION_TYPE_STANDARD
49559 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
49560 : FLT_EVAL_METHOD_UNPREDICTABLE);
49561 default:
49562 gcc_unreachable ();
49565 return FLT_EVAL_METHOD_UNPREDICTABLE;
49568 /* Target-specific selftests. */
49570 #if CHECKING_P
49572 namespace selftest {
49574 /* Verify that hard regs are dumped as expected (in compact mode). */
49576 static void
49577 ix86_test_dumping_hard_regs ()
49579 ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode, 0));
49580 ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode, 1));
49583 /* Test dumping an insn with repeated references to the same SCRATCH,
49584 to verify the rtx_reuse code. */
49586 static void
49587 ix86_test_dumping_memory_blockage ()
49589 set_new_first_and_last_insn (NULL, NULL);
49591 rtx pat = gen_memory_blockage ();
49592 rtx_reuse_manager r;
49593 r.preprocess (pat);
49595 /* Verify that the repeated references to the SCRATCH show use
49596 reuse IDS. The first should be prefixed with a reuse ID,
49597 and the second should be dumped as a "reuse_rtx" of that ID.
49598 The expected string assumes Pmode == DImode. */
49599 if (Pmode == DImode)
49600 ASSERT_RTL_DUMP_EQ_WITH_REUSE
49601 ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0 A8])\n"
49602 " (unspec:BLK [\n"
49603 " (mem/v:BLK (reuse_rtx 0) [0 A8])\n"
49604 " ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat, &r);
49607 /* Verify loading an RTL dump; specifically a dump of copying
49608 a param on x86_64 from a hard reg into the frame.
49609 This test is target-specific since the dump contains target-specific
49610 hard reg names. */
49612 static void
49613 ix86_test_loading_dump_fragment_1 ()
49615 rtl_dump_test t (SELFTEST_LOCATION,
49616 locate_file ("x86_64/copy-hard-reg-into-frame.rtl"));
49618 rtx_insn *insn = get_insn_by_uid (1);
49620 /* The block structure and indentation here is purely for
49621 readability; it mirrors the structure of the rtx. */
49622 tree mem_expr;
49624 rtx pat = PATTERN (insn);
49625 ASSERT_EQ (SET, GET_CODE (pat));
49627 rtx dest = SET_DEST (pat);
49628 ASSERT_EQ (MEM, GET_CODE (dest));
49629 /* Verify the "/c" was parsed. */
49630 ASSERT_TRUE (RTX_FLAG (dest, call));
49631 ASSERT_EQ (SImode, GET_MODE (dest));
49633 rtx addr = XEXP (dest, 0);
49634 ASSERT_EQ (PLUS, GET_CODE (addr));
49635 ASSERT_EQ (DImode, GET_MODE (addr));
49637 rtx lhs = XEXP (addr, 0);
49638 /* Verify that the "frame" REG was consolidated. */
49639 ASSERT_RTX_PTR_EQ (frame_pointer_rtx, lhs);
49642 rtx rhs = XEXP (addr, 1);
49643 ASSERT_EQ (CONST_INT, GET_CODE (rhs));
49644 ASSERT_EQ (-4, INTVAL (rhs));
49647 /* Verify the "[1 i+0 S4 A32]" was parsed. */
49648 ASSERT_EQ (1, MEM_ALIAS_SET (dest));
49649 /* "i" should have been handled by synthesizing a global int
49650 variable named "i". */
49651 mem_expr = MEM_EXPR (dest);
49652 ASSERT_NE (mem_expr, NULL);
49653 ASSERT_EQ (VAR_DECL, TREE_CODE (mem_expr));
49654 ASSERT_EQ (integer_type_node, TREE_TYPE (mem_expr));
49655 ASSERT_EQ (IDENTIFIER_NODE, TREE_CODE (DECL_NAME (mem_expr)));
49656 ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr)));
49657 /* "+0". */
49658 ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest));
49659 ASSERT_EQ (0, MEM_OFFSET (dest));
49660 /* "S4". */
49661 ASSERT_EQ (4, MEM_SIZE (dest));
49662 /* "A32. */
49663 ASSERT_EQ (32, MEM_ALIGN (dest));
49666 rtx src = SET_SRC (pat);
49667 ASSERT_EQ (REG, GET_CODE (src));
49668 ASSERT_EQ (SImode, GET_MODE (src));
49669 ASSERT_EQ (5, REGNO (src));
49670 tree reg_expr = REG_EXPR (src);
49671 /* "i" here should point to the same var as for the MEM_EXPR. */
49672 ASSERT_EQ (reg_expr, mem_expr);
49677 /* Verify that the RTL loader copes with a call_insn dump.
49678 This test is target-specific since the dump contains a target-specific
49679 hard reg name. */
49681 static void
49682 ix86_test_loading_call_insn ()
49684 /* The test dump includes register "xmm0", where requires TARGET_SSE
49685 to exist. */
49686 if (!TARGET_SSE)
49687 return;
49689 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/call-insn.rtl"));
49691 rtx_insn *insn = get_insns ();
49692 ASSERT_EQ (CALL_INSN, GET_CODE (insn));
49694 /* "/j". */
49695 ASSERT_TRUE (RTX_FLAG (insn, jump));
49697 rtx pat = PATTERN (insn);
49698 ASSERT_EQ (CALL, GET_CODE (SET_SRC (pat)));
49700 /* Verify REG_NOTES. */
49702 /* "(expr_list:REG_CALL_DECL". */
49703 ASSERT_EQ (EXPR_LIST, GET_CODE (REG_NOTES (insn)));
49704 rtx_expr_list *note0 = as_a <rtx_expr_list *> (REG_NOTES (insn));
49705 ASSERT_EQ (REG_CALL_DECL, REG_NOTE_KIND (note0));
49707 /* "(expr_list:REG_EH_REGION (const_int 0 [0])". */
49708 rtx_expr_list *note1 = note0->next ();
49709 ASSERT_EQ (REG_EH_REGION, REG_NOTE_KIND (note1));
49711 ASSERT_EQ (NULL, note1->next ());
49714 /* Verify CALL_INSN_FUNCTION_USAGE. */
49716 /* "(expr_list:DF (use (reg:DF 21 xmm0))". */
49717 rtx_expr_list *usage
49718 = as_a <rtx_expr_list *> (CALL_INSN_FUNCTION_USAGE (insn));
49719 ASSERT_EQ (EXPR_LIST, GET_CODE (usage));
49720 ASSERT_EQ (DFmode, GET_MODE (usage));
49721 ASSERT_EQ (USE, GET_CODE (usage->element ()));
49722 ASSERT_EQ (NULL, usage->next ());
49726 /* Verify that the RTL loader copes a dump from print_rtx_function.
49727 This test is target-specific since the dump contains target-specific
49728 hard reg names. */
49730 static void
49731 ix86_test_loading_full_dump ()
49733 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/times-two.rtl"));
49735 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
49737 rtx_insn *insn_1 = get_insn_by_uid (1);
49738 ASSERT_EQ (NOTE, GET_CODE (insn_1));
49740 rtx_insn *insn_7 = get_insn_by_uid (7);
49741 ASSERT_EQ (INSN, GET_CODE (insn_7));
49742 ASSERT_EQ (PARALLEL, GET_CODE (PATTERN (insn_7)));
49744 rtx_insn *insn_15 = get_insn_by_uid (15);
49745 ASSERT_EQ (INSN, GET_CODE (insn_15));
49746 ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
49748 /* Verify crtl->return_rtx. */
49749 ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
49750 ASSERT_EQ (0, REGNO (crtl->return_rtx));
49751 ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
49754 /* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
49755 In particular, verify that it correctly loads the 2nd operand.
49756 This test is target-specific since these are machine-specific
49757 operands (and enums). */
49759 static void
49760 ix86_test_loading_unspec ()
49762 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/unspec.rtl"));
49764 ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
49766 ASSERT_TRUE (cfun);
49768 /* Test of an UNSPEC. */
49769 rtx_insn *insn = get_insns ();
49770 ASSERT_EQ (INSN, GET_CODE (insn));
49771 rtx set = single_set (insn);
49772 ASSERT_NE (NULL, set);
49773 rtx dst = SET_DEST (set);
49774 ASSERT_EQ (MEM, GET_CODE (dst));
49775 rtx src = SET_SRC (set);
49776 ASSERT_EQ (UNSPEC, GET_CODE (src));
49777 ASSERT_EQ (BLKmode, GET_MODE (src));
49778 ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE, XINT (src, 1));
49780 rtx v0 = XVECEXP (src, 0, 0);
49782 /* Verify that the two uses of the first SCRATCH have pointer
49783 equality. */
49784 rtx scratch_a = XEXP (dst, 0);
49785 ASSERT_EQ (SCRATCH, GET_CODE (scratch_a));
49787 rtx scratch_b = XEXP (v0, 0);
49788 ASSERT_EQ (SCRATCH, GET_CODE (scratch_b));
49790 ASSERT_EQ (scratch_a, scratch_b);
49792 /* Verify that the two mems are thus treated as equal. */
49793 ASSERT_TRUE (rtx_equal_p (dst, v0));
49795 /* Verify the the insn is recognized. */
49796 ASSERT_NE(-1, recog_memoized (insn));
49798 /* Test of an UNSPEC_VOLATILE, which has its own enum values. */
49799 insn = NEXT_INSN (insn);
49800 ASSERT_EQ (INSN, GET_CODE (insn));
49802 set = single_set (insn);
49803 ASSERT_NE (NULL, set);
49805 src = SET_SRC (set);
49806 ASSERT_EQ (UNSPEC_VOLATILE, GET_CODE (src));
49807 ASSERT_EQ (UNSPECV_RDTSCP, XINT (src, 1));
49810 /* Run all target-specific selftests. */
49812 static void
49813 ix86_run_selftests (void)
49815 ix86_test_dumping_hard_regs ();
49816 ix86_test_dumping_memory_blockage ();
49818 /* Various tests of loading RTL dumps, here because they contain
49819 ix86-isms (e.g. names of hard regs). */
49820 ix86_test_loading_dump_fragment_1 ();
49821 ix86_test_loading_call_insn ();
49822 ix86_test_loading_full_dump ();
49823 ix86_test_loading_unspec ();
49826 } // namespace selftest
49828 #endif /* CHECKING_P */
49830 /* Initialize the GCC target structure. */
49831 #undef TARGET_RETURN_IN_MEMORY
49832 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
49834 #undef TARGET_LEGITIMIZE_ADDRESS
49835 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
49837 #undef TARGET_ATTRIBUTE_TABLE
49838 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
49839 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
49840 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
49841 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
49842 # undef TARGET_MERGE_DECL_ATTRIBUTES
49843 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
49844 #endif
49846 #undef TARGET_COMP_TYPE_ATTRIBUTES
49847 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
49849 #undef TARGET_INIT_BUILTINS
49850 #define TARGET_INIT_BUILTINS ix86_init_builtins
49851 #undef TARGET_BUILTIN_DECL
49852 #define TARGET_BUILTIN_DECL ix86_builtin_decl
49853 #undef TARGET_EXPAND_BUILTIN
49854 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
49856 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
49857 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
49858 ix86_builtin_vectorized_function
49860 #undef TARGET_VECTORIZE_BUILTIN_GATHER
49861 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
49863 #undef TARGET_VECTORIZE_BUILTIN_SCATTER
49864 #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
49866 #undef TARGET_BUILTIN_RECIPROCAL
49867 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
49869 #undef TARGET_ASM_FUNCTION_EPILOGUE
49870 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
49872 #undef TARGET_ENCODE_SECTION_INFO
49873 #ifndef SUBTARGET_ENCODE_SECTION_INFO
49874 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
49875 #else
49876 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
49877 #endif
49879 #undef TARGET_ASM_OPEN_PAREN
49880 #define TARGET_ASM_OPEN_PAREN ""
49881 #undef TARGET_ASM_CLOSE_PAREN
49882 #define TARGET_ASM_CLOSE_PAREN ""
49884 #undef TARGET_ASM_BYTE_OP
49885 #define TARGET_ASM_BYTE_OP ASM_BYTE
49887 #undef TARGET_ASM_ALIGNED_HI_OP
49888 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
49889 #undef TARGET_ASM_ALIGNED_SI_OP
49890 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
49891 #ifdef ASM_QUAD
49892 #undef TARGET_ASM_ALIGNED_DI_OP
49893 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
49894 #endif
49896 #undef TARGET_PROFILE_BEFORE_PROLOGUE
49897 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
49899 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
49900 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
49902 #undef TARGET_ASM_UNALIGNED_HI_OP
49903 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
49904 #undef TARGET_ASM_UNALIGNED_SI_OP
49905 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
49906 #undef TARGET_ASM_UNALIGNED_DI_OP
49907 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
49909 #undef TARGET_PRINT_OPERAND
49910 #define TARGET_PRINT_OPERAND ix86_print_operand
49911 #undef TARGET_PRINT_OPERAND_ADDRESS
49912 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
49913 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
49914 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
49915 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
49916 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
49918 #undef TARGET_SCHED_INIT_GLOBAL
49919 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
49920 #undef TARGET_SCHED_ADJUST_COST
49921 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
49922 #undef TARGET_SCHED_ISSUE_RATE
49923 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
49924 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
49925 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
49926 ia32_multipass_dfa_lookahead
49927 #undef TARGET_SCHED_MACRO_FUSION_P
49928 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
49929 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
49930 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
49932 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
49933 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
49935 #undef TARGET_MEMMODEL_CHECK
49936 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
49938 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
49939 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
49941 #ifdef HAVE_AS_TLS
49942 #undef TARGET_HAVE_TLS
49943 #define TARGET_HAVE_TLS true
49944 #endif
49945 #undef TARGET_CANNOT_FORCE_CONST_MEM
49946 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
49947 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
49948 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
49950 #undef TARGET_DELEGITIMIZE_ADDRESS
49951 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
49953 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
49954 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p
49956 #undef TARGET_MS_BITFIELD_LAYOUT_P
49957 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
49959 #if TARGET_MACHO
49960 #undef TARGET_BINDS_LOCAL_P
49961 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
49962 #else
49963 #undef TARGET_BINDS_LOCAL_P
49964 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
49965 #endif
49966 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
49967 #undef TARGET_BINDS_LOCAL_P
49968 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
49969 #endif
49971 #undef TARGET_ASM_OUTPUT_MI_THUNK
49972 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
49973 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
49974 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
49976 #undef TARGET_ASM_FILE_START
49977 #define TARGET_ASM_FILE_START x86_file_start
49979 #undef TARGET_OPTION_OVERRIDE
49980 #define TARGET_OPTION_OVERRIDE ix86_option_override
49982 #undef TARGET_REGISTER_MOVE_COST
49983 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
49984 #undef TARGET_MEMORY_MOVE_COST
49985 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
49986 #undef TARGET_RTX_COSTS
49987 #define TARGET_RTX_COSTS ix86_rtx_costs
49988 #undef TARGET_ADDRESS_COST
49989 #define TARGET_ADDRESS_COST ix86_address_cost
49991 #undef TARGET_FLAGS_REGNUM
49992 #define TARGET_FLAGS_REGNUM FLAGS_REG
49993 #undef TARGET_FIXED_CONDITION_CODE_REGS
49994 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
49995 #undef TARGET_CC_MODES_COMPATIBLE
49996 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
49998 #undef TARGET_MACHINE_DEPENDENT_REORG
49999 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
50001 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
50002 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
50004 #undef TARGET_BUILD_BUILTIN_VA_LIST
50005 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
50007 #undef TARGET_FOLD_BUILTIN
50008 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
50010 #undef TARGET_GIMPLE_FOLD_BUILTIN
50011 #define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
50013 #undef TARGET_COMPARE_VERSION_PRIORITY
50014 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
50016 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
50017 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
50018 ix86_generate_version_dispatcher_body
50020 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
50021 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
50022 ix86_get_function_versions_dispatcher
50024 #undef TARGET_ENUM_VA_LIST_P
50025 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
50027 #undef TARGET_FN_ABI_VA_LIST
50028 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
50030 #undef TARGET_CANONICAL_VA_LIST_TYPE
50031 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
50033 #undef TARGET_EXPAND_BUILTIN_VA_START
50034 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
50036 #undef TARGET_MD_ASM_ADJUST
50037 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
50039 #undef TARGET_C_EXCESS_PRECISION
50040 #define TARGET_C_EXCESS_PRECISION ix86_excess_precision
50041 #undef TARGET_PROMOTE_PROTOTYPES
50042 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
50043 #undef TARGET_SETUP_INCOMING_VARARGS
50044 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
50045 #undef TARGET_MUST_PASS_IN_STACK
50046 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
50047 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
50048 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
50049 #undef TARGET_FUNCTION_ARG_ADVANCE
50050 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
50051 #undef TARGET_FUNCTION_ARG
50052 #define TARGET_FUNCTION_ARG ix86_function_arg
50053 #undef TARGET_INIT_PIC_REG
50054 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
50055 #undef TARGET_USE_PSEUDO_PIC_REG
50056 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
50057 #undef TARGET_FUNCTION_ARG_BOUNDARY
50058 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
50059 #undef TARGET_PASS_BY_REFERENCE
50060 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
50061 #undef TARGET_INTERNAL_ARG_POINTER
50062 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
50063 #undef TARGET_UPDATE_STACK_BOUNDARY
50064 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
50065 #undef TARGET_GET_DRAP_RTX
50066 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
50067 #undef TARGET_STRICT_ARGUMENT_NAMING
50068 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
50069 #undef TARGET_STATIC_CHAIN
50070 #define TARGET_STATIC_CHAIN ix86_static_chain
50071 #undef TARGET_TRAMPOLINE_INIT
50072 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
50073 #undef TARGET_RETURN_POPS_ARGS
50074 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
50076 #undef TARGET_WARN_FUNC_RETURN
50077 #define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
50079 #undef TARGET_LEGITIMATE_COMBINED_INSN
50080 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
50082 #undef TARGET_ASAN_SHADOW_OFFSET
50083 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
50085 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
50086 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
50088 #undef TARGET_SCALAR_MODE_SUPPORTED_P
50089 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
50091 #undef TARGET_VECTOR_MODE_SUPPORTED_P
50092 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
50094 #undef TARGET_C_MODE_FOR_SUFFIX
50095 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
50097 #ifdef HAVE_AS_TLS
50098 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
50099 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
50100 #endif
50102 #ifdef SUBTARGET_INSERT_ATTRIBUTES
50103 #undef TARGET_INSERT_ATTRIBUTES
50104 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
50105 #endif
50107 #undef TARGET_MANGLE_TYPE
50108 #define TARGET_MANGLE_TYPE ix86_mangle_type
50110 #undef TARGET_STACK_PROTECT_GUARD
50111 #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
50113 #if !TARGET_MACHO
50114 #undef TARGET_STACK_PROTECT_FAIL
50115 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
50116 #endif
50118 #undef TARGET_FUNCTION_VALUE
50119 #define TARGET_FUNCTION_VALUE ix86_function_value
50121 #undef TARGET_FUNCTION_VALUE_REGNO_P
50122 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
50124 #undef TARGET_PROMOTE_FUNCTION_MODE
50125 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
50127 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
50128 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
50130 #undef TARGET_MEMBER_TYPE_FORCES_BLK
50131 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
50133 #undef TARGET_INSTANTIATE_DECLS
50134 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
50136 #undef TARGET_SECONDARY_RELOAD
50137 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
50138 #undef TARGET_SECONDARY_MEMORY_NEEDED
50139 #define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
50140 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
50141 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
50143 #undef TARGET_CLASS_MAX_NREGS
50144 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
50146 #undef TARGET_PREFERRED_RELOAD_CLASS
50147 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
50148 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
50149 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
50150 #undef TARGET_CLASS_LIKELY_SPILLED_P
50151 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
50153 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
50154 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
50155 ix86_builtin_vectorization_cost
50156 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
50157 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
50158 ix86_vectorize_vec_perm_const_ok
50159 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
50160 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
50161 ix86_preferred_simd_mode
50162 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
50163 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
50164 ix86_autovectorize_vector_sizes
50165 #undef TARGET_VECTORIZE_GET_MASK_MODE
50166 #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
50167 #undef TARGET_VECTORIZE_INIT_COST
50168 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
50169 #undef TARGET_VECTORIZE_ADD_STMT_COST
50170 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
50171 #undef TARGET_VECTORIZE_FINISH_COST
50172 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
50173 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
50174 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
50176 #undef TARGET_SET_CURRENT_FUNCTION
50177 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
50179 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
50180 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
50182 #undef TARGET_OPTION_SAVE
50183 #define TARGET_OPTION_SAVE ix86_function_specific_save
50185 #undef TARGET_OPTION_RESTORE
50186 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
50188 #undef TARGET_OPTION_POST_STREAM_IN
50189 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
50191 #undef TARGET_OPTION_PRINT
50192 #define TARGET_OPTION_PRINT ix86_function_specific_print
50194 #undef TARGET_OPTION_FUNCTION_VERSIONS
50195 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
50197 #undef TARGET_CAN_INLINE_P
50198 #define TARGET_CAN_INLINE_P ix86_can_inline_p
50200 #undef TARGET_LEGITIMATE_ADDRESS_P
50201 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
50203 #undef TARGET_REGISTER_PRIORITY
50204 #define TARGET_REGISTER_PRIORITY ix86_register_priority
50206 #undef TARGET_REGISTER_USAGE_LEVELING_P
50207 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
50209 #undef TARGET_LEGITIMATE_CONSTANT_P
50210 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
50212 #undef TARGET_COMPUTE_FRAME_LAYOUT
50213 #define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
50215 #undef TARGET_FRAME_POINTER_REQUIRED
50216 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
50218 #undef TARGET_CAN_ELIMINATE
50219 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
50221 #undef TARGET_EXTRA_LIVE_ON_ENTRY
50222 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
50224 #undef TARGET_ASM_CODE_END
50225 #define TARGET_ASM_CODE_END ix86_code_end
50227 #undef TARGET_CONDITIONAL_REGISTER_USAGE
50228 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
50230 #undef TARGET_CANONICALIZE_COMPARISON
50231 #define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
50233 #undef TARGET_LOOP_UNROLL_ADJUST
50234 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
50236 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
50237 #undef TARGET_SPILL_CLASS
50238 #define TARGET_SPILL_CLASS ix86_spill_class
50240 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
50241 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
50242 ix86_simd_clone_compute_vecsize_and_simdlen
50244 #undef TARGET_SIMD_CLONE_ADJUST
50245 #define TARGET_SIMD_CLONE_ADJUST \
50246 ix86_simd_clone_adjust
50248 #undef TARGET_SIMD_CLONE_USABLE
50249 #define TARGET_SIMD_CLONE_USABLE \
50250 ix86_simd_clone_usable
50252 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
50253 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
50254 ix86_float_exceptions_rounding_supported_p
50256 #undef TARGET_MODE_EMIT
50257 #define TARGET_MODE_EMIT ix86_emit_mode_set
50259 #undef TARGET_MODE_NEEDED
50260 #define TARGET_MODE_NEEDED ix86_mode_needed
50262 #undef TARGET_MODE_AFTER
50263 #define TARGET_MODE_AFTER ix86_mode_after
50265 #undef TARGET_MODE_ENTRY
50266 #define TARGET_MODE_ENTRY ix86_mode_entry
50268 #undef TARGET_MODE_EXIT
50269 #define TARGET_MODE_EXIT ix86_mode_exit
50271 #undef TARGET_MODE_PRIORITY
50272 #define TARGET_MODE_PRIORITY ix86_mode_priority
50274 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
50275 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
50277 #undef TARGET_LOAD_BOUNDS_FOR_ARG
50278 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
50280 #undef TARGET_STORE_BOUNDS_FOR_ARG
50281 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
50283 #undef TARGET_LOAD_RETURNED_BOUNDS
50284 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
50286 #undef TARGET_STORE_RETURNED_BOUNDS
50287 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
50289 #undef TARGET_CHKP_BOUND_MODE
50290 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
50292 #undef TARGET_BUILTIN_CHKP_FUNCTION
50293 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
50295 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
50296 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
50298 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
50299 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
50301 #undef TARGET_CHKP_INITIALIZE_BOUNDS
50302 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
50304 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
50305 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
50307 #undef TARGET_OFFLOAD_OPTIONS
50308 #define TARGET_OFFLOAD_OPTIONS \
50309 ix86_offload_options
50311 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
50312 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
50314 #undef TARGET_OPTAB_SUPPORTED_P
50315 #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
50317 #undef TARGET_HARD_REGNO_SCRATCH_OK
50318 #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
50320 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
50321 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
50323 #undef TARGET_ADDITIONAL_ALLOCNO_CLASS_P
50324 #define TARGET_ADDITIONAL_ALLOCNO_CLASS_P ix86_additional_allocno_class_p
50326 #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
50327 #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
50329 #undef TARGET_INIT_LIBFUNCS
50330 #define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
50332 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
50333 #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
50335 #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
50336 #define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
50338 #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
50339 #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
50341 #undef TARGET_HARD_REGNO_NREGS
50342 #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
50343 #undef TARGET_HARD_REGNO_MODE_OK
50344 #define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
50346 #undef TARGET_MODES_TIEABLE_P
50347 #define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
50349 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
50350 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
50351 ix86_hard_regno_call_part_clobbered
50353 #undef TARGET_CAN_CHANGE_MODE_CLASS
50354 #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
50356 #undef TARGET_STATIC_RTX_ALIGNMENT
50357 #define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment
50358 #undef TARGET_CONSTANT_ALIGNMENT
50359 #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment
50361 #if CHECKING_P
50362 #undef TARGET_RUN_TARGET_SELFTESTS
50363 #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
50364 #endif /* #if CHECKING_P */
50366 struct gcc_target targetm = TARGET_INITIALIZER;
50368 #include "gt-i386.h"