* gcc.target/i386/pr79683.c: Disable costmodel.
[official-gcc.git] / gcc / config / i386 / i386.c
blob7f9d694d21753e90f6b863e3fc34267b644f3641
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2017 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "memmodel.h"
27 #include "gimple.h"
28 #include "cfghooks.h"
29 #include "cfgloop.h"
30 #include "df.h"
31 #include "tm_p.h"
32 #include "stringpool.h"
33 #include "expmed.h"
34 #include "optabs.h"
35 #include "regs.h"
36 #include "emit-rtl.h"
37 #include "recog.h"
38 #include "cgraph.h"
39 #include "diagnostic.h"
40 #include "cfgbuild.h"
41 #include "alias.h"
42 #include "fold-const.h"
43 #include "attribs.h"
44 #include "calls.h"
45 #include "stor-layout.h"
46 #include "varasm.h"
47 #include "output.h"
48 #include "insn-attr.h"
49 #include "flags.h"
50 #include "except.h"
51 #include "explow.h"
52 #include "expr.h"
53 #include "cfgrtl.h"
54 #include "common/common-target.h"
55 #include "langhooks.h"
56 #include "reload.h"
57 #include "gimplify.h"
58 #include "dwarf2.h"
59 #include "tm-constrs.h"
60 #include "params.h"
61 #include "cselib.h"
62 #include "sched-int.h"
63 #include "opts.h"
64 #include "tree-pass.h"
65 #include "context.h"
66 #include "pass_manager.h"
67 #include "target-globals.h"
68 #include "gimple-iterator.h"
69 #include "tree-vectorizer.h"
70 #include "shrink-wrap.h"
71 #include "builtins.h"
72 #include "rtl-iter.h"
73 #include "tree-iterator.h"
74 #include "tree-chkp.h"
75 #include "rtl-chkp.h"
76 #include "dbgcnt.h"
77 #include "case-cfn-macros.h"
78 #include "regrename.h"
79 #include "dojump.h"
80 #include "fold-const-call.h"
81 #include "tree-vrp.h"
82 #include "tree-ssanames.h"
83 #include "selftest.h"
84 #include "selftest-rtl.h"
85 #include "print-rtl.h"
86 #include "intl.h"
87 #include "ifcvt.h"
88 #include "symbol-summary.h"
89 #include "ipa-prop.h"
90 #include "ipa-fnsummary.h"
92 /* This file should be included last. */
93 #include "target-def.h"
95 #include "x86-tune-costs.h"
97 static rtx legitimize_dllimport_symbol (rtx, bool);
98 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
99 static rtx legitimize_pe_coff_symbol (rtx, bool);
100 static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
101 static bool ix86_save_reg (unsigned int, bool, bool);
102 static bool ix86_function_naked (const_tree);
104 #ifndef CHECK_STACK_LIMIT
105 #define CHECK_STACK_LIMIT (-1)
106 #endif
108 /* Return index of given mode in mult and division cost tables. */
109 #define MODE_INDEX(mode) \
110 ((mode) == QImode ? 0 \
111 : (mode) == HImode ? 1 \
112 : (mode) == SImode ? 2 \
113 : (mode) == DImode ? 3 \
114 : 4)
117 /* Set by -mtune. */
118 const struct processor_costs *ix86_tune_cost = NULL;
120 /* Set by -mtune or -Os. */
121 const struct processor_costs *ix86_cost = NULL;
123 /* Processor feature/optimization bitmasks. */
124 #define m_386 (1U<<PROCESSOR_I386)
125 #define m_486 (1U<<PROCESSOR_I486)
126 #define m_PENT (1U<<PROCESSOR_PENTIUM)
127 #define m_LAKEMONT (1U<<PROCESSOR_LAKEMONT)
128 #define m_PPRO (1U<<PROCESSOR_PENTIUMPRO)
129 #define m_PENT4 (1U<<PROCESSOR_PENTIUM4)
130 #define m_NOCONA (1U<<PROCESSOR_NOCONA)
131 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
132 #define m_CORE2 (1U<<PROCESSOR_CORE2)
133 #define m_NEHALEM (1U<<PROCESSOR_NEHALEM)
134 #define m_SANDYBRIDGE (1U<<PROCESSOR_SANDYBRIDGE)
135 #define m_HASWELL (1U<<PROCESSOR_HASWELL)
136 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
137 #define m_BONNELL (1U<<PROCESSOR_BONNELL)
138 #define m_SILVERMONT (1U<<PROCESSOR_SILVERMONT)
139 #define m_KNL (1U<<PROCESSOR_KNL)
140 #define m_KNM (1U<<PROCESSOR_KNM)
141 #define m_SKYLAKE_AVX512 (1U<<PROCESSOR_SKYLAKE_AVX512)
142 #define m_INTEL (1U<<PROCESSOR_INTEL)
144 #define m_GEODE (1U<<PROCESSOR_GEODE)
145 #define m_K6 (1U<<PROCESSOR_K6)
146 #define m_K6_GEODE (m_K6 | m_GEODE)
147 #define m_K8 (1U<<PROCESSOR_K8)
148 #define m_ATHLON (1U<<PROCESSOR_ATHLON)
149 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
150 #define m_AMDFAM10 (1U<<PROCESSOR_AMDFAM10)
151 #define m_BDVER1 (1U<<PROCESSOR_BDVER1)
152 #define m_BDVER2 (1U<<PROCESSOR_BDVER2)
153 #define m_BDVER3 (1U<<PROCESSOR_BDVER3)
154 #define m_BDVER4 (1U<<PROCESSOR_BDVER4)
155 #define m_ZNVER1 (1U<<PROCESSOR_ZNVER1)
156 #define m_BTVER1 (1U<<PROCESSOR_BTVER1)
157 #define m_BTVER2 (1U<<PROCESSOR_BTVER2)
158 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
159 #define m_BTVER (m_BTVER1 | m_BTVER2)
160 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER \
161 | m_ZNVER1)
163 #define m_GENERIC (1U<<PROCESSOR_GENERIC)
165 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
166 #undef DEF_TUNE
167 #define DEF_TUNE(tune, name, selector) name,
168 #include "x86-tune.def"
169 #undef DEF_TUNE
172 /* Feature tests against the various tunings. */
173 unsigned char ix86_tune_features[X86_TUNE_LAST];
175 /* Feature tests against the various tunings used to create ix86_tune_features
176 based on the processor mask. */
177 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
178 #undef DEF_TUNE
179 #define DEF_TUNE(tune, name, selector) selector,
180 #include "x86-tune.def"
181 #undef DEF_TUNE
184 /* Feature tests against the various architecture variations. */
185 unsigned char ix86_arch_features[X86_ARCH_LAST];
187 /* Feature tests against the various architecture variations, used to create
188 ix86_arch_features based on the processor mask. */
189 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
190 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
191 ~(m_386 | m_486 | m_PENT | m_LAKEMONT | m_K6),
193 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
194 ~m_386,
196 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
197 ~(m_386 | m_486),
199 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
200 ~m_386,
202 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
203 ~m_386,
206 /* In case the average insn count for single function invocation is
207 lower than this constant, emit fast (but longer) prologue and
208 epilogue code. */
209 #define FAST_PROLOGUE_INSN_COUNT 20
211 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
212 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
213 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
214 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
216 /* Array of the smallest class containing reg number REGNO, indexed by
217 REGNO. Used by REGNO_REG_CLASS in i386.h. */
219 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
221 /* ax, dx, cx, bx */
222 AREG, DREG, CREG, BREG,
223 /* si, di, bp, sp */
224 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
225 /* FP registers */
226 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
227 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
228 /* arg pointer */
229 NON_Q_REGS,
230 /* flags, fpsr, fpcr, frame */
231 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
232 /* SSE registers */
233 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
234 SSE_REGS, SSE_REGS,
235 /* MMX registers */
236 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
237 MMX_REGS, MMX_REGS,
238 /* REX registers */
239 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
240 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
241 /* SSE REX registers */
242 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
243 SSE_REGS, SSE_REGS,
244 /* AVX-512 SSE registers */
245 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
246 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
247 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
248 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
249 /* Mask registers. */
250 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
251 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
252 /* MPX bound registers */
253 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
256 /* The "default" register map used in 32bit mode. */
258 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
260 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
261 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
262 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
263 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
264 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
265 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
266 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
267 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
268 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
269 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
270 101, 102, 103, 104, /* bound registers */
273 /* The "default" register map used in 64bit mode. */
275 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
277 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
278 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
279 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
280 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
281 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
282 8,9,10,11,12,13,14,15, /* extended integer registers */
283 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
284 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
285 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
286 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
287 126, 127, 128, 129, /* bound registers */
290 /* Define the register numbers to be used in Dwarf debugging information.
291 The SVR4 reference port C compiler uses the following register numbers
292 in its Dwarf output code:
293 0 for %eax (gcc regno = 0)
294 1 for %ecx (gcc regno = 2)
295 2 for %edx (gcc regno = 1)
296 3 for %ebx (gcc regno = 3)
297 4 for %esp (gcc regno = 7)
298 5 for %ebp (gcc regno = 6)
299 6 for %esi (gcc regno = 4)
300 7 for %edi (gcc regno = 5)
301 The following three DWARF register numbers are never generated by
302 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
303 believes these numbers have these meanings.
304 8 for %eip (no gcc equivalent)
305 9 for %eflags (gcc regno = 17)
306 10 for %trapno (no gcc equivalent)
307 It is not at all clear how we should number the FP stack registers
308 for the x86 architecture. If the version of SDB on x86/svr4 were
309 a bit less brain dead with respect to floating-point then we would
310 have a precedent to follow with respect to DWARF register numbers
311 for x86 FP registers, but the SDB on x86/svr4 is so completely
312 broken with respect to FP registers that it is hardly worth thinking
313 of it as something to strive for compatibility with.
314 The version of x86/svr4 SDB I have at the moment does (partially)
315 seem to believe that DWARF register number 11 is associated with
316 the x86 register %st(0), but that's about all. Higher DWARF
317 register numbers don't seem to be associated with anything in
318 particular, and even for DWARF regno 11, SDB only seems to under-
319 stand that it should say that a variable lives in %st(0) (when
320 asked via an `=' command) if we said it was in DWARF regno 11,
321 but SDB still prints garbage when asked for the value of the
322 variable in question (via a `/' command).
323 (Also note that the labels SDB prints for various FP stack regs
324 when doing an `x' command are all wrong.)
325 Note that these problems generally don't affect the native SVR4
326 C compiler because it doesn't allow the use of -O with -g and
327 because when it is *not* optimizing, it allocates a memory
328 location for each floating-point variable, and the memory
329 location is what gets described in the DWARF AT_location
330 attribute for the variable in question.
331 Regardless of the severe mental illness of the x86/svr4 SDB, we
332 do something sensible here and we use the following DWARF
333 register numbers. Note that these are all stack-top-relative
334 numbers.
335 11 for %st(0) (gcc regno = 8)
336 12 for %st(1) (gcc regno = 9)
337 13 for %st(2) (gcc regno = 10)
338 14 for %st(3) (gcc regno = 11)
339 15 for %st(4) (gcc regno = 12)
340 16 for %st(5) (gcc regno = 13)
341 17 for %st(6) (gcc regno = 14)
342 18 for %st(7) (gcc regno = 15)
344 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
346 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
347 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
348 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
349 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
350 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
351 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
352 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
353 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
354 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
355 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
356 101, 102, 103, 104, /* bound registers */
359 /* Define parameter passing and return registers. */
361 static int const x86_64_int_parameter_registers[6] =
363 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
366 static int const x86_64_ms_abi_int_parameter_registers[4] =
368 CX_REG, DX_REG, R8_REG, R9_REG
371 static int const x86_64_int_return_registers[4] =
373 AX_REG, DX_REG, DI_REG, SI_REG
376 /* Additional registers that are clobbered by SYSV calls. */
378 #define NUM_X86_64_MS_CLOBBERED_REGS 12
379 static int const x86_64_ms_sysv_extra_clobbered_registers
380 [NUM_X86_64_MS_CLOBBERED_REGS] =
382 SI_REG, DI_REG,
383 XMM6_REG, XMM7_REG,
384 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
385 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
388 enum xlogue_stub {
389 XLOGUE_STUB_SAVE,
390 XLOGUE_STUB_RESTORE,
391 XLOGUE_STUB_RESTORE_TAIL,
392 XLOGUE_STUB_SAVE_HFP,
393 XLOGUE_STUB_RESTORE_HFP,
394 XLOGUE_STUB_RESTORE_HFP_TAIL,
396 XLOGUE_STUB_COUNT
399 enum xlogue_stub_sets {
400 XLOGUE_SET_ALIGNED,
401 XLOGUE_SET_ALIGNED_PLUS_8,
402 XLOGUE_SET_HFP_ALIGNED_OR_REALIGN,
403 XLOGUE_SET_HFP_ALIGNED_PLUS_8,
405 XLOGUE_SET_COUNT
408 /* Register save/restore layout used by out-of-line stubs. */
409 class xlogue_layout {
410 public:
411 struct reginfo
413 unsigned regno;
414 HOST_WIDE_INT offset; /* Offset used by stub base pointer (rax or
415 rsi) to where each register is stored. */
418 unsigned get_nregs () const {return m_nregs;}
419 HOST_WIDE_INT get_stack_align_off_in () const {return m_stack_align_off_in;}
421 const reginfo &get_reginfo (unsigned reg) const
423 gcc_assert (reg < m_nregs);
424 return m_regs[reg];
427 static const char *get_stub_name (enum xlogue_stub stub,
428 unsigned n_extra_args);
430 /* Returns an rtx for the stub's symbol based upon
431 1.) the specified stub (save, restore or restore_ret) and
432 2.) the value of cfun->machine->call_ms2sysv_extra_regs and
433 3.) rather or not stack alignment is being performed. */
434 static rtx get_stub_rtx (enum xlogue_stub stub);
436 /* Returns the amount of stack space (including padding) that the stub
437 needs to store registers based upon data in the machine_function. */
438 HOST_WIDE_INT get_stack_space_used () const
440 const struct machine_function *m = cfun->machine;
441 unsigned last_reg = m->call_ms2sysv_extra_regs + MIN_REGS - 1;
443 gcc_assert (m->call_ms2sysv_extra_regs <= MAX_EXTRA_REGS);
444 return m_regs[last_reg].offset + STUB_INDEX_OFFSET;
447 /* Returns the offset for the base pointer used by the stub. */
448 HOST_WIDE_INT get_stub_ptr_offset () const
450 return STUB_INDEX_OFFSET + m_stack_align_off_in;
453 static const struct xlogue_layout &get_instance ();
454 static unsigned count_stub_managed_regs ();
455 static bool is_stub_managed_reg (unsigned regno, unsigned count);
457 static const HOST_WIDE_INT STUB_INDEX_OFFSET = 0x70;
458 static const unsigned MIN_REGS = NUM_X86_64_MS_CLOBBERED_REGS;
459 static const unsigned MAX_REGS = 18;
460 static const unsigned MAX_EXTRA_REGS = MAX_REGS - MIN_REGS;
461 static const unsigned VARIANT_COUNT = MAX_EXTRA_REGS + 1;
462 static const unsigned STUB_NAME_MAX_LEN = 20;
463 static const char * const STUB_BASE_NAMES[XLOGUE_STUB_COUNT];
464 static const unsigned REG_ORDER[MAX_REGS];
465 static const unsigned REG_ORDER_REALIGN[MAX_REGS];
467 private:
468 xlogue_layout ();
469 xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp);
470 xlogue_layout (const xlogue_layout &);
472 /* True if hard frame pointer is used. */
473 bool m_hfp;
475 /* Max number of register this layout manages. */
476 unsigned m_nregs;
478 /* Incoming offset from 16-byte alignment. */
479 HOST_WIDE_INT m_stack_align_off_in;
481 /* Register order and offsets. */
482 struct reginfo m_regs[MAX_REGS];
484 /* Lazy-inited cache of symbol names for stubs. */
485 static char s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT]
486 [STUB_NAME_MAX_LEN];
488 static const xlogue_layout s_instances[XLOGUE_SET_COUNT];
491 const char * const xlogue_layout::STUB_BASE_NAMES[XLOGUE_STUB_COUNT] = {
492 "savms64",
493 "resms64",
494 "resms64x",
495 "savms64f",
496 "resms64f",
497 "resms64fx"
500 const unsigned xlogue_layout::REG_ORDER[xlogue_layout::MAX_REGS] = {
501 /* The below offset values are where each register is stored for the layout
502 relative to incoming stack pointer. The value of each m_regs[].offset will
503 be relative to the incoming base pointer (rax or rsi) used by the stub.
505 s_instances: 0 1 2 3
506 Offset: realigned or aligned + 8
507 Register aligned aligned + 8 aligned w/HFP w/HFP */
508 XMM15_REG, /* 0x10 0x18 0x10 0x18 */
509 XMM14_REG, /* 0x20 0x28 0x20 0x28 */
510 XMM13_REG, /* 0x30 0x38 0x30 0x38 */
511 XMM12_REG, /* 0x40 0x48 0x40 0x48 */
512 XMM11_REG, /* 0x50 0x58 0x50 0x58 */
513 XMM10_REG, /* 0x60 0x68 0x60 0x68 */
514 XMM9_REG, /* 0x70 0x78 0x70 0x78 */
515 XMM8_REG, /* 0x80 0x88 0x80 0x88 */
516 XMM7_REG, /* 0x90 0x98 0x90 0x98 */
517 XMM6_REG, /* 0xa0 0xa8 0xa0 0xa8 */
518 SI_REG, /* 0xa8 0xb0 0xa8 0xb0 */
519 DI_REG, /* 0xb0 0xb8 0xb0 0xb8 */
520 BX_REG, /* 0xb8 0xc0 0xb8 0xc0 */
521 BP_REG, /* 0xc0 0xc8 N/A N/A */
522 R12_REG, /* 0xc8 0xd0 0xc0 0xc8 */
523 R13_REG, /* 0xd0 0xd8 0xc8 0xd0 */
524 R14_REG, /* 0xd8 0xe0 0xd0 0xd8 */
525 R15_REG, /* 0xe0 0xe8 0xd8 0xe0 */
528 /* Instantiate static const values. */
529 const HOST_WIDE_INT xlogue_layout::STUB_INDEX_OFFSET;
530 const unsigned xlogue_layout::MIN_REGS;
531 const unsigned xlogue_layout::MAX_REGS;
532 const unsigned xlogue_layout::MAX_EXTRA_REGS;
533 const unsigned xlogue_layout::VARIANT_COUNT;
534 const unsigned xlogue_layout::STUB_NAME_MAX_LEN;
536 /* Initialize xlogue_layout::s_stub_names to zero. */
537 char xlogue_layout::s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT]
538 [STUB_NAME_MAX_LEN];
540 /* Instantiates all xlogue_layout instances. */
541 const xlogue_layout xlogue_layout::s_instances[XLOGUE_SET_COUNT] = {
542 xlogue_layout (0, false),
543 xlogue_layout (8, false),
544 xlogue_layout (0, true),
545 xlogue_layout (8, true)
548 /* Return an appropriate const instance of xlogue_layout based upon values
549 in cfun->machine and crtl. */
550 const struct xlogue_layout &
551 xlogue_layout::get_instance ()
553 enum xlogue_stub_sets stub_set;
554 bool aligned_plus_8 = cfun->machine->call_ms2sysv_pad_in;
556 if (stack_realign_fp)
557 stub_set = XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
558 else if (frame_pointer_needed)
559 stub_set = aligned_plus_8
560 ? XLOGUE_SET_HFP_ALIGNED_PLUS_8
561 : XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
562 else
563 stub_set = aligned_plus_8 ? XLOGUE_SET_ALIGNED_PLUS_8 : XLOGUE_SET_ALIGNED;
565 return s_instances[stub_set];
568 /* Determine how many clobbered registers can be saved by the stub.
569 Returns the count of registers the stub will save and restore. */
570 unsigned
571 xlogue_layout::count_stub_managed_regs ()
573 bool hfp = frame_pointer_needed || stack_realign_fp;
574 unsigned i, count;
575 unsigned regno;
577 for (count = i = MIN_REGS; i < MAX_REGS; ++i)
579 regno = REG_ORDER[i];
580 if (regno == BP_REG && hfp)
581 continue;
582 if (!ix86_save_reg (regno, false, false))
583 break;
584 ++count;
586 return count;
589 /* Determine if register REGNO is a stub managed register given the
590 total COUNT of stub managed registers. */
591 bool
592 xlogue_layout::is_stub_managed_reg (unsigned regno, unsigned count)
594 bool hfp = frame_pointer_needed || stack_realign_fp;
595 unsigned i;
597 for (i = 0; i < count; ++i)
599 gcc_assert (i < MAX_REGS);
600 if (REG_ORDER[i] == BP_REG && hfp)
601 ++count;
602 else if (REG_ORDER[i] == regno)
603 return true;
605 return false;
608 /* Constructor for xlogue_layout. */
609 xlogue_layout::xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp)
610 : m_hfp (hfp) , m_nregs (hfp ? 17 : 18),
611 m_stack_align_off_in (stack_align_off_in)
613 HOST_WIDE_INT offset = stack_align_off_in;
614 unsigned i, j;
616 for (i = j = 0; i < MAX_REGS; ++i)
618 unsigned regno = REG_ORDER[i];
620 if (regno == BP_REG && hfp)
621 continue;
622 if (SSE_REGNO_P (regno))
624 offset += 16;
625 /* Verify that SSE regs are always aligned. */
626 gcc_assert (!((stack_align_off_in + offset) & 15));
628 else
629 offset += 8;
631 m_regs[j].regno = regno;
632 m_regs[j++].offset = offset - STUB_INDEX_OFFSET;
634 gcc_assert (j == m_nregs);
637 const char *
638 xlogue_layout::get_stub_name (enum xlogue_stub stub,
639 unsigned n_extra_regs)
641 const int have_avx = TARGET_AVX;
642 char *name = s_stub_names[!!have_avx][stub][n_extra_regs];
644 /* Lazy init */
645 if (!*name)
647 int res = snprintf (name, STUB_NAME_MAX_LEN, "__%s_%s_%u",
648 (have_avx ? "avx" : "sse"),
649 STUB_BASE_NAMES[stub],
650 MIN_REGS + n_extra_regs);
651 gcc_checking_assert (res < (int)STUB_NAME_MAX_LEN);
654 return name;
657 /* Return rtx of a symbol ref for the entry point (based upon
658 cfun->machine->call_ms2sysv_extra_regs) of the specified stub. */
660 xlogue_layout::get_stub_rtx (enum xlogue_stub stub)
662 const unsigned n_extra_regs = cfun->machine->call_ms2sysv_extra_regs;
663 gcc_checking_assert (n_extra_regs <= MAX_EXTRA_REGS);
664 gcc_assert (stub < XLOGUE_STUB_COUNT);
665 gcc_assert (crtl->stack_realign_finalized);
667 return gen_rtx_SYMBOL_REF (Pmode, get_stub_name (stub, n_extra_regs));
670 /* Define the structure for the machine field in struct function. */
672 struct GTY(()) stack_local_entry {
673 unsigned short mode;
674 unsigned short n;
675 rtx rtl;
676 struct stack_local_entry *next;
679 /* Which cpu are we scheduling for. */
680 enum attr_cpu ix86_schedule;
682 /* Which cpu are we optimizing for. */
683 enum processor_type ix86_tune;
685 /* Which instruction set architecture to use. */
686 enum processor_type ix86_arch;
688 /* True if processor has SSE prefetch instruction. */
689 unsigned char x86_prefetch_sse;
691 /* -mstackrealign option */
692 static const char ix86_force_align_arg_pointer_string[]
693 = "force_align_arg_pointer";
695 static rtx (*ix86_gen_leave) (void);
696 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
697 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
698 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
699 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
700 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
701 static rtx (*ix86_gen_monitorx) (rtx, rtx, rtx);
702 static rtx (*ix86_gen_clzero) (rtx);
703 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
704 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
705 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
706 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
707 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
708 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
710 /* Preferred alignment for stack boundary in bits. */
711 unsigned int ix86_preferred_stack_boundary;
713 /* Alignment for incoming stack boundary in bits specified at
714 command line. */
715 static unsigned int ix86_user_incoming_stack_boundary;
717 /* Default alignment for incoming stack boundary in bits. */
718 static unsigned int ix86_default_incoming_stack_boundary;
720 /* Alignment for incoming stack boundary in bits. */
721 unsigned int ix86_incoming_stack_boundary;
723 /* Calling abi specific va_list type nodes. */
724 static GTY(()) tree sysv_va_list_type_node;
725 static GTY(()) tree ms_va_list_type_node;
727 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
728 char internal_label_prefix[16];
729 int internal_label_prefix_len;
731 /* Fence to use after loop using movnt. */
732 tree x86_mfence;
734 /* Register class used for passing given 64bit part of the argument.
735 These represent classes as documented by the PS ABI, with the exception
736 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
737 use SF or DFmode move instead of DImode to avoid reformatting penalties.
739 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
740 whenever possible (upper half does contain padding). */
741 enum x86_64_reg_class
743 X86_64_NO_CLASS,
744 X86_64_INTEGER_CLASS,
745 X86_64_INTEGERSI_CLASS,
746 X86_64_SSE_CLASS,
747 X86_64_SSESF_CLASS,
748 X86_64_SSEDF_CLASS,
749 X86_64_SSEUP_CLASS,
750 X86_64_X87_CLASS,
751 X86_64_X87UP_CLASS,
752 X86_64_COMPLEX_X87_CLASS,
753 X86_64_MEMORY_CLASS
756 #define MAX_CLASSES 8
758 /* Table of constants used by fldpi, fldln2, etc.... */
759 static REAL_VALUE_TYPE ext_80387_constants_table [5];
760 static bool ext_80387_constants_init;
763 static struct machine_function * ix86_init_machine_status (void);
764 static rtx ix86_function_value (const_tree, const_tree, bool);
765 static bool ix86_function_value_regno_p (const unsigned int);
766 static unsigned int ix86_function_arg_boundary (machine_mode,
767 const_tree);
768 static rtx ix86_static_chain (const_tree, bool);
769 static int ix86_function_regparm (const_tree, const_tree);
770 static void ix86_compute_frame_layout (void);
771 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
772 rtx, rtx, int);
773 static void ix86_add_new_builtins (HOST_WIDE_INT, HOST_WIDE_INT);
774 static tree ix86_canonical_va_list_type (tree);
775 static void predict_jump (int);
776 static unsigned int split_stack_prologue_scratch_regno (void);
777 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
779 enum ix86_function_specific_strings
781 IX86_FUNCTION_SPECIFIC_ARCH,
782 IX86_FUNCTION_SPECIFIC_TUNE,
783 IX86_FUNCTION_SPECIFIC_MAX
786 static char *ix86_target_string (HOST_WIDE_INT, HOST_WIDE_INT, int, int,
787 const char *, const char *, enum fpmath_unit,
788 bool);
789 static void ix86_function_specific_save (struct cl_target_option *,
790 struct gcc_options *opts);
791 static void ix86_function_specific_restore (struct gcc_options *opts,
792 struct cl_target_option *);
793 static void ix86_function_specific_post_stream_in (struct cl_target_option *);
794 static void ix86_function_specific_print (FILE *, int,
795 struct cl_target_option *);
796 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
797 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
798 struct gcc_options *,
799 struct gcc_options *,
800 struct gcc_options *);
801 static bool ix86_can_inline_p (tree, tree);
802 static void ix86_set_current_function (tree);
803 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
805 static enum calling_abi ix86_function_abi (const_tree);
808 #ifndef SUBTARGET32_DEFAULT_CPU
809 #define SUBTARGET32_DEFAULT_CPU "i386"
810 #endif
812 /* Whether -mtune= or -march= were specified */
813 static int ix86_tune_defaulted;
814 static int ix86_arch_specified;
816 /* Vectorization library interface and handlers. */
817 static tree (*ix86_veclib_handler) (combined_fn, tree, tree);
819 static tree ix86_veclibabi_svml (combined_fn, tree, tree);
820 static tree ix86_veclibabi_acml (combined_fn, tree, tree);
822 /* Processor target table, indexed by processor number */
823 struct ptt
825 const char *const name; /* processor name */
826 const struct processor_costs *cost; /* Processor costs */
827 const int align_loop; /* Default alignments. */
828 const int align_loop_max_skip;
829 const int align_jump;
830 const int align_jump_max_skip;
831 const int align_func;
834 /* This table must be in sync with enum processor_type in i386.h. */
835 static const struct ptt processor_target_table[PROCESSOR_max] =
837 {"generic", &generic_cost, 16, 10, 16, 10, 16},
838 {"i386", &i386_cost, 4, 3, 4, 3, 4},
839 {"i486", &i486_cost, 16, 15, 16, 15, 16},
840 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
841 {"lakemont", &lakemont_cost, 16, 7, 16, 7, 16},
842 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
843 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
844 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
845 {"core2", &core_cost, 16, 10, 16, 10, 16},
846 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
847 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
848 {"haswell", &core_cost, 16, 10, 16, 10, 16},
849 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
850 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
851 {"knl", &slm_cost, 16, 15, 16, 7, 16},
852 {"knm", &slm_cost, 16, 15, 16, 7, 16},
853 {"skylake-avx512", &core_cost, 16, 10, 16, 10, 16},
854 {"intel", &intel_cost, 16, 15, 16, 7, 16},
855 {"geode", &geode_cost, 0, 0, 0, 0, 0},
856 {"k6", &k6_cost, 32, 7, 32, 7, 32},
857 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
858 {"k8", &k8_cost, 16, 7, 16, 7, 16},
859 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
860 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
861 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
862 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
863 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
864 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
865 {"btver2", &btver2_cost, 16, 10, 16, 7, 11},
866 {"znver1", &znver1_cost, 16, 15, 16, 15, 16}
869 static unsigned int
870 rest_of_handle_insert_vzeroupper (void)
872 int i;
874 /* vzeroupper instructions are inserted immediately after reload to
875 account for possible spills from 256bit registers. The pass
876 reuses mode switching infrastructure by re-running mode insertion
877 pass, so disable entities that have already been processed. */
878 for (i = 0; i < MAX_386_ENTITIES; i++)
879 ix86_optimize_mode_switching[i] = 0;
881 ix86_optimize_mode_switching[AVX_U128] = 1;
883 /* Call optimize_mode_switching. */
884 g->get_passes ()->execute_pass_mode_switching ();
885 return 0;
888 /* Return 1 if INSN uses or defines a hard register.
889 Hard register uses in a memory address are ignored.
890 Clobbers and flags definitions are ignored. */
892 static bool
893 has_non_address_hard_reg (rtx_insn *insn)
895 df_ref ref;
896 FOR_EACH_INSN_DEF (ref, insn)
897 if (HARD_REGISTER_P (DF_REF_REAL_REG (ref))
898 && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER)
899 && DF_REF_REGNO (ref) != FLAGS_REG)
900 return true;
902 FOR_EACH_INSN_USE (ref, insn)
903 if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref)))
904 return true;
906 return false;
909 /* Check if comparison INSN may be transformed
910 into vector comparison. Currently we transform
911 zero checks only which look like:
913 (set (reg:CCZ 17 flags)
914 (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4)
915 (subreg:SI (reg:DI x) 0))
916 (const_int 0 [0]))) */
918 static bool
919 convertible_comparison_p (rtx_insn *insn)
921 if (!TARGET_SSE4_1)
922 return false;
924 rtx def_set = single_set (insn);
926 gcc_assert (def_set);
928 rtx src = SET_SRC (def_set);
929 rtx dst = SET_DEST (def_set);
931 gcc_assert (GET_CODE (src) == COMPARE);
933 if (GET_CODE (dst) != REG
934 || REGNO (dst) != FLAGS_REG
935 || GET_MODE (dst) != CCZmode)
936 return false;
938 rtx op1 = XEXP (src, 0);
939 rtx op2 = XEXP (src, 1);
941 if (op2 != CONST0_RTX (GET_MODE (op2)))
942 return false;
944 if (GET_CODE (op1) != IOR)
945 return false;
947 op2 = XEXP (op1, 1);
948 op1 = XEXP (op1, 0);
950 if (!SUBREG_P (op1)
951 || !SUBREG_P (op2)
952 || GET_MODE (op1) != SImode
953 || GET_MODE (op2) != SImode
954 || ((SUBREG_BYTE (op1) != 0
955 || SUBREG_BYTE (op2) != GET_MODE_SIZE (SImode))
956 && (SUBREG_BYTE (op2) != 0
957 || SUBREG_BYTE (op1) != GET_MODE_SIZE (SImode))))
958 return false;
960 op1 = SUBREG_REG (op1);
961 op2 = SUBREG_REG (op2);
963 if (op1 != op2
964 || !REG_P (op1)
965 || GET_MODE (op1) != DImode)
966 return false;
968 return true;
971 /* The DImode version of scalar_to_vector_candidate_p. */
973 static bool
974 dimode_scalar_to_vector_candidate_p (rtx_insn *insn)
976 rtx def_set = single_set (insn);
978 if (!def_set)
979 return false;
981 if (has_non_address_hard_reg (insn))
982 return false;
984 rtx src = SET_SRC (def_set);
985 rtx dst = SET_DEST (def_set);
987 if (GET_CODE (src) == COMPARE)
988 return convertible_comparison_p (insn);
990 /* We are interested in DImode promotion only. */
991 if ((GET_MODE (src) != DImode
992 && !CONST_INT_P (src))
993 || GET_MODE (dst) != DImode)
994 return false;
996 if (!REG_P (dst) && !MEM_P (dst))
997 return false;
999 switch (GET_CODE (src))
1001 case ASHIFTRT:
1002 if (!TARGET_AVX512VL)
1003 return false;
1004 /* FALLTHRU */
1006 case ASHIFT:
1007 case LSHIFTRT:
1008 if (!REG_P (XEXP (src, 1))
1009 && (!SUBREG_P (XEXP (src, 1))
1010 || SUBREG_BYTE (XEXP (src, 1)) != 0
1011 || !REG_P (SUBREG_REG (XEXP (src, 1))))
1012 && (!CONST_INT_P (XEXP (src, 1))
1013 || !IN_RANGE (INTVAL (XEXP (src, 1)), 0, 63)))
1014 return false;
1016 if (GET_MODE (XEXP (src, 1)) != QImode
1017 && !CONST_INT_P (XEXP (src, 1)))
1018 return false;
1019 break;
1021 case PLUS:
1022 case MINUS:
1023 case IOR:
1024 case XOR:
1025 case AND:
1026 if (!REG_P (XEXP (src, 1))
1027 && !MEM_P (XEXP (src, 1))
1028 && !CONST_INT_P (XEXP (src, 1)))
1029 return false;
1031 if (GET_MODE (XEXP (src, 1)) != DImode
1032 && !CONST_INT_P (XEXP (src, 1)))
1033 return false;
1034 break;
1036 case NEG:
1037 case NOT:
1038 break;
1040 case REG:
1041 return true;
1043 case MEM:
1044 case CONST_INT:
1045 return REG_P (dst);
1047 default:
1048 return false;
1051 if (!REG_P (XEXP (src, 0))
1052 && !MEM_P (XEXP (src, 0))
1053 && !CONST_INT_P (XEXP (src, 0))
1054 /* Check for andnot case. */
1055 && (GET_CODE (src) != AND
1056 || GET_CODE (XEXP (src, 0)) != NOT
1057 || !REG_P (XEXP (XEXP (src, 0), 0))))
1058 return false;
1060 if (GET_MODE (XEXP (src, 0)) != DImode
1061 && !CONST_INT_P (XEXP (src, 0)))
1062 return false;
1064 return true;
1067 /* The TImode version of scalar_to_vector_candidate_p. */
1069 static bool
1070 timode_scalar_to_vector_candidate_p (rtx_insn *insn)
1072 rtx def_set = single_set (insn);
1074 if (!def_set)
1075 return false;
1077 if (has_non_address_hard_reg (insn))
1078 return false;
1080 rtx src = SET_SRC (def_set);
1081 rtx dst = SET_DEST (def_set);
1083 /* Only TImode load and store are allowed. */
1084 if (GET_MODE (dst) != TImode)
1085 return false;
1087 if (MEM_P (dst))
1089 /* Check for store. Memory must be aligned or unaligned store
1090 is optimal. Only support store from register, standard SSE
1091 constant or CONST_WIDE_INT generated from piecewise store.
1093 ??? Verify performance impact before enabling CONST_INT for
1094 __int128 store. */
1095 if (misaligned_operand (dst, TImode)
1096 && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
1097 return false;
1099 switch (GET_CODE (src))
1101 default:
1102 return false;
1104 case REG:
1105 case CONST_WIDE_INT:
1106 return true;
1108 case CONST_INT:
1109 return standard_sse_constant_p (src, TImode);
1112 else if (MEM_P (src))
1114 /* Check for load. Memory must be aligned or unaligned load is
1115 optimal. */
1116 return (REG_P (dst)
1117 && (!misaligned_operand (src, TImode)
1118 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL));
1121 return false;
1124 /* Return 1 if INSN may be converted into vector
1125 instruction. */
1127 static bool
1128 scalar_to_vector_candidate_p (rtx_insn *insn)
1130 if (TARGET_64BIT)
1131 return timode_scalar_to_vector_candidate_p (insn);
1132 else
1133 return dimode_scalar_to_vector_candidate_p (insn);
1136 /* The DImode version of remove_non_convertible_regs. */
1138 static void
1139 dimode_remove_non_convertible_regs (bitmap candidates)
1141 bitmap_iterator bi;
1142 unsigned id;
1143 bitmap regs = BITMAP_ALLOC (NULL);
1145 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
1147 rtx def_set = single_set (DF_INSN_UID_GET (id)->insn);
1148 rtx reg = SET_DEST (def_set);
1150 if (!REG_P (reg)
1151 || bitmap_bit_p (regs, REGNO (reg))
1152 || HARD_REGISTER_P (reg))
1153 continue;
1155 for (df_ref def = DF_REG_DEF_CHAIN (REGNO (reg));
1156 def;
1157 def = DF_REF_NEXT_REG (def))
1159 if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1161 if (dump_file)
1162 fprintf (dump_file,
1163 "r%d has non convertible definition in insn %d\n",
1164 REGNO (reg), DF_REF_INSN_UID (def));
1166 bitmap_set_bit (regs, REGNO (reg));
1167 break;
1172 EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
1174 for (df_ref def = DF_REG_DEF_CHAIN (id);
1175 def;
1176 def = DF_REF_NEXT_REG (def))
1177 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1179 if (dump_file)
1180 fprintf (dump_file, "Removing insn %d from candidates list\n",
1181 DF_REF_INSN_UID (def));
1183 bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
1187 BITMAP_FREE (regs);
1190 /* For a register REGNO, scan instructions for its defs and uses.
1191 Put REGNO in REGS if a def or use isn't in CANDIDATES. */
1193 static void
1194 timode_check_non_convertible_regs (bitmap candidates, bitmap regs,
1195 unsigned int regno)
1197 for (df_ref def = DF_REG_DEF_CHAIN (regno);
1198 def;
1199 def = DF_REF_NEXT_REG (def))
1201 if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1203 if (dump_file)
1204 fprintf (dump_file,
1205 "r%d has non convertible def in insn %d\n",
1206 regno, DF_REF_INSN_UID (def));
1208 bitmap_set_bit (regs, regno);
1209 break;
1213 for (df_ref ref = DF_REG_USE_CHAIN (regno);
1214 ref;
1215 ref = DF_REF_NEXT_REG (ref))
1217 /* Debug instructions are skipped. */
1218 if (NONDEBUG_INSN_P (DF_REF_INSN (ref))
1219 && !bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
1221 if (dump_file)
1222 fprintf (dump_file,
1223 "r%d has non convertible use in insn %d\n",
1224 regno, DF_REF_INSN_UID (ref));
1226 bitmap_set_bit (regs, regno);
1227 break;
1232 /* The TImode version of remove_non_convertible_regs. */
1234 static void
1235 timode_remove_non_convertible_regs (bitmap candidates)
1237 bitmap_iterator bi;
1238 unsigned id;
1239 bitmap regs = BITMAP_ALLOC (NULL);
1241 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
1243 rtx def_set = single_set (DF_INSN_UID_GET (id)->insn);
1244 rtx dest = SET_DEST (def_set);
1245 rtx src = SET_SRC (def_set);
1247 if ((!REG_P (dest)
1248 || bitmap_bit_p (regs, REGNO (dest))
1249 || HARD_REGISTER_P (dest))
1250 && (!REG_P (src)
1251 || bitmap_bit_p (regs, REGNO (src))
1252 || HARD_REGISTER_P (src)))
1253 continue;
1255 if (REG_P (dest))
1256 timode_check_non_convertible_regs (candidates, regs,
1257 REGNO (dest));
1259 if (REG_P (src))
1260 timode_check_non_convertible_regs (candidates, regs,
1261 REGNO (src));
1264 EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
1266 for (df_ref def = DF_REG_DEF_CHAIN (id);
1267 def;
1268 def = DF_REF_NEXT_REG (def))
1269 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1271 if (dump_file)
1272 fprintf (dump_file, "Removing insn %d from candidates list\n",
1273 DF_REF_INSN_UID (def));
1275 bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
1278 for (df_ref ref = DF_REG_USE_CHAIN (id);
1279 ref;
1280 ref = DF_REF_NEXT_REG (ref))
1281 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
1283 if (dump_file)
1284 fprintf (dump_file, "Removing insn %d from candidates list\n",
1285 DF_REF_INSN_UID (ref));
1287 bitmap_clear_bit (candidates, DF_REF_INSN_UID (ref));
1291 BITMAP_FREE (regs);
1294 /* For a given bitmap of insn UIDs scans all instruction and
1295 remove insn from CANDIDATES in case it has both convertible
1296 and not convertible definitions.
1298 All insns in a bitmap are conversion candidates according to
1299 scalar_to_vector_candidate_p. Currently it implies all insns
1300 are single_set. */
1302 static void
1303 remove_non_convertible_regs (bitmap candidates)
1305 if (TARGET_64BIT)
1306 timode_remove_non_convertible_regs (candidates);
1307 else
1308 dimode_remove_non_convertible_regs (candidates);
1311 class scalar_chain
1313 public:
1314 scalar_chain ();
1315 virtual ~scalar_chain ();
1317 static unsigned max_id;
1319 /* ID of a chain. */
1320 unsigned int chain_id;
1321 /* A queue of instructions to be included into a chain. */
1322 bitmap queue;
1323 /* Instructions included into a chain. */
1324 bitmap insns;
1325 /* All registers defined by a chain. */
1326 bitmap defs;
1327 /* Registers used in both vector and sclar modes. */
1328 bitmap defs_conv;
1330 void build (bitmap candidates, unsigned insn_uid);
1331 virtual int compute_convert_gain () = 0;
1332 int convert ();
1334 protected:
1335 void add_to_queue (unsigned insn_uid);
1336 void emit_conversion_insns (rtx insns, rtx_insn *pos);
1338 private:
1339 void add_insn (bitmap candidates, unsigned insn_uid);
1340 void analyze_register_chain (bitmap candidates, df_ref ref);
1341 virtual void mark_dual_mode_def (df_ref def) = 0;
1342 virtual void convert_insn (rtx_insn *insn) = 0;
1343 virtual void convert_registers () = 0;
1346 class dimode_scalar_chain : public scalar_chain
1348 public:
1349 int compute_convert_gain ();
1350 private:
1351 void mark_dual_mode_def (df_ref def);
1352 rtx replace_with_subreg (rtx x, rtx reg, rtx subreg);
1353 void replace_with_subreg_in_insn (rtx_insn *insn, rtx reg, rtx subreg);
1354 void convert_insn (rtx_insn *insn);
1355 void convert_op (rtx *op, rtx_insn *insn);
1356 void convert_reg (unsigned regno);
1357 void make_vector_copies (unsigned regno);
1358 void convert_registers ();
1359 int vector_const_cost (rtx exp);
1362 class timode_scalar_chain : public scalar_chain
1364 public:
1365 /* Convert from TImode to V1TImode is always faster. */
1366 int compute_convert_gain () { return 1; }
1368 private:
1369 void mark_dual_mode_def (df_ref def);
1370 void fix_debug_reg_uses (rtx reg);
1371 void convert_insn (rtx_insn *insn);
1372 /* We don't convert registers to difference size. */
1373 void convert_registers () {}
1376 unsigned scalar_chain::max_id = 0;
1378 /* Initialize new chain. */
1380 scalar_chain::scalar_chain ()
1382 chain_id = ++max_id;
1384 if (dump_file)
1385 fprintf (dump_file, "Created a new instruction chain #%d\n", chain_id);
1387 bitmap_obstack_initialize (NULL);
1388 insns = BITMAP_ALLOC (NULL);
1389 defs = BITMAP_ALLOC (NULL);
1390 defs_conv = BITMAP_ALLOC (NULL);
1391 queue = NULL;
1394 /* Free chain's data. */
1396 scalar_chain::~scalar_chain ()
1398 BITMAP_FREE (insns);
1399 BITMAP_FREE (defs);
1400 BITMAP_FREE (defs_conv);
1401 bitmap_obstack_release (NULL);
1404 /* Add instruction into chains' queue. */
1406 void
1407 scalar_chain::add_to_queue (unsigned insn_uid)
1409 if (bitmap_bit_p (insns, insn_uid)
1410 || bitmap_bit_p (queue, insn_uid))
1411 return;
1413 if (dump_file)
1414 fprintf (dump_file, " Adding insn %d into chain's #%d queue\n",
1415 insn_uid, chain_id);
1416 bitmap_set_bit (queue, insn_uid);
1419 /* For DImode conversion, mark register defined by DEF as requiring
1420 conversion. */
1422 void
1423 dimode_scalar_chain::mark_dual_mode_def (df_ref def)
1425 gcc_assert (DF_REF_REG_DEF_P (def));
1427 if (bitmap_bit_p (defs_conv, DF_REF_REGNO (def)))
1428 return;
1430 if (dump_file)
1431 fprintf (dump_file,
1432 " Mark r%d def in insn %d as requiring both modes in chain #%d\n",
1433 DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id);
1435 bitmap_set_bit (defs_conv, DF_REF_REGNO (def));
1438 /* For TImode conversion, it is unused. */
1440 void
1441 timode_scalar_chain::mark_dual_mode_def (df_ref)
1443 gcc_unreachable ();
1446 /* Check REF's chain to add new insns into a queue
1447 and find registers requiring conversion. */
1449 void
1450 scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref)
1452 df_link *chain;
1454 gcc_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))
1455 || bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)));
1456 add_to_queue (DF_REF_INSN_UID (ref));
1458 for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next)
1460 unsigned uid = DF_REF_INSN_UID (chain->ref);
1462 if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref)))
1463 continue;
1465 if (!DF_REF_REG_MEM_P (chain->ref))
1467 if (bitmap_bit_p (insns, uid))
1468 continue;
1470 if (bitmap_bit_p (candidates, uid))
1472 add_to_queue (uid);
1473 continue;
1477 if (DF_REF_REG_DEF_P (chain->ref))
1479 if (dump_file)
1480 fprintf (dump_file, " r%d def in insn %d isn't convertible\n",
1481 DF_REF_REGNO (chain->ref), uid);
1482 mark_dual_mode_def (chain->ref);
1484 else
1486 if (dump_file)
1487 fprintf (dump_file, " r%d use in insn %d isn't convertible\n",
1488 DF_REF_REGNO (chain->ref), uid);
1489 mark_dual_mode_def (ref);
1494 /* Add instruction into a chain. */
1496 void
1497 scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid)
1499 if (bitmap_bit_p (insns, insn_uid))
1500 return;
1502 if (dump_file)
1503 fprintf (dump_file, " Adding insn %d to chain #%d\n", insn_uid, chain_id);
1505 bitmap_set_bit (insns, insn_uid);
1507 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
1508 rtx def_set = single_set (insn);
1509 if (def_set && REG_P (SET_DEST (def_set))
1510 && !HARD_REGISTER_P (SET_DEST (def_set)))
1511 bitmap_set_bit (defs, REGNO (SET_DEST (def_set)));
1513 df_ref ref;
1514 df_ref def;
1515 for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
1516 if (!HARD_REGISTER_P (DF_REF_REG (ref)))
1517 for (def = DF_REG_DEF_CHAIN (DF_REF_REGNO (ref));
1518 def;
1519 def = DF_REF_NEXT_REG (def))
1520 analyze_register_chain (candidates, def);
1521 for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
1522 if (!DF_REF_REG_MEM_P (ref))
1523 analyze_register_chain (candidates, ref);
1526 /* Build new chain starting from insn INSN_UID recursively
1527 adding all dependent uses and definitions. */
1529 void
1530 scalar_chain::build (bitmap candidates, unsigned insn_uid)
1532 queue = BITMAP_ALLOC (NULL);
1533 bitmap_set_bit (queue, insn_uid);
1535 if (dump_file)
1536 fprintf (dump_file, "Building chain #%d...\n", chain_id);
1538 while (!bitmap_empty_p (queue))
1540 insn_uid = bitmap_first_set_bit (queue);
1541 bitmap_clear_bit (queue, insn_uid);
1542 bitmap_clear_bit (candidates, insn_uid);
1543 add_insn (candidates, insn_uid);
1546 if (dump_file)
1548 fprintf (dump_file, "Collected chain #%d...\n", chain_id);
1549 fprintf (dump_file, " insns: ");
1550 dump_bitmap (dump_file, insns);
1551 if (!bitmap_empty_p (defs_conv))
1553 bitmap_iterator bi;
1554 unsigned id;
1555 const char *comma = "";
1556 fprintf (dump_file, " defs to convert: ");
1557 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
1559 fprintf (dump_file, "%sr%d", comma, id);
1560 comma = ", ";
1562 fprintf (dump_file, "\n");
1566 BITMAP_FREE (queue);
1569 /* Return a cost of building a vector costant
1570 instead of using a scalar one. */
1573 dimode_scalar_chain::vector_const_cost (rtx exp)
1575 gcc_assert (CONST_INT_P (exp));
1577 if (standard_sse_constant_p (exp, V2DImode))
1578 return COSTS_N_INSNS (1);
1579 return ix86_cost->sse_load[1];
1582 /* Compute a gain for chain conversion. */
1585 dimode_scalar_chain::compute_convert_gain ()
1587 bitmap_iterator bi;
1588 unsigned insn_uid;
1589 int gain = 0;
1590 int cost = 0;
1592 if (dump_file)
1593 fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
1595 EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
1597 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
1598 rtx def_set = single_set (insn);
1599 rtx src = SET_SRC (def_set);
1600 rtx dst = SET_DEST (def_set);
1602 if (REG_P (src) && REG_P (dst))
1603 gain += COSTS_N_INSNS (2) - ix86_cost->sse_move;
1604 else if (REG_P (src) && MEM_P (dst))
1605 gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1];
1606 else if (MEM_P (src) && REG_P (dst))
1607 gain += 2 * ix86_cost->int_load[2] - ix86_cost->sse_load[1];
1608 else if (GET_CODE (src) == ASHIFT
1609 || GET_CODE (src) == ASHIFTRT
1610 || GET_CODE (src) == LSHIFTRT)
1612 if (CONST_INT_P (XEXP (src, 0)))
1613 gain -= vector_const_cost (XEXP (src, 0));
1614 if (CONST_INT_P (XEXP (src, 1)))
1616 gain += ix86_cost->shift_const;
1617 if (INTVAL (XEXP (src, 1)) >= 32)
1618 gain -= COSTS_N_INSNS (1);
1620 else
1621 /* Additional gain for omitting two CMOVs. */
1622 gain += ix86_cost->shift_var + COSTS_N_INSNS (2);
1624 else if (GET_CODE (src) == PLUS
1625 || GET_CODE (src) == MINUS
1626 || GET_CODE (src) == IOR
1627 || GET_CODE (src) == XOR
1628 || GET_CODE (src) == AND)
1630 gain += ix86_cost->add;
1631 /* Additional gain for andnot for targets without BMI. */
1632 if (GET_CODE (XEXP (src, 0)) == NOT
1633 && !TARGET_BMI)
1634 gain += 2 * ix86_cost->add;
1636 if (CONST_INT_P (XEXP (src, 0)))
1637 gain -= vector_const_cost (XEXP (src, 0));
1638 if (CONST_INT_P (XEXP (src, 1)))
1639 gain -= vector_const_cost (XEXP (src, 1));
1641 else if (GET_CODE (src) == NEG
1642 || GET_CODE (src) == NOT)
1643 gain += ix86_cost->add - COSTS_N_INSNS (1);
1644 else if (GET_CODE (src) == COMPARE)
1646 /* Assume comparison cost is the same. */
1648 else if (CONST_INT_P (src))
1650 if (REG_P (dst))
1651 gain += COSTS_N_INSNS (2);
1652 else if (MEM_P (dst))
1653 gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1];
1654 gain -= vector_const_cost (src);
1656 else
1657 gcc_unreachable ();
1660 if (dump_file)
1661 fprintf (dump_file, " Instruction conversion gain: %d\n", gain);
1663 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, insn_uid, bi)
1664 cost += DF_REG_DEF_COUNT (insn_uid) * ix86_cost->mmxsse_to_integer;
1666 if (dump_file)
1667 fprintf (dump_file, " Registers conversion cost: %d\n", cost);
1669 gain -= cost;
1671 if (dump_file)
1672 fprintf (dump_file, " Total gain: %d\n", gain);
1674 return gain;
1677 /* Replace REG in X with a V2DI subreg of NEW_REG. */
1680 dimode_scalar_chain::replace_with_subreg (rtx x, rtx reg, rtx new_reg)
1682 if (x == reg)
1683 return gen_rtx_SUBREG (V2DImode, new_reg, 0);
1685 const char *fmt = GET_RTX_FORMAT (GET_CODE (x));
1686 int i, j;
1687 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
1689 if (fmt[i] == 'e')
1690 XEXP (x, i) = replace_with_subreg (XEXP (x, i), reg, new_reg);
1691 else if (fmt[i] == 'E')
1692 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
1693 XVECEXP (x, i, j) = replace_with_subreg (XVECEXP (x, i, j),
1694 reg, new_reg);
1697 return x;
1700 /* Replace REG in INSN with a V2DI subreg of NEW_REG. */
1702 void
1703 dimode_scalar_chain::replace_with_subreg_in_insn (rtx_insn *insn,
1704 rtx reg, rtx new_reg)
1706 replace_with_subreg (single_set (insn), reg, new_reg);
1709 /* Insert generated conversion instruction sequence INSNS
1710 after instruction AFTER. New BB may be required in case
1711 instruction has EH region attached. */
1713 void
1714 scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after)
1716 if (!control_flow_insn_p (after))
1718 emit_insn_after (insns, after);
1719 return;
1722 basic_block bb = BLOCK_FOR_INSN (after);
1723 edge e = find_fallthru_edge (bb->succs);
1724 gcc_assert (e);
1726 basic_block new_bb = split_edge (e);
1727 emit_insn_after (insns, BB_HEAD (new_bb));
1730 /* Make vector copies for all register REGNO definitions
1731 and replace its uses in a chain. */
1733 void
1734 dimode_scalar_chain::make_vector_copies (unsigned regno)
1736 rtx reg = regno_reg_rtx[regno];
1737 rtx vreg = gen_reg_rtx (DImode);
1738 bool count_reg = false;
1739 df_ref ref;
1741 for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
1742 if (!bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
1744 df_ref use;
1746 /* Detect the count register of a shift instruction. */
1747 for (use = DF_REG_USE_CHAIN (regno); use; use = DF_REF_NEXT_REG (use))
1748 if (bitmap_bit_p (insns, DF_REF_INSN_UID (use)))
1750 rtx_insn *insn = DF_REF_INSN (use);
1751 rtx def_set = single_set (insn);
1753 gcc_assert (def_set);
1755 rtx src = SET_SRC (def_set);
1757 if ((GET_CODE (src) == ASHIFT
1758 || GET_CODE (src) == ASHIFTRT
1759 || GET_CODE (src) == LSHIFTRT)
1760 && !CONST_INT_P (XEXP (src, 1))
1761 && reg_or_subregno (XEXP (src, 1)) == regno)
1762 count_reg = true;
1765 start_sequence ();
1766 if (count_reg)
1768 rtx qreg = gen_lowpart (QImode, reg);
1769 rtx tmp = gen_reg_rtx (SImode);
1771 if (TARGET_ZERO_EXTEND_WITH_AND
1772 && optimize_function_for_speed_p (cfun))
1774 emit_move_insn (tmp, const0_rtx);
1775 emit_insn (gen_movstrictqi
1776 (gen_lowpart (QImode, tmp), qreg));
1778 else
1779 emit_insn (gen_rtx_SET
1780 (tmp, gen_rtx_ZERO_EXTEND (SImode, qreg)));
1782 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
1784 rtx slot = assign_386_stack_local (SImode, SLOT_STV_TEMP);
1785 emit_move_insn (slot, tmp);
1786 tmp = copy_rtx (slot);
1789 emit_insn (gen_zero_extendsidi2 (vreg, tmp));
1791 else if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
1793 rtx tmp = assign_386_stack_local (DImode, SLOT_STV_TEMP);
1794 emit_move_insn (adjust_address (tmp, SImode, 0),
1795 gen_rtx_SUBREG (SImode, reg, 0));
1796 emit_move_insn (adjust_address (tmp, SImode, 4),
1797 gen_rtx_SUBREG (SImode, reg, 4));
1798 emit_move_insn (vreg, tmp);
1800 else if (TARGET_SSE4_1)
1802 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
1803 CONST0_RTX (V4SImode),
1804 gen_rtx_SUBREG (SImode, reg, 0)));
1805 emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0),
1806 gen_rtx_SUBREG (V4SImode, vreg, 0),
1807 gen_rtx_SUBREG (SImode, reg, 4),
1808 GEN_INT (2)));
1810 else
1812 rtx tmp = gen_reg_rtx (DImode);
1813 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
1814 CONST0_RTX (V4SImode),
1815 gen_rtx_SUBREG (SImode, reg, 0)));
1816 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0),
1817 CONST0_RTX (V4SImode),
1818 gen_rtx_SUBREG (SImode, reg, 4)));
1819 emit_insn (gen_vec_interleave_lowv4si
1820 (gen_rtx_SUBREG (V4SImode, vreg, 0),
1821 gen_rtx_SUBREG (V4SImode, vreg, 0),
1822 gen_rtx_SUBREG (V4SImode, tmp, 0)));
1824 rtx_insn *seq = get_insns ();
1825 end_sequence ();
1826 rtx_insn *insn = DF_REF_INSN (ref);
1827 emit_conversion_insns (seq, insn);
1829 if (dump_file)
1830 fprintf (dump_file,
1831 " Copied r%d to a vector register r%d for insn %d\n",
1832 regno, REGNO (vreg), INSN_UID (insn));
1835 for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
1836 if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
1838 rtx_insn *insn = DF_REF_INSN (ref);
1839 if (count_reg)
1841 rtx def_set = single_set (insn);
1842 gcc_assert (def_set);
1844 rtx src = SET_SRC (def_set);
1846 if ((GET_CODE (src) == ASHIFT
1847 || GET_CODE (src) == ASHIFTRT
1848 || GET_CODE (src) == LSHIFTRT)
1849 && !CONST_INT_P (XEXP (src, 1))
1850 && reg_or_subregno (XEXP (src, 1)) == regno)
1851 XEXP (src, 1) = vreg;
1853 else
1854 replace_with_subreg_in_insn (insn, reg, vreg);
1856 if (dump_file)
1857 fprintf (dump_file, " Replaced r%d with r%d in insn %d\n",
1858 regno, REGNO (vreg), INSN_UID (insn));
1862 /* Convert all definitions of register REGNO
1863 and fix its uses. Scalar copies may be created
1864 in case register is used in not convertible insn. */
1866 void
1867 dimode_scalar_chain::convert_reg (unsigned regno)
1869 bool scalar_copy = bitmap_bit_p (defs_conv, regno);
1870 rtx reg = regno_reg_rtx[regno];
1871 rtx scopy = NULL_RTX;
1872 df_ref ref;
1873 bitmap conv;
1875 conv = BITMAP_ALLOC (NULL);
1876 bitmap_copy (conv, insns);
1878 if (scalar_copy)
1879 scopy = gen_reg_rtx (DImode);
1881 for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
1883 rtx_insn *insn = DF_REF_INSN (ref);
1884 rtx def_set = single_set (insn);
1885 rtx src = SET_SRC (def_set);
1886 rtx reg = DF_REF_REG (ref);
1888 if (!MEM_P (src))
1890 replace_with_subreg_in_insn (insn, reg, reg);
1891 bitmap_clear_bit (conv, INSN_UID (insn));
1894 if (scalar_copy)
1896 start_sequence ();
1897 if (!TARGET_INTER_UNIT_MOVES_FROM_VEC)
1899 rtx tmp = assign_386_stack_local (DImode, SLOT_STV_TEMP);
1900 emit_move_insn (tmp, reg);
1901 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0),
1902 adjust_address (tmp, SImode, 0));
1903 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
1904 adjust_address (tmp, SImode, 4));
1906 else if (TARGET_SSE4_1)
1908 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
1909 emit_insn
1910 (gen_rtx_SET
1911 (gen_rtx_SUBREG (SImode, scopy, 0),
1912 gen_rtx_VEC_SELECT (SImode,
1913 gen_rtx_SUBREG (V4SImode, reg, 0), tmp)));
1915 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const1_rtx));
1916 emit_insn
1917 (gen_rtx_SET
1918 (gen_rtx_SUBREG (SImode, scopy, 4),
1919 gen_rtx_VEC_SELECT (SImode,
1920 gen_rtx_SUBREG (V4SImode, reg, 0), tmp)));
1922 else
1924 rtx vcopy = gen_reg_rtx (V2DImode);
1925 emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, reg, 0));
1926 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0),
1927 gen_rtx_SUBREG (SImode, vcopy, 0));
1928 emit_move_insn (vcopy,
1929 gen_rtx_LSHIFTRT (V2DImode, vcopy, GEN_INT (32)));
1930 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
1931 gen_rtx_SUBREG (SImode, vcopy, 0));
1933 rtx_insn *seq = get_insns ();
1934 end_sequence ();
1935 emit_conversion_insns (seq, insn);
1937 if (dump_file)
1938 fprintf (dump_file,
1939 " Copied r%d to a scalar register r%d for insn %d\n",
1940 regno, REGNO (scopy), INSN_UID (insn));
1944 for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
1945 if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
1947 if (bitmap_bit_p (conv, DF_REF_INSN_UID (ref)))
1949 rtx_insn *insn = DF_REF_INSN (ref);
1951 rtx def_set = single_set (insn);
1952 gcc_assert (def_set);
1954 rtx src = SET_SRC (def_set);
1955 rtx dst = SET_DEST (def_set);
1957 if ((GET_CODE (src) == ASHIFT
1958 || GET_CODE (src) == ASHIFTRT
1959 || GET_CODE (src) == LSHIFTRT)
1960 && !CONST_INT_P (XEXP (src, 1))
1961 && reg_or_subregno (XEXP (src, 1)) == regno)
1963 rtx tmp2 = gen_reg_rtx (V2DImode);
1965 start_sequence ();
1967 if (TARGET_SSE4_1)
1968 emit_insn (gen_sse4_1_zero_extendv2qiv2di2
1969 (tmp2, gen_rtx_SUBREG (V16QImode, reg, 0)));
1970 else
1972 rtx vec_cst
1973 = gen_rtx_CONST_VECTOR (V2DImode,
1974 gen_rtvec (2, GEN_INT (0xff),
1975 const0_rtx));
1976 vec_cst
1977 = validize_mem (force_const_mem (V2DImode, vec_cst));
1979 emit_insn (gen_rtx_SET
1980 (tmp2,
1981 gen_rtx_AND (V2DImode,
1982 gen_rtx_SUBREG (V2DImode, reg, 0),
1983 vec_cst)));
1985 rtx_insn *seq = get_insns ();
1986 end_sequence ();
1988 emit_insn_before (seq, insn);
1990 XEXP (src, 1) = gen_rtx_SUBREG (DImode, tmp2, 0);
1992 else if (!MEM_P (dst) || !REG_P (src))
1993 replace_with_subreg_in_insn (insn, reg, reg);
1995 bitmap_clear_bit (conv, INSN_UID (insn));
1998 /* Skip debug insns and uninitialized uses. */
1999 else if (DF_REF_CHAIN (ref)
2000 && NONDEBUG_INSN_P (DF_REF_INSN (ref)))
2002 gcc_assert (scopy);
2003 replace_rtx (DF_REF_INSN (ref), reg, scopy);
2004 df_insn_rescan (DF_REF_INSN (ref));
2007 BITMAP_FREE (conv);
2010 /* Convert operand OP in INSN. We should handle
2011 memory operands and uninitialized registers.
2012 All other register uses are converted during
2013 registers conversion. */
2015 void
2016 dimode_scalar_chain::convert_op (rtx *op, rtx_insn *insn)
2018 *op = copy_rtx_if_shared (*op);
2020 if (GET_CODE (*op) == NOT)
2022 convert_op (&XEXP (*op, 0), insn);
2023 PUT_MODE (*op, V2DImode);
2025 else if (MEM_P (*op))
2027 rtx tmp = gen_reg_rtx (DImode);
2029 emit_insn_before (gen_move_insn (tmp, *op), insn);
2030 *op = gen_rtx_SUBREG (V2DImode, tmp, 0);
2032 if (dump_file)
2033 fprintf (dump_file, " Preloading operand for insn %d into r%d\n",
2034 INSN_UID (insn), REGNO (tmp));
2036 else if (REG_P (*op))
2038 /* We may have not converted register usage in case
2039 this register has no definition. Otherwise it
2040 should be converted in convert_reg. */
2041 df_ref ref;
2042 FOR_EACH_INSN_USE (ref, insn)
2043 if (DF_REF_REGNO (ref) == REGNO (*op))
2045 gcc_assert (!DF_REF_CHAIN (ref));
2046 break;
2048 *op = gen_rtx_SUBREG (V2DImode, *op, 0);
2050 else if (CONST_INT_P (*op))
2052 rtx vec_cst;
2053 rtx tmp = gen_rtx_SUBREG (V2DImode, gen_reg_rtx (DImode), 0);
2055 /* Prefer all ones vector in case of -1. */
2056 if (constm1_operand (*op, GET_MODE (*op)))
2057 vec_cst = CONSTM1_RTX (V2DImode);
2058 else
2059 vec_cst = gen_rtx_CONST_VECTOR (V2DImode,
2060 gen_rtvec (2, *op, const0_rtx));
2062 if (!standard_sse_constant_p (vec_cst, V2DImode))
2064 start_sequence ();
2065 vec_cst = validize_mem (force_const_mem (V2DImode, vec_cst));
2066 rtx_insn *seq = get_insns ();
2067 end_sequence ();
2068 emit_insn_before (seq, insn);
2071 emit_insn_before (gen_move_insn (copy_rtx (tmp), vec_cst), insn);
2072 *op = tmp;
2074 else
2076 gcc_assert (SUBREG_P (*op));
2077 gcc_assert (GET_MODE (*op) == V2DImode);
2081 /* Convert INSN to vector mode. */
2083 void
2084 dimode_scalar_chain::convert_insn (rtx_insn *insn)
2086 rtx def_set = single_set (insn);
2087 rtx src = SET_SRC (def_set);
2088 rtx dst = SET_DEST (def_set);
2089 rtx subreg;
2091 if (MEM_P (dst) && !REG_P (src))
2093 /* There are no scalar integer instructions and therefore
2094 temporary register usage is required. */
2095 rtx tmp = gen_reg_rtx (DImode);
2096 emit_conversion_insns (gen_move_insn (dst, tmp), insn);
2097 dst = gen_rtx_SUBREG (V2DImode, tmp, 0);
2100 switch (GET_CODE (src))
2102 case ASHIFT:
2103 case ASHIFTRT:
2104 case LSHIFTRT:
2105 convert_op (&XEXP (src, 0), insn);
2106 PUT_MODE (src, V2DImode);
2107 break;
2109 case PLUS:
2110 case MINUS:
2111 case IOR:
2112 case XOR:
2113 case AND:
2114 convert_op (&XEXP (src, 0), insn);
2115 convert_op (&XEXP (src, 1), insn);
2116 PUT_MODE (src, V2DImode);
2117 break;
2119 case NEG:
2120 src = XEXP (src, 0);
2121 convert_op (&src, insn);
2122 subreg = gen_reg_rtx (V2DImode);
2123 emit_insn_before (gen_move_insn (subreg, CONST0_RTX (V2DImode)), insn);
2124 src = gen_rtx_MINUS (V2DImode, subreg, src);
2125 break;
2127 case NOT:
2128 src = XEXP (src, 0);
2129 convert_op (&src, insn);
2130 subreg = gen_reg_rtx (V2DImode);
2131 emit_insn_before (gen_move_insn (subreg, CONSTM1_RTX (V2DImode)), insn);
2132 src = gen_rtx_XOR (V2DImode, src, subreg);
2133 break;
2135 case MEM:
2136 if (!REG_P (dst))
2137 convert_op (&src, insn);
2138 break;
2140 case REG:
2141 if (!MEM_P (dst))
2142 convert_op (&src, insn);
2143 break;
2145 case SUBREG:
2146 gcc_assert (GET_MODE (src) == V2DImode);
2147 break;
2149 case COMPARE:
2150 src = SUBREG_REG (XEXP (XEXP (src, 0), 0));
2152 gcc_assert ((REG_P (src) && GET_MODE (src) == DImode)
2153 || (SUBREG_P (src) && GET_MODE (src) == V2DImode));
2155 if (REG_P (src))
2156 subreg = gen_rtx_SUBREG (V2DImode, src, 0);
2157 else
2158 subreg = copy_rtx_if_shared (src);
2159 emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (subreg),
2160 copy_rtx_if_shared (subreg),
2161 copy_rtx_if_shared (subreg)),
2162 insn);
2163 dst = gen_rtx_REG (CCmode, FLAGS_REG);
2164 src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (src),
2165 copy_rtx_if_shared (src)),
2166 UNSPEC_PTEST);
2167 break;
2169 case CONST_INT:
2170 convert_op (&src, insn);
2171 break;
2173 default:
2174 gcc_unreachable ();
2177 SET_SRC (def_set) = src;
2178 SET_DEST (def_set) = dst;
2180 /* Drop possible dead definitions. */
2181 PATTERN (insn) = def_set;
2183 INSN_CODE (insn) = -1;
2184 recog_memoized (insn);
2185 df_insn_rescan (insn);
2188 /* Fix uses of converted REG in debug insns. */
2190 void
2191 timode_scalar_chain::fix_debug_reg_uses (rtx reg)
2193 if (!flag_var_tracking)
2194 return;
2196 df_ref ref, next;
2197 for (ref = DF_REG_USE_CHAIN (REGNO (reg)); ref; ref = next)
2199 rtx_insn *insn = DF_REF_INSN (ref);
2200 /* Make sure the next ref is for a different instruction,
2201 so that we're not affected by the rescan. */
2202 next = DF_REF_NEXT_REG (ref);
2203 while (next && DF_REF_INSN (next) == insn)
2204 next = DF_REF_NEXT_REG (next);
2206 if (DEBUG_INSN_P (insn))
2208 /* It may be a debug insn with a TImode variable in
2209 register. */
2210 bool changed = false;
2211 for (; ref != next; ref = DF_REF_NEXT_REG (ref))
2213 rtx *loc = DF_REF_LOC (ref);
2214 if (REG_P (*loc) && GET_MODE (*loc) == V1TImode)
2216 *loc = gen_rtx_SUBREG (TImode, *loc, 0);
2217 changed = true;
2220 if (changed)
2221 df_insn_rescan (insn);
2226 /* Convert INSN from TImode to V1T1mode. */
2228 void
2229 timode_scalar_chain::convert_insn (rtx_insn *insn)
2231 rtx def_set = single_set (insn);
2232 rtx src = SET_SRC (def_set);
2233 rtx dst = SET_DEST (def_set);
2235 switch (GET_CODE (dst))
2237 case REG:
2239 rtx tmp = find_reg_equal_equiv_note (insn);
2240 if (tmp)
2241 PUT_MODE (XEXP (tmp, 0), V1TImode);
2242 PUT_MODE (dst, V1TImode);
2243 fix_debug_reg_uses (dst);
2245 break;
2246 case MEM:
2247 PUT_MODE (dst, V1TImode);
2248 break;
2250 default:
2251 gcc_unreachable ();
2254 switch (GET_CODE (src))
2256 case REG:
2257 PUT_MODE (src, V1TImode);
2258 /* Call fix_debug_reg_uses only if SRC is never defined. */
2259 if (!DF_REG_DEF_CHAIN (REGNO (src)))
2260 fix_debug_reg_uses (src);
2261 break;
2263 case MEM:
2264 PUT_MODE (src, V1TImode);
2265 break;
2267 case CONST_WIDE_INT:
2268 if (NONDEBUG_INSN_P (insn))
2270 /* Since there are no instructions to store 128-bit constant,
2271 temporary register usage is required. */
2272 rtx tmp = gen_reg_rtx (V1TImode);
2273 start_sequence ();
2274 src = gen_rtx_CONST_VECTOR (V1TImode, gen_rtvec (1, src));
2275 src = validize_mem (force_const_mem (V1TImode, src));
2276 rtx_insn *seq = get_insns ();
2277 end_sequence ();
2278 if (seq)
2279 emit_insn_before (seq, insn);
2280 emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
2281 dst = tmp;
2283 break;
2285 case CONST_INT:
2286 switch (standard_sse_constant_p (src, TImode))
2288 case 1:
2289 src = CONST0_RTX (GET_MODE (dst));
2290 break;
2291 case 2:
2292 src = CONSTM1_RTX (GET_MODE (dst));
2293 break;
2294 default:
2295 gcc_unreachable ();
2297 if (NONDEBUG_INSN_P (insn))
2299 rtx tmp = gen_reg_rtx (V1TImode);
2300 /* Since there are no instructions to store standard SSE
2301 constant, temporary register usage is required. */
2302 emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
2303 dst = tmp;
2305 break;
2307 default:
2308 gcc_unreachable ();
2311 SET_SRC (def_set) = src;
2312 SET_DEST (def_set) = dst;
2314 /* Drop possible dead definitions. */
2315 PATTERN (insn) = def_set;
2317 INSN_CODE (insn) = -1;
2318 recog_memoized (insn);
2319 df_insn_rescan (insn);
2322 void
2323 dimode_scalar_chain::convert_registers ()
2325 bitmap_iterator bi;
2326 unsigned id;
2328 EXECUTE_IF_SET_IN_BITMAP (defs, 0, id, bi)
2329 convert_reg (id);
2331 EXECUTE_IF_AND_COMPL_IN_BITMAP (defs_conv, defs, 0, id, bi)
2332 make_vector_copies (id);
2335 /* Convert whole chain creating required register
2336 conversions and copies. */
2339 scalar_chain::convert ()
2341 bitmap_iterator bi;
2342 unsigned id;
2343 int converted_insns = 0;
2345 if (!dbg_cnt (stv_conversion))
2346 return 0;
2348 if (dump_file)
2349 fprintf (dump_file, "Converting chain #%d...\n", chain_id);
2351 convert_registers ();
2353 EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi)
2355 convert_insn (DF_INSN_UID_GET (id)->insn);
2356 converted_insns++;
2359 return converted_insns;
2362 /* Main STV pass function. Find and convert scalar
2363 instructions into vector mode when profitable. */
2365 static unsigned int
2366 convert_scalars_to_vector ()
2368 basic_block bb;
2369 bitmap candidates;
2370 int converted_insns = 0;
2372 bitmap_obstack_initialize (NULL);
2373 candidates = BITMAP_ALLOC (NULL);
2375 calculate_dominance_info (CDI_DOMINATORS);
2376 df_set_flags (DF_DEFER_INSN_RESCAN);
2377 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
2378 df_md_add_problem ();
2379 df_analyze ();
2381 /* Find all instructions we want to convert into vector mode. */
2382 if (dump_file)
2383 fprintf (dump_file, "Searching for mode conversion candidates...\n");
2385 FOR_EACH_BB_FN (bb, cfun)
2387 rtx_insn *insn;
2388 FOR_BB_INSNS (bb, insn)
2389 if (scalar_to_vector_candidate_p (insn))
2391 if (dump_file)
2392 fprintf (dump_file, " insn %d is marked as a candidate\n",
2393 INSN_UID (insn));
2395 bitmap_set_bit (candidates, INSN_UID (insn));
2399 remove_non_convertible_regs (candidates);
2401 if (bitmap_empty_p (candidates))
2402 if (dump_file)
2403 fprintf (dump_file, "There are no candidates for optimization.\n");
2405 while (!bitmap_empty_p (candidates))
2407 unsigned uid = bitmap_first_set_bit (candidates);
2408 scalar_chain *chain;
2410 if (TARGET_64BIT)
2411 chain = new timode_scalar_chain;
2412 else
2413 chain = new dimode_scalar_chain;
2415 /* Find instructions chain we want to convert to vector mode.
2416 Check all uses and definitions to estimate all required
2417 conversions. */
2418 chain->build (candidates, uid);
2420 if (chain->compute_convert_gain () > 0)
2421 converted_insns += chain->convert ();
2422 else
2423 if (dump_file)
2424 fprintf (dump_file, "Chain #%d conversion is not profitable\n",
2425 chain->chain_id);
2427 delete chain;
2430 if (dump_file)
2431 fprintf (dump_file, "Total insns converted: %d\n", converted_insns);
2433 BITMAP_FREE (candidates);
2434 bitmap_obstack_release (NULL);
2435 df_process_deferred_rescans ();
2437 /* Conversion means we may have 128bit register spills/fills
2438 which require aligned stack. */
2439 if (converted_insns)
2441 if (crtl->stack_alignment_needed < 128)
2442 crtl->stack_alignment_needed = 128;
2443 if (crtl->stack_alignment_estimated < 128)
2444 crtl->stack_alignment_estimated = 128;
2445 /* Fix up DECL_RTL/DECL_INCOMING_RTL of arguments. */
2446 if (TARGET_64BIT)
2447 for (tree parm = DECL_ARGUMENTS (current_function_decl);
2448 parm; parm = DECL_CHAIN (parm))
2450 if (TYPE_MODE (TREE_TYPE (parm)) != TImode)
2451 continue;
2452 if (DECL_RTL_SET_P (parm)
2453 && GET_MODE (DECL_RTL (parm)) == V1TImode)
2455 rtx r = DECL_RTL (parm);
2456 if (REG_P (r))
2457 SET_DECL_RTL (parm, gen_rtx_SUBREG (TImode, r, 0));
2459 if (DECL_INCOMING_RTL (parm)
2460 && GET_MODE (DECL_INCOMING_RTL (parm)) == V1TImode)
2462 rtx r = DECL_INCOMING_RTL (parm);
2463 if (REG_P (r))
2464 DECL_INCOMING_RTL (parm) = gen_rtx_SUBREG (TImode, r, 0);
2469 return 0;
2472 namespace {
2474 const pass_data pass_data_insert_vzeroupper =
2476 RTL_PASS, /* type */
2477 "vzeroupper", /* name */
2478 OPTGROUP_NONE, /* optinfo_flags */
2479 TV_MACH_DEP, /* tv_id */
2480 0, /* properties_required */
2481 0, /* properties_provided */
2482 0, /* properties_destroyed */
2483 0, /* todo_flags_start */
2484 TODO_df_finish, /* todo_flags_finish */
2487 class pass_insert_vzeroupper : public rtl_opt_pass
2489 public:
2490 pass_insert_vzeroupper(gcc::context *ctxt)
2491 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2494 /* opt_pass methods: */
2495 virtual bool gate (function *)
2497 return TARGET_AVX && !TARGET_AVX512F
2498 && TARGET_VZEROUPPER && flag_expensive_optimizations
2499 && !optimize_size;
2502 virtual unsigned int execute (function *)
2504 return rest_of_handle_insert_vzeroupper ();
2507 }; // class pass_insert_vzeroupper
2509 const pass_data pass_data_stv =
2511 RTL_PASS, /* type */
2512 "stv", /* name */
2513 OPTGROUP_NONE, /* optinfo_flags */
2514 TV_MACH_DEP, /* tv_id */
2515 0, /* properties_required */
2516 0, /* properties_provided */
2517 0, /* properties_destroyed */
2518 0, /* todo_flags_start */
2519 TODO_df_finish, /* todo_flags_finish */
2522 class pass_stv : public rtl_opt_pass
2524 public:
2525 pass_stv (gcc::context *ctxt)
2526 : rtl_opt_pass (pass_data_stv, ctxt),
2527 timode_p (false)
2530 /* opt_pass methods: */
2531 virtual bool gate (function *)
2533 return (timode_p == !!TARGET_64BIT
2534 && TARGET_STV && TARGET_SSE2 && optimize > 1);
2537 virtual unsigned int execute (function *)
2539 return convert_scalars_to_vector ();
2542 opt_pass *clone ()
2544 return new pass_stv (m_ctxt);
2547 void set_pass_param (unsigned int n, bool param)
2549 gcc_assert (n == 0);
2550 timode_p = param;
2553 private:
2554 bool timode_p;
2555 }; // class pass_stv
2557 } // anon namespace
2559 rtl_opt_pass *
2560 make_pass_insert_vzeroupper (gcc::context *ctxt)
2562 return new pass_insert_vzeroupper (ctxt);
2565 rtl_opt_pass *
2566 make_pass_stv (gcc::context *ctxt)
2568 return new pass_stv (ctxt);
2571 /* Return true if a red-zone is in use. */
2573 bool
2574 ix86_using_red_zone (void)
2576 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2579 /* Return a string that documents the current -m options. The caller is
2580 responsible for freeing the string. */
2582 static char *
2583 ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2,
2584 int flags, int flags2,
2585 const char *arch, const char *tune,
2586 enum fpmath_unit fpmath, bool add_nl_p)
2588 struct ix86_target_opts
2590 const char *option; /* option string */
2591 HOST_WIDE_INT mask; /* isa mask options */
2594 /* This table is ordered so that options like -msse4.2 that imply other
2595 ISAs come first. Target string will be displayed in the same order. */
2596 static struct ix86_target_opts isa2_opts[] =
2598 { "-mgfni", OPTION_MASK_ISA_GFNI },
2599 { "-mrdpid", OPTION_MASK_ISA_RDPID },
2600 { "-msgx", OPTION_MASK_ISA_SGX },
2601 { "-mavx5124vnniw", OPTION_MASK_ISA_AVX5124VNNIW },
2602 { "-mavx5124fmaps", OPTION_MASK_ISA_AVX5124FMAPS },
2603 { "-mavx512vpopcntdq", OPTION_MASK_ISA_AVX512VPOPCNTDQ }
2605 static struct ix86_target_opts isa_opts[] =
2607 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
2608 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
2609 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2610 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2611 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2612 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2613 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2614 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2615 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2616 { "-mavx2", OPTION_MASK_ISA_AVX2 },
2617 { "-mfma", OPTION_MASK_ISA_FMA },
2618 { "-mxop", OPTION_MASK_ISA_XOP },
2619 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2620 { "-mf16c", OPTION_MASK_ISA_F16C },
2621 { "-mavx", OPTION_MASK_ISA_AVX },
2622 /* { "-msse4" OPTION_MASK_ISA_SSE4 }, */
2623 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2624 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2625 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2626 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2627 { "-msse3", OPTION_MASK_ISA_SSE3 },
2628 { "-maes", OPTION_MASK_ISA_AES },
2629 { "-msha", OPTION_MASK_ISA_SHA },
2630 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2631 { "-msse2", OPTION_MASK_ISA_SSE2 },
2632 { "-msse", OPTION_MASK_ISA_SSE },
2633 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2634 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2635 { "-mmmx", OPTION_MASK_ISA_MMX },
2636 { "-mrtm", OPTION_MASK_ISA_RTM },
2637 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2638 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2639 { "-madx", OPTION_MASK_ISA_ADX },
2640 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2641 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2642 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2643 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2644 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2645 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2646 { "-mabm", OPTION_MASK_ISA_ABM },
2647 { "-mbmi", OPTION_MASK_ISA_BMI },
2648 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2649 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2650 { "-mtbm", OPTION_MASK_ISA_TBM },
2651 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2652 { "-mcx16", OPTION_MASK_ISA_CX16 },
2653 { "-msahf", OPTION_MASK_ISA_SAHF },
2654 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2655 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2656 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2657 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2658 { "-mmwaitx", OPTION_MASK_ISA_MWAITX },
2659 { "-mclzero", OPTION_MASK_ISA_CLZERO },
2660 { "-mpku", OPTION_MASK_ISA_PKU },
2661 { "-mlwp", OPTION_MASK_ISA_LWP },
2662 { "-mhle", OPTION_MASK_ISA_HLE },
2663 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2664 { "-mmpx", OPTION_MASK_ISA_MPX },
2665 { "-mclwb", OPTION_MASK_ISA_CLWB }
2668 /* Flag options. */
2669 static struct ix86_target_opts flag_opts[] =
2671 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2672 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2673 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2674 { "-m80387", MASK_80387 },
2675 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2676 { "-malign-double", MASK_ALIGN_DOUBLE },
2677 { "-mcld", MASK_CLD },
2678 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2679 { "-mieee-fp", MASK_IEEE_FP },
2680 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2681 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2682 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2683 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2684 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2685 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2686 { "-mno-red-zone", MASK_NO_RED_ZONE },
2687 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2688 { "-mrecip", MASK_RECIP },
2689 { "-mrtd", MASK_RTD },
2690 { "-msseregparm", MASK_SSEREGPARM },
2691 { "-mstack-arg-probe", MASK_STACK_PROBE },
2692 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2693 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2694 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2695 { "-mvzeroupper", MASK_VZEROUPPER },
2696 { "-mstv", MASK_STV },
2697 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD },
2698 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE },
2699 { "-mprefer-avx128", MASK_PREFER_AVX128 },
2700 { "-mcall-ms2sysv-xlogues", MASK_CALL_MS2SYSV_XLOGUES }
2703 /* Additional flag options. */
2704 static struct ix86_target_opts flag2_opts[] =
2706 { "-mgeneral-regs-only", OPTION_MASK_GENERAL_REGS_ONLY },
2707 { "-mprefer-avx256", OPTION_MASK_PREFER_AVX256 },
2710 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (isa2_opts)
2711 + ARRAY_SIZE (flag_opts) + ARRAY_SIZE (flag2_opts) + 6][2];
2713 char isa_other[40];
2714 char isa2_other[40];
2715 char flags_other[40];
2716 char flags2_other[40];
2717 unsigned num = 0;
2718 unsigned i, j;
2719 char *ret;
2720 char *ptr;
2721 size_t len;
2722 size_t line_len;
2723 size_t sep_len;
2724 const char *abi;
2726 memset (opts, '\0', sizeof (opts));
2728 /* Add -march= option. */
2729 if (arch)
2731 opts[num][0] = "-march=";
2732 opts[num++][1] = arch;
2735 /* Add -mtune= option. */
2736 if (tune)
2738 opts[num][0] = "-mtune=";
2739 opts[num++][1] = tune;
2742 /* Add -m32/-m64/-mx32. */
2743 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2745 if ((isa & OPTION_MASK_ABI_64) != 0)
2746 abi = "-m64";
2747 else
2748 abi = "-mx32";
2749 isa &= ~ (OPTION_MASK_ISA_64BIT
2750 | OPTION_MASK_ABI_64
2751 | OPTION_MASK_ABI_X32);
2753 else
2754 abi = "-m32";
2755 opts[num++][0] = abi;
2757 /* Pick out the options in isa2 options. */
2758 for (i = 0; i < ARRAY_SIZE (isa2_opts); i++)
2760 if ((isa2 & isa2_opts[i].mask) != 0)
2762 opts[num++][0] = isa2_opts[i].option;
2763 isa2 &= ~ isa2_opts[i].mask;
2767 if (isa2 && add_nl_p)
2769 opts[num++][0] = isa2_other;
2770 sprintf (isa2_other, "(other isa2: %#" HOST_WIDE_INT_PRINT "x)", isa2);
2773 /* Pick out the options in isa options. */
2774 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2776 if ((isa & isa_opts[i].mask) != 0)
2778 opts[num++][0] = isa_opts[i].option;
2779 isa &= ~ isa_opts[i].mask;
2783 if (isa && add_nl_p)
2785 opts[num++][0] = isa_other;
2786 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)", isa);
2789 /* Add flag options. */
2790 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2792 if ((flags & flag_opts[i].mask) != 0)
2794 opts[num++][0] = flag_opts[i].option;
2795 flags &= ~ flag_opts[i].mask;
2799 if (flags && add_nl_p)
2801 opts[num++][0] = flags_other;
2802 sprintf (flags_other, "(other flags: %#x)", flags);
2805 /* Add additional flag options. */
2806 for (i = 0; i < ARRAY_SIZE (flag2_opts); i++)
2808 if ((flags2 & flag2_opts[i].mask) != 0)
2810 opts[num++][0] = flag2_opts[i].option;
2811 flags2 &= ~ flag2_opts[i].mask;
2815 if (flags2 && add_nl_p)
2817 opts[num++][0] = flags2_other;
2818 sprintf (flags2_other, "(other flags2: %#x)", flags2);
2821 /* Add -fpmath= option. */
2822 if (fpmath)
2824 opts[num][0] = "-mfpmath=";
2825 switch ((int) fpmath)
2827 case FPMATH_387:
2828 opts[num++][1] = "387";
2829 break;
2831 case FPMATH_SSE:
2832 opts[num++][1] = "sse";
2833 break;
2835 case FPMATH_387 | FPMATH_SSE:
2836 opts[num++][1] = "sse+387";
2837 break;
2839 default:
2840 gcc_unreachable ();
2844 /* Any options? */
2845 if (num == 0)
2846 return NULL;
2848 gcc_assert (num < ARRAY_SIZE (opts));
2850 /* Size the string. */
2851 len = 0;
2852 sep_len = (add_nl_p) ? 3 : 1;
2853 for (i = 0; i < num; i++)
2855 len += sep_len;
2856 for (j = 0; j < 2; j++)
2857 if (opts[i][j])
2858 len += strlen (opts[i][j]);
2861 /* Build the string. */
2862 ret = ptr = (char *) xmalloc (len);
2863 line_len = 0;
2865 for (i = 0; i < num; i++)
2867 size_t len2[2];
2869 for (j = 0; j < 2; j++)
2870 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2872 if (i != 0)
2874 *ptr++ = ' ';
2875 line_len++;
2877 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2879 *ptr++ = '\\';
2880 *ptr++ = '\n';
2881 line_len = 0;
2885 for (j = 0; j < 2; j++)
2886 if (opts[i][j])
2888 memcpy (ptr, opts[i][j], len2[j]);
2889 ptr += len2[j];
2890 line_len += len2[j];
2894 *ptr = '\0';
2895 gcc_assert (ret + len >= ptr);
2897 return ret;
2900 /* Return true, if profiling code should be emitted before
2901 prologue. Otherwise it returns false.
2902 Note: For x86 with "hotfix" it is sorried. */
2903 static bool
2904 ix86_profile_before_prologue (void)
2906 return flag_fentry != 0;
2909 /* Function that is callable from the debugger to print the current
2910 options. */
2911 void ATTRIBUTE_UNUSED
2912 ix86_debug_options (void)
2914 char *opts = ix86_target_string (ix86_isa_flags, ix86_isa_flags2,
2915 target_flags, ix86_target_flags,
2916 ix86_arch_string,ix86_tune_string,
2917 ix86_fpmath, true);
2919 if (opts)
2921 fprintf (stderr, "%s\n\n", opts);
2922 free (opts);
2924 else
2925 fputs ("<no options>\n\n", stderr);
2927 return;
2930 /* Return true if T is one of the bytes we should avoid with
2931 -fmitigate-rop. */
2933 static bool
2934 ix86_rop_should_change_byte_p (int t)
2936 return t == 0xc2 || t == 0xc3 || t == 0xca || t == 0xcb;
2939 static const char *stringop_alg_names[] = {
2940 #define DEF_ENUM
2941 #define DEF_ALG(alg, name) #name,
2942 #include "stringop.def"
2943 #undef DEF_ENUM
2944 #undef DEF_ALG
2947 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2948 The string is of the following form (or comma separated list of it):
2950 strategy_alg:max_size:[align|noalign]
2952 where the full size range for the strategy is either [0, max_size] or
2953 [min_size, max_size], in which min_size is the max_size + 1 of the
2954 preceding range. The last size range must have max_size == -1.
2956 Examples:
2959 -mmemcpy-strategy=libcall:-1:noalign
2961 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2965 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2967 This is to tell the compiler to use the following strategy for memset
2968 1) when the expected size is between [1, 16], use rep_8byte strategy;
2969 2) when the size is between [17, 2048], use vector_loop;
2970 3) when the size is > 2048, use libcall. */
2972 struct stringop_size_range
2974 int max;
2975 stringop_alg alg;
2976 bool noalign;
2979 static void
2980 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2982 const struct stringop_algs *default_algs;
2983 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2984 char *curr_range_str, *next_range_str;
2985 const char *opt = is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=";
2986 int i = 0, n = 0;
2988 if (is_memset)
2989 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2990 else
2991 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2993 curr_range_str = strategy_str;
2997 int maxs;
2998 char alg_name[128];
2999 char align[16];
3000 next_range_str = strchr (curr_range_str, ',');
3001 if (next_range_str)
3002 *next_range_str++ = '\0';
3004 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
3005 alg_name, &maxs, align))
3007 error ("wrong argument %qs to option %qs", curr_range_str, opt);
3008 return;
3011 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
3013 error ("size ranges of option %qs should be increasing", opt);
3014 return;
3017 for (i = 0; i < last_alg; i++)
3018 if (!strcmp (alg_name, stringop_alg_names[i]))
3019 break;
3021 if (i == last_alg)
3023 error ("wrong strategy name %qs specified for option %qs",
3024 alg_name, opt);
3026 auto_vec <const char *> candidates;
3027 for (i = 0; i < last_alg; i++)
3028 if ((stringop_alg) i != rep_prefix_8_byte || TARGET_64BIT)
3029 candidates.safe_push (stringop_alg_names[i]);
3031 char *s;
3032 const char *hint
3033 = candidates_list_and_hint (alg_name, s, candidates);
3034 if (hint)
3035 inform (input_location,
3036 "valid arguments to %qs are: %s; did you mean %qs?",
3037 opt, s, hint);
3038 else
3039 inform (input_location, "valid arguments to %qs are: %s",
3040 opt, s);
3041 XDELETEVEC (s);
3042 return;
3045 if ((stringop_alg) i == rep_prefix_8_byte
3046 && !TARGET_64BIT)
3048 /* rep; movq isn't available in 32-bit code. */
3049 error ("strategy name %qs specified for option %qs "
3050 "not supported for 32-bit code", alg_name, opt);
3051 return;
3054 input_ranges[n].max = maxs;
3055 input_ranges[n].alg = (stringop_alg) i;
3056 if (!strcmp (align, "align"))
3057 input_ranges[n].noalign = false;
3058 else if (!strcmp (align, "noalign"))
3059 input_ranges[n].noalign = true;
3060 else
3062 error ("unknown alignment %qs specified for option %qs", align, opt);
3063 return;
3065 n++;
3066 curr_range_str = next_range_str;
3068 while (curr_range_str);
3070 if (input_ranges[n - 1].max != -1)
3072 error ("the max value for the last size range should be -1"
3073 " for option %qs", opt);
3074 return;
3077 if (n > MAX_STRINGOP_ALGS)
3079 error ("too many size ranges specified in option %qs", opt);
3080 return;
3083 /* Now override the default algs array. */
3084 for (i = 0; i < n; i++)
3086 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
3087 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
3088 = input_ranges[i].alg;
3089 *const_cast<int *>(&default_algs->size[i].noalign)
3090 = input_ranges[i].noalign;
3095 /* parse -mtune-ctrl= option. When DUMP is true,
3096 print the features that are explicitly set. */
3098 static void
3099 parse_mtune_ctrl_str (bool dump)
3101 if (!ix86_tune_ctrl_string)
3102 return;
3104 char *next_feature_string = NULL;
3105 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3106 char *orig = curr_feature_string;
3107 int i;
3110 bool clear = false;
3112 next_feature_string = strchr (curr_feature_string, ',');
3113 if (next_feature_string)
3114 *next_feature_string++ = '\0';
3115 if (*curr_feature_string == '^')
3117 curr_feature_string++;
3118 clear = true;
3120 for (i = 0; i < X86_TUNE_LAST; i++)
3122 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3124 ix86_tune_features[i] = !clear;
3125 if (dump)
3126 fprintf (stderr, "Explicitly %s feature %s\n",
3127 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3128 break;
3131 if (i == X86_TUNE_LAST)
3132 error ("Unknown parameter to option -mtune-ctrl: %s",
3133 clear ? curr_feature_string - 1 : curr_feature_string);
3134 curr_feature_string = next_feature_string;
3136 while (curr_feature_string);
3137 free (orig);
3140 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3141 processor type. */
3143 static void
3144 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3146 unsigned int ix86_tune_mask = 1u << ix86_tune;
3147 int i;
3149 for (i = 0; i < X86_TUNE_LAST; ++i)
3151 if (ix86_tune_no_default)
3152 ix86_tune_features[i] = 0;
3153 else
3154 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3157 if (dump)
3159 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3160 for (i = 0; i < X86_TUNE_LAST; i++)
3161 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3162 ix86_tune_features[i] ? "on" : "off");
3165 parse_mtune_ctrl_str (dump);
3169 /* Default align_* from the processor table. */
3171 static void
3172 ix86_default_align (struct gcc_options *opts)
3174 if (opts->x_align_loops == 0)
3176 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3177 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3179 if (opts->x_align_jumps == 0)
3181 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3182 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3184 if (opts->x_align_functions == 0)
3186 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3190 /* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */
3192 static void
3193 ix86_override_options_after_change (void)
3195 ix86_default_align (&global_options);
3198 /* Override various settings based on options. If MAIN_ARGS_P, the
3199 options are from the command line, otherwise they are from
3200 attributes. Return true if there's an error related to march
3201 option. */
3203 static bool
3204 ix86_option_override_internal (bool main_args_p,
3205 struct gcc_options *opts,
3206 struct gcc_options *opts_set)
3208 int i;
3209 unsigned int ix86_arch_mask;
3210 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3212 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3213 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3214 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3215 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3216 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3217 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3218 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3219 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3220 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3221 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3222 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3223 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3224 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3225 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3226 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3227 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3228 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3229 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3230 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3231 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3232 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3233 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3234 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3235 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3236 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3237 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3238 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3239 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3240 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3241 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3242 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3243 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3244 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3245 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3246 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3247 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3248 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3249 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3250 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3251 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3252 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3253 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3254 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3255 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3256 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
3257 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3258 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3259 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3260 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3261 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3262 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3263 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3264 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3265 #define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53)
3266 #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
3267 #define PTA_CLWB (HOST_WIDE_INT_1 << 55)
3268 #define PTA_MWAITX (HOST_WIDE_INT_1 << 56)
3269 #define PTA_CLZERO (HOST_WIDE_INT_1 << 57)
3270 #define PTA_NO_80387 (HOST_WIDE_INT_1 << 58)
3271 #define PTA_PKU (HOST_WIDE_INT_1 << 59)
3272 #define PTA_AVX5124VNNIW (HOST_WIDE_INT_1 << 60)
3273 #define PTA_AVX5124FMAPS (HOST_WIDE_INT_1 << 61)
3274 #define PTA_AVX512VPOPCNTDQ (HOST_WIDE_INT_1 << 62)
3275 #define PTA_SGX (HOST_WIDE_INT_1 << 63)
3277 #define PTA_CORE2 \
3278 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3279 | PTA_CX16 | PTA_FXSR)
3280 #define PTA_NEHALEM \
3281 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3282 #define PTA_WESTMERE \
3283 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3284 #define PTA_SANDYBRIDGE \
3285 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3286 #define PTA_IVYBRIDGE \
3287 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3288 #define PTA_HASWELL \
3289 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3290 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3291 #define PTA_BROADWELL \
3292 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3293 #define PTA_SKYLAKE \
3294 (PTA_BROADWELL | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES)
3295 #define PTA_SKYLAKE_AVX512 \
3296 (PTA_SKYLAKE | PTA_AVX512F | PTA_AVX512CD | PTA_AVX512VL \
3297 | PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU)
3298 #define PTA_KNL \
3299 (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
3300 #define PTA_BONNELL \
3301 (PTA_CORE2 | PTA_MOVBE)
3302 #define PTA_SILVERMONT \
3303 (PTA_WESTMERE | PTA_MOVBE)
3304 #define PTA_KNM \
3305 (PTA_KNL | PTA_AVX5124VNNIW | PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ)
3307 /* if this reaches 64, need to widen struct pta flags below */
3309 static struct pta
3311 const char *const name; /* processor name or nickname. */
3312 const enum processor_type processor;
3313 const enum attr_cpu schedule;
3314 const unsigned HOST_WIDE_INT flags;
3316 const processor_alias_table[] =
3318 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3319 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3320 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3321 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3322 {"lakemont", PROCESSOR_LAKEMONT, CPU_PENTIUM, PTA_NO_80387},
3323 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3324 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3325 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3326 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3327 {"samuel-2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3328 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3329 PTA_MMX | PTA_SSE | PTA_FXSR},
3330 {"nehemiah", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3331 PTA_MMX | PTA_SSE | PTA_FXSR},
3332 {"c7", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3333 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3334 {"esther", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3335 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3336 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3337 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3338 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3339 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3340 PTA_MMX | PTA_SSE | PTA_FXSR},
3341 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3342 PTA_MMX | PTA_SSE | PTA_FXSR},
3343 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3344 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3345 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3346 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3347 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3348 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3349 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3350 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3351 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3352 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3353 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3354 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3355 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3356 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3357 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3358 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3359 PTA_SANDYBRIDGE},
3360 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3361 PTA_SANDYBRIDGE},
3362 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3363 PTA_IVYBRIDGE},
3364 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3365 PTA_IVYBRIDGE},
3366 {"haswell", PROCESSOR_HASWELL, CPU_HASWELL, PTA_HASWELL},
3367 {"core-avx2", PROCESSOR_HASWELL, CPU_HASWELL, PTA_HASWELL},
3368 {"broadwell", PROCESSOR_HASWELL, CPU_HASWELL, PTA_BROADWELL},
3369 {"skylake", PROCESSOR_HASWELL, CPU_HASWELL, PTA_SKYLAKE},
3370 {"skylake-avx512", PROCESSOR_HASWELL, CPU_HASWELL, PTA_SKYLAKE_AVX512},
3371 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3372 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3373 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3374 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3375 {"knl", PROCESSOR_KNL, CPU_SLM, PTA_KNL},
3376 {"knm", PROCESSOR_KNM, CPU_SLM, PTA_KNM},
3377 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3378 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3379 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3380 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3381 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3382 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3383 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3384 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3385 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3386 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3387 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3388 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_FXSR},
3389 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3390 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_FXSR},
3391 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3392 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_FXSR},
3393 {"x86-64", PROCESSOR_K8, CPU_K8,
3394 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3395 {"eden-x2", PROCESSOR_K8, CPU_K8,
3396 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3397 {"nano", PROCESSOR_K8, CPU_K8,
3398 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3399 | PTA_SSSE3 | PTA_FXSR},
3400 {"nano-1000", PROCESSOR_K8, CPU_K8,
3401 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3402 | PTA_SSSE3 | PTA_FXSR},
3403 {"nano-2000", PROCESSOR_K8, CPU_K8,
3404 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3405 | PTA_SSSE3 | PTA_FXSR},
3406 {"nano-3000", PROCESSOR_K8, CPU_K8,
3407 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3408 | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR},
3409 {"nano-x2", PROCESSOR_K8, CPU_K8,
3410 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3411 | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR},
3412 {"eden-x4", PROCESSOR_K8, CPU_K8,
3413 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3414 | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR},
3415 {"nano-x4", PROCESSOR_K8, CPU_K8,
3416 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3417 | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR},
3418 {"k8", PROCESSOR_K8, CPU_K8,
3419 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3420 | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3421 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3422 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3423 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_FXSR},
3424 {"opteron", PROCESSOR_K8, CPU_K8,
3425 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3426 | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3427 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3428 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3429 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_FXSR},
3430 {"athlon64", PROCESSOR_K8, CPU_K8,
3431 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3432 | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3433 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3434 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3435 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_FXSR},
3436 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3437 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3438 | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3439 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3440 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3441 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3442 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3443 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3444 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3445 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3446 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3447 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3448 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3449 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3450 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3451 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3452 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3453 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3454 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3455 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3456 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3457 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3458 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3459 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3460 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3461 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3462 | PTA_XSAVEOPT | PTA_FSGSBASE},
3463 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3464 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3465 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3466 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3467 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3468 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3469 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3470 | PTA_MOVBE | PTA_MWAITX},
3471 {"znver1", PROCESSOR_ZNVER1, CPU_ZNVER1,
3472 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3473 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3474 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3475 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_PRFCHW
3476 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE
3477 | PTA_RDRND | PTA_MOVBE | PTA_MWAITX | PTA_ADX | PTA_RDSEED
3478 | PTA_CLZERO | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES
3479 | PTA_SHA | PTA_LZCNT | PTA_POPCNT},
3480 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3481 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3482 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3483 | PTA_FXSR | PTA_XSAVE},
3484 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3485 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3486 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3487 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3488 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3489 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3491 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3492 PTA_64BIT
3493 | PTA_HLE /* flags are only used for -march switch. */ },
3496 /* -mrecip options. */
3497 static struct
3499 const char *string; /* option name */
3500 unsigned int mask; /* mask bits to set */
3502 const recip_options[] =
3504 { "all", RECIP_MASK_ALL },
3505 { "none", RECIP_MASK_NONE },
3506 { "div", RECIP_MASK_DIV },
3507 { "sqrt", RECIP_MASK_SQRT },
3508 { "vec-div", RECIP_MASK_VEC_DIV },
3509 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3512 int const pta_size = ARRAY_SIZE (processor_alias_table);
3514 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3515 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3516 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3517 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3518 #ifdef TARGET_BI_ARCH
3519 else
3521 #if TARGET_BI_ARCH == 1
3522 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3523 is on and OPTION_MASK_ABI_X32 is off. We turn off
3524 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3525 -mx32. */
3526 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3527 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3528 #else
3529 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3530 on and OPTION_MASK_ABI_64 is off. We turn off
3531 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3532 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3533 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3534 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3535 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3536 #endif
3537 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3538 && TARGET_IAMCU_P (opts->x_target_flags))
3539 sorry ("Intel MCU psABI isn%'t supported in %s mode",
3540 TARGET_X32_P (opts->x_ix86_isa_flags) ? "x32" : "64-bit");
3542 #endif
3544 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3546 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3547 OPTION_MASK_ABI_64 for TARGET_X32. */
3548 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3549 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3551 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3552 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3553 | OPTION_MASK_ABI_X32
3554 | OPTION_MASK_ABI_64);
3555 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3557 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3558 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3559 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3560 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3563 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3564 SUBTARGET_OVERRIDE_OPTIONS;
3565 #endif
3567 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3568 SUBSUBTARGET_OVERRIDE_OPTIONS;
3569 #endif
3571 /* -fPIC is the default for x86_64. */
3572 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3573 opts->x_flag_pic = 2;
3575 /* Need to check -mtune=generic first. */
3576 if (opts->x_ix86_tune_string)
3578 /* As special support for cross compilers we read -mtune=native
3579 as -mtune=generic. With native compilers we won't see the
3580 -mtune=native, as it was changed by the driver. */
3581 if (!strcmp (opts->x_ix86_tune_string, "native"))
3583 opts->x_ix86_tune_string = "generic";
3585 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3586 warning (OPT_Wdeprecated,
3587 main_args_p
3588 ? G_("%<-mtune=x86-64%> is deprecated; use %<-mtune=k8%> "
3589 "or %<-mtune=generic%> instead as appropriate")
3590 : G_("%<target(\"tune=x86-64\")%> is deprecated; use "
3591 "%<target(\"tune=k8\")%> or %<target(\"tune=generic\")%>"
3592 " instead as appropriate"));
3594 else
3596 if (opts->x_ix86_arch_string)
3597 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3598 if (!opts->x_ix86_tune_string)
3600 opts->x_ix86_tune_string
3601 = processor_target_table[TARGET_CPU_DEFAULT].name;
3602 ix86_tune_defaulted = 1;
3605 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3606 or defaulted. We need to use a sensible tune option. */
3607 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3609 opts->x_ix86_tune_string = "generic";
3613 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3614 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3616 /* rep; movq isn't available in 32-bit code. */
3617 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3618 opts->x_ix86_stringop_alg = no_stringop;
3621 if (!opts->x_ix86_arch_string)
3622 opts->x_ix86_arch_string
3623 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3624 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3625 else
3626 ix86_arch_specified = 1;
3628 if (opts_set->x_ix86_pmode)
3630 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3631 && opts->x_ix86_pmode == PMODE_SI)
3632 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3633 && opts->x_ix86_pmode == PMODE_DI))
3634 error ("address mode %qs not supported in the %s bit mode",
3635 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3636 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3638 else
3639 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3640 ? PMODE_DI : PMODE_SI;
3642 if (!opts_set->x_ix86_abi)
3643 opts->x_ix86_abi = DEFAULT_ABI;
3645 if (opts->x_ix86_abi == MS_ABI && TARGET_X32_P (opts->x_ix86_isa_flags))
3646 error ("-mabi=ms not supported with X32 ABI");
3647 gcc_assert (opts->x_ix86_abi == SYSV_ABI || opts->x_ix86_abi == MS_ABI);
3649 /* For targets using ms ABI enable ms-extensions, if not
3650 explicit turned off. For non-ms ABI we turn off this
3651 option. */
3652 if (!opts_set->x_flag_ms_extensions)
3653 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3655 if (opts_set->x_ix86_cmodel)
3657 switch (opts->x_ix86_cmodel)
3659 case CM_SMALL:
3660 case CM_SMALL_PIC:
3661 if (opts->x_flag_pic)
3662 opts->x_ix86_cmodel = CM_SMALL_PIC;
3663 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3664 error ("code model %qs not supported in the %s bit mode",
3665 "small", "32");
3666 break;
3668 case CM_MEDIUM:
3669 case CM_MEDIUM_PIC:
3670 if (opts->x_flag_pic)
3671 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3672 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3673 error ("code model %qs not supported in the %s bit mode",
3674 "medium", "32");
3675 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3676 error ("code model %qs not supported in x32 mode",
3677 "medium");
3678 break;
3680 case CM_LARGE:
3681 case CM_LARGE_PIC:
3682 if (opts->x_flag_pic)
3683 opts->x_ix86_cmodel = CM_LARGE_PIC;
3684 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3685 error ("code model %qs not supported in the %s bit mode",
3686 "large", "32");
3687 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3688 error ("code model %qs not supported in x32 mode",
3689 "large");
3690 break;
3692 case CM_32:
3693 if (opts->x_flag_pic)
3694 error ("code model %s does not support PIC mode", "32");
3695 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3696 error ("code model %qs not supported in the %s bit mode",
3697 "32", "64");
3698 break;
3700 case CM_KERNEL:
3701 if (opts->x_flag_pic)
3703 error ("code model %s does not support PIC mode", "kernel");
3704 opts->x_ix86_cmodel = CM_32;
3706 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3707 error ("code model %qs not supported in the %s bit mode",
3708 "kernel", "32");
3709 break;
3711 default:
3712 gcc_unreachable ();
3715 else
3717 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3718 use of rip-relative addressing. This eliminates fixups that
3719 would otherwise be needed if this object is to be placed in a
3720 DLL, and is essentially just as efficient as direct addressing. */
3721 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3722 && (TARGET_RDOS || TARGET_PECOFF))
3723 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3724 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3725 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3726 else
3727 opts->x_ix86_cmodel = CM_32;
3729 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3731 error ("-masm=intel not supported in this configuration");
3732 opts->x_ix86_asm_dialect = ASM_ATT;
3734 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3735 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3736 sorry ("%i-bit mode not compiled in",
3737 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3739 for (i = 0; i < pta_size; i++)
3740 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3742 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3744 error (main_args_p
3745 ? G_("%<generic%> CPU can be used only for %<-mtune=%> "
3746 "switch")
3747 : G_("%<generic%> CPU can be used only for "
3748 "%<target(\"tune=\")%> attribute"));
3749 return false;
3751 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3753 error (main_args_p
3754 ? G_("%<intel%> CPU can be used only for %<-mtune=%> "
3755 "switch")
3756 : G_("%<intel%> CPU can be used only for "
3757 "%<target(\"tune=\")%> attribute"));
3758 return false;
3761 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3762 && !(processor_alias_table[i].flags & PTA_64BIT))
3764 error ("CPU you selected does not support x86-64 "
3765 "instruction set");
3766 return false;
3769 ix86_schedule = processor_alias_table[i].schedule;
3770 ix86_arch = processor_alias_table[i].processor;
3771 /* Default cpu tuning to the architecture. */
3772 ix86_tune = ix86_arch;
3774 if (processor_alias_table[i].flags & PTA_MMX
3775 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3776 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3777 if (processor_alias_table[i].flags & PTA_3DNOW
3778 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3779 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3780 if (processor_alias_table[i].flags & PTA_3DNOW_A
3781 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3782 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3783 if (processor_alias_table[i].flags & PTA_SSE
3784 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3785 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3786 if (processor_alias_table[i].flags & PTA_SSE2
3787 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3788 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3789 if (processor_alias_table[i].flags & PTA_SSE3
3790 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3791 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3792 if (processor_alias_table[i].flags & PTA_SSSE3
3793 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3794 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3795 if (processor_alias_table[i].flags & PTA_SSE4_1
3796 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3797 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3798 if (processor_alias_table[i].flags & PTA_SSE4_2
3799 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3800 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3801 if (processor_alias_table[i].flags & PTA_AVX
3802 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3803 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3804 if (processor_alias_table[i].flags & PTA_AVX2
3805 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3806 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3807 if (processor_alias_table[i].flags & PTA_FMA
3808 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3809 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3810 if (processor_alias_table[i].flags & PTA_SSE4A
3811 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3812 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3813 if (processor_alias_table[i].flags & PTA_FMA4
3814 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3815 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3816 if (processor_alias_table[i].flags & PTA_XOP
3817 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3818 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3819 if (processor_alias_table[i].flags & PTA_LWP
3820 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3821 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3822 if (processor_alias_table[i].flags & PTA_ABM
3823 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3824 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3825 if (processor_alias_table[i].flags & PTA_BMI
3826 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3827 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3828 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3829 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3830 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3831 if (processor_alias_table[i].flags & PTA_TBM
3832 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3833 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3834 if (processor_alias_table[i].flags & PTA_BMI2
3835 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3836 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3837 if (processor_alias_table[i].flags & PTA_CX16
3838 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3839 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3840 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3841 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3842 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3843 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3844 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3845 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3846 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3847 if (processor_alias_table[i].flags & PTA_MOVBE
3848 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3849 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3850 if (processor_alias_table[i].flags & PTA_AES
3851 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3852 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3853 if (processor_alias_table[i].flags & PTA_SHA
3854 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3855 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3856 if (processor_alias_table[i].flags & PTA_PCLMUL
3857 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3858 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3859 if (processor_alias_table[i].flags & PTA_FSGSBASE
3860 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3861 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3862 if (processor_alias_table[i].flags & PTA_RDRND
3863 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3864 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3865 if (processor_alias_table[i].flags & PTA_F16C
3866 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3867 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3868 if (processor_alias_table[i].flags & PTA_RTM
3869 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3870 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3871 if (processor_alias_table[i].flags & PTA_HLE
3872 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3873 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3874 if (processor_alias_table[i].flags & PTA_PRFCHW
3875 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3876 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3877 if (processor_alias_table[i].flags & PTA_RDSEED
3878 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3879 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3880 if (processor_alias_table[i].flags & PTA_ADX
3881 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3882 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3883 if (processor_alias_table[i].flags & PTA_FXSR
3884 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3885 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3886 if (processor_alias_table[i].flags & PTA_XSAVE
3887 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3888 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3889 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3890 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3891 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3892 if (processor_alias_table[i].flags & PTA_AVX512F
3893 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3894 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3895 if (processor_alias_table[i].flags & PTA_AVX512ER
3896 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3897 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3898 if (processor_alias_table[i].flags & PTA_AVX512PF
3899 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3900 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3901 if (processor_alias_table[i].flags & PTA_AVX512CD
3902 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3903 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3904 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3905 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3906 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3907 if (processor_alias_table[i].flags & PTA_CLWB
3908 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
3909 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
3910 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3911 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3912 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3913 if (processor_alias_table[i].flags & PTA_CLZERO
3914 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLZERO))
3915 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLZERO;
3916 if (processor_alias_table[i].flags & PTA_XSAVEC
3917 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3918 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3919 if (processor_alias_table[i].flags & PTA_XSAVES
3920 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3921 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3922 if (processor_alias_table[i].flags & PTA_AVX512DQ
3923 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3924 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3925 if (processor_alias_table[i].flags & PTA_AVX512BW
3926 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3927 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3928 if (processor_alias_table[i].flags & PTA_AVX512VL
3929 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3930 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3931 if (processor_alias_table[i].flags & PTA_MPX
3932 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX))
3933 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX;
3934 if (processor_alias_table[i].flags & PTA_AVX512VBMI
3935 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
3936 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
3937 if (processor_alias_table[i].flags & PTA_AVX512IFMA
3938 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
3939 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
3941 if (processor_alias_table[i].flags & PTA_AVX5124VNNIW
3942 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_AVX5124VNNIW))
3943 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX5124VNNIW;
3944 if (processor_alias_table[i].flags & PTA_AVX5124FMAPS
3945 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_AVX5124FMAPS))
3946 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX5124FMAPS;
3947 if (processor_alias_table[i].flags & PTA_AVX512VPOPCNTDQ
3948 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_AVX512VPOPCNTDQ))
3949 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX512VPOPCNTDQ;
3950 if (processor_alias_table[i].flags & PTA_SGX
3951 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_SGX))
3952 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_SGX;
3954 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3955 x86_prefetch_sse = true;
3956 if (processor_alias_table[i].flags & PTA_MWAITX
3957 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MWAITX))
3958 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MWAITX;
3959 if (processor_alias_table[i].flags & PTA_PKU
3960 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PKU))
3961 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PKU;
3963 /* Don't enable x87 instructions if only
3964 general registers are allowed. */
3965 if (!(opts_set->x_ix86_target_flags & OPTION_MASK_GENERAL_REGS_ONLY)
3966 && !(opts_set->x_target_flags & MASK_80387))
3968 if (processor_alias_table[i].flags & PTA_NO_80387)
3969 opts->x_target_flags &= ~MASK_80387;
3970 else
3971 opts->x_target_flags |= MASK_80387;
3973 break;
3976 if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
3977 error ("Intel MPX does not support x32");
3979 if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
3980 error ("Intel MPX does not support x32");
3982 if (i == pta_size)
3984 error (main_args_p
3985 ? G_("bad value (%qs) for %<-march=%> switch")
3986 : G_("bad value (%qs) for %<target(\"arch=\")%> attribute"),
3987 opts->x_ix86_arch_string);
3989 auto_vec <const char *> candidates;
3990 for (i = 0; i < pta_size; i++)
3991 if (strcmp (processor_alias_table[i].name, "generic")
3992 && strcmp (processor_alias_table[i].name, "intel")
3993 && (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3994 || (processor_alias_table[i].flags & PTA_64BIT)))
3995 candidates.safe_push (processor_alias_table[i].name);
3997 char *s;
3998 const char *hint
3999 = candidates_list_and_hint (opts->x_ix86_arch_string, s, candidates);
4000 if (hint)
4001 inform (input_location,
4002 main_args_p
4003 ? G_("valid arguments to %<-march=%> switch are: "
4004 "%s; did you mean %qs?")
4005 : G_("valid arguments to %<target(\"arch=\")%> attribute are: "
4006 "%s; did you mean %qs?"), s, hint);
4007 else
4008 inform (input_location,
4009 main_args_p
4010 ? G_("valid arguments to %<-march=%> switch are: %s")
4011 : G_("valid arguments to %<target(\"arch=\")%> attribute "
4012 "are: %s"), s);
4013 XDELETEVEC (s);
4016 ix86_arch_mask = 1u << ix86_arch;
4017 for (i = 0; i < X86_ARCH_LAST; ++i)
4018 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4020 for (i = 0; i < pta_size; i++)
4021 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
4023 ix86_schedule = processor_alias_table[i].schedule;
4024 ix86_tune = processor_alias_table[i].processor;
4025 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4027 if (!(processor_alias_table[i].flags & PTA_64BIT))
4029 if (ix86_tune_defaulted)
4031 opts->x_ix86_tune_string = "x86-64";
4032 for (i = 0; i < pta_size; i++)
4033 if (! strcmp (opts->x_ix86_tune_string,
4034 processor_alias_table[i].name))
4035 break;
4036 ix86_schedule = processor_alias_table[i].schedule;
4037 ix86_tune = processor_alias_table[i].processor;
4039 else
4040 error ("CPU you selected does not support x86-64 "
4041 "instruction set");
4044 /* Intel CPUs have always interpreted SSE prefetch instructions as
4045 NOPs; so, we can enable SSE prefetch instructions even when
4046 -mtune (rather than -march) points us to a processor that has them.
4047 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
4048 higher processors. */
4049 if (TARGET_CMOV
4050 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
4051 x86_prefetch_sse = true;
4052 break;
4055 if (ix86_tune_specified && i == pta_size)
4057 error (main_args_p
4058 ? G_("bad value (%qs) for %<-mtune=%> switch")
4059 : G_("bad value (%qs) for %<target(\"tune=\")%> attribute"),
4060 opts->x_ix86_tune_string);
4062 auto_vec <const char *> candidates;
4063 for (i = 0; i < pta_size; i++)
4064 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4065 || (processor_alias_table[i].flags & PTA_64BIT))
4066 candidates.safe_push (processor_alias_table[i].name);
4068 char *s;
4069 const char *hint
4070 = candidates_list_and_hint (opts->x_ix86_tune_string, s, candidates);
4071 if (hint)
4072 inform (input_location,
4073 main_args_p
4074 ? G_("valid arguments to %<-mtune=%> switch are: "
4075 "%s; did you mean %qs?")
4076 : G_("valid arguments to %<target(\"tune=\")%> attribute are: "
4077 "%s; did you mean %qs?"), s, hint);
4078 else
4079 inform (input_location,
4080 main_args_p
4081 ? G_("valid arguments to %<-mtune=%> switch are: %s")
4082 : G_("valid arguments to %<target(\"tune=\")%> attribute "
4083 "are: %s"), s);
4084 XDELETEVEC (s);
4087 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
4089 #ifndef USE_IX86_FRAME_POINTER
4090 #define USE_IX86_FRAME_POINTER 0
4091 #endif
4093 #ifndef USE_X86_64_FRAME_POINTER
4094 #define USE_X86_64_FRAME_POINTER 0
4095 #endif
4097 /* Set the default values for switches whose default depends on TARGET_64BIT
4098 in case they weren't overwritten by command line options. */
4099 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4101 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
4102 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
4103 if (opts->x_flag_asynchronous_unwind_tables
4104 && !opts_set->x_flag_unwind_tables
4105 && TARGET_64BIT_MS_ABI)
4106 opts->x_flag_unwind_tables = 1;
4107 if (opts->x_flag_asynchronous_unwind_tables == 2)
4108 opts->x_flag_unwind_tables
4109 = opts->x_flag_asynchronous_unwind_tables = 1;
4110 if (opts->x_flag_pcc_struct_return == 2)
4111 opts->x_flag_pcc_struct_return = 0;
4113 else
4115 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
4116 opts->x_flag_omit_frame_pointer
4117 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
4118 if (opts->x_flag_asynchronous_unwind_tables == 2)
4119 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
4120 if (opts->x_flag_pcc_struct_return == 2)
4122 /* Intel MCU psABI specifies that -freg-struct-return should
4123 be on. Instead of setting DEFAULT_PCC_STRUCT_RETURN to 1,
4124 we check -miamcu so that -freg-struct-return is always
4125 turned on if -miamcu is used. */
4126 if (TARGET_IAMCU_P (opts->x_target_flags))
4127 opts->x_flag_pcc_struct_return = 0;
4128 else
4129 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
4133 ix86_tune_cost = processor_target_table[ix86_tune].cost;
4134 /* TODO: ix86_cost should be chosen at instruction or function granuality
4135 so for cold code we use size_cost even in !optimize_size compilation. */
4136 if (opts->x_optimize_size)
4137 ix86_cost = &ix86_size_cost;
4138 else
4139 ix86_cost = ix86_tune_cost;
4141 /* Arrange to set up i386_stack_locals for all functions. */
4142 init_machine_status = ix86_init_machine_status;
4144 /* Validate -mregparm= value. */
4145 if (opts_set->x_ix86_regparm)
4147 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4148 warning (0, "-mregparm is ignored in 64-bit mode");
4149 else if (TARGET_IAMCU_P (opts->x_target_flags))
4150 warning (0, "-mregparm is ignored for Intel MCU psABI");
4151 if (opts->x_ix86_regparm > REGPARM_MAX)
4153 error ("-mregparm=%d is not between 0 and %d",
4154 opts->x_ix86_regparm, REGPARM_MAX);
4155 opts->x_ix86_regparm = 0;
4158 if (TARGET_IAMCU_P (opts->x_target_flags)
4159 || TARGET_64BIT_P (opts->x_ix86_isa_flags))
4160 opts->x_ix86_regparm = REGPARM_MAX;
4162 /* Default align_* from the processor table. */
4163 ix86_default_align (opts);
4165 /* Provide default for -mbranch-cost= value. */
4166 if (!opts_set->x_ix86_branch_cost)
4167 opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost;
4169 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4171 opts->x_target_flags
4172 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
4174 /* Enable by default the SSE and MMX builtins. Do allow the user to
4175 explicitly disable any of these. In particular, disabling SSE and
4176 MMX for kernel code is extremely useful. */
4177 if (!ix86_arch_specified)
4178 opts->x_ix86_isa_flags
4179 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
4180 | TARGET_SUBTARGET64_ISA_DEFAULT)
4181 & ~opts->x_ix86_isa_flags_explicit);
4183 if (TARGET_RTD_P (opts->x_target_flags))
4184 warning (0,
4185 main_args_p
4186 ? G_("%<-mrtd%> is ignored in 64bit mode")
4187 : G_("%<target(\"rtd\")%> is ignored in 64bit mode"));
4189 else
4191 opts->x_target_flags
4192 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
4194 if (!ix86_arch_specified)
4195 opts->x_ix86_isa_flags
4196 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
4198 /* i386 ABI does not specify red zone. It still makes sense to use it
4199 when programmer takes care to stack from being destroyed. */
4200 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
4201 opts->x_target_flags |= MASK_NO_RED_ZONE;
4204 /* Keep nonleaf frame pointers. */
4205 if (opts->x_flag_omit_frame_pointer)
4206 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
4207 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
4208 opts->x_flag_omit_frame_pointer = 1;
4210 /* If we're doing fast math, we don't care about comparison order
4211 wrt NaNs. This lets us use a shorter comparison sequence. */
4212 if (opts->x_flag_finite_math_only)
4213 opts->x_target_flags &= ~MASK_IEEE_FP;
4215 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
4216 since the insns won't need emulation. */
4217 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
4218 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
4220 /* Likewise, if the target doesn't have a 387, or we've specified
4221 software floating point, don't use 387 inline intrinsics. */
4222 if (!TARGET_80387_P (opts->x_target_flags))
4223 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
4225 /* Turn on MMX builtins for -msse. */
4226 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
4227 opts->x_ix86_isa_flags
4228 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
4230 /* Enable SSE prefetch. */
4231 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
4232 || (TARGET_PRFCHW_P (opts->x_ix86_isa_flags)
4233 && !TARGET_3DNOW_P (opts->x_ix86_isa_flags))
4234 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
4235 x86_prefetch_sse = true;
4237 /* Enable popcnt instruction for -msse4.2 or -mabm. */
4238 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
4239 || TARGET_ABM_P (opts->x_ix86_isa_flags))
4240 opts->x_ix86_isa_flags
4241 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
4243 /* Enable lzcnt instruction for -mabm. */
4244 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
4245 opts->x_ix86_isa_flags
4246 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
4248 /* Disable BMI, BMI2 and TBM instructions for -m16. */
4249 if (TARGET_16BIT_P(opts->x_ix86_isa_flags))
4250 opts->x_ix86_isa_flags
4251 &= ~((OPTION_MASK_ISA_BMI | OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_TBM)
4252 & ~opts->x_ix86_isa_flags_explicit);
4254 /* Validate -mpreferred-stack-boundary= value or default it to
4255 PREFERRED_STACK_BOUNDARY_DEFAULT. */
4256 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
4257 if (opts_set->x_ix86_preferred_stack_boundary_arg)
4259 int min = TARGET_64BIT_P (opts->x_ix86_isa_flags)? 3 : 2;
4260 int max = TARGET_SEH ? 4 : 12;
4262 if (opts->x_ix86_preferred_stack_boundary_arg < min
4263 || opts->x_ix86_preferred_stack_boundary_arg > max)
4265 if (min == max)
4266 error ("-mpreferred-stack-boundary is not supported "
4267 "for this target");
4268 else
4269 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
4270 opts->x_ix86_preferred_stack_boundary_arg, min, max);
4272 else
4273 ix86_preferred_stack_boundary
4274 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
4277 /* Set the default value for -mstackrealign. */
4278 if (!opts_set->x_ix86_force_align_arg_pointer)
4279 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
4281 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
4283 /* Validate -mincoming-stack-boundary= value or default it to
4284 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
4285 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4286 if (opts_set->x_ix86_incoming_stack_boundary_arg)
4288 int min = TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 3 : 2;
4290 if (opts->x_ix86_incoming_stack_boundary_arg < min
4291 || opts->x_ix86_incoming_stack_boundary_arg > 12)
4292 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4293 opts->x_ix86_incoming_stack_boundary_arg, min);
4294 else
4296 ix86_user_incoming_stack_boundary
4297 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
4298 ix86_incoming_stack_boundary
4299 = ix86_user_incoming_stack_boundary;
4303 #ifndef NO_PROFILE_COUNTERS
4304 if (flag_nop_mcount)
4305 error ("-mnop-mcount is not compatible with this target");
4306 #endif
4307 if (flag_nop_mcount && flag_pic)
4308 error ("-mnop-mcount is not implemented for -fPIC");
4310 /* Accept -msseregparm only if at least SSE support is enabled. */
4311 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
4312 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
4313 error (main_args_p
4314 ? G_("%<-msseregparm%> used without SSE enabled")
4315 : G_("%<target(\"sseregparm\")%> used without SSE enabled"));
4317 if (opts_set->x_ix86_fpmath)
4319 if (opts->x_ix86_fpmath & FPMATH_SSE)
4321 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
4323 if (TARGET_80387_P (opts->x_target_flags))
4325 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4326 opts->x_ix86_fpmath = FPMATH_387;
4329 else if ((opts->x_ix86_fpmath & FPMATH_387)
4330 && !TARGET_80387_P (opts->x_target_flags))
4332 warning (0, "387 instruction set disabled, using SSE arithmetics");
4333 opts->x_ix86_fpmath = FPMATH_SSE;
4337 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4338 fpmath=387. The second is however default at many targets since the
4339 extra 80bit precision of temporaries is considered to be part of ABI.
4340 Overwrite the default at least for -ffast-math.
4341 TODO: -mfpmath=both seems to produce same performing code with bit
4342 smaller binaries. It is however not clear if register allocation is
4343 ready for this setting.
4344 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4345 codegen. We may switch to 387 with -ffast-math for size optimized
4346 functions. */
4347 else if (fast_math_flags_set_p (&global_options)
4348 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4349 opts->x_ix86_fpmath = FPMATH_SSE;
4350 else
4351 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4353 /* Use external vectorized library in vectorizing intrinsics. */
4354 if (opts_set->x_ix86_veclibabi_type)
4355 switch (opts->x_ix86_veclibabi_type)
4357 case ix86_veclibabi_type_svml:
4358 ix86_veclib_handler = ix86_veclibabi_svml;
4359 break;
4361 case ix86_veclibabi_type_acml:
4362 ix86_veclib_handler = ix86_veclibabi_acml;
4363 break;
4365 default:
4366 gcc_unreachable ();
4369 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4370 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4371 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4373 /* If stack probes are required, the space used for large function
4374 arguments on the stack must also be probed, so enable
4375 -maccumulate-outgoing-args so this happens in the prologue. */
4376 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4377 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4379 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4380 warning (0,
4381 main_args_p
4382 ? G_("stack probing requires %<-maccumulate-outgoing-args%> "
4383 "for correctness")
4384 : G_("stack probing requires "
4385 "%<target(\"accumulate-outgoing-args\")%> for "
4386 "correctness"));
4387 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4390 /* Stack realignment without -maccumulate-outgoing-args requires %ebp,
4391 so enable -maccumulate-outgoing-args when %ebp is fixed. */
4392 if (fixed_regs[BP_REG]
4393 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4395 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4396 warning (0,
4397 main_args_p
4398 ? G_("fixed ebp register requires "
4399 "%<-maccumulate-outgoing-args%>")
4400 : G_("fixed ebp register requires "
4401 "%<target(\"accumulate-outgoing-args\")%>"));
4402 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4405 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4407 char *p;
4408 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4409 p = strchr (internal_label_prefix, 'X');
4410 internal_label_prefix_len = p - internal_label_prefix;
4411 *p = '\0';
4414 /* When scheduling description is not available, disable scheduler pass
4415 so it won't slow down the compilation and make x87 code slower. */
4416 if (!TARGET_SCHEDULE)
4417 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4419 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4420 ix86_tune_cost->simultaneous_prefetches,
4421 opts->x_param_values,
4422 opts_set->x_param_values);
4423 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4424 ix86_tune_cost->prefetch_block,
4425 opts->x_param_values,
4426 opts_set->x_param_values);
4427 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4428 ix86_tune_cost->l1_cache_size,
4429 opts->x_param_values,
4430 opts_set->x_param_values);
4431 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4432 ix86_tune_cost->l2_cache_size,
4433 opts->x_param_values,
4434 opts_set->x_param_values);
4436 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4437 if (opts->x_flag_prefetch_loop_arrays < 0
4438 && HAVE_prefetch
4439 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4440 && !opts->x_optimize_size
4441 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4442 opts->x_flag_prefetch_loop_arrays = 1;
4444 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4445 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4446 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4447 targetm.expand_builtin_va_start = NULL;
4449 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4451 ix86_gen_leave = gen_leave_rex64;
4452 if (Pmode == DImode)
4454 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4455 ix86_gen_tls_local_dynamic_base_64
4456 = gen_tls_local_dynamic_base_64_di;
4458 else
4460 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4461 ix86_gen_tls_local_dynamic_base_64
4462 = gen_tls_local_dynamic_base_64_si;
4465 else
4466 ix86_gen_leave = gen_leave;
4468 if (Pmode == DImode)
4470 ix86_gen_add3 = gen_adddi3;
4471 ix86_gen_sub3 = gen_subdi3;
4472 ix86_gen_sub3_carry = gen_subdi3_carry;
4473 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4474 ix86_gen_andsp = gen_anddi3;
4475 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4476 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4477 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4478 ix86_gen_monitor = gen_sse3_monitor_di;
4479 ix86_gen_monitorx = gen_monitorx_di;
4480 ix86_gen_clzero = gen_clzero_di;
4482 else
4484 ix86_gen_add3 = gen_addsi3;
4485 ix86_gen_sub3 = gen_subsi3;
4486 ix86_gen_sub3_carry = gen_subsi3_carry;
4487 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4488 ix86_gen_andsp = gen_andsi3;
4489 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4490 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4491 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4492 ix86_gen_monitor = gen_sse3_monitor_si;
4493 ix86_gen_monitorx = gen_monitorx_si;
4494 ix86_gen_clzero = gen_clzero_si;
4497 #ifdef USE_IX86_CLD
4498 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4499 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4500 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4501 #endif
4503 /* Set the default value for -mfentry. */
4504 if (!opts_set->x_flag_fentry)
4505 opts->x_flag_fentry = TARGET_SEH;
4506 else
4508 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic
4509 && opts->x_flag_fentry)
4510 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4511 "with -fpic");
4512 else if (TARGET_SEH && !opts->x_flag_fentry)
4513 sorry ("-mno-fentry isn%'t compatible with SEH");
4516 if (TARGET_SEH && TARGET_CALL_MS2SYSV_XLOGUES)
4517 sorry ("-mcall-ms2sysv-xlogues isn%'t currently supported with SEH");
4519 if (!(opts_set->x_target_flags & MASK_VZEROUPPER))
4520 opts->x_target_flags |= MASK_VZEROUPPER;
4521 if (!(opts_set->x_target_flags & MASK_STV))
4522 opts->x_target_flags |= MASK_STV;
4523 /* Disable STV if -mpreferred-stack-boundary={2,3} or
4524 -mincoming-stack-boundary={2,3} or -mstackrealign - the needed
4525 stack realignment will be extra cost the pass doesn't take into
4526 account and the pass can't realign the stack. */
4527 if (ix86_preferred_stack_boundary < 128
4528 || ix86_incoming_stack_boundary < 128
4529 || opts->x_ix86_force_align_arg_pointer)
4530 opts->x_target_flags &= ~MASK_STV;
4531 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4532 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4533 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4534 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4535 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4536 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4537 /* Enable 128-bit AVX instruction generation
4538 for the auto-vectorizer. */
4539 if (TARGET_AVX128_OPTIMAL
4540 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4541 opts->x_target_flags |= MASK_PREFER_AVX128;
4543 if (opts->x_ix86_recip_name)
4545 char *p = ASTRDUP (opts->x_ix86_recip_name);
4546 char *q;
4547 unsigned int mask, i;
4548 bool invert;
4550 while ((q = strtok (p, ",")) != NULL)
4552 p = NULL;
4553 if (*q == '!')
4555 invert = true;
4556 q++;
4558 else
4559 invert = false;
4561 if (!strcmp (q, "default"))
4562 mask = RECIP_MASK_ALL;
4563 else
4565 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4566 if (!strcmp (q, recip_options[i].string))
4568 mask = recip_options[i].mask;
4569 break;
4572 if (i == ARRAY_SIZE (recip_options))
4574 error ("unknown option for -mrecip=%s", q);
4575 invert = false;
4576 mask = RECIP_MASK_NONE;
4580 opts->x_recip_mask_explicit |= mask;
4581 if (invert)
4582 opts->x_recip_mask &= ~mask;
4583 else
4584 opts->x_recip_mask |= mask;
4588 if (TARGET_RECIP_P (opts->x_target_flags))
4589 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4590 else if (opts_set->x_target_flags & MASK_RECIP)
4591 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4593 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4594 for 64-bit Bionic. Also default long double to 64-bit for Intel
4595 MCU psABI. */
4596 if ((TARGET_HAS_BIONIC || TARGET_IAMCU)
4597 && !(opts_set->x_target_flags
4598 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4599 opts->x_target_flags |= (TARGET_64BIT
4600 ? MASK_LONG_DOUBLE_128
4601 : MASK_LONG_DOUBLE_64);
4603 /* Only one of them can be active. */
4604 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4605 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4607 /* Handle stack protector */
4608 if (!opts_set->x_ix86_stack_protector_guard)
4609 opts->x_ix86_stack_protector_guard
4610 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4612 #ifdef TARGET_THREAD_SSP_OFFSET
4613 ix86_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4614 #endif
4616 if (global_options_set.x_ix86_stack_protector_guard_offset_str)
4618 char *endp;
4619 const char *str = ix86_stack_protector_guard_offset_str;
4621 errno = 0;
4622 int64_t offset;
4624 #if defined(INT64_T_IS_LONG)
4625 offset = strtol (str, &endp, 0);
4626 #else
4627 offset = strtoll (str, &endp, 0);
4628 #endif
4630 if (!*str || *endp || errno)
4631 error ("%qs is not a valid number "
4632 "in -mstack-protector-guard-offset=", str);
4634 if (!IN_RANGE (offset, HOST_WIDE_INT_C (-0x80000000),
4635 HOST_WIDE_INT_C (0x7fffffff)))
4636 error ("%qs is not a valid offset "
4637 "in -mstack-protector-guard-offset=", str);
4639 ix86_stack_protector_guard_offset = offset;
4642 ix86_stack_protector_guard_reg = DEFAULT_TLS_SEG_REG;
4644 /* The kernel uses a different segment register for performance
4645 reasons; a system call would not have to trash the userspace
4646 segment register, which would be expensive. */
4647 if (ix86_cmodel == CM_KERNEL)
4648 ix86_stack_protector_guard_reg = ADDR_SPACE_SEG_GS;
4650 if (global_options_set.x_ix86_stack_protector_guard_reg_str)
4652 const char *str = ix86_stack_protector_guard_reg_str;
4653 addr_space_t seg = ADDR_SPACE_GENERIC;
4655 /* Discard optional register prefix. */
4656 if (str[0] == '%')
4657 str++;
4659 if (strlen (str) == 2 && str[1] == 's')
4661 if (str[0] == 'f')
4662 seg = ADDR_SPACE_SEG_FS;
4663 else if (str[0] == 'g')
4664 seg = ADDR_SPACE_SEG_GS;
4667 if (seg == ADDR_SPACE_GENERIC)
4668 error ("%qs is not a valid base register "
4669 "in -mstack-protector-guard-reg=",
4670 ix86_stack_protector_guard_reg_str);
4672 ix86_stack_protector_guard_reg = seg;
4675 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4676 if (opts->x_ix86_tune_memcpy_strategy)
4678 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4679 ix86_parse_stringop_strategy_string (str, false);
4680 free (str);
4683 if (opts->x_ix86_tune_memset_strategy)
4685 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4686 ix86_parse_stringop_strategy_string (str, true);
4687 free (str);
4690 /* Save the initial options in case the user does function specific
4691 options. */
4692 if (main_args_p)
4693 target_option_default_node = target_option_current_node
4694 = build_target_option_node (opts);
4696 return true;
4699 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4701 static void
4702 ix86_option_override (void)
4704 ix86_option_override_internal (true, &global_options, &global_options_set);
4707 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
4708 static char *
4709 ix86_offload_options (void)
4711 if (TARGET_LP64)
4712 return xstrdup ("-foffload-abi=lp64");
4713 return xstrdup ("-foffload-abi=ilp32");
4716 /* Update register usage after having seen the compiler flags. */
4718 static void
4719 ix86_conditional_register_usage (void)
4721 int i, c_mask;
4723 /* If there are no caller-saved registers, preserve all registers.
4724 except fixed_regs and registers used for function return value
4725 since aggregate_value_p checks call_used_regs[regno] on return
4726 value. */
4727 if (cfun && cfun->machine->no_caller_saved_registers)
4728 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4729 if (!fixed_regs[i] && !ix86_function_value_regno_p (i))
4730 call_used_regs[i] = 0;
4732 /* For 32-bit targets, squash the REX registers. */
4733 if (! TARGET_64BIT)
4735 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4736 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4737 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4738 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4739 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4740 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4743 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4744 c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
4746 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4748 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4750 /* Set/reset conditionally defined registers from
4751 CALL_USED_REGISTERS initializer. */
4752 if (call_used_regs[i] > 1)
4753 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4755 /* Calculate registers of CLOBBERED_REGS register set
4756 as call used registers from GENERAL_REGS register set. */
4757 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4758 && call_used_regs[i])
4759 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4762 /* If MMX is disabled, squash the registers. */
4763 if (! TARGET_MMX)
4764 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4765 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4766 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4768 /* If SSE is disabled, squash the registers. */
4769 if (! TARGET_SSE)
4770 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4771 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4772 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4774 /* If the FPU is disabled, squash the registers. */
4775 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4776 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4777 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4778 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4780 /* If AVX512F is disabled, squash the registers. */
4781 if (! TARGET_AVX512F)
4783 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4784 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4786 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4787 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4790 /* If MPX is disabled, squash the registers. */
4791 if (! TARGET_MPX)
4792 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
4793 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4796 /* Canonicalize a comparison from one we don't have to one we do have. */
4798 static void
4799 ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4800 bool op0_preserve_value)
4802 /* The order of operands in x87 ficom compare is forced by combine in
4803 simplify_comparison () function. Float operator is treated as RTX_OBJ
4804 with a precedence over other operators and is always put in the first
4805 place. Swap condition and operands to match ficom instruction. */
4806 if (!op0_preserve_value
4807 && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1))
4809 enum rtx_code scode = swap_condition ((enum rtx_code) *code);
4811 /* We are called only for compares that are split to SAHF instruction.
4812 Ensure that we have setcc/jcc insn for the swapped condition. */
4813 if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN)
4815 std::swap (*op0, *op1);
4816 *code = (int) scode;
4821 /* Save the current options */
4823 static void
4824 ix86_function_specific_save (struct cl_target_option *ptr,
4825 struct gcc_options *opts)
4827 ptr->arch = ix86_arch;
4828 ptr->schedule = ix86_schedule;
4829 ptr->prefetch_sse = x86_prefetch_sse;
4830 ptr->tune = ix86_tune;
4831 ptr->branch_cost = ix86_branch_cost;
4832 ptr->tune_defaulted = ix86_tune_defaulted;
4833 ptr->arch_specified = ix86_arch_specified;
4834 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4835 ptr->x_ix86_isa_flags2_explicit = opts->x_ix86_isa_flags2_explicit;
4836 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4837 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4838 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4839 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4840 ptr->x_ix86_abi = opts->x_ix86_abi;
4841 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4842 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4843 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4844 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4845 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4846 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4847 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4848 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4849 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4850 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4851 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4852 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4853 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4854 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4855 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4856 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4857 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4858 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4859 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4860 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4862 /* The fields are char but the variables are not; make sure the
4863 values fit in the fields. */
4864 gcc_assert (ptr->arch == ix86_arch);
4865 gcc_assert (ptr->schedule == ix86_schedule);
4866 gcc_assert (ptr->tune == ix86_tune);
4867 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4870 /* Restore the current options */
4872 static void
4873 ix86_function_specific_restore (struct gcc_options *opts,
4874 struct cl_target_option *ptr)
4876 enum processor_type old_tune = ix86_tune;
4877 enum processor_type old_arch = ix86_arch;
4878 unsigned int ix86_arch_mask;
4879 int i;
4881 /* We don't change -fPIC. */
4882 opts->x_flag_pic = flag_pic;
4884 ix86_arch = (enum processor_type) ptr->arch;
4885 ix86_schedule = (enum attr_cpu) ptr->schedule;
4886 ix86_tune = (enum processor_type) ptr->tune;
4887 x86_prefetch_sse = ptr->prefetch_sse;
4888 opts->x_ix86_branch_cost = ptr->branch_cost;
4889 ix86_tune_defaulted = ptr->tune_defaulted;
4890 ix86_arch_specified = ptr->arch_specified;
4891 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4892 opts->x_ix86_isa_flags2_explicit = ptr->x_ix86_isa_flags2_explicit;
4893 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4894 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4895 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4896 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4897 opts->x_ix86_abi = ptr->x_ix86_abi;
4898 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4899 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4900 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4901 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4902 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4903 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4904 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4905 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4906 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4907 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4908 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4909 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4910 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4911 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4912 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4913 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4914 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4915 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4916 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4917 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4918 ix86_tune_cost = processor_target_table[ix86_tune].cost;
4919 /* TODO: ix86_cost should be chosen at instruction or function granuality
4920 so for cold code we use size_cost even in !optimize_size compilation. */
4921 if (opts->x_optimize_size)
4922 ix86_cost = &ix86_size_cost;
4923 else
4924 ix86_cost = ix86_tune_cost;
4926 /* Recreate the arch feature tests if the arch changed */
4927 if (old_arch != ix86_arch)
4929 ix86_arch_mask = 1u << ix86_arch;
4930 for (i = 0; i < X86_ARCH_LAST; ++i)
4931 ix86_arch_features[i]
4932 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4935 /* Recreate the tune optimization tests */
4936 if (old_tune != ix86_tune)
4937 set_ix86_tune_features (ix86_tune, false);
4940 /* Adjust target options after streaming them in. This is mainly about
4941 reconciling them with global options. */
4943 static void
4944 ix86_function_specific_post_stream_in (struct cl_target_option *ptr)
4946 /* flag_pic is a global option, but ix86_cmodel is target saved option
4947 partly computed from flag_pic. If flag_pic is on, adjust x_ix86_cmodel
4948 for PIC, or error out. */
4949 if (flag_pic)
4950 switch (ptr->x_ix86_cmodel)
4952 case CM_SMALL:
4953 ptr->x_ix86_cmodel = CM_SMALL_PIC;
4954 break;
4956 case CM_MEDIUM:
4957 ptr->x_ix86_cmodel = CM_MEDIUM_PIC;
4958 break;
4960 case CM_LARGE:
4961 ptr->x_ix86_cmodel = CM_LARGE_PIC;
4962 break;
4964 case CM_KERNEL:
4965 error ("code model %s does not support PIC mode", "kernel");
4966 break;
4968 default:
4969 break;
4971 else
4972 switch (ptr->x_ix86_cmodel)
4974 case CM_SMALL_PIC:
4975 ptr->x_ix86_cmodel = CM_SMALL;
4976 break;
4978 case CM_MEDIUM_PIC:
4979 ptr->x_ix86_cmodel = CM_MEDIUM;
4980 break;
4982 case CM_LARGE_PIC:
4983 ptr->x_ix86_cmodel = CM_LARGE;
4984 break;
4986 default:
4987 break;
4991 /* Print the current options */
4993 static void
4994 ix86_function_specific_print (FILE *file, int indent,
4995 struct cl_target_option *ptr)
4997 char *target_string
4998 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_ix86_isa_flags2,
4999 ptr->x_target_flags, ptr->x_ix86_target_flags,
5000 NULL, NULL, ptr->x_ix86_fpmath, false);
5002 gcc_assert (ptr->arch < PROCESSOR_max);
5003 fprintf (file, "%*sarch = %d (%s)\n",
5004 indent, "",
5005 ptr->arch, processor_target_table[ptr->arch].name);
5007 gcc_assert (ptr->tune < PROCESSOR_max);
5008 fprintf (file, "%*stune = %d (%s)\n",
5009 indent, "",
5010 ptr->tune, processor_target_table[ptr->tune].name);
5012 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
5014 if (target_string)
5016 fprintf (file, "%*s%s\n", indent, "", target_string);
5017 free (target_string);
5022 /* Inner function to process the attribute((target(...))), take an argument and
5023 set the current options from the argument. If we have a list, recursively go
5024 over the list. */
5026 static bool
5027 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
5028 struct gcc_options *opts,
5029 struct gcc_options *opts_set,
5030 struct gcc_options *enum_opts_set)
5032 char *next_optstr;
5033 bool ret = true;
5035 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
5036 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
5037 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
5038 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
5039 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
5041 enum ix86_opt_type
5043 ix86_opt_unknown,
5044 ix86_opt_yes,
5045 ix86_opt_no,
5046 ix86_opt_str,
5047 ix86_opt_enum,
5048 ix86_opt_isa
5051 static const struct
5053 const char *string;
5054 size_t len;
5055 enum ix86_opt_type type;
5056 int opt;
5057 int mask;
5058 } attrs[] = {
5059 /* isa options */
5060 IX86_ATTR_ISA ("sgx", OPT_msgx),
5061 IX86_ATTR_ISA ("avx5124fmaps", OPT_mavx5124fmaps),
5062 IX86_ATTR_ISA ("avx5124vnniw", OPT_mavx5124vnniw),
5063 IX86_ATTR_ISA ("avx512vpopcntdq", OPT_mavx512vpopcntdq),
5065 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
5066 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
5067 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
5068 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
5069 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
5070 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
5071 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
5072 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
5073 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
5074 IX86_ATTR_ISA ("avx2", OPT_mavx2),
5075 IX86_ATTR_ISA ("fma", OPT_mfma),
5076 IX86_ATTR_ISA ("xop", OPT_mxop),
5077 IX86_ATTR_ISA ("fma4", OPT_mfma4),
5078 IX86_ATTR_ISA ("f16c", OPT_mf16c),
5079 IX86_ATTR_ISA ("avx", OPT_mavx),
5080 IX86_ATTR_ISA ("sse4", OPT_msse4),
5081 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
5082 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
5083 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
5084 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
5085 IX86_ATTR_ISA ("sse3", OPT_msse3),
5086 IX86_ATTR_ISA ("aes", OPT_maes),
5087 IX86_ATTR_ISA ("sha", OPT_msha),
5088 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
5089 IX86_ATTR_ISA ("sse2", OPT_msse2),
5090 IX86_ATTR_ISA ("sse", OPT_msse),
5091 IX86_ATTR_ISA ("3dnowa", OPT_m3dnowa),
5092 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
5093 IX86_ATTR_ISA ("mmx", OPT_mmmx),
5094 IX86_ATTR_ISA ("rtm", OPT_mrtm),
5095 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
5096 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
5097 IX86_ATTR_ISA ("adx", OPT_madx),
5098 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
5099 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
5100 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
5101 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
5102 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
5103 IX86_ATTR_ISA ("xsave", OPT_mxsave),
5104 IX86_ATTR_ISA ("abm", OPT_mabm),
5105 IX86_ATTR_ISA ("bmi", OPT_mbmi),
5106 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
5107 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
5108 IX86_ATTR_ISA ("tbm", OPT_mtbm),
5109 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
5110 IX86_ATTR_ISA ("cx16", OPT_mcx16),
5111 IX86_ATTR_ISA ("sahf", OPT_msahf),
5112 IX86_ATTR_ISA ("movbe", OPT_mmovbe),
5113 IX86_ATTR_ISA ("crc32", OPT_mcrc32),
5114 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
5115 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
5116 IX86_ATTR_ISA ("mwaitx", OPT_mmwaitx),
5117 IX86_ATTR_ISA ("clzero", OPT_mclzero),
5118 IX86_ATTR_ISA ("pku", OPT_mpku),
5119 IX86_ATTR_ISA ("lwp", OPT_mlwp),
5120 IX86_ATTR_ISA ("hle", OPT_mhle),
5121 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
5122 IX86_ATTR_ISA ("mpx", OPT_mmpx),
5123 IX86_ATTR_ISA ("clwb", OPT_mclwb),
5124 IX86_ATTR_ISA ("rdpid", OPT_mrdpid),
5125 IX86_ATTR_ISA ("gfni", OPT_mgfni),
5127 /* enum options */
5128 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
5130 /* string options */
5131 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
5132 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
5134 /* flag options */
5135 IX86_ATTR_YES ("cld",
5136 OPT_mcld,
5137 MASK_CLD),
5139 IX86_ATTR_NO ("fancy-math-387",
5140 OPT_mfancy_math_387,
5141 MASK_NO_FANCY_MATH_387),
5143 IX86_ATTR_YES ("ieee-fp",
5144 OPT_mieee_fp,
5145 MASK_IEEE_FP),
5147 IX86_ATTR_YES ("inline-all-stringops",
5148 OPT_minline_all_stringops,
5149 MASK_INLINE_ALL_STRINGOPS),
5151 IX86_ATTR_YES ("inline-stringops-dynamically",
5152 OPT_minline_stringops_dynamically,
5153 MASK_INLINE_STRINGOPS_DYNAMICALLY),
5155 IX86_ATTR_NO ("align-stringops",
5156 OPT_mno_align_stringops,
5157 MASK_NO_ALIGN_STRINGOPS),
5159 IX86_ATTR_YES ("recip",
5160 OPT_mrecip,
5161 MASK_RECIP),
5165 /* If this is a list, recurse to get the options. */
5166 if (TREE_CODE (args) == TREE_LIST)
5168 bool ret = true;
5170 for (; args; args = TREE_CHAIN (args))
5171 if (TREE_VALUE (args)
5172 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
5173 p_strings, opts, opts_set,
5174 enum_opts_set))
5175 ret = false;
5177 return ret;
5180 else if (TREE_CODE (args) != STRING_CST)
5182 error ("attribute %<target%> argument not a string");
5183 return false;
5186 /* Handle multiple arguments separated by commas. */
5187 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
5189 while (next_optstr && *next_optstr != '\0')
5191 char *p = next_optstr;
5192 char *orig_p = p;
5193 char *comma = strchr (next_optstr, ',');
5194 const char *opt_string;
5195 size_t len, opt_len;
5196 int opt;
5197 bool opt_set_p;
5198 char ch;
5199 unsigned i;
5200 enum ix86_opt_type type = ix86_opt_unknown;
5201 int mask = 0;
5203 if (comma)
5205 *comma = '\0';
5206 len = comma - next_optstr;
5207 next_optstr = comma + 1;
5209 else
5211 len = strlen (p);
5212 next_optstr = NULL;
5215 /* Recognize no-xxx. */
5216 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
5218 opt_set_p = false;
5219 p += 3;
5220 len -= 3;
5222 else
5223 opt_set_p = true;
5225 /* Find the option. */
5226 ch = *p;
5227 opt = N_OPTS;
5228 for (i = 0; i < ARRAY_SIZE (attrs); i++)
5230 type = attrs[i].type;
5231 opt_len = attrs[i].len;
5232 if (ch == attrs[i].string[0]
5233 && ((type != ix86_opt_str && type != ix86_opt_enum)
5234 ? len == opt_len
5235 : len > opt_len)
5236 && memcmp (p, attrs[i].string, opt_len) == 0)
5238 opt = attrs[i].opt;
5239 mask = attrs[i].mask;
5240 opt_string = attrs[i].string;
5241 break;
5245 /* Process the option. */
5246 if (opt == N_OPTS)
5248 error ("attribute(target(\"%s\")) is unknown", orig_p);
5249 ret = false;
5252 else if (type == ix86_opt_isa)
5254 struct cl_decoded_option decoded;
5256 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
5257 ix86_handle_option (opts, opts_set,
5258 &decoded, input_location);
5261 else if (type == ix86_opt_yes || type == ix86_opt_no)
5263 if (type == ix86_opt_no)
5264 opt_set_p = !opt_set_p;
5266 if (opt_set_p)
5267 opts->x_target_flags |= mask;
5268 else
5269 opts->x_target_flags &= ~mask;
5272 else if (type == ix86_opt_str)
5274 if (p_strings[opt])
5276 error ("option(\"%s\") was already specified", opt_string);
5277 ret = false;
5279 else
5280 p_strings[opt] = xstrdup (p + opt_len);
5283 else if (type == ix86_opt_enum)
5285 bool arg_ok;
5286 int value;
5288 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
5289 if (arg_ok)
5290 set_option (opts, enum_opts_set, opt, value,
5291 p + opt_len, DK_UNSPECIFIED, input_location,
5292 global_dc);
5293 else
5295 error ("attribute(target(\"%s\")) is unknown", orig_p);
5296 ret = false;
5300 else
5301 gcc_unreachable ();
5304 return ret;
5307 /* Release allocated strings. */
5308 static void
5309 release_options_strings (char **option_strings)
5311 /* Free up memory allocated to hold the strings */
5312 for (unsigned i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
5313 free (option_strings[i]);
5316 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
5318 tree
5319 ix86_valid_target_attribute_tree (tree args,
5320 struct gcc_options *opts,
5321 struct gcc_options *opts_set)
5323 const char *orig_arch_string = opts->x_ix86_arch_string;
5324 const char *orig_tune_string = opts->x_ix86_tune_string;
5325 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
5326 int orig_tune_defaulted = ix86_tune_defaulted;
5327 int orig_arch_specified = ix86_arch_specified;
5328 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
5329 tree t = NULL_TREE;
5330 struct cl_target_option *def
5331 = TREE_TARGET_OPTION (target_option_default_node);
5332 struct gcc_options enum_opts_set;
5334 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
5336 /* Process each of the options on the chain. */
5337 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
5338 opts_set, &enum_opts_set))
5339 return error_mark_node;
5341 /* If the changed options are different from the default, rerun
5342 ix86_option_override_internal, and then save the options away.
5343 The string options are attribute options, and will be undone
5344 when we copy the save structure. */
5345 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
5346 || opts->x_ix86_isa_flags2 != def->x_ix86_isa_flags2
5347 || opts->x_target_flags != def->x_target_flags
5348 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
5349 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
5350 || enum_opts_set.x_ix86_fpmath)
5352 /* If we are using the default tune= or arch=, undo the string assigned,
5353 and use the default. */
5354 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
5356 opts->x_ix86_arch_string
5357 = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]);
5359 /* If arch= is set, clear all bits in x_ix86_isa_flags,
5360 except for ISA_64BIT, ABI_64, ABI_X32, and CODE16. */
5361 opts->x_ix86_isa_flags &= (OPTION_MASK_ISA_64BIT
5362 | OPTION_MASK_ABI_64
5363 | OPTION_MASK_ABI_X32
5364 | OPTION_MASK_CODE16);
5365 opts->x_ix86_isa_flags2 = 0;
5367 else if (!orig_arch_specified)
5368 opts->x_ix86_arch_string = NULL;
5370 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
5371 opts->x_ix86_tune_string
5372 = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]);
5373 else if (orig_tune_defaulted)
5374 opts->x_ix86_tune_string = NULL;
5376 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
5377 if (enum_opts_set.x_ix86_fpmath)
5378 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
5380 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
5381 bool r = ix86_option_override_internal (false, opts, opts_set);
5382 if (!r)
5384 release_options_strings (option_strings);
5385 return error_mark_node;
5388 /* Add any builtin functions with the new isa if any. */
5389 ix86_add_new_builtins (opts->x_ix86_isa_flags, opts->x_ix86_isa_flags2);
5391 /* Save the current options unless we are validating options for
5392 #pragma. */
5393 t = build_target_option_node (opts);
5395 opts->x_ix86_arch_string = orig_arch_string;
5396 opts->x_ix86_tune_string = orig_tune_string;
5397 opts_set->x_ix86_fpmath = orig_fpmath_set;
5399 release_options_strings (option_strings);
5402 return t;
5405 /* Hook to validate attribute((target("string"))). */
5407 static bool
5408 ix86_valid_target_attribute_p (tree fndecl,
5409 tree ARG_UNUSED (name),
5410 tree args,
5411 int ARG_UNUSED (flags))
5413 struct gcc_options func_options;
5414 tree new_target, new_optimize;
5415 bool ret = true;
5417 /* attribute((target("default"))) does nothing, beyond
5418 affecting multi-versioning. */
5419 if (TREE_VALUE (args)
5420 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
5421 && TREE_CHAIN (args) == NULL_TREE
5422 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
5423 return true;
5425 tree old_optimize = build_optimization_node (&global_options);
5427 /* Get the optimization options of the current function. */
5428 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
5430 if (!func_optimize)
5431 func_optimize = old_optimize;
5433 /* Init func_options. */
5434 memset (&func_options, 0, sizeof (func_options));
5435 init_options_struct (&func_options, NULL);
5436 lang_hooks.init_options_struct (&func_options);
5438 cl_optimization_restore (&func_options,
5439 TREE_OPTIMIZATION (func_optimize));
5441 /* Initialize func_options to the default before its target options can
5442 be set. */
5443 cl_target_option_restore (&func_options,
5444 TREE_TARGET_OPTION (target_option_default_node));
5446 new_target = ix86_valid_target_attribute_tree (args, &func_options,
5447 &global_options_set);
5449 new_optimize = build_optimization_node (&func_options);
5451 if (new_target == error_mark_node)
5452 ret = false;
5454 else if (fndecl && new_target)
5456 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
5458 if (old_optimize != new_optimize)
5459 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
5462 finalize_options_struct (&func_options);
5464 return ret;
5468 /* Hook to determine if one function can safely inline another. */
5470 static bool
5471 ix86_can_inline_p (tree caller, tree callee)
5473 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
5474 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
5475 if (!callee_tree)
5476 callee_tree = target_option_default_node;
5477 if (!caller_tree)
5478 caller_tree = target_option_default_node;
5479 if (callee_tree == caller_tree)
5480 return true;
5482 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
5483 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
5484 bool ret = false;
5486 /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
5487 function can inline a SSE2 function but a SSE2 function can't inline
5488 a SSE4 function. */
5489 if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
5490 != callee_opts->x_ix86_isa_flags)
5491 || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)
5492 != callee_opts->x_ix86_isa_flags2))
5493 ret = false;
5495 /* See if we have the same non-isa options. */
5496 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
5497 ret = false;
5499 /* See if arch, tune, etc. are the same. */
5500 else if (caller_opts->arch != callee_opts->arch)
5501 ret = false;
5503 else if (caller_opts->tune != callee_opts->tune)
5504 ret = false;
5506 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath
5507 /* If the calle doesn't use FP expressions differences in
5508 ix86_fpmath can be ignored. We are called from FEs
5509 for multi-versioning call optimization, so beware of
5510 ipa_fn_summaries not available. */
5511 && (! ipa_fn_summaries
5512 || ipa_fn_summaries->get
5513 (cgraph_node::get (callee))->fp_expressions))
5514 ret = false;
5516 else if (caller_opts->branch_cost != callee_opts->branch_cost)
5517 ret = false;
5519 else
5520 ret = true;
5522 return ret;
5526 /* Remember the last target of ix86_set_current_function. */
5527 static GTY(()) tree ix86_previous_fndecl;
5529 /* Set targets globals to the default (or current #pragma GCC target
5530 if active). Invalidate ix86_previous_fndecl cache. */
5532 void
5533 ix86_reset_previous_fndecl (void)
5535 tree new_tree = target_option_current_node;
5536 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5537 if (TREE_TARGET_GLOBALS (new_tree))
5538 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5539 else if (new_tree == target_option_default_node)
5540 restore_target_globals (&default_target_globals);
5541 else
5542 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5543 ix86_previous_fndecl = NULL_TREE;
5546 /* Set the func_type field from the function FNDECL. */
5548 static void
5549 ix86_set_func_type (tree fndecl)
5551 if (cfun->machine->func_type == TYPE_UNKNOWN)
5553 if (lookup_attribute ("interrupt",
5554 TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
5556 if (ix86_function_naked (fndecl))
5557 error_at (DECL_SOURCE_LOCATION (fndecl),
5558 "interrupt and naked attributes are not compatible");
5560 int nargs = 0;
5561 for (tree arg = DECL_ARGUMENTS (fndecl);
5562 arg;
5563 arg = TREE_CHAIN (arg))
5564 nargs++;
5565 cfun->machine->no_caller_saved_registers = true;
5566 cfun->machine->func_type
5567 = nargs == 2 ? TYPE_EXCEPTION : TYPE_INTERRUPT;
5569 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
5571 /* Only dwarf2out.c can handle -WORD(AP) as a pointer argument. */
5572 if (write_symbols != NO_DEBUG && write_symbols != DWARF2_DEBUG)
5573 sorry ("Only DWARF debug format is supported for interrupt "
5574 "service routine.");
5576 else
5578 cfun->machine->func_type = TYPE_NORMAL;
5579 if (lookup_attribute ("no_caller_saved_registers",
5580 TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
5581 cfun->machine->no_caller_saved_registers = true;
5586 /* Establish appropriate back-end context for processing the function
5587 FNDECL. The argument might be NULL to indicate processing at top
5588 level, outside of any function scope. */
5589 static void
5590 ix86_set_current_function (tree fndecl)
5592 /* Only change the context if the function changes. This hook is called
5593 several times in the course of compiling a function, and we don't want to
5594 slow things down too much or call target_reinit when it isn't safe. */
5595 if (fndecl == ix86_previous_fndecl)
5597 /* There may be 2 function bodies for the same function FNDECL,
5598 one is extern inline and one isn't. Call ix86_set_func_type
5599 to set the func_type field. */
5600 if (fndecl != NULL_TREE)
5601 ix86_set_func_type (fndecl);
5602 return;
5605 tree old_tree;
5606 if (ix86_previous_fndecl == NULL_TREE)
5607 old_tree = target_option_current_node;
5608 else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl))
5609 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl);
5610 else
5611 old_tree = target_option_default_node;
5613 if (fndecl == NULL_TREE)
5615 if (old_tree != target_option_current_node)
5616 ix86_reset_previous_fndecl ();
5617 return;
5620 ix86_set_func_type (fndecl);
5622 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
5623 if (new_tree == NULL_TREE)
5624 new_tree = target_option_default_node;
5626 if (old_tree != new_tree)
5628 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5629 if (TREE_TARGET_GLOBALS (new_tree))
5630 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5631 else if (new_tree == target_option_default_node)
5632 restore_target_globals (&default_target_globals);
5633 else
5634 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5636 ix86_previous_fndecl = fndecl;
5638 static bool prev_no_caller_saved_registers;
5640 /* 64-bit MS and SYSV ABI have different set of call used registers.
5641 Avoid expensive re-initialization of init_regs each time we switch
5642 function context. */
5643 if (TARGET_64BIT
5644 && (call_used_regs[SI_REG]
5645 == (cfun->machine->call_abi == MS_ABI)))
5646 reinit_regs ();
5647 /* Need to re-initialize init_regs if caller-saved registers are
5648 changed. */
5649 else if (prev_no_caller_saved_registers
5650 != cfun->machine->no_caller_saved_registers)
5651 reinit_regs ();
5653 if (cfun->machine->func_type != TYPE_NORMAL
5654 || cfun->machine->no_caller_saved_registers)
5656 /* Don't allow MPX, SSE, MMX nor x87 instructions since they
5657 may change processor state. */
5658 const char *isa;
5659 if (TARGET_MPX)
5660 isa = "MPX";
5661 else if (TARGET_SSE)
5662 isa = "SSE";
5663 else if (TARGET_MMX)
5664 isa = "MMX/3Dnow";
5665 else if (TARGET_80387)
5666 isa = "80387";
5667 else
5668 isa = NULL;
5669 if (isa != NULL)
5671 if (cfun->machine->func_type != TYPE_NORMAL)
5672 sorry ("%s instructions aren't allowed in %s service routine",
5673 isa, (cfun->machine->func_type == TYPE_EXCEPTION
5674 ? "exception" : "interrupt"));
5675 else
5676 sorry ("%s instructions aren't allowed in function with "
5677 "no_caller_saved_registers attribute", isa);
5678 /* Don't issue the same error twice. */
5679 cfun->machine->func_type = TYPE_NORMAL;
5680 cfun->machine->no_caller_saved_registers = false;
5684 prev_no_caller_saved_registers
5685 = cfun->machine->no_caller_saved_registers;
5689 /* Return true if this goes in large data/bss. */
5691 static bool
5692 ix86_in_large_data_p (tree exp)
5694 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5695 return false;
5697 if (exp == NULL_TREE)
5698 return false;
5700 /* Functions are never large data. */
5701 if (TREE_CODE (exp) == FUNCTION_DECL)
5702 return false;
5704 /* Automatic variables are never large data. */
5705 if (VAR_P (exp) && !is_global_var (exp))
5706 return false;
5708 if (VAR_P (exp) && DECL_SECTION_NAME (exp))
5710 const char *section = DECL_SECTION_NAME (exp);
5711 if (strcmp (section, ".ldata") == 0
5712 || strcmp (section, ".lbss") == 0)
5713 return true;
5714 return false;
5716 else
5718 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5720 /* If this is an incomplete type with size 0, then we can't put it
5721 in data because it might be too big when completed. Also,
5722 int_size_in_bytes returns -1 if size can vary or is larger than
5723 an integer in which case also it is safer to assume that it goes in
5724 large data. */
5725 if (size <= 0 || size > ix86_section_threshold)
5726 return true;
5729 return false;
5732 /* i386-specific section flag to mark large sections. */
5733 #define SECTION_LARGE SECTION_MACH_DEP
5735 /* Switch to the appropriate section for output of DECL.
5736 DECL is either a `VAR_DECL' node or a constant of some sort.
5737 RELOC indicates whether forming the initial value of DECL requires
5738 link-time relocations. */
5740 ATTRIBUTE_UNUSED static section *
5741 x86_64_elf_select_section (tree decl, int reloc,
5742 unsigned HOST_WIDE_INT align)
5744 if (ix86_in_large_data_p (decl))
5746 const char *sname = NULL;
5747 unsigned int flags = SECTION_WRITE | SECTION_LARGE;
5748 switch (categorize_decl_for_section (decl, reloc))
5750 case SECCAT_DATA:
5751 sname = ".ldata";
5752 break;
5753 case SECCAT_DATA_REL:
5754 sname = ".ldata.rel";
5755 break;
5756 case SECCAT_DATA_REL_LOCAL:
5757 sname = ".ldata.rel.local";
5758 break;
5759 case SECCAT_DATA_REL_RO:
5760 sname = ".ldata.rel.ro";
5761 break;
5762 case SECCAT_DATA_REL_RO_LOCAL:
5763 sname = ".ldata.rel.ro.local";
5764 break;
5765 case SECCAT_BSS:
5766 sname = ".lbss";
5767 flags |= SECTION_BSS;
5768 break;
5769 case SECCAT_RODATA:
5770 case SECCAT_RODATA_MERGE_STR:
5771 case SECCAT_RODATA_MERGE_STR_INIT:
5772 case SECCAT_RODATA_MERGE_CONST:
5773 sname = ".lrodata";
5774 flags &= ~SECTION_WRITE;
5775 break;
5776 case SECCAT_SRODATA:
5777 case SECCAT_SDATA:
5778 case SECCAT_SBSS:
5779 gcc_unreachable ();
5780 case SECCAT_TEXT:
5781 case SECCAT_TDATA:
5782 case SECCAT_TBSS:
5783 /* We don't split these for medium model. Place them into
5784 default sections and hope for best. */
5785 break;
5787 if (sname)
5789 /* We might get called with string constants, but get_named_section
5790 doesn't like them as they are not DECLs. Also, we need to set
5791 flags in that case. */
5792 if (!DECL_P (decl))
5793 return get_section (sname, flags, NULL);
5794 return get_named_section (decl, sname, reloc);
5797 return default_elf_select_section (decl, reloc, align);
5800 /* Select a set of attributes for section NAME based on the properties
5801 of DECL and whether or not RELOC indicates that DECL's initializer
5802 might contain runtime relocations. */
5804 static unsigned int ATTRIBUTE_UNUSED
5805 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5807 unsigned int flags = default_section_type_flags (decl, name, reloc);
5809 if (ix86_in_large_data_p (decl))
5810 flags |= SECTION_LARGE;
5812 if (decl == NULL_TREE
5813 && (strcmp (name, ".ldata.rel.ro") == 0
5814 || strcmp (name, ".ldata.rel.ro.local") == 0))
5815 flags |= SECTION_RELRO;
5817 if (strcmp (name, ".lbss") == 0
5818 || strncmp (name, ".lbss.", 5) == 0
5819 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5820 flags |= SECTION_BSS;
5822 return flags;
5825 /* Build up a unique section name, expressed as a
5826 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5827 RELOC indicates whether the initial value of EXP requires
5828 link-time relocations. */
5830 static void ATTRIBUTE_UNUSED
5831 x86_64_elf_unique_section (tree decl, int reloc)
5833 if (ix86_in_large_data_p (decl))
5835 const char *prefix = NULL;
5836 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5837 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5839 switch (categorize_decl_for_section (decl, reloc))
5841 case SECCAT_DATA:
5842 case SECCAT_DATA_REL:
5843 case SECCAT_DATA_REL_LOCAL:
5844 case SECCAT_DATA_REL_RO:
5845 case SECCAT_DATA_REL_RO_LOCAL:
5846 prefix = one_only ? ".ld" : ".ldata";
5847 break;
5848 case SECCAT_BSS:
5849 prefix = one_only ? ".lb" : ".lbss";
5850 break;
5851 case SECCAT_RODATA:
5852 case SECCAT_RODATA_MERGE_STR:
5853 case SECCAT_RODATA_MERGE_STR_INIT:
5854 case SECCAT_RODATA_MERGE_CONST:
5855 prefix = one_only ? ".lr" : ".lrodata";
5856 break;
5857 case SECCAT_SRODATA:
5858 case SECCAT_SDATA:
5859 case SECCAT_SBSS:
5860 gcc_unreachable ();
5861 case SECCAT_TEXT:
5862 case SECCAT_TDATA:
5863 case SECCAT_TBSS:
5864 /* We don't split these for medium model. Place them into
5865 default sections and hope for best. */
5866 break;
5868 if (prefix)
5870 const char *name, *linkonce;
5871 char *string;
5873 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5874 name = targetm.strip_name_encoding (name);
5876 /* If we're using one_only, then there needs to be a .gnu.linkonce
5877 prefix to the section name. */
5878 linkonce = one_only ? ".gnu.linkonce" : "";
5880 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5882 set_decl_section_name (decl, string);
5883 return;
5886 default_unique_section (decl, reloc);
5889 #ifdef COMMON_ASM_OP
5891 #ifndef LARGECOMM_SECTION_ASM_OP
5892 #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
5893 #endif
5895 /* This says how to output assembler code to declare an
5896 uninitialized external linkage data object.
5898 For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
5899 large objects. */
5900 void
5901 x86_elf_aligned_decl_common (FILE *file, tree decl,
5902 const char *name, unsigned HOST_WIDE_INT size,
5903 int align)
5905 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5906 && size > (unsigned int)ix86_section_threshold)
5908 switch_to_section (get_named_section (decl, ".lbss", 0));
5909 fputs (LARGECOMM_SECTION_ASM_OP, file);
5911 else
5912 fputs (COMMON_ASM_OP, file);
5913 assemble_name (file, name);
5914 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5915 size, align / BITS_PER_UNIT);
5917 #endif
5919 /* Utility function for targets to use in implementing
5920 ASM_OUTPUT_ALIGNED_BSS. */
5922 void
5923 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5924 unsigned HOST_WIDE_INT size, int align)
5926 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5927 && size > (unsigned int)ix86_section_threshold)
5928 switch_to_section (get_named_section (decl, ".lbss", 0));
5929 else
5930 switch_to_section (bss_section);
5931 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5932 #ifdef ASM_DECLARE_OBJECT_NAME
5933 last_assemble_variable_decl = decl;
5934 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5935 #else
5936 /* Standard thing is just output label for the object. */
5937 ASM_OUTPUT_LABEL (file, name);
5938 #endif /* ASM_DECLARE_OBJECT_NAME */
5939 ASM_OUTPUT_SKIP (file, size ? size : 1);
5942 /* Decide whether we must probe the stack before any space allocation
5943 on this target. It's essentially TARGET_STACK_PROBE except when
5944 -fstack-check causes the stack to be already probed differently. */
5946 bool
5947 ix86_target_stack_probe (void)
5949 /* Do not probe the stack twice if static stack checking is enabled. */
5950 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5951 return false;
5953 return TARGET_STACK_PROBE;
5956 /* Decide whether we can make a sibling call to a function. DECL is the
5957 declaration of the function being targeted by the call and EXP is the
5958 CALL_EXPR representing the call. */
5960 static bool
5961 ix86_function_ok_for_sibcall (tree decl, tree exp)
5963 tree type, decl_or_type;
5964 rtx a, b;
5965 bool bind_global = decl && !targetm.binds_local_p (decl);
5967 if (ix86_function_naked (current_function_decl))
5968 return false;
5970 /* Sibling call isn't OK if there are no caller-saved registers
5971 since all registers must be preserved before return. */
5972 if (cfun->machine->no_caller_saved_registers)
5973 return false;
5975 /* If we are generating position-independent code, we cannot sibcall
5976 optimize direct calls to global functions, as the PLT requires
5977 %ebx be live. (Darwin does not have a PLT.) */
5978 if (!TARGET_MACHO
5979 && !TARGET_64BIT
5980 && flag_pic
5981 && flag_plt
5982 && bind_global)
5983 return false;
5985 /* If we need to align the outgoing stack, then sibcalling would
5986 unalign the stack, which may break the called function. */
5987 if (ix86_minimum_incoming_stack_boundary (true)
5988 < PREFERRED_STACK_BOUNDARY)
5989 return false;
5991 if (decl)
5993 decl_or_type = decl;
5994 type = TREE_TYPE (decl);
5996 else
5998 /* We're looking at the CALL_EXPR, we need the type of the function. */
5999 type = CALL_EXPR_FN (exp); /* pointer expression */
6000 type = TREE_TYPE (type); /* pointer type */
6001 type = TREE_TYPE (type); /* function type */
6002 decl_or_type = type;
6005 /* Check that the return value locations are the same. Like
6006 if we are returning floats on the 80387 register stack, we cannot
6007 make a sibcall from a function that doesn't return a float to a
6008 function that does or, conversely, from a function that does return
6009 a float to a function that doesn't; the necessary stack adjustment
6010 would not be executed. This is also the place we notice
6011 differences in the return value ABI. Note that it is ok for one
6012 of the functions to have void return type as long as the return
6013 value of the other is passed in a register. */
6014 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
6015 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6016 cfun->decl, false);
6017 if (STACK_REG_P (a) || STACK_REG_P (b))
6019 if (!rtx_equal_p (a, b))
6020 return false;
6022 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6024 else if (!rtx_equal_p (a, b))
6025 return false;
6027 if (TARGET_64BIT)
6029 /* The SYSV ABI has more call-clobbered registers;
6030 disallow sibcalls from MS to SYSV. */
6031 if (cfun->machine->call_abi == MS_ABI
6032 && ix86_function_type_abi (type) == SYSV_ABI)
6033 return false;
6035 else
6037 /* If this call is indirect, we'll need to be able to use a
6038 call-clobbered register for the address of the target function.
6039 Make sure that all such registers are not used for passing
6040 parameters. Note that DLLIMPORT functions and call to global
6041 function via GOT slot are indirect. */
6042 if (!decl
6043 || (bind_global && flag_pic && !flag_plt)
6044 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
6046 /* Check if regparm >= 3 since arg_reg_available is set to
6047 false if regparm == 0. If regparm is 1 or 2, there is
6048 always a call-clobbered register available.
6050 ??? The symbol indirect call doesn't need a call-clobbered
6051 register. But we don't know if this is a symbol indirect
6052 call or not here. */
6053 if (ix86_function_regparm (type, NULL) >= 3
6054 && !cfun->machine->arg_reg_available)
6055 return false;
6059 /* Otherwise okay. That also includes certain types of indirect calls. */
6060 return true;
6063 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
6064 and "sseregparm" calling convention attributes;
6065 arguments as in struct attribute_spec.handler. */
6067 static tree
6068 ix86_handle_cconv_attribute (tree *node, tree name,
6069 tree args,
6070 int,
6071 bool *no_add_attrs)
6073 if (TREE_CODE (*node) != FUNCTION_TYPE
6074 && TREE_CODE (*node) != METHOD_TYPE
6075 && TREE_CODE (*node) != FIELD_DECL
6076 && TREE_CODE (*node) != TYPE_DECL)
6078 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6079 name);
6080 *no_add_attrs = true;
6081 return NULL_TREE;
6084 /* Can combine regparm with all attributes but fastcall, and thiscall. */
6085 if (is_attribute_p ("regparm", name))
6087 tree cst;
6089 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6091 error ("fastcall and regparm attributes are not compatible");
6094 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6096 error ("regparam and thiscall attributes are not compatible");
6099 cst = TREE_VALUE (args);
6100 if (TREE_CODE (cst) != INTEGER_CST)
6102 warning (OPT_Wattributes,
6103 "%qE attribute requires an integer constant argument",
6104 name);
6105 *no_add_attrs = true;
6107 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
6109 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
6110 name, REGPARM_MAX);
6111 *no_add_attrs = true;
6114 return NULL_TREE;
6117 if (TARGET_64BIT)
6119 /* Do not warn when emulating the MS ABI. */
6120 if ((TREE_CODE (*node) != FUNCTION_TYPE
6121 && TREE_CODE (*node) != METHOD_TYPE)
6122 || ix86_function_type_abi (*node) != MS_ABI)
6123 warning (OPT_Wattributes, "%qE attribute ignored",
6124 name);
6125 *no_add_attrs = true;
6126 return NULL_TREE;
6129 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
6130 if (is_attribute_p ("fastcall", name))
6132 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6134 error ("fastcall and cdecl attributes are not compatible");
6136 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6138 error ("fastcall and stdcall attributes are not compatible");
6140 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
6142 error ("fastcall and regparm attributes are not compatible");
6144 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6146 error ("fastcall and thiscall attributes are not compatible");
6150 /* Can combine stdcall with fastcall (redundant), regparm and
6151 sseregparm. */
6152 else if (is_attribute_p ("stdcall", name))
6154 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6156 error ("stdcall and cdecl attributes are not compatible");
6158 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6160 error ("stdcall and fastcall attributes are not compatible");
6162 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6164 error ("stdcall and thiscall attributes are not compatible");
6168 /* Can combine cdecl with regparm and sseregparm. */
6169 else if (is_attribute_p ("cdecl", name))
6171 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6173 error ("stdcall and cdecl attributes are not compatible");
6175 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6177 error ("fastcall and cdecl attributes are not compatible");
6179 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6181 error ("cdecl and thiscall attributes are not compatible");
6184 else if (is_attribute_p ("thiscall", name))
6186 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
6187 warning (OPT_Wattributes, "%qE attribute is used for non-class method",
6188 name);
6189 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6191 error ("stdcall and thiscall attributes are not compatible");
6193 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6195 error ("fastcall and thiscall attributes are not compatible");
6197 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6199 error ("cdecl and thiscall attributes are not compatible");
6203 /* Can combine sseregparm with all attributes. */
6205 return NULL_TREE;
6208 /* The transactional memory builtins are implicitly regparm or fastcall
6209 depending on the ABI. Override the generic do-nothing attribute that
6210 these builtins were declared with, and replace it with one of the two
6211 attributes that we expect elsewhere. */
6213 static tree
6214 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
6215 int flags, bool *no_add_attrs)
6217 tree alt;
6219 /* In no case do we want to add the placeholder attribute. */
6220 *no_add_attrs = true;
6222 /* The 64-bit ABI is unchanged for transactional memory. */
6223 if (TARGET_64BIT)
6224 return NULL_TREE;
6226 /* ??? Is there a better way to validate 32-bit windows? We have
6227 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
6228 if (CHECK_STACK_LIMIT > 0)
6229 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
6230 else
6232 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
6233 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
6235 decl_attributes (node, alt, flags);
6237 return NULL_TREE;
6240 /* This function determines from TYPE the calling-convention. */
6242 unsigned int
6243 ix86_get_callcvt (const_tree type)
6245 unsigned int ret = 0;
6246 bool is_stdarg;
6247 tree attrs;
6249 if (TARGET_64BIT)
6250 return IX86_CALLCVT_CDECL;
6252 attrs = TYPE_ATTRIBUTES (type);
6253 if (attrs != NULL_TREE)
6255 if (lookup_attribute ("cdecl", attrs))
6256 ret |= IX86_CALLCVT_CDECL;
6257 else if (lookup_attribute ("stdcall", attrs))
6258 ret |= IX86_CALLCVT_STDCALL;
6259 else if (lookup_attribute ("fastcall", attrs))
6260 ret |= IX86_CALLCVT_FASTCALL;
6261 else if (lookup_attribute ("thiscall", attrs))
6262 ret |= IX86_CALLCVT_THISCALL;
6264 /* Regparam isn't allowed for thiscall and fastcall. */
6265 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
6267 if (lookup_attribute ("regparm", attrs))
6268 ret |= IX86_CALLCVT_REGPARM;
6269 if (lookup_attribute ("sseregparm", attrs))
6270 ret |= IX86_CALLCVT_SSEREGPARM;
6273 if (IX86_BASE_CALLCVT(ret) != 0)
6274 return ret;
6277 is_stdarg = stdarg_p (type);
6278 if (TARGET_RTD && !is_stdarg)
6279 return IX86_CALLCVT_STDCALL | ret;
6281 if (ret != 0
6282 || is_stdarg
6283 || TREE_CODE (type) != METHOD_TYPE
6284 || ix86_function_type_abi (type) != MS_ABI)
6285 return IX86_CALLCVT_CDECL | ret;
6287 return IX86_CALLCVT_THISCALL;
6290 /* Return 0 if the attributes for two types are incompatible, 1 if they
6291 are compatible, and 2 if they are nearly compatible (which causes a
6292 warning to be generated). */
6294 static int
6295 ix86_comp_type_attributes (const_tree type1, const_tree type2)
6297 unsigned int ccvt1, ccvt2;
6299 if (TREE_CODE (type1) != FUNCTION_TYPE
6300 && TREE_CODE (type1) != METHOD_TYPE)
6301 return 1;
6303 ccvt1 = ix86_get_callcvt (type1);
6304 ccvt2 = ix86_get_callcvt (type2);
6305 if (ccvt1 != ccvt2)
6306 return 0;
6307 if (ix86_function_regparm (type1, NULL)
6308 != ix86_function_regparm (type2, NULL))
6309 return 0;
6311 return 1;
6314 /* Return the regparm value for a function with the indicated TYPE and DECL.
6315 DECL may be NULL when calling function indirectly
6316 or considering a libcall. */
6318 static int
6319 ix86_function_regparm (const_tree type, const_tree decl)
6321 tree attr;
6322 int regparm;
6323 unsigned int ccvt;
6325 if (TARGET_64BIT)
6326 return (ix86_function_type_abi (type) == SYSV_ABI
6327 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
6328 ccvt = ix86_get_callcvt (type);
6329 regparm = ix86_regparm;
6331 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
6333 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
6334 if (attr)
6336 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
6337 return regparm;
6340 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6341 return 2;
6342 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6343 return 1;
6345 /* Use register calling convention for local functions when possible. */
6346 if (decl
6347 && TREE_CODE (decl) == FUNCTION_DECL)
6349 cgraph_node *target = cgraph_node::get (decl);
6350 if (target)
6351 target = target->function_symbol ();
6353 /* Caller and callee must agree on the calling convention, so
6354 checking here just optimize means that with
6355 __attribute__((optimize (...))) caller could use regparm convention
6356 and callee not, or vice versa. Instead look at whether the callee
6357 is optimized or not. */
6358 if (target && opt_for_fn (target->decl, optimize)
6359 && !(profile_flag && !flag_fentry))
6361 cgraph_local_info *i = &target->local;
6362 if (i && i->local && i->can_change_signature)
6364 int local_regparm, globals = 0, regno;
6366 /* Make sure no regparm register is taken by a
6367 fixed register variable. */
6368 for (local_regparm = 0; local_regparm < REGPARM_MAX;
6369 local_regparm++)
6370 if (fixed_regs[local_regparm])
6371 break;
6373 /* We don't want to use regparm(3) for nested functions as
6374 these use a static chain pointer in the third argument. */
6375 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
6376 local_regparm = 2;
6378 /* Save a register for the split stack. */
6379 if (flag_split_stack)
6381 if (local_regparm == 3)
6382 local_regparm = 2;
6383 else if (local_regparm == 2
6384 && DECL_STATIC_CHAIN (target->decl))
6385 local_regparm = 1;
6388 /* Each fixed register usage increases register pressure,
6389 so less registers should be used for argument passing.
6390 This functionality can be overriden by an explicit
6391 regparm value. */
6392 for (regno = AX_REG; regno <= DI_REG; regno++)
6393 if (fixed_regs[regno])
6394 globals++;
6396 local_regparm
6397 = globals < local_regparm ? local_regparm - globals : 0;
6399 if (local_regparm > regparm)
6400 regparm = local_regparm;
6405 return regparm;
6408 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
6409 DFmode (2) arguments in SSE registers for a function with the
6410 indicated TYPE and DECL. DECL may be NULL when calling function
6411 indirectly or considering a libcall. Return -1 if any FP parameter
6412 should be rejected by error. This is used in siutation we imply SSE
6413 calling convetion but the function is called from another function with
6414 SSE disabled. Otherwise return 0. */
6416 static int
6417 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
6419 gcc_assert (!TARGET_64BIT);
6421 /* Use SSE registers to pass SFmode and DFmode arguments if requested
6422 by the sseregparm attribute. */
6423 if (TARGET_SSEREGPARM
6424 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
6426 if (!TARGET_SSE)
6428 if (warn)
6430 if (decl)
6431 error ("calling %qD with attribute sseregparm without "
6432 "SSE/SSE2 enabled", decl);
6433 else
6434 error ("calling %qT with attribute sseregparm without "
6435 "SSE/SSE2 enabled", type);
6437 return 0;
6440 return 2;
6443 if (!decl)
6444 return 0;
6446 cgraph_node *target = cgraph_node::get (decl);
6447 if (target)
6448 target = target->function_symbol ();
6450 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
6451 (and DFmode for SSE2) arguments in SSE registers. */
6452 if (target
6453 /* TARGET_SSE_MATH */
6454 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
6455 && opt_for_fn (target->decl, optimize)
6456 && !(profile_flag && !flag_fentry))
6458 cgraph_local_info *i = &target->local;
6459 if (i && i->local && i->can_change_signature)
6461 /* Refuse to produce wrong code when local function with SSE enabled
6462 is called from SSE disabled function.
6463 FIXME: We need a way to detect these cases cross-ltrans partition
6464 and avoid using SSE calling conventions on local functions called
6465 from function with SSE disabled. For now at least delay the
6466 warning until we know we are going to produce wrong code.
6467 See PR66047 */
6468 if (!TARGET_SSE && warn)
6469 return -1;
6470 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
6471 ->x_ix86_isa_flags) ? 2 : 1;
6475 return 0;
6478 /* Return true if EAX is live at the start of the function. Used by
6479 ix86_expand_prologue to determine if we need special help before
6480 calling allocate_stack_worker. */
6482 static bool
6483 ix86_eax_live_at_start_p (void)
6485 /* Cheat. Don't bother working forward from ix86_function_regparm
6486 to the function type to whether an actual argument is located in
6487 eax. Instead just look at cfg info, which is still close enough
6488 to correct at this point. This gives false positives for broken
6489 functions that might use uninitialized data that happens to be
6490 allocated in eax, but who cares? */
6491 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
6494 static bool
6495 ix86_keep_aggregate_return_pointer (tree fntype)
6497 tree attr;
6499 if (!TARGET_64BIT)
6501 attr = lookup_attribute ("callee_pop_aggregate_return",
6502 TYPE_ATTRIBUTES (fntype));
6503 if (attr)
6504 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
6506 /* For 32-bit MS-ABI the default is to keep aggregate
6507 return pointer. */
6508 if (ix86_function_type_abi (fntype) == MS_ABI)
6509 return true;
6511 return KEEP_AGGREGATE_RETURN_POINTER != 0;
6514 /* Value is the number of bytes of arguments automatically
6515 popped when returning from a subroutine call.
6516 FUNDECL is the declaration node of the function (as a tree),
6517 FUNTYPE is the data type of the function (as a tree),
6518 or for a library call it is an identifier node for the subroutine name.
6519 SIZE is the number of bytes of arguments passed on the stack.
6521 On the 80386, the RTD insn may be used to pop them if the number
6522 of args is fixed, but if the number is variable then the caller
6523 must pop them all. RTD can't be used for library calls now
6524 because the library is compiled with the Unix compiler.
6525 Use of RTD is a selectable option, since it is incompatible with
6526 standard Unix calling sequences. If the option is not selected,
6527 the caller must always pop the args.
6529 The attribute stdcall is equivalent to RTD on a per module basis. */
6531 static int
6532 ix86_return_pops_args (tree fundecl, tree funtype, int size)
6534 unsigned int ccvt;
6536 /* None of the 64-bit ABIs pop arguments. */
6537 if (TARGET_64BIT)
6538 return 0;
6540 ccvt = ix86_get_callcvt (funtype);
6542 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
6543 | IX86_CALLCVT_THISCALL)) != 0
6544 && ! stdarg_p (funtype))
6545 return size;
6547 /* Lose any fake structure return argument if it is passed on the stack. */
6548 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
6549 && !ix86_keep_aggregate_return_pointer (funtype))
6551 int nregs = ix86_function_regparm (funtype, fundecl);
6552 if (nregs == 0)
6553 return GET_MODE_SIZE (Pmode);
6556 return 0;
6559 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
6561 static bool
6562 ix86_legitimate_combined_insn (rtx_insn *insn)
6564 int i;
6566 /* Check operand constraints in case hard registers were propagated
6567 into insn pattern. This check prevents combine pass from
6568 generating insn patterns with invalid hard register operands.
6569 These invalid insns can eventually confuse reload to error out
6570 with a spill failure. See also PRs 46829 and 46843. */
6572 gcc_assert (INSN_CODE (insn) >= 0);
6574 extract_insn (insn);
6575 preprocess_constraints (insn);
6577 int n_operands = recog_data.n_operands;
6578 int n_alternatives = recog_data.n_alternatives;
6579 for (i = 0; i < n_operands; i++)
6581 rtx op = recog_data.operand[i];
6582 machine_mode mode = GET_MODE (op);
6583 const operand_alternative *op_alt;
6584 int offset = 0;
6585 bool win;
6586 int j;
6588 /* A unary operator may be accepted by the predicate, but it
6589 is irrelevant for matching constraints. */
6590 if (UNARY_P (op))
6591 op = XEXP (op, 0);
6593 if (SUBREG_P (op))
6595 if (REG_P (SUBREG_REG (op))
6596 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
6597 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
6598 GET_MODE (SUBREG_REG (op)),
6599 SUBREG_BYTE (op),
6600 GET_MODE (op));
6601 op = SUBREG_REG (op);
6604 if (!(REG_P (op) && HARD_REGISTER_P (op)))
6605 continue;
6607 op_alt = recog_op_alt;
6609 /* Operand has no constraints, anything is OK. */
6610 win = !n_alternatives;
6612 alternative_mask preferred = get_preferred_alternatives (insn);
6613 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
6615 if (!TEST_BIT (preferred, j))
6616 continue;
6617 if (op_alt[i].anything_ok
6618 || (op_alt[i].matches != -1
6619 && operands_match_p
6620 (recog_data.operand[i],
6621 recog_data.operand[op_alt[i].matches]))
6622 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
6624 win = true;
6625 break;
6629 if (!win)
6630 return false;
6633 return true;
6636 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
6638 static unsigned HOST_WIDE_INT
6639 ix86_asan_shadow_offset (void)
6641 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
6642 : HOST_WIDE_INT_C (0x7fff8000))
6643 : (HOST_WIDE_INT_1 << 29);
6646 /* Argument support functions. */
6648 /* Return true when register may be used to pass function parameters. */
6649 bool
6650 ix86_function_arg_regno_p (int regno)
6652 int i;
6653 enum calling_abi call_abi;
6654 const int *parm_regs;
6656 if (TARGET_MPX && BND_REGNO_P (regno))
6657 return true;
6659 if (!TARGET_64BIT)
6661 if (TARGET_MACHO)
6662 return (regno < REGPARM_MAX
6663 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
6664 else
6665 return (regno < REGPARM_MAX
6666 || (TARGET_MMX && MMX_REGNO_P (regno)
6667 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
6668 || (TARGET_SSE && SSE_REGNO_P (regno)
6669 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
6672 if (TARGET_SSE && SSE_REGNO_P (regno)
6673 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
6674 return true;
6676 /* TODO: The function should depend on current function ABI but
6677 builtins.c would need updating then. Therefore we use the
6678 default ABI. */
6679 call_abi = ix86_cfun_abi ();
6681 /* RAX is used as hidden argument to va_arg functions. */
6682 if (call_abi == SYSV_ABI && regno == AX_REG)
6683 return true;
6685 if (call_abi == MS_ABI)
6686 parm_regs = x86_64_ms_abi_int_parameter_registers;
6687 else
6688 parm_regs = x86_64_int_parameter_registers;
6690 for (i = 0; i < (call_abi == MS_ABI
6691 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
6692 if (regno == parm_regs[i])
6693 return true;
6694 return false;
6697 /* Return if we do not know how to pass TYPE solely in registers. */
6699 static bool
6700 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
6702 if (must_pass_in_stack_var_size_or_pad (mode, type))
6703 return true;
6705 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
6706 The layout_type routine is crafty and tries to trick us into passing
6707 currently unsupported vector types on the stack by using TImode. */
6708 return (!TARGET_64BIT && mode == TImode
6709 && type && TREE_CODE (type) != VECTOR_TYPE);
6712 /* It returns the size, in bytes, of the area reserved for arguments passed
6713 in registers for the function represented by fndecl dependent to the used
6714 abi format. */
6716 ix86_reg_parm_stack_space (const_tree fndecl)
6718 enum calling_abi call_abi = SYSV_ABI;
6719 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6720 call_abi = ix86_function_abi (fndecl);
6721 else
6722 call_abi = ix86_function_type_abi (fndecl);
6723 if (TARGET_64BIT && call_abi == MS_ABI)
6724 return 32;
6725 return 0;
6728 /* We add this as a workaround in order to use libc_has_function
6729 hook in i386.md. */
6730 bool
6731 ix86_libc_has_function (enum function_class fn_class)
6733 return targetm.libc_has_function (fn_class);
6736 /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
6737 specifying the call abi used. */
6738 enum calling_abi
6739 ix86_function_type_abi (const_tree fntype)
6741 enum calling_abi abi = ix86_abi;
6743 if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
6744 return abi;
6746 if (abi == SYSV_ABI
6747 && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6749 static int warned;
6750 if (TARGET_X32 && !warned)
6752 error ("X32 does not support ms_abi attribute");
6753 warned = 1;
6756 abi = MS_ABI;
6758 else if (abi == MS_ABI
6759 && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6760 abi = SYSV_ABI;
6762 return abi;
6765 static enum calling_abi
6766 ix86_function_abi (const_tree fndecl)
6768 return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
6771 /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
6772 specifying the call abi used. */
6773 enum calling_abi
6774 ix86_cfun_abi (void)
6776 return cfun ? cfun->machine->call_abi : ix86_abi;
6779 static bool
6780 ix86_function_ms_hook_prologue (const_tree fn)
6782 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6784 if (decl_function_context (fn) != NULL_TREE)
6785 error_at (DECL_SOURCE_LOCATION (fn),
6786 "ms_hook_prologue is not compatible with nested function");
6787 else
6788 return true;
6790 return false;
6793 static bool
6794 ix86_function_naked (const_tree fn)
6796 if (fn && lookup_attribute ("naked", DECL_ATTRIBUTES (fn)))
6797 return true;
6799 return false;
6802 /* Write the extra assembler code needed to declare a function properly. */
6804 void
6805 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6806 tree decl)
6808 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6810 if (is_ms_hook)
6812 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6813 unsigned int filler_cc = 0xcccccccc;
6815 for (i = 0; i < filler_count; i += 4)
6816 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6819 #ifdef SUBTARGET_ASM_UNWIND_INIT
6820 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6821 #endif
6823 ASM_OUTPUT_LABEL (asm_out_file, fname);
6825 /* Output magic byte marker, if hot-patch attribute is set. */
6826 if (is_ms_hook)
6828 if (TARGET_64BIT)
6830 /* leaq [%rsp + 0], %rsp */
6831 fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
6832 asm_out_file);
6834 else
6836 /* movl.s %edi, %edi
6837 push %ebp
6838 movl.s %esp, %ebp */
6839 fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", asm_out_file);
6844 /* Implementation of call abi switching target hook. Specific to FNDECL
6845 the specific call register sets are set. See also
6846 ix86_conditional_register_usage for more details. */
6847 void
6848 ix86_call_abi_override (const_tree fndecl)
6850 cfun->machine->call_abi = ix86_function_abi (fndecl);
6853 /* Return 1 if pseudo register should be created and used to hold
6854 GOT address for PIC code. */
6855 bool
6856 ix86_use_pseudo_pic_reg (void)
6858 if ((TARGET_64BIT
6859 && (ix86_cmodel == CM_SMALL_PIC
6860 || TARGET_PECOFF))
6861 || !flag_pic)
6862 return false;
6863 return true;
6866 /* Initialize large model PIC register. */
6868 static void
6869 ix86_init_large_pic_reg (unsigned int tmp_regno)
6871 rtx_code_label *label;
6872 rtx tmp_reg;
6874 gcc_assert (Pmode == DImode);
6875 label = gen_label_rtx ();
6876 emit_label (label);
6877 LABEL_PRESERVE_P (label) = 1;
6878 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
6879 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
6880 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
6881 label));
6882 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6883 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
6884 pic_offset_table_rtx, tmp_reg));
6885 const char *name = LABEL_NAME (label);
6886 PUT_CODE (label, NOTE);
6887 NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL;
6888 NOTE_DELETED_LABEL_NAME (label) = name;
6891 /* Create and initialize PIC register if required. */
6892 static void
6893 ix86_init_pic_reg (void)
6895 edge entry_edge;
6896 rtx_insn *seq;
6898 if (!ix86_use_pseudo_pic_reg ())
6899 return;
6901 start_sequence ();
6903 if (TARGET_64BIT)
6905 if (ix86_cmodel == CM_LARGE_PIC)
6906 ix86_init_large_pic_reg (R11_REG);
6907 else
6908 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6910 else
6912 /* If there is future mcount call in the function it is more profitable
6913 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
6914 rtx reg = crtl->profile
6915 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
6916 : pic_offset_table_rtx;
6917 rtx_insn *insn = emit_insn (gen_set_got (reg));
6918 RTX_FRAME_RELATED_P (insn) = 1;
6919 if (crtl->profile)
6920 emit_move_insn (pic_offset_table_rtx, reg);
6921 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
6924 seq = get_insns ();
6925 end_sequence ();
6927 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6928 insert_insn_on_edge (seq, entry_edge);
6929 commit_one_edge_insertion (entry_edge);
6932 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6933 for a call to a function whose data type is FNTYPE.
6934 For a library call, FNTYPE is 0. */
6936 void
6937 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6938 tree fntype, /* tree ptr for function decl */
6939 rtx libname, /* SYMBOL_REF of library name or 0 */
6940 tree fndecl,
6941 int caller)
6943 struct cgraph_local_info *i = NULL;
6944 struct cgraph_node *target = NULL;
6946 memset (cum, 0, sizeof (*cum));
6948 if (fndecl)
6950 target = cgraph_node::get (fndecl);
6951 if (target)
6953 target = target->function_symbol ();
6954 i = cgraph_node::local_info (target->decl);
6955 cum->call_abi = ix86_function_abi (target->decl);
6957 else
6958 cum->call_abi = ix86_function_abi (fndecl);
6960 else
6961 cum->call_abi = ix86_function_type_abi (fntype);
6963 cum->caller = caller;
6965 /* Set up the number of registers to use for passing arguments. */
6966 cum->nregs = ix86_regparm;
6967 if (TARGET_64BIT)
6969 cum->nregs = (cum->call_abi == SYSV_ABI
6970 ? X86_64_REGPARM_MAX
6971 : X86_64_MS_REGPARM_MAX);
6973 if (TARGET_SSE)
6975 cum->sse_nregs = SSE_REGPARM_MAX;
6976 if (TARGET_64BIT)
6978 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6979 ? X86_64_SSE_REGPARM_MAX
6980 : X86_64_MS_SSE_REGPARM_MAX);
6983 if (TARGET_MMX)
6984 cum->mmx_nregs = MMX_REGPARM_MAX;
6985 cum->warn_avx512f = true;
6986 cum->warn_avx = true;
6987 cum->warn_sse = true;
6988 cum->warn_mmx = true;
6990 /* Because type might mismatch in between caller and callee, we need to
6991 use actual type of function for local calls.
6992 FIXME: cgraph_analyze can be told to actually record if function uses
6993 va_start so for local functions maybe_vaarg can be made aggressive
6994 helping K&R code.
6995 FIXME: once typesytem is fixed, we won't need this code anymore. */
6996 if (i && i->local && i->can_change_signature)
6997 fntype = TREE_TYPE (target->decl);
6998 cum->stdarg = stdarg_p (fntype);
6999 cum->maybe_vaarg = (fntype
7000 ? (!prototype_p (fntype) || stdarg_p (fntype))
7001 : !libname);
7003 cum->bnd_regno = FIRST_BND_REG;
7004 cum->bnds_in_bt = 0;
7005 cum->force_bnd_pass = 0;
7006 cum->decl = fndecl;
7008 if (!TARGET_64BIT)
7010 /* If there are variable arguments, then we won't pass anything
7011 in registers in 32-bit mode. */
7012 if (stdarg_p (fntype))
7014 cum->nregs = 0;
7015 /* Since in 32-bit, variable arguments are always passed on
7016 stack, there is scratch register available for indirect
7017 sibcall. */
7018 cfun->machine->arg_reg_available = true;
7019 cum->sse_nregs = 0;
7020 cum->mmx_nregs = 0;
7021 cum->warn_avx512f = false;
7022 cum->warn_avx = false;
7023 cum->warn_sse = false;
7024 cum->warn_mmx = false;
7025 return;
7028 /* Use ecx and edx registers if function has fastcall attribute,
7029 else look for regparm information. */
7030 if (fntype)
7032 unsigned int ccvt = ix86_get_callcvt (fntype);
7033 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
7035 cum->nregs = 1;
7036 cum->fastcall = 1; /* Same first register as in fastcall. */
7038 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
7040 cum->nregs = 2;
7041 cum->fastcall = 1;
7043 else
7044 cum->nregs = ix86_function_regparm (fntype, fndecl);
7047 /* Set up the number of SSE registers used for passing SFmode
7048 and DFmode arguments. Warn for mismatching ABI. */
7049 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
7052 cfun->machine->arg_reg_available = (cum->nregs > 0);
7055 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
7056 But in the case of vector types, it is some vector mode.
7058 When we have only some of our vector isa extensions enabled, then there
7059 are some modes for which vector_mode_supported_p is false. For these
7060 modes, the generic vector support in gcc will choose some non-vector mode
7061 in order to implement the type. By computing the natural mode, we'll
7062 select the proper ABI location for the operand and not depend on whatever
7063 the middle-end decides to do with these vector types.
7065 The midde-end can't deal with the vector types > 16 bytes. In this
7066 case, we return the original mode and warn ABI change if CUM isn't
7067 NULL.
7069 If INT_RETURN is true, warn ABI change if the vector mode isn't
7070 available for function return value. */
7072 static machine_mode
7073 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
7074 bool in_return)
7076 machine_mode mode = TYPE_MODE (type);
7078 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
7080 HOST_WIDE_INT size = int_size_in_bytes (type);
7081 if ((size == 8 || size == 16 || size == 32 || size == 64)
7082 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
7083 && TYPE_VECTOR_SUBPARTS (type) > 1)
7085 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
7087 /* There are no XFmode vector modes. */
7088 if (innermode == XFmode)
7089 return mode;
7091 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
7092 mode = MIN_MODE_VECTOR_FLOAT;
7093 else
7094 mode = MIN_MODE_VECTOR_INT;
7096 /* Get the mode which has this inner mode and number of units. */
7097 FOR_EACH_MODE_FROM (mode, mode)
7098 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
7099 && GET_MODE_INNER (mode) == innermode)
7101 if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
7103 static bool warnedavx512f;
7104 static bool warnedavx512f_ret;
7106 if (cum && cum->warn_avx512f && !warnedavx512f)
7108 if (warning (OPT_Wpsabi, "AVX512F vector argument "
7109 "without AVX512F enabled changes the ABI"))
7110 warnedavx512f = true;
7112 else if (in_return && !warnedavx512f_ret)
7114 if (warning (OPT_Wpsabi, "AVX512F vector return "
7115 "without AVX512F enabled changes the ABI"))
7116 warnedavx512f_ret = true;
7119 return TYPE_MODE (type);
7121 else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
7123 static bool warnedavx;
7124 static bool warnedavx_ret;
7126 if (cum && cum->warn_avx && !warnedavx)
7128 if (warning (OPT_Wpsabi, "AVX vector argument "
7129 "without AVX enabled changes the ABI"))
7130 warnedavx = true;
7132 else if (in_return && !warnedavx_ret)
7134 if (warning (OPT_Wpsabi, "AVX vector return "
7135 "without AVX enabled changes the ABI"))
7136 warnedavx_ret = true;
7139 return TYPE_MODE (type);
7141 else if (((size == 8 && TARGET_64BIT) || size == 16)
7142 && !TARGET_SSE
7143 && !TARGET_IAMCU)
7145 static bool warnedsse;
7146 static bool warnedsse_ret;
7148 if (cum && cum->warn_sse && !warnedsse)
7150 if (warning (OPT_Wpsabi, "SSE vector argument "
7151 "without SSE enabled changes the ABI"))
7152 warnedsse = true;
7154 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
7156 if (warning (OPT_Wpsabi, "SSE vector return "
7157 "without SSE enabled changes the ABI"))
7158 warnedsse_ret = true;
7161 else if ((size == 8 && !TARGET_64BIT)
7162 && (!cfun
7163 || cfun->machine->func_type == TYPE_NORMAL)
7164 && !TARGET_MMX
7165 && !TARGET_IAMCU)
7167 static bool warnedmmx;
7168 static bool warnedmmx_ret;
7170 if (cum && cum->warn_mmx && !warnedmmx)
7172 if (warning (OPT_Wpsabi, "MMX vector argument "
7173 "without MMX enabled changes the ABI"))
7174 warnedmmx = true;
7176 else if (in_return && !warnedmmx_ret)
7178 if (warning (OPT_Wpsabi, "MMX vector return "
7179 "without MMX enabled changes the ABI"))
7180 warnedmmx_ret = true;
7183 return mode;
7186 gcc_unreachable ();
7190 return mode;
7193 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
7194 this may not agree with the mode that the type system has chosen for the
7195 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
7196 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
7198 static rtx
7199 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
7200 unsigned int regno)
7202 rtx tmp;
7204 if (orig_mode != BLKmode)
7205 tmp = gen_rtx_REG (orig_mode, regno);
7206 else
7208 tmp = gen_rtx_REG (mode, regno);
7209 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
7210 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
7213 return tmp;
7216 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
7217 of this code is to classify each 8bytes of incoming argument by the register
7218 class and assign registers accordingly. */
7220 /* Return the union class of CLASS1 and CLASS2.
7221 See the x86-64 PS ABI for details. */
7223 static enum x86_64_reg_class
7224 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
7226 /* Rule #1: If both classes are equal, this is the resulting class. */
7227 if (class1 == class2)
7228 return class1;
7230 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
7231 the other class. */
7232 if (class1 == X86_64_NO_CLASS)
7233 return class2;
7234 if (class2 == X86_64_NO_CLASS)
7235 return class1;
7237 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
7238 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
7239 return X86_64_MEMORY_CLASS;
7241 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
7242 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
7243 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
7244 return X86_64_INTEGERSI_CLASS;
7245 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
7246 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
7247 return X86_64_INTEGER_CLASS;
7249 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
7250 MEMORY is used. */
7251 if (class1 == X86_64_X87_CLASS
7252 || class1 == X86_64_X87UP_CLASS
7253 || class1 == X86_64_COMPLEX_X87_CLASS
7254 || class2 == X86_64_X87_CLASS
7255 || class2 == X86_64_X87UP_CLASS
7256 || class2 == X86_64_COMPLEX_X87_CLASS)
7257 return X86_64_MEMORY_CLASS;
7259 /* Rule #6: Otherwise class SSE is used. */
7260 return X86_64_SSE_CLASS;
7263 /* Classify the argument of type TYPE and mode MODE.
7264 CLASSES will be filled by the register class used to pass each word
7265 of the operand. The number of words is returned. In case the parameter
7266 should be passed in memory, 0 is returned. As a special case for zero
7267 sized containers, classes[0] will be NO_CLASS and 1 is returned.
7269 BIT_OFFSET is used internally for handling records and specifies offset
7270 of the offset in bits modulo 512 to avoid overflow cases.
7272 See the x86-64 PS ABI for details.
7275 static int
7276 classify_argument (machine_mode mode, const_tree type,
7277 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
7279 HOST_WIDE_INT bytes =
7280 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7281 int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
7283 /* Variable sized entities are always passed/returned in memory. */
7284 if (bytes < 0)
7285 return 0;
7287 if (mode != VOIDmode
7288 && targetm.calls.must_pass_in_stack (mode, type))
7289 return 0;
7291 if (type && AGGREGATE_TYPE_P (type))
7293 int i;
7294 tree field;
7295 enum x86_64_reg_class subclasses[MAX_CLASSES];
7297 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
7298 if (bytes > 64)
7299 return 0;
7301 for (i = 0; i < words; i++)
7302 classes[i] = X86_64_NO_CLASS;
7304 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
7305 signalize memory class, so handle it as special case. */
7306 if (!words)
7308 classes[0] = X86_64_NO_CLASS;
7309 return 1;
7312 /* Classify each field of record and merge classes. */
7313 switch (TREE_CODE (type))
7315 case RECORD_TYPE:
7316 /* And now merge the fields of structure. */
7317 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7319 if (TREE_CODE (field) == FIELD_DECL)
7321 int num;
7323 if (TREE_TYPE (field) == error_mark_node)
7324 continue;
7326 /* Bitfields are always classified as integer. Handle them
7327 early, since later code would consider them to be
7328 misaligned integers. */
7329 if (DECL_BIT_FIELD (field))
7331 for (i = (int_bit_position (field)
7332 + (bit_offset % 64)) / 8 / 8;
7333 i < ((int_bit_position (field) + (bit_offset % 64))
7334 + tree_to_shwi (DECL_SIZE (field))
7335 + 63) / 8 / 8; i++)
7336 classes[i] =
7337 merge_classes (X86_64_INTEGER_CLASS,
7338 classes[i]);
7340 else
7342 int pos;
7344 type = TREE_TYPE (field);
7346 /* Flexible array member is ignored. */
7347 if (TYPE_MODE (type) == BLKmode
7348 && TREE_CODE (type) == ARRAY_TYPE
7349 && TYPE_SIZE (type) == NULL_TREE
7350 && TYPE_DOMAIN (type) != NULL_TREE
7351 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
7352 == NULL_TREE))
7354 static bool warned;
7356 if (!warned && warn_psabi)
7358 warned = true;
7359 inform (input_location,
7360 "the ABI of passing struct with"
7361 " a flexible array member has"
7362 " changed in GCC 4.4");
7364 continue;
7366 num = classify_argument (TYPE_MODE (type), type,
7367 subclasses,
7368 (int_bit_position (field)
7369 + bit_offset) % 512);
7370 if (!num)
7371 return 0;
7372 pos = (int_bit_position (field)
7373 + (bit_offset % 64)) / 8 / 8;
7374 for (i = 0; i < num && (i + pos) < words; i++)
7375 classes[i + pos] =
7376 merge_classes (subclasses[i], classes[i + pos]);
7380 break;
7382 case ARRAY_TYPE:
7383 /* Arrays are handled as small records. */
7385 int num;
7386 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
7387 TREE_TYPE (type), subclasses, bit_offset);
7388 if (!num)
7389 return 0;
7391 /* The partial classes are now full classes. */
7392 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
7393 subclasses[0] = X86_64_SSE_CLASS;
7394 if (subclasses[0] == X86_64_INTEGERSI_CLASS
7395 && !((bit_offset % 64) == 0 && bytes == 4))
7396 subclasses[0] = X86_64_INTEGER_CLASS;
7398 for (i = 0; i < words; i++)
7399 classes[i] = subclasses[i % num];
7401 break;
7403 case UNION_TYPE:
7404 case QUAL_UNION_TYPE:
7405 /* Unions are similar to RECORD_TYPE but offset is always 0.
7407 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7409 if (TREE_CODE (field) == FIELD_DECL)
7411 int num;
7413 if (TREE_TYPE (field) == error_mark_node)
7414 continue;
7416 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
7417 TREE_TYPE (field), subclasses,
7418 bit_offset);
7419 if (!num)
7420 return 0;
7421 for (i = 0; i < num && i < words; i++)
7422 classes[i] = merge_classes (subclasses[i], classes[i]);
7425 break;
7427 default:
7428 gcc_unreachable ();
7431 if (words > 2)
7433 /* When size > 16 bytes, if the first one isn't
7434 X86_64_SSE_CLASS or any other ones aren't
7435 X86_64_SSEUP_CLASS, everything should be passed in
7436 memory. */
7437 if (classes[0] != X86_64_SSE_CLASS)
7438 return 0;
7440 for (i = 1; i < words; i++)
7441 if (classes[i] != X86_64_SSEUP_CLASS)
7442 return 0;
7445 /* Final merger cleanup. */
7446 for (i = 0; i < words; i++)
7448 /* If one class is MEMORY, everything should be passed in
7449 memory. */
7450 if (classes[i] == X86_64_MEMORY_CLASS)
7451 return 0;
7453 /* The X86_64_SSEUP_CLASS should be always preceded by
7454 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
7455 if (classes[i] == X86_64_SSEUP_CLASS
7456 && classes[i - 1] != X86_64_SSE_CLASS
7457 && classes[i - 1] != X86_64_SSEUP_CLASS)
7459 /* The first one should never be X86_64_SSEUP_CLASS. */
7460 gcc_assert (i != 0);
7461 classes[i] = X86_64_SSE_CLASS;
7464 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
7465 everything should be passed in memory. */
7466 if (classes[i] == X86_64_X87UP_CLASS
7467 && (classes[i - 1] != X86_64_X87_CLASS))
7469 static bool warned;
7471 /* The first one should never be X86_64_X87UP_CLASS. */
7472 gcc_assert (i != 0);
7473 if (!warned && warn_psabi)
7475 warned = true;
7476 inform (input_location,
7477 "the ABI of passing union with long double"
7478 " has changed in GCC 4.4");
7480 return 0;
7483 return words;
7486 /* Compute alignment needed. We align all types to natural boundaries with
7487 exception of XFmode that is aligned to 64bits. */
7488 if (mode != VOIDmode && mode != BLKmode)
7490 int mode_alignment = GET_MODE_BITSIZE (mode);
7492 if (mode == XFmode)
7493 mode_alignment = 128;
7494 else if (mode == XCmode)
7495 mode_alignment = 256;
7496 if (COMPLEX_MODE_P (mode))
7497 mode_alignment /= 2;
7498 /* Misaligned fields are always returned in memory. */
7499 if (bit_offset % mode_alignment)
7500 return 0;
7503 /* for V1xx modes, just use the base mode */
7504 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
7505 && GET_MODE_UNIT_SIZE (mode) == bytes)
7506 mode = GET_MODE_INNER (mode);
7508 /* Classification of atomic types. */
7509 switch (mode)
7511 case E_SDmode:
7512 case E_DDmode:
7513 classes[0] = X86_64_SSE_CLASS;
7514 return 1;
7515 case E_TDmode:
7516 classes[0] = X86_64_SSE_CLASS;
7517 classes[1] = X86_64_SSEUP_CLASS;
7518 return 2;
7519 case E_DImode:
7520 case E_SImode:
7521 case E_HImode:
7522 case E_QImode:
7523 case E_CSImode:
7524 case E_CHImode:
7525 case E_CQImode:
7527 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
7529 /* Analyze last 128 bits only. */
7530 size = (size - 1) & 0x7f;
7532 if (size < 32)
7534 classes[0] = X86_64_INTEGERSI_CLASS;
7535 return 1;
7537 else if (size < 64)
7539 classes[0] = X86_64_INTEGER_CLASS;
7540 return 1;
7542 else if (size < 64+32)
7544 classes[0] = X86_64_INTEGER_CLASS;
7545 classes[1] = X86_64_INTEGERSI_CLASS;
7546 return 2;
7548 else if (size < 64+64)
7550 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7551 return 2;
7553 else
7554 gcc_unreachable ();
7556 case E_CDImode:
7557 case E_TImode:
7558 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7559 return 2;
7560 case E_COImode:
7561 case E_OImode:
7562 /* OImode shouldn't be used directly. */
7563 gcc_unreachable ();
7564 case E_CTImode:
7565 return 0;
7566 case E_SFmode:
7567 if (!(bit_offset % 64))
7568 classes[0] = X86_64_SSESF_CLASS;
7569 else
7570 classes[0] = X86_64_SSE_CLASS;
7571 return 1;
7572 case E_DFmode:
7573 classes[0] = X86_64_SSEDF_CLASS;
7574 return 1;
7575 case E_XFmode:
7576 classes[0] = X86_64_X87_CLASS;
7577 classes[1] = X86_64_X87UP_CLASS;
7578 return 2;
7579 case E_TFmode:
7580 classes[0] = X86_64_SSE_CLASS;
7581 classes[1] = X86_64_SSEUP_CLASS;
7582 return 2;
7583 case E_SCmode:
7584 classes[0] = X86_64_SSE_CLASS;
7585 if (!(bit_offset % 64))
7586 return 1;
7587 else
7589 static bool warned;
7591 if (!warned && warn_psabi)
7593 warned = true;
7594 inform (input_location,
7595 "the ABI of passing structure with complex float"
7596 " member has changed in GCC 4.4");
7598 classes[1] = X86_64_SSESF_CLASS;
7599 return 2;
7601 case E_DCmode:
7602 classes[0] = X86_64_SSEDF_CLASS;
7603 classes[1] = X86_64_SSEDF_CLASS;
7604 return 2;
7605 case E_XCmode:
7606 classes[0] = X86_64_COMPLEX_X87_CLASS;
7607 return 1;
7608 case E_TCmode:
7609 /* This modes is larger than 16 bytes. */
7610 return 0;
7611 case E_V8SFmode:
7612 case E_V8SImode:
7613 case E_V32QImode:
7614 case E_V16HImode:
7615 case E_V4DFmode:
7616 case E_V4DImode:
7617 classes[0] = X86_64_SSE_CLASS;
7618 classes[1] = X86_64_SSEUP_CLASS;
7619 classes[2] = X86_64_SSEUP_CLASS;
7620 classes[3] = X86_64_SSEUP_CLASS;
7621 return 4;
7622 case E_V8DFmode:
7623 case E_V16SFmode:
7624 case E_V8DImode:
7625 case E_V16SImode:
7626 case E_V32HImode:
7627 case E_V64QImode:
7628 classes[0] = X86_64_SSE_CLASS;
7629 classes[1] = X86_64_SSEUP_CLASS;
7630 classes[2] = X86_64_SSEUP_CLASS;
7631 classes[3] = X86_64_SSEUP_CLASS;
7632 classes[4] = X86_64_SSEUP_CLASS;
7633 classes[5] = X86_64_SSEUP_CLASS;
7634 classes[6] = X86_64_SSEUP_CLASS;
7635 classes[7] = X86_64_SSEUP_CLASS;
7636 return 8;
7637 case E_V4SFmode:
7638 case E_V4SImode:
7639 case E_V16QImode:
7640 case E_V8HImode:
7641 case E_V2DFmode:
7642 case E_V2DImode:
7643 classes[0] = X86_64_SSE_CLASS;
7644 classes[1] = X86_64_SSEUP_CLASS;
7645 return 2;
7646 case E_V1TImode:
7647 case E_V1DImode:
7648 case E_V2SFmode:
7649 case E_V2SImode:
7650 case E_V4HImode:
7651 case E_V8QImode:
7652 classes[0] = X86_64_SSE_CLASS;
7653 return 1;
7654 case E_BLKmode:
7655 case E_VOIDmode:
7656 return 0;
7657 default:
7658 gcc_assert (VECTOR_MODE_P (mode));
7660 if (bytes > 16)
7661 return 0;
7663 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
7665 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
7666 classes[0] = X86_64_INTEGERSI_CLASS;
7667 else
7668 classes[0] = X86_64_INTEGER_CLASS;
7669 classes[1] = X86_64_INTEGER_CLASS;
7670 return 1 + (bytes > 8);
7674 /* Examine the argument and return set number of register required in each
7675 class. Return true iff parameter should be passed in memory. */
7677 static bool
7678 examine_argument (machine_mode mode, const_tree type, int in_return,
7679 int *int_nregs, int *sse_nregs)
7681 enum x86_64_reg_class regclass[MAX_CLASSES];
7682 int n = classify_argument (mode, type, regclass, 0);
7684 *int_nregs = 0;
7685 *sse_nregs = 0;
7687 if (!n)
7688 return true;
7689 for (n--; n >= 0; n--)
7690 switch (regclass[n])
7692 case X86_64_INTEGER_CLASS:
7693 case X86_64_INTEGERSI_CLASS:
7694 (*int_nregs)++;
7695 break;
7696 case X86_64_SSE_CLASS:
7697 case X86_64_SSESF_CLASS:
7698 case X86_64_SSEDF_CLASS:
7699 (*sse_nregs)++;
7700 break;
7701 case X86_64_NO_CLASS:
7702 case X86_64_SSEUP_CLASS:
7703 break;
7704 case X86_64_X87_CLASS:
7705 case X86_64_X87UP_CLASS:
7706 case X86_64_COMPLEX_X87_CLASS:
7707 if (!in_return)
7708 return true;
7709 break;
7710 case X86_64_MEMORY_CLASS:
7711 gcc_unreachable ();
7714 return false;
7717 /* Construct container for the argument used by GCC interface. See
7718 FUNCTION_ARG for the detailed description. */
7720 static rtx
7721 construct_container (machine_mode mode, machine_mode orig_mode,
7722 const_tree type, int in_return, int nintregs, int nsseregs,
7723 const int *intreg, int sse_regno)
7725 /* The following variables hold the static issued_error state. */
7726 static bool issued_sse_arg_error;
7727 static bool issued_sse_ret_error;
7728 static bool issued_x87_ret_error;
7730 machine_mode tmpmode;
7731 int bytes =
7732 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7733 enum x86_64_reg_class regclass[MAX_CLASSES];
7734 int n;
7735 int i;
7736 int nexps = 0;
7737 int needed_sseregs, needed_intregs;
7738 rtx exp[MAX_CLASSES];
7739 rtx ret;
7741 n = classify_argument (mode, type, regclass, 0);
7742 if (!n)
7743 return NULL;
7744 if (examine_argument (mode, type, in_return, &needed_intregs,
7745 &needed_sseregs))
7746 return NULL;
7747 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
7748 return NULL;
7750 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
7751 some less clueful developer tries to use floating-point anyway. */
7752 if (needed_sseregs && !TARGET_SSE)
7754 if (in_return)
7756 if (!issued_sse_ret_error)
7758 error ("SSE register return with SSE disabled");
7759 issued_sse_ret_error = true;
7762 else if (!issued_sse_arg_error)
7764 error ("SSE register argument with SSE disabled");
7765 issued_sse_arg_error = true;
7767 return NULL;
7770 /* Likewise, error if the ABI requires us to return values in the
7771 x87 registers and the user specified -mno-80387. */
7772 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
7773 for (i = 0; i < n; i++)
7774 if (regclass[i] == X86_64_X87_CLASS
7775 || regclass[i] == X86_64_X87UP_CLASS
7776 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
7778 if (!issued_x87_ret_error)
7780 error ("x87 register return with x87 disabled");
7781 issued_x87_ret_error = true;
7783 return NULL;
7786 /* First construct simple cases. Avoid SCmode, since we want to use
7787 single register to pass this type. */
7788 if (n == 1 && mode != SCmode)
7789 switch (regclass[0])
7791 case X86_64_INTEGER_CLASS:
7792 case X86_64_INTEGERSI_CLASS:
7793 return gen_rtx_REG (mode, intreg[0]);
7794 case X86_64_SSE_CLASS:
7795 case X86_64_SSESF_CLASS:
7796 case X86_64_SSEDF_CLASS:
7797 if (mode != BLKmode)
7798 return gen_reg_or_parallel (mode, orig_mode,
7799 SSE_REGNO (sse_regno));
7800 break;
7801 case X86_64_X87_CLASS:
7802 case X86_64_COMPLEX_X87_CLASS:
7803 return gen_rtx_REG (mode, FIRST_STACK_REG);
7804 case X86_64_NO_CLASS:
7805 /* Zero sized array, struct or class. */
7806 return NULL;
7807 default:
7808 gcc_unreachable ();
7810 if (n == 2
7811 && regclass[0] == X86_64_SSE_CLASS
7812 && regclass[1] == X86_64_SSEUP_CLASS
7813 && mode != BLKmode)
7814 return gen_reg_or_parallel (mode, orig_mode,
7815 SSE_REGNO (sse_regno));
7816 if (n == 4
7817 && regclass[0] == X86_64_SSE_CLASS
7818 && regclass[1] == X86_64_SSEUP_CLASS
7819 && regclass[2] == X86_64_SSEUP_CLASS
7820 && regclass[3] == X86_64_SSEUP_CLASS
7821 && mode != BLKmode)
7822 return gen_reg_or_parallel (mode, orig_mode,
7823 SSE_REGNO (sse_regno));
7824 if (n == 8
7825 && regclass[0] == X86_64_SSE_CLASS
7826 && regclass[1] == X86_64_SSEUP_CLASS
7827 && regclass[2] == X86_64_SSEUP_CLASS
7828 && regclass[3] == X86_64_SSEUP_CLASS
7829 && regclass[4] == X86_64_SSEUP_CLASS
7830 && regclass[5] == X86_64_SSEUP_CLASS
7831 && regclass[6] == X86_64_SSEUP_CLASS
7832 && regclass[7] == X86_64_SSEUP_CLASS
7833 && mode != BLKmode)
7834 return gen_reg_or_parallel (mode, orig_mode,
7835 SSE_REGNO (sse_regno));
7836 if (n == 2
7837 && regclass[0] == X86_64_X87_CLASS
7838 && regclass[1] == X86_64_X87UP_CLASS)
7839 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7841 if (n == 2
7842 && regclass[0] == X86_64_INTEGER_CLASS
7843 && regclass[1] == X86_64_INTEGER_CLASS
7844 && (mode == CDImode || mode == TImode)
7845 && intreg[0] + 1 == intreg[1])
7846 return gen_rtx_REG (mode, intreg[0]);
7848 /* Otherwise figure out the entries of the PARALLEL. */
7849 for (i = 0; i < n; i++)
7851 int pos;
7853 switch (regclass[i])
7855 case X86_64_NO_CLASS:
7856 break;
7857 case X86_64_INTEGER_CLASS:
7858 case X86_64_INTEGERSI_CLASS:
7859 /* Merge TImodes on aligned occasions here too. */
7860 if (i * 8 + 8 > bytes)
7862 unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT;
7863 if (!int_mode_for_size (tmpbits, 0).exists (&tmpmode))
7864 /* We've requested 24 bytes we
7865 don't have mode for. Use DImode. */
7866 tmpmode = DImode;
7868 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7869 tmpmode = SImode;
7870 else
7871 tmpmode = DImode;
7872 exp [nexps++]
7873 = gen_rtx_EXPR_LIST (VOIDmode,
7874 gen_rtx_REG (tmpmode, *intreg),
7875 GEN_INT (i*8));
7876 intreg++;
7877 break;
7878 case X86_64_SSESF_CLASS:
7879 exp [nexps++]
7880 = gen_rtx_EXPR_LIST (VOIDmode,
7881 gen_rtx_REG (SFmode,
7882 SSE_REGNO (sse_regno)),
7883 GEN_INT (i*8));
7884 sse_regno++;
7885 break;
7886 case X86_64_SSEDF_CLASS:
7887 exp [nexps++]
7888 = gen_rtx_EXPR_LIST (VOIDmode,
7889 gen_rtx_REG (DFmode,
7890 SSE_REGNO (sse_regno)),
7891 GEN_INT (i*8));
7892 sse_regno++;
7893 break;
7894 case X86_64_SSE_CLASS:
7895 pos = i;
7896 switch (n)
7898 case 1:
7899 tmpmode = DImode;
7900 break;
7901 case 2:
7902 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7904 tmpmode = TImode;
7905 i++;
7907 else
7908 tmpmode = DImode;
7909 break;
7910 case 4:
7911 gcc_assert (i == 0
7912 && regclass[1] == X86_64_SSEUP_CLASS
7913 && regclass[2] == X86_64_SSEUP_CLASS
7914 && regclass[3] == X86_64_SSEUP_CLASS);
7915 tmpmode = OImode;
7916 i += 3;
7917 break;
7918 case 8:
7919 gcc_assert (i == 0
7920 && regclass[1] == X86_64_SSEUP_CLASS
7921 && regclass[2] == X86_64_SSEUP_CLASS
7922 && regclass[3] == X86_64_SSEUP_CLASS
7923 && regclass[4] == X86_64_SSEUP_CLASS
7924 && regclass[5] == X86_64_SSEUP_CLASS
7925 && regclass[6] == X86_64_SSEUP_CLASS
7926 && regclass[7] == X86_64_SSEUP_CLASS);
7927 tmpmode = XImode;
7928 i += 7;
7929 break;
7930 default:
7931 gcc_unreachable ();
7933 exp [nexps++]
7934 = gen_rtx_EXPR_LIST (VOIDmode,
7935 gen_rtx_REG (tmpmode,
7936 SSE_REGNO (sse_regno)),
7937 GEN_INT (pos*8));
7938 sse_regno++;
7939 break;
7940 default:
7941 gcc_unreachable ();
7945 /* Empty aligned struct, union or class. */
7946 if (nexps == 0)
7947 return NULL;
7949 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7950 for (i = 0; i < nexps; i++)
7951 XVECEXP (ret, 0, i) = exp [i];
7952 return ret;
7955 /* Update the data in CUM to advance over an argument of mode MODE
7956 and data type TYPE. (TYPE is null for libcalls where that information
7957 may not be available.)
7959 Return a number of integer regsiters advanced over. */
7961 static int
7962 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7963 const_tree type, HOST_WIDE_INT bytes,
7964 HOST_WIDE_INT words)
7966 int res = 0;
7967 bool error_p = false;
7969 if (TARGET_IAMCU)
7971 /* Intel MCU psABI passes scalars and aggregates no larger than 8
7972 bytes in registers. */
7973 if (!VECTOR_MODE_P (mode) && bytes <= 8)
7974 goto pass_in_reg;
7975 return res;
7978 switch (mode)
7980 default:
7981 break;
7983 case E_BLKmode:
7984 if (bytes < 0)
7985 break;
7986 /* FALLTHRU */
7988 case E_DImode:
7989 case E_SImode:
7990 case E_HImode:
7991 case E_QImode:
7992 pass_in_reg:
7993 cum->words += words;
7994 cum->nregs -= words;
7995 cum->regno += words;
7996 if (cum->nregs >= 0)
7997 res = words;
7998 if (cum->nregs <= 0)
8000 cum->nregs = 0;
8001 cfun->machine->arg_reg_available = false;
8002 cum->regno = 0;
8004 break;
8006 case E_OImode:
8007 /* OImode shouldn't be used directly. */
8008 gcc_unreachable ();
8010 case E_DFmode:
8011 if (cum->float_in_sse == -1)
8012 error_p = true;
8013 if (cum->float_in_sse < 2)
8014 break;
8015 /* FALLTHRU */
8016 case E_SFmode:
8017 if (cum->float_in_sse == -1)
8018 error_p = true;
8019 if (cum->float_in_sse < 1)
8020 break;
8021 /* FALLTHRU */
8023 case E_V8SFmode:
8024 case E_V8SImode:
8025 case E_V64QImode:
8026 case E_V32HImode:
8027 case E_V16SImode:
8028 case E_V8DImode:
8029 case E_V16SFmode:
8030 case E_V8DFmode:
8031 case E_V32QImode:
8032 case E_V16HImode:
8033 case E_V4DFmode:
8034 case E_V4DImode:
8035 case E_TImode:
8036 case E_V16QImode:
8037 case E_V8HImode:
8038 case E_V4SImode:
8039 case E_V2DImode:
8040 case E_V4SFmode:
8041 case E_V2DFmode:
8042 if (!type || !AGGREGATE_TYPE_P (type))
8044 cum->sse_words += words;
8045 cum->sse_nregs -= 1;
8046 cum->sse_regno += 1;
8047 if (cum->sse_nregs <= 0)
8049 cum->sse_nregs = 0;
8050 cum->sse_regno = 0;
8053 break;
8055 case E_V8QImode:
8056 case E_V4HImode:
8057 case E_V2SImode:
8058 case E_V2SFmode:
8059 case E_V1TImode:
8060 case E_V1DImode:
8061 if (!type || !AGGREGATE_TYPE_P (type))
8063 cum->mmx_words += words;
8064 cum->mmx_nregs -= 1;
8065 cum->mmx_regno += 1;
8066 if (cum->mmx_nregs <= 0)
8068 cum->mmx_nregs = 0;
8069 cum->mmx_regno = 0;
8072 break;
8074 if (error_p)
8076 cum->float_in_sse = 0;
8077 error ("calling %qD with SSE calling convention without "
8078 "SSE/SSE2 enabled", cum->decl);
8079 sorry ("this is a GCC bug that can be worked around by adding "
8080 "attribute used to function called");
8083 return res;
8086 static int
8087 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
8088 const_tree type, HOST_WIDE_INT words, bool named)
8090 int int_nregs, sse_nregs;
8092 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
8093 if (!named && (VALID_AVX512F_REG_MODE (mode)
8094 || VALID_AVX256_REG_MODE (mode)))
8095 return 0;
8097 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
8098 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
8100 cum->nregs -= int_nregs;
8101 cum->sse_nregs -= sse_nregs;
8102 cum->regno += int_nregs;
8103 cum->sse_regno += sse_nregs;
8104 return int_nregs;
8106 else
8108 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
8109 cum->words = ROUND_UP (cum->words, align);
8110 cum->words += words;
8111 return 0;
8115 static int
8116 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
8117 HOST_WIDE_INT words)
8119 /* Otherwise, this should be passed indirect. */
8120 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
8122 cum->words += words;
8123 if (cum->nregs > 0)
8125 cum->nregs -= 1;
8126 cum->regno += 1;
8127 return 1;
8129 return 0;
8132 /* Update the data in CUM to advance over an argument of mode MODE and
8133 data type TYPE. (TYPE is null for libcalls where that information
8134 may not be available.) */
8136 static void
8137 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
8138 const_tree type, bool named)
8140 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8141 HOST_WIDE_INT bytes, words;
8142 int nregs;
8144 /* The argument of interrupt handler is a special case and is
8145 handled in ix86_function_arg. */
8146 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
8147 return;
8149 if (mode == BLKmode)
8150 bytes = int_size_in_bytes (type);
8151 else
8152 bytes = GET_MODE_SIZE (mode);
8153 words = CEIL (bytes, UNITS_PER_WORD);
8155 if (type)
8156 mode = type_natural_mode (type, NULL, false);
8158 if ((type && POINTER_BOUNDS_TYPE_P (type))
8159 || POINTER_BOUNDS_MODE_P (mode))
8161 /* If we pass bounds in BT then just update remained bounds count. */
8162 if (cum->bnds_in_bt)
8164 cum->bnds_in_bt--;
8165 return;
8168 /* Update remained number of bounds to force. */
8169 if (cum->force_bnd_pass)
8170 cum->force_bnd_pass--;
8172 cum->bnd_regno++;
8174 return;
8177 /* The first arg not going to Bounds Tables resets this counter. */
8178 cum->bnds_in_bt = 0;
8179 /* For unnamed args we always pass bounds to avoid bounds mess when
8180 passed and received types do not match. If bounds do not follow
8181 unnamed arg, still pretend required number of bounds were passed. */
8182 if (cum->force_bnd_pass)
8184 cum->bnd_regno += cum->force_bnd_pass;
8185 cum->force_bnd_pass = 0;
8188 if (TARGET_64BIT)
8190 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8192 if (call_abi == MS_ABI)
8193 nregs = function_arg_advance_ms_64 (cum, bytes, words);
8194 else
8195 nregs = function_arg_advance_64 (cum, mode, type, words, named);
8197 else
8198 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
8200 /* For stdarg we expect bounds to be passed for each value passed
8201 in register. */
8202 if (cum->stdarg)
8203 cum->force_bnd_pass = nregs;
8204 /* For pointers passed in memory we expect bounds passed in Bounds
8205 Table. */
8206 if (!nregs)
8208 /* Track if there are outgoing arguments on stack. */
8209 if (cum->caller)
8210 cfun->machine->outgoing_args_on_stack = true;
8212 cum->bnds_in_bt = chkp_type_bounds_count (type);
8216 /* Define where to put the arguments to a function.
8217 Value is zero to push the argument on the stack,
8218 or a hard register in which to store the argument.
8220 MODE is the argument's machine mode.
8221 TYPE is the data type of the argument (as a tree).
8222 This is null for libcalls where that information may
8223 not be available.
8224 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8225 the preceding args and about the function being called.
8226 NAMED is nonzero if this argument is a named parameter
8227 (otherwise it is an extra parameter matching an ellipsis). */
8229 static rtx
8230 function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
8231 machine_mode orig_mode, const_tree type,
8232 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
8234 bool error_p = false;
8236 /* Avoid the AL settings for the Unix64 ABI. */
8237 if (mode == VOIDmode)
8238 return constm1_rtx;
8240 if (TARGET_IAMCU)
8242 /* Intel MCU psABI passes scalars and aggregates no larger than 8
8243 bytes in registers. */
8244 if (!VECTOR_MODE_P (mode) && bytes <= 8)
8245 goto pass_in_reg;
8246 return NULL_RTX;
8249 switch (mode)
8251 default:
8252 break;
8254 case E_BLKmode:
8255 if (bytes < 0)
8256 break;
8257 /* FALLTHRU */
8258 case E_DImode:
8259 case E_SImode:
8260 case E_HImode:
8261 case E_QImode:
8262 pass_in_reg:
8263 if (words <= cum->nregs)
8265 int regno = cum->regno;
8267 /* Fastcall allocates the first two DWORD (SImode) or
8268 smaller arguments to ECX and EDX if it isn't an
8269 aggregate type . */
8270 if (cum->fastcall)
8272 if (mode == BLKmode
8273 || mode == DImode
8274 || (type && AGGREGATE_TYPE_P (type)))
8275 break;
8277 /* ECX not EAX is the first allocated register. */
8278 if (regno == AX_REG)
8279 regno = CX_REG;
8281 return gen_rtx_REG (mode, regno);
8283 break;
8285 case E_DFmode:
8286 if (cum->float_in_sse == -1)
8287 error_p = true;
8288 if (cum->float_in_sse < 2)
8289 break;
8290 /* FALLTHRU */
8291 case E_SFmode:
8292 if (cum->float_in_sse == -1)
8293 error_p = true;
8294 if (cum->float_in_sse < 1)
8295 break;
8296 /* FALLTHRU */
8297 case E_TImode:
8298 /* In 32bit, we pass TImode in xmm registers. */
8299 case E_V16QImode:
8300 case E_V8HImode:
8301 case E_V4SImode:
8302 case E_V2DImode:
8303 case E_V4SFmode:
8304 case E_V2DFmode:
8305 if (!type || !AGGREGATE_TYPE_P (type))
8307 if (cum->sse_nregs)
8308 return gen_reg_or_parallel (mode, orig_mode,
8309 cum->sse_regno + FIRST_SSE_REG);
8311 break;
8313 case E_OImode:
8314 case E_XImode:
8315 /* OImode and XImode shouldn't be used directly. */
8316 gcc_unreachable ();
8318 case E_V64QImode:
8319 case E_V32HImode:
8320 case E_V16SImode:
8321 case E_V8DImode:
8322 case E_V16SFmode:
8323 case E_V8DFmode:
8324 case E_V8SFmode:
8325 case E_V8SImode:
8326 case E_V32QImode:
8327 case E_V16HImode:
8328 case E_V4DFmode:
8329 case E_V4DImode:
8330 if (!type || !AGGREGATE_TYPE_P (type))
8332 if (cum->sse_nregs)
8333 return gen_reg_or_parallel (mode, orig_mode,
8334 cum->sse_regno + FIRST_SSE_REG);
8336 break;
8338 case E_V8QImode:
8339 case E_V4HImode:
8340 case E_V2SImode:
8341 case E_V2SFmode:
8342 case E_V1TImode:
8343 case E_V1DImode:
8344 if (!type || !AGGREGATE_TYPE_P (type))
8346 if (cum->mmx_nregs)
8347 return gen_reg_or_parallel (mode, orig_mode,
8348 cum->mmx_regno + FIRST_MMX_REG);
8350 break;
8352 if (error_p)
8354 cum->float_in_sse = 0;
8355 error ("calling %qD with SSE calling convention without "
8356 "SSE/SSE2 enabled", cum->decl);
8357 sorry ("this is a GCC bug that can be worked around by adding "
8358 "attribute used to function called");
8361 return NULL_RTX;
8364 static rtx
8365 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
8366 machine_mode orig_mode, const_tree type, bool named)
8368 /* Handle a hidden AL argument containing number of registers
8369 for varargs x86-64 functions. */
8370 if (mode == VOIDmode)
8371 return GEN_INT (cum->maybe_vaarg
8372 ? (cum->sse_nregs < 0
8373 ? X86_64_SSE_REGPARM_MAX
8374 : cum->sse_regno)
8375 : -1);
8377 switch (mode)
8379 default:
8380 break;
8382 case E_V8SFmode:
8383 case E_V8SImode:
8384 case E_V32QImode:
8385 case E_V16HImode:
8386 case E_V4DFmode:
8387 case E_V4DImode:
8388 case E_V16SFmode:
8389 case E_V16SImode:
8390 case E_V64QImode:
8391 case E_V32HImode:
8392 case E_V8DFmode:
8393 case E_V8DImode:
8394 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8395 if (!named)
8396 return NULL;
8397 break;
8400 return construct_container (mode, orig_mode, type, 0, cum->nregs,
8401 cum->sse_nregs,
8402 &x86_64_int_parameter_registers [cum->regno],
8403 cum->sse_regno);
8406 static rtx
8407 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
8408 machine_mode orig_mode, bool named,
8409 HOST_WIDE_INT bytes)
8411 unsigned int regno;
8413 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
8414 We use value of -2 to specify that current function call is MSABI. */
8415 if (mode == VOIDmode)
8416 return GEN_INT (-2);
8418 /* If we've run out of registers, it goes on the stack. */
8419 if (cum->nregs == 0)
8420 return NULL_RTX;
8422 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
8424 /* Only floating point modes are passed in anything but integer regs. */
8425 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
8427 if (named)
8428 regno = cum->regno + FIRST_SSE_REG;
8429 else
8431 rtx t1, t2;
8433 /* Unnamed floating parameters are passed in both the
8434 SSE and integer registers. */
8435 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
8436 t2 = gen_rtx_REG (mode, regno);
8437 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
8438 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
8439 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
8442 /* Handle aggregated types passed in register. */
8443 if (orig_mode == BLKmode)
8445 if (bytes > 0 && bytes <= 8)
8446 mode = (bytes > 4 ? DImode : SImode);
8447 if (mode == BLKmode)
8448 mode = DImode;
8451 return gen_reg_or_parallel (mode, orig_mode, regno);
8454 /* Return where to put the arguments to a function.
8455 Return zero to push the argument on the stack, or a hard register in which to store the argument.
8457 MODE is the argument's machine mode. TYPE is the data type of the
8458 argument. It is null for libcalls where that information may not be
8459 available. CUM gives information about the preceding args and about
8460 the function being called. NAMED is nonzero if this argument is a
8461 named parameter (otherwise it is an extra parameter matching an
8462 ellipsis). */
8464 static rtx
8465 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
8466 const_tree type, bool named)
8468 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8469 machine_mode mode = omode;
8470 HOST_WIDE_INT bytes, words;
8471 rtx arg;
8473 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
8475 gcc_assert (type != NULL_TREE);
8476 if (POINTER_TYPE_P (type))
8478 /* This is the pointer argument. */
8479 gcc_assert (TYPE_MODE (type) == Pmode);
8480 /* It is at -WORD(AP) in the current frame in interrupt and
8481 exception handlers. */
8482 arg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD);
8484 else
8486 gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION
8487 && TREE_CODE (type) == INTEGER_TYPE
8488 && TYPE_MODE (type) == word_mode);
8489 /* The error code is the word-mode integer argument at
8490 -2 * WORD(AP) in the current frame of the exception
8491 handler. */
8492 arg = gen_rtx_MEM (word_mode,
8493 plus_constant (Pmode,
8494 arg_pointer_rtx,
8495 -2 * UNITS_PER_WORD));
8497 return arg;
8500 /* All pointer bounds arguments are handled separately here. */
8501 if ((type && POINTER_BOUNDS_TYPE_P (type))
8502 || POINTER_BOUNDS_MODE_P (mode))
8504 /* Return NULL if bounds are forced to go in Bounds Table. */
8505 if (cum->bnds_in_bt)
8506 arg = NULL;
8507 /* Return the next available bound reg if any. */
8508 else if (cum->bnd_regno <= LAST_BND_REG)
8509 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
8510 /* Return the next special slot number otherwise. */
8511 else
8512 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
8514 return arg;
8517 if (mode == BLKmode)
8518 bytes = int_size_in_bytes (type);
8519 else
8520 bytes = GET_MODE_SIZE (mode);
8521 words = CEIL (bytes, UNITS_PER_WORD);
8523 /* To simplify the code below, represent vector types with a vector mode
8524 even if MMX/SSE are not active. */
8525 if (type && TREE_CODE (type) == VECTOR_TYPE)
8526 mode = type_natural_mode (type, cum, false);
8528 if (TARGET_64BIT)
8530 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8532 if (call_abi == MS_ABI)
8533 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
8534 else
8535 arg = function_arg_64 (cum, mode, omode, type, named);
8537 else
8538 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
8540 /* Track if there are outgoing arguments on stack. */
8541 if (arg == NULL_RTX && cum->caller)
8542 cfun->machine->outgoing_args_on_stack = true;
8544 return arg;
8547 /* A C expression that indicates when an argument must be passed by
8548 reference. If nonzero for an argument, a copy of that argument is
8549 made in memory and a pointer to the argument is passed instead of
8550 the argument itself. The pointer is passed in whatever way is
8551 appropriate for passing a pointer to that type. */
8553 static bool
8554 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
8555 const_tree type, bool)
8557 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8559 /* Bounds are never passed by reference. */
8560 if ((type && POINTER_BOUNDS_TYPE_P (type))
8561 || POINTER_BOUNDS_MODE_P (mode))
8562 return false;
8564 if (TARGET_64BIT)
8566 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8568 /* See Windows x64 Software Convention. */
8569 if (call_abi == MS_ABI)
8571 HOST_WIDE_INT msize = GET_MODE_SIZE (mode);
8573 if (type)
8575 /* Arrays are passed by reference. */
8576 if (TREE_CODE (type) == ARRAY_TYPE)
8577 return true;
8579 if (RECORD_OR_UNION_TYPE_P (type))
8581 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
8582 are passed by reference. */
8583 msize = int_size_in_bytes (type);
8587 /* __m128 is passed by reference. */
8588 return msize != 1 && msize != 2 && msize != 4 && msize != 8;
8590 else if (type && int_size_in_bytes (type) == -1)
8591 return true;
8594 return false;
8597 /* Return true when TYPE should be 128bit aligned for 32bit argument
8598 passing ABI. XXX: This function is obsolete and is only used for
8599 checking psABI compatibility with previous versions of GCC. */
8601 static bool
8602 ix86_compat_aligned_value_p (const_tree type)
8604 machine_mode mode = TYPE_MODE (type);
8605 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
8606 || mode == TDmode
8607 || mode == TFmode
8608 || mode == TCmode)
8609 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
8610 return true;
8611 if (TYPE_ALIGN (type) < 128)
8612 return false;
8614 if (AGGREGATE_TYPE_P (type))
8616 /* Walk the aggregates recursively. */
8617 switch (TREE_CODE (type))
8619 case RECORD_TYPE:
8620 case UNION_TYPE:
8621 case QUAL_UNION_TYPE:
8623 tree field;
8625 /* Walk all the structure fields. */
8626 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8628 if (TREE_CODE (field) == FIELD_DECL
8629 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
8630 return true;
8632 break;
8635 case ARRAY_TYPE:
8636 /* Just for use if some languages passes arrays by value. */
8637 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
8638 return true;
8639 break;
8641 default:
8642 gcc_unreachable ();
8645 return false;
8648 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
8649 XXX: This function is obsolete and is only used for checking psABI
8650 compatibility with previous versions of GCC. */
8652 static unsigned int
8653 ix86_compat_function_arg_boundary (machine_mode mode,
8654 const_tree type, unsigned int align)
8656 /* In 32bit, only _Decimal128 and __float128 are aligned to their
8657 natural boundaries. */
8658 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
8660 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
8661 make an exception for SSE modes since these require 128bit
8662 alignment.
8664 The handling here differs from field_alignment. ICC aligns MMX
8665 arguments to 4 byte boundaries, while structure fields are aligned
8666 to 8 byte boundaries. */
8667 if (!type)
8669 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
8670 align = PARM_BOUNDARY;
8672 else
8674 if (!ix86_compat_aligned_value_p (type))
8675 align = PARM_BOUNDARY;
8678 if (align > BIGGEST_ALIGNMENT)
8679 align = BIGGEST_ALIGNMENT;
8680 return align;
8683 /* Return true when TYPE should be 128bit aligned for 32bit argument
8684 passing ABI. */
8686 static bool
8687 ix86_contains_aligned_value_p (const_tree type)
8689 machine_mode mode = TYPE_MODE (type);
8691 if (mode == XFmode || mode == XCmode)
8692 return false;
8694 if (TYPE_ALIGN (type) < 128)
8695 return false;
8697 if (AGGREGATE_TYPE_P (type))
8699 /* Walk the aggregates recursively. */
8700 switch (TREE_CODE (type))
8702 case RECORD_TYPE:
8703 case UNION_TYPE:
8704 case QUAL_UNION_TYPE:
8706 tree field;
8708 /* Walk all the structure fields. */
8709 for (field = TYPE_FIELDS (type);
8710 field;
8711 field = DECL_CHAIN (field))
8713 if (TREE_CODE (field) == FIELD_DECL
8714 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
8715 return true;
8717 break;
8720 case ARRAY_TYPE:
8721 /* Just for use if some languages passes arrays by value. */
8722 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
8723 return true;
8724 break;
8726 default:
8727 gcc_unreachable ();
8730 else
8731 return TYPE_ALIGN (type) >= 128;
8733 return false;
8736 /* Gives the alignment boundary, in bits, of an argument with the
8737 specified mode and type. */
8739 static unsigned int
8740 ix86_function_arg_boundary (machine_mode mode, const_tree type)
8742 unsigned int align;
8743 if (type)
8745 /* Since the main variant type is used for call, we convert it to
8746 the main variant type. */
8747 type = TYPE_MAIN_VARIANT (type);
8748 align = TYPE_ALIGN (type);
8750 else
8751 align = GET_MODE_ALIGNMENT (mode);
8752 if (align < PARM_BOUNDARY)
8753 align = PARM_BOUNDARY;
8754 else
8756 static bool warned;
8757 unsigned int saved_align = align;
8759 if (!TARGET_64BIT)
8761 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
8762 if (!type)
8764 if (mode == XFmode || mode == XCmode)
8765 align = PARM_BOUNDARY;
8767 else if (!ix86_contains_aligned_value_p (type))
8768 align = PARM_BOUNDARY;
8770 if (align < 128)
8771 align = PARM_BOUNDARY;
8774 if (warn_psabi
8775 && !warned
8776 && align != ix86_compat_function_arg_boundary (mode, type,
8777 saved_align))
8779 warned = true;
8780 inform (input_location,
8781 "The ABI for passing parameters with %d-byte"
8782 " alignment has changed in GCC 4.6",
8783 align / BITS_PER_UNIT);
8787 return align;
8790 /* Return true if N is a possible register number of function value. */
8792 static bool
8793 ix86_function_value_regno_p (const unsigned int regno)
8795 switch (regno)
8797 case AX_REG:
8798 return true;
8799 case DX_REG:
8800 return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
8801 case DI_REG:
8802 case SI_REG:
8803 return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
8805 case BND0_REG:
8806 case BND1_REG:
8807 return chkp_function_instrumented_p (current_function_decl);
8809 /* Complex values are returned in %st(0)/%st(1) pair. */
8810 case ST0_REG:
8811 case ST1_REG:
8812 /* TODO: The function should depend on current function ABI but
8813 builtins.c would need updating then. Therefore we use the
8814 default ABI. */
8815 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
8816 return false;
8817 return TARGET_FLOAT_RETURNS_IN_80387;
8819 /* Complex values are returned in %xmm0/%xmm1 pair. */
8820 case XMM0_REG:
8821 case XMM1_REG:
8822 return TARGET_SSE;
8824 case MM0_REG:
8825 if (TARGET_MACHO || TARGET_64BIT)
8826 return false;
8827 return TARGET_MMX;
8830 return false;
8833 /* Define how to find the value returned by a function.
8834 VALTYPE is the data type of the value (as a tree).
8835 If the precise function being called is known, FUNC is its FUNCTION_DECL;
8836 otherwise, FUNC is 0. */
8838 static rtx
8839 function_value_32 (machine_mode orig_mode, machine_mode mode,
8840 const_tree fntype, const_tree fn)
8842 unsigned int regno;
8844 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
8845 we normally prevent this case when mmx is not available. However
8846 some ABIs may require the result to be returned like DImode. */
8847 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
8848 regno = FIRST_MMX_REG;
8850 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
8851 we prevent this case when sse is not available. However some ABIs
8852 may require the result to be returned like integer TImode. */
8853 else if (mode == TImode
8854 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
8855 regno = FIRST_SSE_REG;
8857 /* 32-byte vector modes in %ymm0. */
8858 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
8859 regno = FIRST_SSE_REG;
8861 /* 64-byte vector modes in %zmm0. */
8862 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
8863 regno = FIRST_SSE_REG;
8865 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
8866 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
8867 regno = FIRST_FLOAT_REG;
8868 else
8869 /* Most things go in %eax. */
8870 regno = AX_REG;
8872 /* Override FP return register with %xmm0 for local functions when
8873 SSE math is enabled or for functions with sseregparm attribute. */
8874 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
8876 int sse_level = ix86_function_sseregparm (fntype, fn, false);
8877 if (sse_level == -1)
8879 error ("calling %qD with SSE calling convention without "
8880 "SSE/SSE2 enabled", fn);
8881 sorry ("this is a GCC bug that can be worked around by adding "
8882 "attribute used to function called");
8884 else if ((sse_level >= 1 && mode == SFmode)
8885 || (sse_level == 2 && mode == DFmode))
8886 regno = FIRST_SSE_REG;
8889 /* OImode shouldn't be used directly. */
8890 gcc_assert (mode != OImode);
8892 return gen_rtx_REG (orig_mode, regno);
8895 static rtx
8896 function_value_64 (machine_mode orig_mode, machine_mode mode,
8897 const_tree valtype)
8899 rtx ret;
8901 /* Handle libcalls, which don't provide a type node. */
8902 if (valtype == NULL)
8904 unsigned int regno;
8906 switch (mode)
8908 case E_SFmode:
8909 case E_SCmode:
8910 case E_DFmode:
8911 case E_DCmode:
8912 case E_TFmode:
8913 case E_SDmode:
8914 case E_DDmode:
8915 case E_TDmode:
8916 regno = FIRST_SSE_REG;
8917 break;
8918 case E_XFmode:
8919 case E_XCmode:
8920 regno = FIRST_FLOAT_REG;
8921 break;
8922 case E_TCmode:
8923 return NULL;
8924 default:
8925 regno = AX_REG;
8928 return gen_rtx_REG (mode, regno);
8930 else if (POINTER_TYPE_P (valtype))
8932 /* Pointers are always returned in word_mode. */
8933 mode = word_mode;
8936 ret = construct_container (mode, orig_mode, valtype, 1,
8937 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
8938 x86_64_int_return_registers, 0);
8940 /* For zero sized structures, construct_container returns NULL, but we
8941 need to keep rest of compiler happy by returning meaningful value. */
8942 if (!ret)
8943 ret = gen_rtx_REG (orig_mode, AX_REG);
8945 return ret;
8948 static rtx
8949 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
8950 const_tree valtype)
8952 unsigned int regno = AX_REG;
8954 if (TARGET_SSE)
8956 switch (GET_MODE_SIZE (mode))
8958 case 16:
8959 if (valtype != NULL_TREE
8960 && !VECTOR_INTEGER_TYPE_P (valtype)
8961 && !VECTOR_INTEGER_TYPE_P (valtype)
8962 && !INTEGRAL_TYPE_P (valtype)
8963 && !VECTOR_FLOAT_TYPE_P (valtype))
8964 break;
8965 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8966 && !COMPLEX_MODE_P (mode))
8967 regno = FIRST_SSE_REG;
8968 break;
8969 case 8:
8970 case 4:
8971 if (mode == SFmode || mode == DFmode)
8972 regno = FIRST_SSE_REG;
8973 break;
8974 default:
8975 break;
8978 return gen_rtx_REG (orig_mode, regno);
8981 static rtx
8982 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
8983 machine_mode orig_mode, machine_mode mode)
8985 const_tree fn, fntype;
8987 fn = NULL_TREE;
8988 if (fntype_or_decl && DECL_P (fntype_or_decl))
8989 fn = fntype_or_decl;
8990 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
8992 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
8993 || POINTER_BOUNDS_MODE_P (mode))
8994 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
8995 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
8996 return function_value_ms_64 (orig_mode, mode, valtype);
8997 else if (TARGET_64BIT)
8998 return function_value_64 (orig_mode, mode, valtype);
8999 else
9000 return function_value_32 (orig_mode, mode, fntype, fn);
9003 static rtx
9004 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
9006 machine_mode mode, orig_mode;
9008 orig_mode = TYPE_MODE (valtype);
9009 mode = type_natural_mode (valtype, NULL, true);
9010 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
9013 /* Return an RTX representing a place where a function returns
9014 or recieves pointer bounds or NULL if no bounds are returned.
9016 VALTYPE is a data type of a value returned by the function.
9018 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
9019 or FUNCTION_TYPE of the function.
9021 If OUTGOING is false, return a place in which the caller will
9022 see the return value. Otherwise, return a place where a
9023 function returns a value. */
9025 static rtx
9026 ix86_function_value_bounds (const_tree valtype,
9027 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
9028 bool outgoing ATTRIBUTE_UNUSED)
9030 rtx res = NULL_RTX;
9032 if (BOUNDED_TYPE_P (valtype))
9033 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
9034 else if (chkp_type_has_pointer (valtype))
9036 bitmap slots;
9037 rtx bounds[2];
9038 bitmap_iterator bi;
9039 unsigned i, bnd_no = 0;
9041 bitmap_obstack_initialize (NULL);
9042 slots = BITMAP_ALLOC (NULL);
9043 chkp_find_bound_slots (valtype, slots);
9045 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
9047 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
9048 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
9049 gcc_assert (bnd_no < 2);
9050 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
9053 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
9055 BITMAP_FREE (slots);
9056 bitmap_obstack_release (NULL);
9058 else
9059 res = NULL_RTX;
9061 return res;
9064 /* Pointer function arguments and return values are promoted to
9065 word_mode for normal functions. */
9067 static machine_mode
9068 ix86_promote_function_mode (const_tree type, machine_mode mode,
9069 int *punsignedp, const_tree fntype,
9070 int for_return)
9072 if (cfun->machine->func_type == TYPE_NORMAL
9073 && type != NULL_TREE
9074 && POINTER_TYPE_P (type))
9076 *punsignedp = POINTERS_EXTEND_UNSIGNED;
9077 return word_mode;
9079 return default_promote_function_mode (type, mode, punsignedp, fntype,
9080 for_return);
9083 /* Return true if a structure, union or array with MODE containing FIELD
9084 should be accessed using BLKmode. */
9086 static bool
9087 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
9089 /* Union with XFmode must be in BLKmode. */
9090 return (mode == XFmode
9091 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
9092 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
9096 ix86_libcall_value (machine_mode mode)
9098 return ix86_function_value_1 (NULL, NULL, mode, mode);
9101 /* Return true iff type is returned in memory. */
9103 static bool
9104 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9106 #ifdef SUBTARGET_RETURN_IN_MEMORY
9107 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
9108 #else
9109 const machine_mode mode = type_natural_mode (type, NULL, true);
9110 HOST_WIDE_INT size;
9112 if (POINTER_BOUNDS_TYPE_P (type))
9113 return false;
9115 if (TARGET_64BIT)
9117 if (ix86_function_type_abi (fntype) == MS_ABI)
9119 size = int_size_in_bytes (type);
9121 /* __m128 is returned in xmm0. */
9122 if ((!type || VECTOR_INTEGER_TYPE_P (type)
9123 || INTEGRAL_TYPE_P (type)
9124 || VECTOR_FLOAT_TYPE_P (type))
9125 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
9126 && !COMPLEX_MODE_P (mode)
9127 && (GET_MODE_SIZE (mode) == 16 || size == 16))
9128 return false;
9130 /* Otherwise, the size must be exactly in [1248]. */
9131 return size != 1 && size != 2 && size != 4 && size != 8;
9133 else
9135 int needed_intregs, needed_sseregs;
9137 return examine_argument (mode, type, 1,
9138 &needed_intregs, &needed_sseregs);
9141 else
9143 size = int_size_in_bytes (type);
9145 /* Intel MCU psABI returns scalars and aggregates no larger than 8
9146 bytes in registers. */
9147 if (TARGET_IAMCU)
9148 return VECTOR_MODE_P (mode) || size < 0 || size > 8;
9150 if (mode == BLKmode)
9151 return true;
9153 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
9154 return false;
9156 if (VECTOR_MODE_P (mode) || mode == TImode)
9158 /* User-created vectors small enough to fit in EAX. */
9159 if (size < 8)
9160 return false;
9162 /* Unless ABI prescibes otherwise,
9163 MMX/3dNow values are returned in MM0 if available. */
9165 if (size == 8)
9166 return TARGET_VECT8_RETURNS || !TARGET_MMX;
9168 /* SSE values are returned in XMM0 if available. */
9169 if (size == 16)
9170 return !TARGET_SSE;
9172 /* AVX values are returned in YMM0 if available. */
9173 if (size == 32)
9174 return !TARGET_AVX;
9176 /* AVX512F values are returned in ZMM0 if available. */
9177 if (size == 64)
9178 return !TARGET_AVX512F;
9181 if (mode == XFmode)
9182 return false;
9184 if (size > 12)
9185 return true;
9187 /* OImode shouldn't be used directly. */
9188 gcc_assert (mode != OImode);
9190 return false;
9192 #endif
9196 /* Create the va_list data type. */
9198 static tree
9199 ix86_build_builtin_va_list_64 (void)
9201 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
9203 record = lang_hooks.types.make_type (RECORD_TYPE);
9204 type_decl = build_decl (BUILTINS_LOCATION,
9205 TYPE_DECL, get_identifier ("__va_list_tag"), record);
9207 f_gpr = build_decl (BUILTINS_LOCATION,
9208 FIELD_DECL, get_identifier ("gp_offset"),
9209 unsigned_type_node);
9210 f_fpr = build_decl (BUILTINS_LOCATION,
9211 FIELD_DECL, get_identifier ("fp_offset"),
9212 unsigned_type_node);
9213 f_ovf = build_decl (BUILTINS_LOCATION,
9214 FIELD_DECL, get_identifier ("overflow_arg_area"),
9215 ptr_type_node);
9216 f_sav = build_decl (BUILTINS_LOCATION,
9217 FIELD_DECL, get_identifier ("reg_save_area"),
9218 ptr_type_node);
9220 va_list_gpr_counter_field = f_gpr;
9221 va_list_fpr_counter_field = f_fpr;
9223 DECL_FIELD_CONTEXT (f_gpr) = record;
9224 DECL_FIELD_CONTEXT (f_fpr) = record;
9225 DECL_FIELD_CONTEXT (f_ovf) = record;
9226 DECL_FIELD_CONTEXT (f_sav) = record;
9228 TYPE_STUB_DECL (record) = type_decl;
9229 TYPE_NAME (record) = type_decl;
9230 TYPE_FIELDS (record) = f_gpr;
9231 DECL_CHAIN (f_gpr) = f_fpr;
9232 DECL_CHAIN (f_fpr) = f_ovf;
9233 DECL_CHAIN (f_ovf) = f_sav;
9235 layout_type (record);
9237 TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list"),
9238 NULL_TREE, TYPE_ATTRIBUTES (record));
9240 /* The correct type is an array type of one element. */
9241 return build_array_type (record, build_index_type (size_zero_node));
9244 /* Setup the builtin va_list data type and for 64-bit the additional
9245 calling convention specific va_list data types. */
9247 static tree
9248 ix86_build_builtin_va_list (void)
9250 if (TARGET_64BIT)
9252 /* Initialize ABI specific va_list builtin types.
9254 In lto1, we can encounter two va_list types:
9255 - one as a result of the type-merge across TUs, and
9256 - the one constructed here.
9257 These two types will not have the same TYPE_MAIN_VARIANT, and therefore
9258 a type identity check in canonical_va_list_type based on
9259 TYPE_MAIN_VARIANT (which we used to have) will not work.
9260 Instead, we tag each va_list_type_node with its unique attribute, and
9261 look for the attribute in the type identity check in
9262 canonical_va_list_type.
9264 Tagging sysv_va_list_type_node directly with the attribute is
9265 problematic since it's a array of one record, which will degrade into a
9266 pointer to record when used as parameter (see build_va_arg comments for
9267 an example), dropping the attribute in the process. So we tag the
9268 record instead. */
9270 /* For SYSV_ABI we use an array of one record. */
9271 sysv_va_list_type_node = ix86_build_builtin_va_list_64 ();
9273 /* For MS_ABI we use plain pointer to argument area. */
9274 tree char_ptr_type = build_pointer_type (char_type_node);
9275 tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE,
9276 TYPE_ATTRIBUTES (char_ptr_type));
9277 ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr);
9279 return ((ix86_abi == MS_ABI)
9280 ? ms_va_list_type_node
9281 : sysv_va_list_type_node);
9283 else
9285 /* For i386 we use plain pointer to argument area. */
9286 return build_pointer_type (char_type_node);
9290 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
9292 static void
9293 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
9295 rtx save_area, mem;
9296 alias_set_type set;
9297 int i, max;
9299 /* GPR size of varargs save area. */
9300 if (cfun->va_list_gpr_size)
9301 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
9302 else
9303 ix86_varargs_gpr_size = 0;
9305 /* FPR size of varargs save area. We don't need it if we don't pass
9306 anything in SSE registers. */
9307 if (TARGET_SSE && cfun->va_list_fpr_size)
9308 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
9309 else
9310 ix86_varargs_fpr_size = 0;
9312 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
9313 return;
9315 save_area = frame_pointer_rtx;
9316 set = get_varargs_alias_set ();
9318 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
9319 if (max > X86_64_REGPARM_MAX)
9320 max = X86_64_REGPARM_MAX;
9322 for (i = cum->regno; i < max; i++)
9324 mem = gen_rtx_MEM (word_mode,
9325 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
9326 MEM_NOTRAP_P (mem) = 1;
9327 set_mem_alias_set (mem, set);
9328 emit_move_insn (mem,
9329 gen_rtx_REG (word_mode,
9330 x86_64_int_parameter_registers[i]));
9333 if (ix86_varargs_fpr_size)
9335 machine_mode smode;
9336 rtx_code_label *label;
9337 rtx test;
9339 /* Now emit code to save SSE registers. The AX parameter contains number
9340 of SSE parameter registers used to call this function, though all we
9341 actually check here is the zero/non-zero status. */
9343 label = gen_label_rtx ();
9344 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
9345 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
9346 label));
9348 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
9349 we used movdqa (i.e. TImode) instead? Perhaps even better would
9350 be if we could determine the real mode of the data, via a hook
9351 into pass_stdarg. Ignore all that for now. */
9352 smode = V4SFmode;
9353 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
9354 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
9356 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
9357 if (max > X86_64_SSE_REGPARM_MAX)
9358 max = X86_64_SSE_REGPARM_MAX;
9360 for (i = cum->sse_regno; i < max; ++i)
9362 mem = plus_constant (Pmode, save_area,
9363 i * 16 + ix86_varargs_gpr_size);
9364 mem = gen_rtx_MEM (smode, mem);
9365 MEM_NOTRAP_P (mem) = 1;
9366 set_mem_alias_set (mem, set);
9367 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
9369 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
9372 emit_label (label);
9376 static void
9377 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
9379 alias_set_type set = get_varargs_alias_set ();
9380 int i;
9382 /* Reset to zero, as there might be a sysv vaarg used
9383 before. */
9384 ix86_varargs_gpr_size = 0;
9385 ix86_varargs_fpr_size = 0;
9387 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
9389 rtx reg, mem;
9391 mem = gen_rtx_MEM (Pmode,
9392 plus_constant (Pmode, virtual_incoming_args_rtx,
9393 i * UNITS_PER_WORD));
9394 MEM_NOTRAP_P (mem) = 1;
9395 set_mem_alias_set (mem, set);
9397 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
9398 emit_move_insn (mem, reg);
9402 static void
9403 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
9404 tree type, int *, int no_rtl)
9406 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9407 CUMULATIVE_ARGS next_cum;
9408 tree fntype;
9410 /* This argument doesn't appear to be used anymore. Which is good,
9411 because the old code here didn't suppress rtl generation. */
9412 gcc_assert (!no_rtl);
9414 if (!TARGET_64BIT)
9415 return;
9417 fntype = TREE_TYPE (current_function_decl);
9419 /* For varargs, we do not want to skip the dummy va_dcl argument.
9420 For stdargs, we do want to skip the last named argument. */
9421 next_cum = *cum;
9422 if (stdarg_p (fntype))
9423 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
9424 true);
9426 if (cum->call_abi == MS_ABI)
9427 setup_incoming_varargs_ms_64 (&next_cum);
9428 else
9429 setup_incoming_varargs_64 (&next_cum);
9432 static void
9433 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
9434 machine_mode mode,
9435 tree type,
9436 int *pretend_size ATTRIBUTE_UNUSED,
9437 int no_rtl)
9439 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9440 CUMULATIVE_ARGS next_cum;
9441 tree fntype;
9442 rtx save_area;
9443 int bnd_reg, i, max;
9445 gcc_assert (!no_rtl);
9447 /* Do nothing if we use plain pointer to argument area. */
9448 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
9449 return;
9451 fntype = TREE_TYPE (current_function_decl);
9453 /* For varargs, we do not want to skip the dummy va_dcl argument.
9454 For stdargs, we do want to skip the last named argument. */
9455 next_cum = *cum;
9456 if (stdarg_p (fntype))
9457 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
9458 true);
9459 save_area = frame_pointer_rtx;
9461 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
9462 if (max > X86_64_REGPARM_MAX)
9463 max = X86_64_REGPARM_MAX;
9465 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
9466 if (chkp_function_instrumented_p (current_function_decl))
9467 for (i = cum->regno; i < max; i++)
9469 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
9470 rtx ptr = gen_rtx_REG (Pmode,
9471 x86_64_int_parameter_registers[i]);
9472 rtx bounds;
9474 if (bnd_reg <= LAST_BND_REG)
9475 bounds = gen_rtx_REG (BNDmode, bnd_reg);
9476 else
9478 rtx ldx_addr =
9479 plus_constant (Pmode, arg_pointer_rtx,
9480 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
9481 bounds = gen_reg_rtx (BNDmode);
9482 emit_insn (BNDmode == BND64mode
9483 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
9484 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
9487 emit_insn (BNDmode == BND64mode
9488 ? gen_bnd64_stx (addr, ptr, bounds)
9489 : gen_bnd32_stx (addr, ptr, bounds));
9491 bnd_reg++;
9496 /* Checks if TYPE is of kind va_list char *. */
9498 static bool
9499 is_va_list_char_pointer (tree type)
9501 tree canonic;
9503 /* For 32-bit it is always true. */
9504 if (!TARGET_64BIT)
9505 return true;
9506 canonic = ix86_canonical_va_list_type (type);
9507 return (canonic == ms_va_list_type_node
9508 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
9511 /* Implement va_start. */
9513 static void
9514 ix86_va_start (tree valist, rtx nextarg)
9516 HOST_WIDE_INT words, n_gpr, n_fpr;
9517 tree f_gpr, f_fpr, f_ovf, f_sav;
9518 tree gpr, fpr, ovf, sav, t;
9519 tree type;
9520 rtx ovf_rtx;
9522 if (flag_split_stack
9523 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9525 unsigned int scratch_regno;
9527 /* When we are splitting the stack, we can't refer to the stack
9528 arguments using internal_arg_pointer, because they may be on
9529 the old stack. The split stack prologue will arrange to
9530 leave a pointer to the old stack arguments in a scratch
9531 register, which we here copy to a pseudo-register. The split
9532 stack prologue can't set the pseudo-register directly because
9533 it (the prologue) runs before any registers have been saved. */
9535 scratch_regno = split_stack_prologue_scratch_regno ();
9536 if (scratch_regno != INVALID_REGNUM)
9538 rtx reg;
9539 rtx_insn *seq;
9541 reg = gen_reg_rtx (Pmode);
9542 cfun->machine->split_stack_varargs_pointer = reg;
9544 start_sequence ();
9545 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
9546 seq = get_insns ();
9547 end_sequence ();
9549 push_topmost_sequence ();
9550 emit_insn_after (seq, entry_of_function ());
9551 pop_topmost_sequence ();
9555 /* Only 64bit target needs something special. */
9556 if (is_va_list_char_pointer (TREE_TYPE (valist)))
9558 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9559 std_expand_builtin_va_start (valist, nextarg);
9560 else
9562 rtx va_r, next;
9564 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
9565 next = expand_binop (ptr_mode, add_optab,
9566 cfun->machine->split_stack_varargs_pointer,
9567 crtl->args.arg_offset_rtx,
9568 NULL_RTX, 0, OPTAB_LIB_WIDEN);
9569 convert_move (va_r, next, 0);
9571 /* Store zero bounds for va_list. */
9572 if (chkp_function_instrumented_p (current_function_decl))
9573 chkp_expand_bounds_reset_for_mem (valist,
9574 make_tree (TREE_TYPE (valist),
9575 next));
9578 return;
9581 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9582 f_fpr = DECL_CHAIN (f_gpr);
9583 f_ovf = DECL_CHAIN (f_fpr);
9584 f_sav = DECL_CHAIN (f_ovf);
9586 valist = build_simple_mem_ref (valist);
9587 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
9588 /* The following should be folded into the MEM_REF offset. */
9589 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
9590 f_gpr, NULL_TREE);
9591 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
9592 f_fpr, NULL_TREE);
9593 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
9594 f_ovf, NULL_TREE);
9595 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
9596 f_sav, NULL_TREE);
9598 /* Count number of gp and fp argument registers used. */
9599 words = crtl->args.info.words;
9600 n_gpr = crtl->args.info.regno;
9601 n_fpr = crtl->args.info.sse_regno;
9603 if (cfun->va_list_gpr_size)
9605 type = TREE_TYPE (gpr);
9606 t = build2 (MODIFY_EXPR, type,
9607 gpr, build_int_cst (type, n_gpr * 8));
9608 TREE_SIDE_EFFECTS (t) = 1;
9609 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9612 if (TARGET_SSE && cfun->va_list_fpr_size)
9614 type = TREE_TYPE (fpr);
9615 t = build2 (MODIFY_EXPR, type, fpr,
9616 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
9617 TREE_SIDE_EFFECTS (t) = 1;
9618 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9621 /* Find the overflow area. */
9622 type = TREE_TYPE (ovf);
9623 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9624 ovf_rtx = crtl->args.internal_arg_pointer;
9625 else
9626 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
9627 t = make_tree (type, ovf_rtx);
9628 if (words != 0)
9629 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
9631 /* Store zero bounds for overflow area pointer. */
9632 if (chkp_function_instrumented_p (current_function_decl))
9633 chkp_expand_bounds_reset_for_mem (ovf, t);
9635 t = build2 (MODIFY_EXPR, type, ovf, t);
9636 TREE_SIDE_EFFECTS (t) = 1;
9637 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9639 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
9641 /* Find the register save area.
9642 Prologue of the function save it right above stack frame. */
9643 type = TREE_TYPE (sav);
9644 t = make_tree (type, frame_pointer_rtx);
9645 if (!ix86_varargs_gpr_size)
9646 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
9648 /* Store zero bounds for save area pointer. */
9649 if (chkp_function_instrumented_p (current_function_decl))
9650 chkp_expand_bounds_reset_for_mem (sav, t);
9652 t = build2 (MODIFY_EXPR, type, sav, t);
9653 TREE_SIDE_EFFECTS (t) = 1;
9654 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9658 /* Implement va_arg. */
9660 static tree
9661 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
9662 gimple_seq *post_p)
9664 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
9665 tree f_gpr, f_fpr, f_ovf, f_sav;
9666 tree gpr, fpr, ovf, sav, t;
9667 int size, rsize;
9668 tree lab_false, lab_over = NULL_TREE;
9669 tree addr, t2;
9670 rtx container;
9671 int indirect_p = 0;
9672 tree ptrtype;
9673 machine_mode nat_mode;
9674 unsigned int arg_boundary;
9676 /* Only 64bit target needs something special. */
9677 if (is_va_list_char_pointer (TREE_TYPE (valist)))
9678 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
9680 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9681 f_fpr = DECL_CHAIN (f_gpr);
9682 f_ovf = DECL_CHAIN (f_fpr);
9683 f_sav = DECL_CHAIN (f_ovf);
9685 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
9686 valist, f_gpr, NULL_TREE);
9688 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
9689 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
9690 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
9692 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
9693 if (indirect_p)
9694 type = build_pointer_type (type);
9695 size = int_size_in_bytes (type);
9696 rsize = CEIL (size, UNITS_PER_WORD);
9698 nat_mode = type_natural_mode (type, NULL, false);
9699 switch (nat_mode)
9701 case E_V8SFmode:
9702 case E_V8SImode:
9703 case E_V32QImode:
9704 case E_V16HImode:
9705 case E_V4DFmode:
9706 case E_V4DImode:
9707 case E_V16SFmode:
9708 case E_V16SImode:
9709 case E_V64QImode:
9710 case E_V32HImode:
9711 case E_V8DFmode:
9712 case E_V8DImode:
9713 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
9714 if (!TARGET_64BIT_MS_ABI)
9716 container = NULL;
9717 break;
9719 /* FALLTHRU */
9721 default:
9722 container = construct_container (nat_mode, TYPE_MODE (type),
9723 type, 0, X86_64_REGPARM_MAX,
9724 X86_64_SSE_REGPARM_MAX, intreg,
9726 break;
9729 /* Pull the value out of the saved registers. */
9731 addr = create_tmp_var (ptr_type_node, "addr");
9733 if (container)
9735 int needed_intregs, needed_sseregs;
9736 bool need_temp;
9737 tree int_addr, sse_addr;
9739 lab_false = create_artificial_label (UNKNOWN_LOCATION);
9740 lab_over = create_artificial_label (UNKNOWN_LOCATION);
9742 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
9744 need_temp = (!REG_P (container)
9745 && ((needed_intregs && TYPE_ALIGN (type) > 64)
9746 || TYPE_ALIGN (type) > 128));
9748 /* In case we are passing structure, verify that it is consecutive block
9749 on the register save area. If not we need to do moves. */
9750 if (!need_temp && !REG_P (container))
9752 /* Verify that all registers are strictly consecutive */
9753 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
9755 int i;
9757 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9759 rtx slot = XVECEXP (container, 0, i);
9760 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
9761 || INTVAL (XEXP (slot, 1)) != i * 16)
9762 need_temp = true;
9765 else
9767 int i;
9769 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9771 rtx slot = XVECEXP (container, 0, i);
9772 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
9773 || INTVAL (XEXP (slot, 1)) != i * 8)
9774 need_temp = true;
9778 if (!need_temp)
9780 int_addr = addr;
9781 sse_addr = addr;
9783 else
9785 int_addr = create_tmp_var (ptr_type_node, "int_addr");
9786 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
9789 /* First ensure that we fit completely in registers. */
9790 if (needed_intregs)
9792 t = build_int_cst (TREE_TYPE (gpr),
9793 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
9794 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
9795 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9796 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9797 gimplify_and_add (t, pre_p);
9799 if (needed_sseregs)
9801 t = build_int_cst (TREE_TYPE (fpr),
9802 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
9803 + X86_64_REGPARM_MAX * 8);
9804 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
9805 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9806 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9807 gimplify_and_add (t, pre_p);
9810 /* Compute index to start of area used for integer regs. */
9811 if (needed_intregs)
9813 /* int_addr = gpr + sav; */
9814 t = fold_build_pointer_plus (sav, gpr);
9815 gimplify_assign (int_addr, t, pre_p);
9817 if (needed_sseregs)
9819 /* sse_addr = fpr + sav; */
9820 t = fold_build_pointer_plus (sav, fpr);
9821 gimplify_assign (sse_addr, t, pre_p);
9823 if (need_temp)
9825 int i, prev_size = 0;
9826 tree temp = create_tmp_var (type, "va_arg_tmp");
9828 /* addr = &temp; */
9829 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
9830 gimplify_assign (addr, t, pre_p);
9832 for (i = 0; i < XVECLEN (container, 0); i++)
9834 rtx slot = XVECEXP (container, 0, i);
9835 rtx reg = XEXP (slot, 0);
9836 machine_mode mode = GET_MODE (reg);
9837 tree piece_type;
9838 tree addr_type;
9839 tree daddr_type;
9840 tree src_addr, src;
9841 int src_offset;
9842 tree dest_addr, dest;
9843 int cur_size = GET_MODE_SIZE (mode);
9845 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
9846 prev_size = INTVAL (XEXP (slot, 1));
9847 if (prev_size + cur_size > size)
9849 cur_size = size - prev_size;
9850 unsigned int nbits = cur_size * BITS_PER_UNIT;
9851 if (!int_mode_for_size (nbits, 1).exists (&mode))
9852 mode = QImode;
9854 piece_type = lang_hooks.types.type_for_mode (mode, 1);
9855 if (mode == GET_MODE (reg))
9856 addr_type = build_pointer_type (piece_type);
9857 else
9858 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9859 true);
9860 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9861 true);
9863 if (SSE_REGNO_P (REGNO (reg)))
9865 src_addr = sse_addr;
9866 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
9868 else
9870 src_addr = int_addr;
9871 src_offset = REGNO (reg) * 8;
9873 src_addr = fold_convert (addr_type, src_addr);
9874 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
9876 dest_addr = fold_convert (daddr_type, addr);
9877 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
9878 if (cur_size == GET_MODE_SIZE (mode))
9880 src = build_va_arg_indirect_ref (src_addr);
9881 dest = build_va_arg_indirect_ref (dest_addr);
9883 gimplify_assign (dest, src, pre_p);
9885 else
9887 tree copy
9888 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
9889 3, dest_addr, src_addr,
9890 size_int (cur_size));
9891 gimplify_and_add (copy, pre_p);
9893 prev_size += cur_size;
9897 if (needed_intregs)
9899 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
9900 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
9901 gimplify_assign (gpr, t, pre_p);
9904 if (needed_sseregs)
9906 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
9907 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
9908 gimplify_assign (unshare_expr (fpr), t, pre_p);
9911 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
9913 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
9916 /* ... otherwise out of the overflow area. */
9918 /* When we align parameter on stack for caller, if the parameter
9919 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
9920 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
9921 here with caller. */
9922 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
9923 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
9924 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
9926 /* Care for on-stack alignment if needed. */
9927 if (arg_boundary <= 64 || size == 0)
9928 t = ovf;
9929 else
9931 HOST_WIDE_INT align = arg_boundary / 8;
9932 t = fold_build_pointer_plus_hwi (ovf, align - 1);
9933 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9934 build_int_cst (TREE_TYPE (t), -align));
9937 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
9938 gimplify_assign (addr, t, pre_p);
9940 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
9941 gimplify_assign (unshare_expr (ovf), t, pre_p);
9943 if (container)
9944 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
9946 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
9947 addr = fold_convert (ptrtype, addr);
9949 if (indirect_p)
9950 addr = build_va_arg_indirect_ref (addr);
9951 return build_va_arg_indirect_ref (addr);
9954 /* Return true if OPNUM's MEM should be matched
9955 in movabs* patterns. */
9957 bool
9958 ix86_check_movabs (rtx insn, int opnum)
9960 rtx set, mem;
9962 set = PATTERN (insn);
9963 if (GET_CODE (set) == PARALLEL)
9964 set = XVECEXP (set, 0, 0);
9965 gcc_assert (GET_CODE (set) == SET);
9966 mem = XEXP (set, opnum);
9967 while (SUBREG_P (mem))
9968 mem = SUBREG_REG (mem);
9969 gcc_assert (MEM_P (mem));
9970 return volatile_ok || !MEM_VOLATILE_P (mem);
9973 /* Return false if INSN contains a MEM with a non-default address space. */
9974 bool
9975 ix86_check_no_addr_space (rtx insn)
9977 subrtx_var_iterator::array_type array;
9978 FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL)
9980 rtx x = *iter;
9981 if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)))
9982 return false;
9984 return true;
9987 /* Initialize the table of extra 80387 mathematical constants. */
9989 static void
9990 init_ext_80387_constants (void)
9992 static const char * cst[5] =
9994 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
9995 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
9996 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
9997 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
9998 "3.1415926535897932385128089594061862044", /* 4: fldpi */
10000 int i;
10002 for (i = 0; i < 5; i++)
10004 real_from_string (&ext_80387_constants_table[i], cst[i]);
10005 /* Ensure each constant is rounded to XFmode precision. */
10006 real_convert (&ext_80387_constants_table[i],
10007 XFmode, &ext_80387_constants_table[i]);
10010 ext_80387_constants_init = 1;
10013 /* Return non-zero if the constant is something that
10014 can be loaded with a special instruction. */
10017 standard_80387_constant_p (rtx x)
10019 machine_mode mode = GET_MODE (x);
10021 const REAL_VALUE_TYPE *r;
10023 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
10024 return -1;
10026 if (x == CONST0_RTX (mode))
10027 return 1;
10028 if (x == CONST1_RTX (mode))
10029 return 2;
10031 r = CONST_DOUBLE_REAL_VALUE (x);
10033 /* For XFmode constants, try to find a special 80387 instruction when
10034 optimizing for size or on those CPUs that benefit from them. */
10035 if (mode == XFmode
10036 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
10038 int i;
10040 if (! ext_80387_constants_init)
10041 init_ext_80387_constants ();
10043 for (i = 0; i < 5; i++)
10044 if (real_identical (r, &ext_80387_constants_table[i]))
10045 return i + 3;
10048 /* Load of the constant -0.0 or -1.0 will be split as
10049 fldz;fchs or fld1;fchs sequence. */
10050 if (real_isnegzero (r))
10051 return 8;
10052 if (real_identical (r, &dconstm1))
10053 return 9;
10055 return 0;
10058 /* Return the opcode of the special instruction to be used to load
10059 the constant X. */
10061 const char *
10062 standard_80387_constant_opcode (rtx x)
10064 switch (standard_80387_constant_p (x))
10066 case 1:
10067 return "fldz";
10068 case 2:
10069 return "fld1";
10070 case 3:
10071 return "fldlg2";
10072 case 4:
10073 return "fldln2";
10074 case 5:
10075 return "fldl2e";
10076 case 6:
10077 return "fldl2t";
10078 case 7:
10079 return "fldpi";
10080 case 8:
10081 case 9:
10082 return "#";
10083 default:
10084 gcc_unreachable ();
10088 /* Return the CONST_DOUBLE representing the 80387 constant that is
10089 loaded by the specified special instruction. The argument IDX
10090 matches the return value from standard_80387_constant_p. */
10093 standard_80387_constant_rtx (int idx)
10095 int i;
10097 if (! ext_80387_constants_init)
10098 init_ext_80387_constants ();
10100 switch (idx)
10102 case 3:
10103 case 4:
10104 case 5:
10105 case 6:
10106 case 7:
10107 i = idx - 3;
10108 break;
10110 default:
10111 gcc_unreachable ();
10114 return const_double_from_real_value (ext_80387_constants_table[i],
10115 XFmode);
10118 /* Return 1 if X is all bits 0 and 2 if X is all bits 1
10119 in supported SSE/AVX vector mode. */
10122 standard_sse_constant_p (rtx x, machine_mode pred_mode)
10124 machine_mode mode;
10126 if (!TARGET_SSE)
10127 return 0;
10129 mode = GET_MODE (x);
10131 if (x == const0_rtx || const0_operand (x, mode))
10132 return 1;
10134 if (x == constm1_rtx || vector_all_ones_operand (x, mode))
10136 /* VOIDmode integer constant, get mode from the predicate. */
10137 if (mode == VOIDmode)
10138 mode = pred_mode;
10140 switch (GET_MODE_SIZE (mode))
10142 case 64:
10143 if (TARGET_AVX512F)
10144 return 2;
10145 break;
10146 case 32:
10147 if (TARGET_AVX2)
10148 return 2;
10149 break;
10150 case 16:
10151 if (TARGET_SSE2)
10152 return 2;
10153 break;
10154 case 0:
10155 /* VOIDmode */
10156 gcc_unreachable ();
10157 default:
10158 break;
10162 return 0;
10165 /* Return the opcode of the special instruction to be used to load
10166 the constant X. */
10168 const char *
10169 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
10171 machine_mode mode;
10173 gcc_assert (TARGET_SSE);
10175 mode = GET_MODE (x);
10177 if (x == const0_rtx || const0_operand (x, mode))
10179 switch (get_attr_mode (insn))
10181 case MODE_XI:
10182 return "vpxord\t%g0, %g0, %g0";
10183 case MODE_OI:
10184 return (TARGET_AVX512VL
10185 ? "vpxord\t%x0, %x0, %x0"
10186 : "vpxor\t%x0, %x0, %x0");
10187 case MODE_TI:
10188 return (TARGET_AVX512VL
10189 ? "vpxord\t%t0, %t0, %t0"
10190 : "%vpxor\t%0, %d0");
10192 case MODE_V8DF:
10193 return (TARGET_AVX512DQ
10194 ? "vxorpd\t%g0, %g0, %g0"
10195 : "vpxorq\t%g0, %g0, %g0");
10196 case MODE_V4DF:
10197 return "vxorpd\t%x0, %x0, %x0";
10198 case MODE_V2DF:
10199 return "%vxorpd\t%0, %d0";
10201 case MODE_V16SF:
10202 return (TARGET_AVX512DQ
10203 ? "vxorps\t%g0, %g0, %g0"
10204 : "vpxord\t%g0, %g0, %g0");
10205 case MODE_V8SF:
10206 return "vxorps\t%x0, %x0, %x0";
10207 case MODE_V4SF:
10208 return "%vxorps\t%0, %d0";
10210 default:
10211 gcc_unreachable ();
10214 else if (x == constm1_rtx || vector_all_ones_operand (x, mode))
10216 enum attr_mode insn_mode = get_attr_mode (insn);
10218 switch (insn_mode)
10220 case MODE_XI:
10221 case MODE_V8DF:
10222 case MODE_V16SF:
10223 gcc_assert (TARGET_AVX512F);
10224 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
10226 case MODE_OI:
10227 case MODE_V4DF:
10228 case MODE_V8SF:
10229 gcc_assert (TARGET_AVX2);
10230 /* FALLTHRU */
10231 case MODE_TI:
10232 case MODE_V2DF:
10233 case MODE_V4SF:
10234 gcc_assert (TARGET_SSE2);
10235 return (TARGET_AVX
10236 ? "vpcmpeqd\t%0, %0, %0"
10237 : "pcmpeqd\t%0, %0");
10239 default:
10240 gcc_unreachable ();
10244 gcc_unreachable ();
10247 /* Returns true if INSN can be transformed from a memory load
10248 to a supported FP constant load. */
10250 bool
10251 ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst)
10253 rtx src = find_constant_src (insn);
10255 gcc_assert (REG_P (dst));
10257 if (src == NULL
10258 || (SSE_REGNO_P (REGNO (dst))
10259 && standard_sse_constant_p (src, GET_MODE (dst)) != 1)
10260 || (STACK_REGNO_P (REGNO (dst))
10261 && standard_80387_constant_p (src) < 1))
10262 return false;
10264 return true;
10267 /* Returns true if OP contains a symbol reference */
10269 bool
10270 symbolic_reference_mentioned_p (rtx op)
10272 const char *fmt;
10273 int i;
10275 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
10276 return true;
10278 fmt = GET_RTX_FORMAT (GET_CODE (op));
10279 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
10281 if (fmt[i] == 'E')
10283 int j;
10285 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
10286 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
10287 return true;
10290 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
10291 return true;
10294 return false;
10297 /* Return true if it is appropriate to emit `ret' instructions in the
10298 body of a function. Do this only if the epilogue is simple, needing a
10299 couple of insns. Prior to reloading, we can't tell how many registers
10300 must be saved, so return false then. Return false if there is no frame
10301 marker to de-allocate. */
10303 bool
10304 ix86_can_use_return_insn_p (void)
10306 struct ix86_frame frame;
10308 if (ix86_function_naked (current_function_decl))
10309 return false;
10311 /* Don't use `ret' instruction in interrupt handler. */
10312 if (! reload_completed
10313 || frame_pointer_needed
10314 || cfun->machine->func_type != TYPE_NORMAL)
10315 return 0;
10317 /* Don't allow more than 32k pop, since that's all we can do
10318 with one instruction. */
10319 if (crtl->args.pops_args && crtl->args.size >= 32768)
10320 return 0;
10322 frame = cfun->machine->frame;
10323 return (frame.stack_pointer_offset == UNITS_PER_WORD
10324 && (frame.nregs + frame.nsseregs) == 0);
10327 /* Value should be nonzero if functions must have frame pointers.
10328 Zero means the frame pointer need not be set up (and parms may
10329 be accessed via the stack pointer) in functions that seem suitable. */
10331 static bool
10332 ix86_frame_pointer_required (void)
10334 /* If we accessed previous frames, then the generated code expects
10335 to be able to access the saved ebp value in our frame. */
10336 if (cfun->machine->accesses_prev_frame)
10337 return true;
10339 /* Several x86 os'es need a frame pointer for other reasons,
10340 usually pertaining to setjmp. */
10341 if (SUBTARGET_FRAME_POINTER_REQUIRED)
10342 return true;
10344 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
10345 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
10346 return true;
10348 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
10349 allocation is 4GB. */
10350 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
10351 return true;
10353 /* SSE saves require frame-pointer when stack is misaligned. */
10354 if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
10355 return true;
10357 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
10358 turns off the frame pointer by default. Turn it back on now if
10359 we've not got a leaf function. */
10360 if (TARGET_OMIT_LEAF_FRAME_POINTER
10361 && (!crtl->is_leaf
10362 || ix86_current_function_calls_tls_descriptor))
10363 return true;
10365 if (crtl->profile && !flag_fentry)
10366 return true;
10368 return false;
10371 /* Record that the current function accesses previous call frames. */
10373 void
10374 ix86_setup_frame_addresses (void)
10376 cfun->machine->accesses_prev_frame = 1;
10379 #ifndef USE_HIDDEN_LINKONCE
10380 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
10381 # define USE_HIDDEN_LINKONCE 1
10382 # else
10383 # define USE_HIDDEN_LINKONCE 0
10384 # endif
10385 #endif
10387 static int pic_labels_used;
10389 /* Fills in the label name that should be used for a pc thunk for
10390 the given register. */
10392 static void
10393 get_pc_thunk_name (char name[32], unsigned int regno)
10395 gcc_assert (!TARGET_64BIT);
10397 if (USE_HIDDEN_LINKONCE)
10398 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
10399 else
10400 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
10404 /* This function generates code for -fpic that loads %ebx with
10405 the return address of the caller and then returns. */
10407 static void
10408 ix86_code_end (void)
10410 rtx xops[2];
10411 int regno;
10413 for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++)
10415 char name[32];
10416 tree decl;
10418 if (!(pic_labels_used & (1 << regno)))
10419 continue;
10421 get_pc_thunk_name (name, regno);
10423 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
10424 get_identifier (name),
10425 build_function_type_list (void_type_node, NULL_TREE));
10426 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
10427 NULL_TREE, void_type_node);
10428 TREE_PUBLIC (decl) = 1;
10429 TREE_STATIC (decl) = 1;
10430 DECL_IGNORED_P (decl) = 1;
10432 #if TARGET_MACHO
10433 if (TARGET_MACHO)
10435 switch_to_section (darwin_sections[picbase_thunk_section]);
10436 fputs ("\t.weak_definition\t", asm_out_file);
10437 assemble_name (asm_out_file, name);
10438 fputs ("\n\t.private_extern\t", asm_out_file);
10439 assemble_name (asm_out_file, name);
10440 putc ('\n', asm_out_file);
10441 ASM_OUTPUT_LABEL (asm_out_file, name);
10442 DECL_WEAK (decl) = 1;
10444 else
10445 #endif
10446 if (USE_HIDDEN_LINKONCE)
10448 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
10450 targetm.asm_out.unique_section (decl, 0);
10451 switch_to_section (get_named_section (decl, NULL, 0));
10453 targetm.asm_out.globalize_label (asm_out_file, name);
10454 fputs ("\t.hidden\t", asm_out_file);
10455 assemble_name (asm_out_file, name);
10456 putc ('\n', asm_out_file);
10457 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
10459 else
10461 switch_to_section (text_section);
10462 ASM_OUTPUT_LABEL (asm_out_file, name);
10465 DECL_INITIAL (decl) = make_node (BLOCK);
10466 current_function_decl = decl;
10467 allocate_struct_function (decl, false);
10468 init_function_start (decl);
10469 /* We're about to hide the function body from callees of final_* by
10470 emitting it directly; tell them we're a thunk, if they care. */
10471 cfun->is_thunk = true;
10472 first_function_block_is_cold = false;
10473 /* Make sure unwind info is emitted for the thunk if needed. */
10474 final_start_function (emit_barrier (), asm_out_file, 1);
10476 /* Pad stack IP move with 4 instructions (two NOPs count
10477 as one instruction). */
10478 if (TARGET_PAD_SHORT_FUNCTION)
10480 int i = 8;
10482 while (i--)
10483 fputs ("\tnop\n", asm_out_file);
10486 xops[0] = gen_rtx_REG (Pmode, regno);
10487 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
10488 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
10489 output_asm_insn ("%!ret", NULL);
10490 final_end_function ();
10491 init_insn_lengths ();
10492 free_after_compilation (cfun);
10493 set_cfun (NULL);
10494 current_function_decl = NULL;
10497 if (flag_split_stack)
10498 file_end_indicate_split_stack ();
10501 /* Emit code for the SET_GOT patterns. */
10503 const char *
10504 output_set_got (rtx dest, rtx label)
10506 rtx xops[3];
10508 xops[0] = dest;
10510 if (TARGET_VXWORKS_RTP && flag_pic)
10512 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
10513 xops[2] = gen_rtx_MEM (Pmode,
10514 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
10515 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
10517 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
10518 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
10519 an unadorned address. */
10520 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
10521 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
10522 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
10523 return "";
10526 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
10528 if (flag_pic)
10530 char name[32];
10531 get_pc_thunk_name (name, REGNO (dest));
10532 pic_labels_used |= 1 << REGNO (dest);
10534 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
10535 xops[2] = gen_rtx_MEM (QImode, xops[2]);
10536 output_asm_insn ("%!call\t%X2", xops);
10538 #if TARGET_MACHO
10539 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
10540 This is what will be referenced by the Mach-O PIC subsystem. */
10541 if (machopic_should_output_picbase_label () || !label)
10542 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
10544 /* When we are restoring the pic base at the site of a nonlocal label,
10545 and we decided to emit the pic base above, we will still output a
10546 local label used for calculating the correction offset (even though
10547 the offset will be 0 in that case). */
10548 if (label)
10549 targetm.asm_out.internal_label (asm_out_file, "L",
10550 CODE_LABEL_NUMBER (label));
10551 #endif
10553 else
10555 if (TARGET_MACHO)
10556 /* We don't need a pic base, we're not producing pic. */
10557 gcc_unreachable ();
10559 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
10560 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
10561 targetm.asm_out.internal_label (asm_out_file, "L",
10562 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
10565 if (!TARGET_MACHO)
10566 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
10568 return "";
10571 /* Generate an "push" pattern for input ARG. */
10573 static rtx
10574 gen_push (rtx arg)
10576 struct machine_function *m = cfun->machine;
10578 if (m->fs.cfa_reg == stack_pointer_rtx)
10579 m->fs.cfa_offset += UNITS_PER_WORD;
10580 m->fs.sp_offset += UNITS_PER_WORD;
10582 if (REG_P (arg) && GET_MODE (arg) != word_mode)
10583 arg = gen_rtx_REG (word_mode, REGNO (arg));
10585 return gen_rtx_SET (gen_rtx_MEM (word_mode,
10586 gen_rtx_PRE_DEC (Pmode,
10587 stack_pointer_rtx)),
10588 arg);
10591 /* Generate an "pop" pattern for input ARG. */
10593 static rtx
10594 gen_pop (rtx arg)
10596 if (REG_P (arg) && GET_MODE (arg) != word_mode)
10597 arg = gen_rtx_REG (word_mode, REGNO (arg));
10599 return gen_rtx_SET (arg,
10600 gen_rtx_MEM (word_mode,
10601 gen_rtx_POST_INC (Pmode,
10602 stack_pointer_rtx)));
10605 /* Return >= 0 if there is an unused call-clobbered register available
10606 for the entire function. */
10608 static unsigned int
10609 ix86_select_alt_pic_regnum (void)
10611 if (ix86_use_pseudo_pic_reg ())
10612 return INVALID_REGNUM;
10614 if (crtl->is_leaf
10615 && !crtl->profile
10616 && !ix86_current_function_calls_tls_descriptor)
10618 int i, drap;
10619 /* Can't use the same register for both PIC and DRAP. */
10620 if (crtl->drap_reg)
10621 drap = REGNO (crtl->drap_reg);
10622 else
10623 drap = -1;
10624 for (i = 2; i >= 0; --i)
10625 if (i != drap && !df_regs_ever_live_p (i))
10626 return i;
10629 return INVALID_REGNUM;
10632 /* Return true if REGNO is used by the epilogue. */
10634 bool
10635 ix86_epilogue_uses (int regno)
10637 /* If there are no caller-saved registers, we preserve all registers,
10638 except for MMX and x87 registers which aren't supported when saving
10639 and restoring registers. Don't explicitly save SP register since
10640 it is always preserved. */
10641 return (epilogue_completed
10642 && cfun->machine->no_caller_saved_registers
10643 && !fixed_regs[regno]
10644 && !STACK_REGNO_P (regno)
10645 && !MMX_REGNO_P (regno));
10648 /* Return nonzero if register REGNO can be used as a scratch register
10649 in peephole2. */
10651 static bool
10652 ix86_hard_regno_scratch_ok (unsigned int regno)
10654 /* If there are no caller-saved registers, we can't use any register
10655 as a scratch register after epilogue and use REGNO as scratch
10656 register only if it has been used before to avoid saving and
10657 restoring it. */
10658 return (!cfun->machine->no_caller_saved_registers
10659 || (!epilogue_completed
10660 && df_regs_ever_live_p (regno)));
10663 /* Return true if register class CL should be an additional allocno
10664 class. */
10666 static bool
10667 ix86_additional_allocno_class_p (reg_class_t cl)
10669 return cl == MOD4_SSE_REGS;
10672 /* Return TRUE if we need to save REGNO. */
10674 static bool
10675 ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined)
10677 /* If there are no caller-saved registers, we preserve all registers,
10678 except for MMX and x87 registers which aren't supported when saving
10679 and restoring registers. Don't explicitly save SP register since
10680 it is always preserved. */
10681 if (cfun->machine->no_caller_saved_registers)
10683 /* Don't preserve registers used for function return value. */
10684 rtx reg = crtl->return_rtx;
10685 if (reg)
10687 unsigned int i = REGNO (reg);
10688 unsigned int nregs = REG_NREGS (reg);
10689 while (nregs-- > 0)
10690 if ((i + nregs) == regno)
10691 return false;
10693 reg = crtl->return_bnd;
10694 if (reg)
10696 i = REGNO (reg);
10697 nregs = REG_NREGS (reg);
10698 while (nregs-- > 0)
10699 if ((i + nregs) == regno)
10700 return false;
10704 return (df_regs_ever_live_p (regno)
10705 && !fixed_regs[regno]
10706 && !STACK_REGNO_P (regno)
10707 && !MMX_REGNO_P (regno)
10708 && (regno != HARD_FRAME_POINTER_REGNUM
10709 || !frame_pointer_needed));
10712 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
10713 && pic_offset_table_rtx)
10715 if (ix86_use_pseudo_pic_reg ())
10717 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
10718 _mcount in prologue. */
10719 if (!TARGET_64BIT && flag_pic && crtl->profile)
10720 return true;
10722 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10723 || crtl->profile
10724 || crtl->calls_eh_return
10725 || crtl->uses_const_pool
10726 || cfun->has_nonlocal_label)
10727 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
10730 if (crtl->calls_eh_return && maybe_eh_return)
10732 unsigned i;
10733 for (i = 0; ; i++)
10735 unsigned test = EH_RETURN_DATA_REGNO (i);
10736 if (test == INVALID_REGNUM)
10737 break;
10738 if (test == regno)
10739 return true;
10743 if (ignore_outlined && cfun->machine->call_ms2sysv)
10745 unsigned count = cfun->machine->call_ms2sysv_extra_regs
10746 + xlogue_layout::MIN_REGS;
10747 if (xlogue_layout::is_stub_managed_reg (regno, count))
10748 return false;
10751 if (crtl->drap_reg
10752 && regno == REGNO (crtl->drap_reg)
10753 && !cfun->machine->no_drap_save_restore)
10754 return true;
10756 return (df_regs_ever_live_p (regno)
10757 && !call_used_regs[regno]
10758 && !fixed_regs[regno]
10759 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
10762 /* Return number of saved general prupose registers. */
10764 static int
10765 ix86_nsaved_regs (void)
10767 int nregs = 0;
10768 int regno;
10770 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10771 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
10772 nregs ++;
10773 return nregs;
10776 /* Return number of saved SSE registers. */
10778 static int
10779 ix86_nsaved_sseregs (void)
10781 int nregs = 0;
10782 int regno;
10784 if (!TARGET_64BIT_MS_ABI)
10785 return 0;
10786 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10787 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
10788 nregs ++;
10789 return nregs;
10792 /* Given FROM and TO register numbers, say whether this elimination is
10793 allowed. If stack alignment is needed, we can only replace argument
10794 pointer with hard frame pointer, or replace frame pointer with stack
10795 pointer. Otherwise, frame pointer elimination is automatically
10796 handled and all other eliminations are valid. */
10798 static bool
10799 ix86_can_eliminate (const int from, const int to)
10801 if (stack_realign_fp)
10802 return ((from == ARG_POINTER_REGNUM
10803 && to == HARD_FRAME_POINTER_REGNUM)
10804 || (from == FRAME_POINTER_REGNUM
10805 && to == STACK_POINTER_REGNUM));
10806 else
10807 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
10810 /* Return the offset between two registers, one to be eliminated, and the other
10811 its replacement, at the start of a routine. */
10813 HOST_WIDE_INT
10814 ix86_initial_elimination_offset (int from, int to)
10816 struct ix86_frame frame = cfun->machine->frame;
10818 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10819 return frame.hard_frame_pointer_offset;
10820 else if (from == FRAME_POINTER_REGNUM
10821 && to == HARD_FRAME_POINTER_REGNUM)
10822 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
10823 else
10825 gcc_assert (to == STACK_POINTER_REGNUM);
10827 if (from == ARG_POINTER_REGNUM)
10828 return frame.stack_pointer_offset;
10830 gcc_assert (from == FRAME_POINTER_REGNUM);
10831 return frame.stack_pointer_offset - frame.frame_pointer_offset;
10835 /* In a dynamically-aligned function, we can't know the offset from
10836 stack pointer to frame pointer, so we must ensure that setjmp
10837 eliminates fp against the hard fp (%ebp) rather than trying to
10838 index from %esp up to the top of the frame across a gap that is
10839 of unknown (at compile-time) size. */
10840 static rtx
10841 ix86_builtin_setjmp_frame_value (void)
10843 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
10846 /* Emits a warning for unsupported msabi to sysv pro/epilogues. */
10847 static void warn_once_call_ms2sysv_xlogues (const char *feature)
10849 static bool warned_once = false;
10850 if (!warned_once)
10852 warning (0, "-mcall-ms2sysv-xlogues is not compatible with %s",
10853 feature);
10854 warned_once = true;
10858 /* When using -fsplit-stack, the allocation routines set a field in
10859 the TCB to the bottom of the stack plus this much space, measured
10860 in bytes. */
10862 #define SPLIT_STACK_AVAILABLE 256
10864 /* Fill structure ix86_frame about frame of currently computed function. */
10866 static void
10867 ix86_compute_frame_layout (void)
10869 struct ix86_frame *frame = &cfun->machine->frame;
10870 struct machine_function *m = cfun->machine;
10871 unsigned HOST_WIDE_INT stack_alignment_needed;
10872 HOST_WIDE_INT offset;
10873 unsigned HOST_WIDE_INT preferred_alignment;
10874 HOST_WIDE_INT size = get_frame_size ();
10875 HOST_WIDE_INT to_allocate;
10877 /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
10878 * ms_abi functions that call a sysv function. We now need to prune away
10879 * cases where it should be disabled. */
10880 if (TARGET_64BIT && m->call_ms2sysv)
10882 gcc_assert (TARGET_64BIT_MS_ABI);
10883 gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES);
10884 gcc_assert (!TARGET_SEH);
10885 gcc_assert (TARGET_SSE);
10886 gcc_assert (!ix86_using_red_zone ());
10888 if (crtl->calls_eh_return)
10890 gcc_assert (!reload_completed);
10891 m->call_ms2sysv = false;
10892 warn_once_call_ms2sysv_xlogues ("__builtin_eh_return");
10895 else if (ix86_static_chain_on_stack)
10897 gcc_assert (!reload_completed);
10898 m->call_ms2sysv = false;
10899 warn_once_call_ms2sysv_xlogues ("static call chains");
10902 /* Finally, compute which registers the stub will manage. */
10903 else
10905 unsigned count = xlogue_layout::count_stub_managed_regs ();
10906 m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS;
10907 m->call_ms2sysv_pad_in = 0;
10911 frame->nregs = ix86_nsaved_regs ();
10912 frame->nsseregs = ix86_nsaved_sseregs ();
10914 /* 64-bit MS ABI seem to require stack alignment to be always 16,
10915 except for function prologues, leaf functions and when the defult
10916 incoming stack boundary is overriden at command line or via
10917 force_align_arg_pointer attribute. */
10918 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
10919 && (!crtl->is_leaf || cfun->calls_alloca != 0
10920 || ix86_current_function_calls_tls_descriptor
10921 || ix86_incoming_stack_boundary < 128))
10923 crtl->preferred_stack_boundary = 128;
10924 crtl->stack_alignment_needed = 128;
10927 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
10928 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
10930 gcc_assert (!size || stack_alignment_needed);
10931 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
10932 gcc_assert (preferred_alignment <= stack_alignment_needed);
10934 /* The only ABI saving SSE regs should be 64-bit ms_abi. */
10935 gcc_assert (TARGET_64BIT || !frame->nsseregs);
10936 if (TARGET_64BIT && m->call_ms2sysv)
10938 gcc_assert (stack_alignment_needed >= 16);
10939 gcc_assert (!frame->nsseregs);
10942 /* For SEH we have to limit the amount of code movement into the prologue.
10943 At present we do this via a BLOCKAGE, at which point there's very little
10944 scheduling that can be done, which means that there's very little point
10945 in doing anything except PUSHs. */
10946 if (TARGET_SEH)
10947 m->use_fast_prologue_epilogue = false;
10948 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)))
10950 int count = frame->nregs;
10951 struct cgraph_node *node = cgraph_node::get (current_function_decl);
10953 /* The fast prologue uses move instead of push to save registers. This
10954 is significantly longer, but also executes faster as modern hardware
10955 can execute the moves in parallel, but can't do that for push/pop.
10957 Be careful about choosing what prologue to emit: When function takes
10958 many instructions to execute we may use slow version as well as in
10959 case function is known to be outside hot spot (this is known with
10960 feedback only). Weight the size of function by number of registers
10961 to save as it is cheap to use one or two push instructions but very
10962 slow to use many of them. */
10963 if (count)
10964 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
10965 if (node->frequency < NODE_FREQUENCY_NORMAL
10966 || (flag_branch_probabilities
10967 && node->frequency < NODE_FREQUENCY_HOT))
10968 m->use_fast_prologue_epilogue = false;
10969 else
10970 m->use_fast_prologue_epilogue
10971 = !expensive_function_p (count);
10974 frame->save_regs_using_mov
10975 = (TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue
10976 /* If static stack checking is enabled and done with probes,
10977 the registers need to be saved before allocating the frame. */
10978 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
10980 /* Skip return address and error code in exception handler. */
10981 offset = INCOMING_FRAME_SP_OFFSET;
10983 /* Skip pushed static chain. */
10984 if (ix86_static_chain_on_stack)
10985 offset += UNITS_PER_WORD;
10987 /* Skip saved base pointer. */
10988 if (frame_pointer_needed)
10989 offset += UNITS_PER_WORD;
10990 frame->hfp_save_offset = offset;
10992 /* The traditional frame pointer location is at the top of the frame. */
10993 frame->hard_frame_pointer_offset = offset;
10995 /* Register save area */
10996 offset += frame->nregs * UNITS_PER_WORD;
10997 frame->reg_save_offset = offset;
10999 /* On SEH target, registers are pushed just before the frame pointer
11000 location. */
11001 if (TARGET_SEH)
11002 frame->hard_frame_pointer_offset = offset;
11004 /* Calculate the size of the va-arg area (not including padding, if any). */
11005 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
11007 if (stack_realign_fp)
11009 /* We may need a 16-byte aligned stack for the remainder of the
11010 register save area, but the stack frame for the local function
11011 may require a greater alignment if using AVX/2/512. In order
11012 to avoid wasting space, we first calculate the space needed for
11013 the rest of the register saves, add that to the stack pointer,
11014 and then realign the stack to the boundary of the start of the
11015 frame for the local function. */
11016 HOST_WIDE_INT space_needed = 0;
11017 HOST_WIDE_INT sse_reg_space_needed = 0;
11019 if (TARGET_64BIT)
11021 if (m->call_ms2sysv)
11023 m->call_ms2sysv_pad_in = 0;
11024 space_needed = xlogue_layout::get_instance ().get_stack_space_used ();
11027 else if (frame->nsseregs)
11028 /* The only ABI that has saved SSE registers (Win64) also has a
11029 16-byte aligned default stack. However, many programs violate
11030 the ABI, and Wine64 forces stack realignment to compensate. */
11031 space_needed = frame->nsseregs * 16;
11033 sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16);
11035 /* 64-bit frame->va_arg_size should always be a multiple of 16, but
11036 rounding to be pedantic. */
11037 space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16);
11039 else
11040 space_needed = frame->va_arg_size;
11042 /* Record the allocation size required prior to the realignment AND. */
11043 frame->stack_realign_allocate = space_needed;
11045 /* The re-aligned stack starts at frame->stack_realign_offset. Values
11046 before this point are not directly comparable with values below
11047 this point. Use sp_valid_at to determine if the stack pointer is
11048 valid for a given offset, fp_valid_at for the frame pointer, or
11049 choose_baseaddr to have a base register chosen for you.
11051 Note that the result of (frame->stack_realign_offset
11052 & (stack_alignment_needed - 1)) may not equal zero. */
11053 offset = ROUND_UP (offset + space_needed, stack_alignment_needed);
11054 frame->stack_realign_offset = offset - space_needed;
11055 frame->sse_reg_save_offset = frame->stack_realign_offset
11056 + sse_reg_space_needed;
11058 else
11060 frame->stack_realign_offset = offset;
11062 if (TARGET_64BIT && m->call_ms2sysv)
11064 m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD);
11065 offset += xlogue_layout::get_instance ().get_stack_space_used ();
11068 /* Align and set SSE register save area. */
11069 else if (frame->nsseregs)
11071 /* If the incoming stack boundary is at least 16 bytes, or DRAP is
11072 required and the DRAP re-alignment boundary is at least 16 bytes,
11073 then we want the SSE register save area properly aligned. */
11074 if (ix86_incoming_stack_boundary >= 128
11075 || (stack_realign_drap && stack_alignment_needed >= 16))
11076 offset = ROUND_UP (offset, 16);
11077 offset += frame->nsseregs * 16;
11079 frame->sse_reg_save_offset = offset;
11080 offset += frame->va_arg_size;
11083 /* Align start of frame for local function. */
11084 if (m->call_ms2sysv
11085 || frame->va_arg_size != 0
11086 || size != 0
11087 || !crtl->is_leaf
11088 || cfun->calls_alloca
11089 || ix86_current_function_calls_tls_descriptor)
11090 offset = ROUND_UP (offset, stack_alignment_needed);
11092 /* Frame pointer points here. */
11093 frame->frame_pointer_offset = offset;
11095 offset += size;
11097 /* Add outgoing arguments area. Can be skipped if we eliminated
11098 all the function calls as dead code.
11099 Skipping is however impossible when function calls alloca. Alloca
11100 expander assumes that last crtl->outgoing_args_size
11101 of stack frame are unused. */
11102 if (ACCUMULATE_OUTGOING_ARGS
11103 && (!crtl->is_leaf || cfun->calls_alloca
11104 || ix86_current_function_calls_tls_descriptor))
11106 offset += crtl->outgoing_args_size;
11107 frame->outgoing_arguments_size = crtl->outgoing_args_size;
11109 else
11110 frame->outgoing_arguments_size = 0;
11112 /* Align stack boundary. Only needed if we're calling another function
11113 or using alloca. */
11114 if (!crtl->is_leaf || cfun->calls_alloca
11115 || ix86_current_function_calls_tls_descriptor)
11116 offset = ROUND_UP (offset, preferred_alignment);
11118 /* We've reached end of stack frame. */
11119 frame->stack_pointer_offset = offset;
11121 /* Size prologue needs to allocate. */
11122 to_allocate = offset - frame->sse_reg_save_offset;
11124 if ((!to_allocate && frame->nregs <= 1)
11125 || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000)))
11126 frame->save_regs_using_mov = false;
11128 if (ix86_using_red_zone ()
11129 && crtl->sp_is_unchanging
11130 && crtl->is_leaf
11131 && !ix86_pc_thunk_call_expanded
11132 && !ix86_current_function_calls_tls_descriptor)
11134 frame->red_zone_size = to_allocate;
11135 if (frame->save_regs_using_mov)
11136 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
11137 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
11138 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
11140 else
11141 frame->red_zone_size = 0;
11142 frame->stack_pointer_offset -= frame->red_zone_size;
11144 /* The SEH frame pointer location is near the bottom of the frame.
11145 This is enforced by the fact that the difference between the
11146 stack pointer and the frame pointer is limited to 240 bytes in
11147 the unwind data structure. */
11148 if (TARGET_SEH)
11150 HOST_WIDE_INT diff;
11152 /* If we can leave the frame pointer where it is, do so. Also, returns
11153 the establisher frame for __builtin_frame_address (0). */
11154 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
11155 if (diff <= SEH_MAX_FRAME_SIZE
11156 && (diff > 240 || (diff & 15) != 0)
11157 && !crtl->accesses_prior_frames)
11159 /* Ideally we'd determine what portion of the local stack frame
11160 (within the constraint of the lowest 240) is most heavily used.
11161 But without that complication, simply bias the frame pointer
11162 by 128 bytes so as to maximize the amount of the local stack
11163 frame that is addressable with 8-bit offsets. */
11164 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
11169 /* This is semi-inlined memory_address_length, but simplified
11170 since we know that we're always dealing with reg+offset, and
11171 to avoid having to create and discard all that rtl. */
11173 static inline int
11174 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
11176 int len = 4;
11178 if (offset == 0)
11180 /* EBP and R13 cannot be encoded without an offset. */
11181 len = (regno == BP_REG || regno == R13_REG);
11183 else if (IN_RANGE (offset, -128, 127))
11184 len = 1;
11186 /* ESP and R12 must be encoded with a SIB byte. */
11187 if (regno == SP_REG || regno == R12_REG)
11188 len++;
11190 return len;
11193 /* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
11194 the frame save area. The register is saved at CFA - CFA_OFFSET. */
11196 static bool
11197 sp_valid_at (HOST_WIDE_INT cfa_offset)
11199 const struct machine_frame_state &fs = cfun->machine->fs;
11200 if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset)
11202 /* Validate that the cfa_offset isn't in a "no-man's land". */
11203 gcc_assert (cfa_offset <= fs.sp_realigned_fp_last);
11204 return false;
11206 return fs.sp_valid;
11209 /* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
11210 the frame save area. The register is saved at CFA - CFA_OFFSET. */
11212 static inline bool
11213 fp_valid_at (HOST_WIDE_INT cfa_offset)
11215 const struct machine_frame_state &fs = cfun->machine->fs;
11216 if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last)
11218 /* Validate that the cfa_offset isn't in a "no-man's land". */
11219 gcc_assert (cfa_offset >= fs.sp_realigned_offset);
11220 return false;
11222 return fs.fp_valid;
11225 /* Choose a base register based upon alignment requested, speed and/or
11226 size. */
11228 static void
11229 choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg,
11230 HOST_WIDE_INT &base_offset,
11231 unsigned int align_reqested, unsigned int *align)
11233 const struct machine_function *m = cfun->machine;
11234 unsigned int hfp_align;
11235 unsigned int drap_align;
11236 unsigned int sp_align;
11237 bool hfp_ok = fp_valid_at (cfa_offset);
11238 bool drap_ok = m->fs.drap_valid;
11239 bool sp_ok = sp_valid_at (cfa_offset);
11241 hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY;
11243 /* Filter out any registers that don't meet the requested alignment
11244 criteria. */
11245 if (align_reqested)
11247 if (m->fs.realigned)
11248 hfp_align = drap_align = sp_align = crtl->stack_alignment_needed;
11249 /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
11250 notes (which we would need to use a realigned stack pointer),
11251 so disable on SEH targets. */
11252 else if (m->fs.sp_realigned)
11253 sp_align = crtl->stack_alignment_needed;
11255 hfp_ok = hfp_ok && hfp_align >= align_reqested;
11256 drap_ok = drap_ok && drap_align >= align_reqested;
11257 sp_ok = sp_ok && sp_align >= align_reqested;
11260 if (m->use_fast_prologue_epilogue)
11262 /* Choose the base register most likely to allow the most scheduling
11263 opportunities. Generally FP is valid throughout the function,
11264 while DRAP must be reloaded within the epilogue. But choose either
11265 over the SP due to increased encoding size. */
11267 if (hfp_ok)
11269 base_reg = hard_frame_pointer_rtx;
11270 base_offset = m->fs.fp_offset - cfa_offset;
11272 else if (drap_ok)
11274 base_reg = crtl->drap_reg;
11275 base_offset = 0 - cfa_offset;
11277 else if (sp_ok)
11279 base_reg = stack_pointer_rtx;
11280 base_offset = m->fs.sp_offset - cfa_offset;
11283 else
11285 HOST_WIDE_INT toffset;
11286 int len = 16, tlen;
11288 /* Choose the base register with the smallest address encoding.
11289 With a tie, choose FP > DRAP > SP. */
11290 if (sp_ok)
11292 base_reg = stack_pointer_rtx;
11293 base_offset = m->fs.sp_offset - cfa_offset;
11294 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
11296 if (drap_ok)
11298 toffset = 0 - cfa_offset;
11299 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
11300 if (tlen <= len)
11302 base_reg = crtl->drap_reg;
11303 base_offset = toffset;
11304 len = tlen;
11307 if (hfp_ok)
11309 toffset = m->fs.fp_offset - cfa_offset;
11310 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
11311 if (tlen <= len)
11313 base_reg = hard_frame_pointer_rtx;
11314 base_offset = toffset;
11315 len = tlen;
11320 /* Set the align return value. */
11321 if (align)
11323 if (base_reg == stack_pointer_rtx)
11324 *align = sp_align;
11325 else if (base_reg == crtl->drap_reg)
11326 *align = drap_align;
11327 else if (base_reg == hard_frame_pointer_rtx)
11328 *align = hfp_align;
11332 /* Return an RTX that points to CFA_OFFSET within the stack frame and
11333 the alignment of address. If ALIGN is non-null, it should point to
11334 an alignment value (in bits) that is preferred or zero and will
11335 recieve the alignment of the base register that was selected,
11336 irrespective of rather or not CFA_OFFSET is a multiple of that
11337 alignment value.
11339 The valid base registers are taken from CFUN->MACHINE->FS. */
11341 static rtx
11342 choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align)
11344 rtx base_reg = NULL;
11345 HOST_WIDE_INT base_offset = 0;
11347 /* If a specific alignment is requested, try to get a base register
11348 with that alignment first. */
11349 if (align && *align)
11350 choose_basereg (cfa_offset, base_reg, base_offset, *align, align);
11352 if (!base_reg)
11353 choose_basereg (cfa_offset, base_reg, base_offset, 0, align);
11355 gcc_assert (base_reg != NULL);
11356 return plus_constant (Pmode, base_reg, base_offset);
11359 /* Emit code to save registers in the prologue. */
11361 static void
11362 ix86_emit_save_regs (void)
11364 unsigned int regno;
11365 rtx_insn *insn;
11367 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
11368 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
11370 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
11371 RTX_FRAME_RELATED_P (insn) = 1;
11375 /* Emit a single register save at CFA - CFA_OFFSET. */
11377 static void
11378 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
11379 HOST_WIDE_INT cfa_offset)
11381 struct machine_function *m = cfun->machine;
11382 rtx reg = gen_rtx_REG (mode, regno);
11383 rtx mem, addr, base, insn;
11384 unsigned int align = GET_MODE_ALIGNMENT (mode);
11386 addr = choose_baseaddr (cfa_offset, &align);
11387 mem = gen_frame_mem (mode, addr);
11389 /* The location aligment depends upon the base register. */
11390 align = MIN (GET_MODE_ALIGNMENT (mode), align);
11391 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
11392 set_mem_align (mem, align);
11394 insn = emit_insn (gen_rtx_SET (mem, reg));
11395 RTX_FRAME_RELATED_P (insn) = 1;
11397 base = addr;
11398 if (GET_CODE (base) == PLUS)
11399 base = XEXP (base, 0);
11400 gcc_checking_assert (REG_P (base));
11402 /* When saving registers into a re-aligned local stack frame, avoid
11403 any tricky guessing by dwarf2out. */
11404 if (m->fs.realigned)
11406 gcc_checking_assert (stack_realign_drap);
11408 if (regno == REGNO (crtl->drap_reg))
11410 /* A bit of a hack. We force the DRAP register to be saved in
11411 the re-aligned stack frame, which provides us with a copy
11412 of the CFA that will last past the prologue. Install it. */
11413 gcc_checking_assert (cfun->machine->fs.fp_valid);
11414 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
11415 cfun->machine->fs.fp_offset - cfa_offset);
11416 mem = gen_rtx_MEM (mode, addr);
11417 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
11419 else
11421 /* The frame pointer is a stable reference within the
11422 aligned frame. Use it. */
11423 gcc_checking_assert (cfun->machine->fs.fp_valid);
11424 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
11425 cfun->machine->fs.fp_offset - cfa_offset);
11426 mem = gen_rtx_MEM (mode, addr);
11427 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
11431 else if (base == stack_pointer_rtx && m->fs.sp_realigned
11432 && cfa_offset >= m->fs.sp_realigned_offset)
11434 gcc_checking_assert (stack_realign_fp);
11435 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
11438 /* The memory may not be relative to the current CFA register,
11439 which means that we may need to generate a new pattern for
11440 use by the unwind info. */
11441 else if (base != m->fs.cfa_reg)
11443 addr = plus_constant (Pmode, m->fs.cfa_reg,
11444 m->fs.cfa_offset - cfa_offset);
11445 mem = gen_rtx_MEM (mode, addr);
11446 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
11450 /* Emit code to save registers using MOV insns.
11451 First register is stored at CFA - CFA_OFFSET. */
11452 static void
11453 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
11455 unsigned int regno;
11457 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11458 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
11460 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
11461 cfa_offset -= UNITS_PER_WORD;
11465 /* Emit code to save SSE registers using MOV insns.
11466 First register is stored at CFA - CFA_OFFSET. */
11467 static void
11468 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
11470 unsigned int regno;
11472 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11473 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
11475 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
11476 cfa_offset -= GET_MODE_SIZE (V4SFmode);
11480 static GTY(()) rtx queued_cfa_restores;
11482 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
11483 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
11484 Don't add the note if the previously saved value will be left untouched
11485 within stack red-zone till return, as unwinders can find the same value
11486 in the register and on the stack. */
11488 static void
11489 ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
11491 if (!crtl->shrink_wrapped
11492 && cfa_offset <= cfun->machine->fs.red_zone_offset)
11493 return;
11495 if (insn)
11497 add_reg_note (insn, REG_CFA_RESTORE, reg);
11498 RTX_FRAME_RELATED_P (insn) = 1;
11500 else
11501 queued_cfa_restores
11502 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
11505 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
11507 static void
11508 ix86_add_queued_cfa_restore_notes (rtx insn)
11510 rtx last;
11511 if (!queued_cfa_restores)
11512 return;
11513 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
11515 XEXP (last, 1) = REG_NOTES (insn);
11516 REG_NOTES (insn) = queued_cfa_restores;
11517 queued_cfa_restores = NULL_RTX;
11518 RTX_FRAME_RELATED_P (insn) = 1;
11521 /* Expand prologue or epilogue stack adjustment.
11522 The pattern exist to put a dependency on all ebp-based memory accesses.
11523 STYLE should be negative if instructions should be marked as frame related,
11524 zero if %r11 register is live and cannot be freely used and positive
11525 otherwise. */
11527 static rtx
11528 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
11529 int style, bool set_cfa)
11531 struct machine_function *m = cfun->machine;
11532 rtx insn;
11533 bool add_frame_related_expr = false;
11535 if (Pmode == SImode)
11536 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
11537 else if (x86_64_immediate_operand (offset, DImode))
11538 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
11539 else
11541 rtx tmp;
11542 /* r11 is used by indirect sibcall return as well, set before the
11543 epilogue and used after the epilogue. */
11544 if (style)
11545 tmp = gen_rtx_REG (DImode, R11_REG);
11546 else
11548 gcc_assert (src != hard_frame_pointer_rtx
11549 && dest != hard_frame_pointer_rtx);
11550 tmp = hard_frame_pointer_rtx;
11552 insn = emit_insn (gen_rtx_SET (tmp, offset));
11553 if (style < 0)
11554 add_frame_related_expr = true;
11556 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
11559 insn = emit_insn (insn);
11560 if (style >= 0)
11561 ix86_add_queued_cfa_restore_notes (insn);
11563 if (set_cfa)
11565 rtx r;
11567 gcc_assert (m->fs.cfa_reg == src);
11568 m->fs.cfa_offset += INTVAL (offset);
11569 m->fs.cfa_reg = dest;
11571 r = gen_rtx_PLUS (Pmode, src, offset);
11572 r = gen_rtx_SET (dest, r);
11573 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
11574 RTX_FRAME_RELATED_P (insn) = 1;
11576 else if (style < 0)
11578 RTX_FRAME_RELATED_P (insn) = 1;
11579 if (add_frame_related_expr)
11581 rtx r = gen_rtx_PLUS (Pmode, src, offset);
11582 r = gen_rtx_SET (dest, r);
11583 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
11587 if (dest == stack_pointer_rtx)
11589 HOST_WIDE_INT ooffset = m->fs.sp_offset;
11590 bool valid = m->fs.sp_valid;
11591 bool realigned = m->fs.sp_realigned;
11593 if (src == hard_frame_pointer_rtx)
11595 valid = m->fs.fp_valid;
11596 realigned = false;
11597 ooffset = m->fs.fp_offset;
11599 else if (src == crtl->drap_reg)
11601 valid = m->fs.drap_valid;
11602 realigned = false;
11603 ooffset = 0;
11605 else
11607 /* Else there are two possibilities: SP itself, which we set
11608 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
11609 taken care of this by hand along the eh_return path. */
11610 gcc_checking_assert (src == stack_pointer_rtx
11611 || offset == const0_rtx);
11614 m->fs.sp_offset = ooffset - INTVAL (offset);
11615 m->fs.sp_valid = valid;
11616 m->fs.sp_realigned = realigned;
11618 return insn;
11621 /* Find an available register to be used as dynamic realign argument
11622 pointer regsiter. Such a register will be written in prologue and
11623 used in begin of body, so it must not be
11624 1. parameter passing register.
11625 2. GOT pointer.
11626 We reuse static-chain register if it is available. Otherwise, we
11627 use DI for i386 and R13 for x86-64. We chose R13 since it has
11628 shorter encoding.
11630 Return: the regno of chosen register. */
11632 static unsigned int
11633 find_drap_reg (void)
11635 tree decl = cfun->decl;
11637 /* Always use callee-saved register if there are no caller-saved
11638 registers. */
11639 if (TARGET_64BIT)
11641 /* Use R13 for nested function or function need static chain.
11642 Since function with tail call may use any caller-saved
11643 registers in epilogue, DRAP must not use caller-saved
11644 register in such case. */
11645 if (DECL_STATIC_CHAIN (decl)
11646 || cfun->machine->no_caller_saved_registers
11647 || crtl->tail_call_emit)
11648 return R13_REG;
11650 return R10_REG;
11652 else
11654 /* Use DI for nested function or function need static chain.
11655 Since function with tail call may use any caller-saved
11656 registers in epilogue, DRAP must not use caller-saved
11657 register in such case. */
11658 if (DECL_STATIC_CHAIN (decl)
11659 || cfun->machine->no_caller_saved_registers
11660 || crtl->tail_call_emit)
11661 return DI_REG;
11663 /* Reuse static chain register if it isn't used for parameter
11664 passing. */
11665 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
11667 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
11668 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
11669 return CX_REG;
11671 return DI_REG;
11675 /* Handle a "force_align_arg_pointer" attribute. */
11677 static tree
11678 ix86_handle_force_align_arg_pointer_attribute (tree *node, tree name,
11679 tree, int, bool *no_add_attrs)
11681 if (TREE_CODE (*node) != FUNCTION_TYPE
11682 && TREE_CODE (*node) != METHOD_TYPE
11683 && TREE_CODE (*node) != FIELD_DECL
11684 && TREE_CODE (*node) != TYPE_DECL)
11686 warning (OPT_Wattributes, "%qE attribute only applies to functions",
11687 name);
11688 *no_add_attrs = true;
11691 return NULL_TREE;
11694 /* Return minimum incoming stack alignment. */
11696 static unsigned int
11697 ix86_minimum_incoming_stack_boundary (bool sibcall)
11699 unsigned int incoming_stack_boundary;
11701 /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */
11702 if (cfun->machine->func_type != TYPE_NORMAL)
11703 incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY;
11704 /* Prefer the one specified at command line. */
11705 else if (ix86_user_incoming_stack_boundary)
11706 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
11707 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
11708 if -mstackrealign is used, it isn't used for sibcall check and
11709 estimated stack alignment is 128bit. */
11710 else if (!sibcall
11711 && ix86_force_align_arg_pointer
11712 && crtl->stack_alignment_estimated == 128)
11713 incoming_stack_boundary = MIN_STACK_BOUNDARY;
11714 else
11715 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
11717 /* Incoming stack alignment can be changed on individual functions
11718 via force_align_arg_pointer attribute. We use the smallest
11719 incoming stack boundary. */
11720 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
11721 && lookup_attribute (ix86_force_align_arg_pointer_string,
11722 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
11723 incoming_stack_boundary = MIN_STACK_BOUNDARY;
11725 /* The incoming stack frame has to be aligned at least at
11726 parm_stack_boundary. */
11727 if (incoming_stack_boundary < crtl->parm_stack_boundary)
11728 incoming_stack_boundary = crtl->parm_stack_boundary;
11730 /* Stack at entrance of main is aligned by runtime. We use the
11731 smallest incoming stack boundary. */
11732 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
11733 && DECL_NAME (current_function_decl)
11734 && MAIN_NAME_P (DECL_NAME (current_function_decl))
11735 && DECL_FILE_SCOPE_P (current_function_decl))
11736 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
11738 return incoming_stack_boundary;
11741 /* Update incoming stack boundary and estimated stack alignment. */
11743 static void
11744 ix86_update_stack_boundary (void)
11746 ix86_incoming_stack_boundary
11747 = ix86_minimum_incoming_stack_boundary (false);
11749 /* x86_64 vararg needs 16byte stack alignment for register save
11750 area. */
11751 if (TARGET_64BIT
11752 && cfun->stdarg
11753 && crtl->stack_alignment_estimated < 128)
11754 crtl->stack_alignment_estimated = 128;
11756 /* __tls_get_addr needs to be called with 16-byte aligned stack. */
11757 if (ix86_tls_descriptor_calls_expanded_in_cfun
11758 && crtl->preferred_stack_boundary < 128)
11759 crtl->preferred_stack_boundary = 128;
11762 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
11763 needed or an rtx for DRAP otherwise. */
11765 static rtx
11766 ix86_get_drap_rtx (void)
11768 /* We must use DRAP if there are outgoing arguments on stack and
11769 ACCUMULATE_OUTGOING_ARGS is false. */
11770 if (ix86_force_drap
11771 || (cfun->machine->outgoing_args_on_stack
11772 && !ACCUMULATE_OUTGOING_ARGS))
11773 crtl->need_drap = true;
11775 if (stack_realign_drap)
11777 /* Assign DRAP to vDRAP and returns vDRAP */
11778 unsigned int regno = find_drap_reg ();
11779 rtx drap_vreg;
11780 rtx arg_ptr;
11781 rtx_insn *seq, *insn;
11783 arg_ptr = gen_rtx_REG (Pmode, regno);
11784 crtl->drap_reg = arg_ptr;
11786 start_sequence ();
11787 drap_vreg = copy_to_reg (arg_ptr);
11788 seq = get_insns ();
11789 end_sequence ();
11791 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
11792 if (!optimize)
11794 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
11795 RTX_FRAME_RELATED_P (insn) = 1;
11797 return drap_vreg;
11799 else
11800 return NULL;
11803 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
11805 static rtx
11806 ix86_internal_arg_pointer (void)
11808 return virtual_incoming_args_rtx;
11811 struct scratch_reg {
11812 rtx reg;
11813 bool saved;
11816 /* Return a short-lived scratch register for use on function entry.
11817 In 32-bit mode, it is valid only after the registers are saved
11818 in the prologue. This register must be released by means of
11819 release_scratch_register_on_entry once it is dead. */
11821 static void
11822 get_scratch_register_on_entry (struct scratch_reg *sr)
11824 int regno;
11826 sr->saved = false;
11828 if (TARGET_64BIT)
11830 /* We always use R11 in 64-bit mode. */
11831 regno = R11_REG;
11833 else
11835 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
11836 bool fastcall_p
11837 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
11838 bool thiscall_p
11839 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
11840 bool static_chain_p = DECL_STATIC_CHAIN (decl);
11841 int regparm = ix86_function_regparm (fntype, decl);
11842 int drap_regno
11843 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
11845 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
11846 for the static chain register. */
11847 if ((regparm < 1 || (fastcall_p && !static_chain_p))
11848 && drap_regno != AX_REG)
11849 regno = AX_REG;
11850 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
11851 for the static chain register. */
11852 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
11853 regno = AX_REG;
11854 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
11855 regno = DX_REG;
11856 /* ecx is the static chain register. */
11857 else if (regparm < 3 && !fastcall_p && !thiscall_p
11858 && !static_chain_p
11859 && drap_regno != CX_REG)
11860 regno = CX_REG;
11861 else if (ix86_save_reg (BX_REG, true, false))
11862 regno = BX_REG;
11863 /* esi is the static chain register. */
11864 else if (!(regparm == 3 && static_chain_p)
11865 && ix86_save_reg (SI_REG, true, false))
11866 regno = SI_REG;
11867 else if (ix86_save_reg (DI_REG, true, false))
11868 regno = DI_REG;
11869 else
11871 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
11872 sr->saved = true;
11876 sr->reg = gen_rtx_REG (Pmode, regno);
11877 if (sr->saved)
11879 rtx_insn *insn = emit_insn (gen_push (sr->reg));
11880 RTX_FRAME_RELATED_P (insn) = 1;
11884 /* Release a scratch register obtained from the preceding function. */
11886 static void
11887 release_scratch_register_on_entry (struct scratch_reg *sr)
11889 if (sr->saved)
11891 struct machine_function *m = cfun->machine;
11892 rtx x, insn = emit_insn (gen_pop (sr->reg));
11894 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
11895 RTX_FRAME_RELATED_P (insn) = 1;
11896 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
11897 x = gen_rtx_SET (stack_pointer_rtx, x);
11898 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
11899 m->fs.sp_offset -= UNITS_PER_WORD;
11903 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
11905 /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
11907 This differs from the next routine in that it tries hard to prevent
11908 attacks that jump the stack guard. Thus it is never allowed to allocate
11909 more than PROBE_INTERVAL bytes of stack space without a suitable
11910 probe. */
11912 static void
11913 ix86_adjust_stack_and_probe_stack_clash (const HOST_WIDE_INT size)
11915 struct machine_function *m = cfun->machine;
11917 /* If this function does not statically allocate stack space, then
11918 no probes are needed. */
11919 if (!size)
11921 /* However, the allocation of space via pushes for register
11922 saves could be viewed as allocating space, but without the
11923 need to probe. */
11924 if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)
11925 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
11926 else
11927 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
11928 return;
11931 /* If we are a noreturn function, then we have to consider the
11932 possibility that we're called via a jump rather than a call.
11934 Thus we don't have the implicit probe generated by saving the
11935 return address into the stack at the call. Thus, the stack
11936 pointer could be anywhere in the guard page. The safe thing
11937 to do is emit a probe now.
11939 ?!? This should be revamped to work like aarch64 and s390 where
11940 we track the offset from the most recent probe. Normally that
11941 offset would be zero. For a noreturn function we would reset
11942 it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then
11943 we just probe when we cross PROBE_INTERVAL. */
11944 if (TREE_THIS_VOLATILE (cfun->decl))
11946 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
11947 -GET_MODE_SIZE (word_mode)));
11948 emit_insn (gen_blockage ());
11951 /* If we allocate less than the size of the guard statically,
11952 then no probing is necessary, but we do need to allocate
11953 the stack. */
11954 if (size < (1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE)))
11956 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11957 GEN_INT (-size), -1,
11958 m->fs.cfa_reg == stack_pointer_rtx);
11959 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
11960 return;
11963 /* We're allocating a large enough stack frame that we need to
11964 emit probes. Either emit them inline or in a loop depending
11965 on the size. */
11966 HOST_WIDE_INT probe_interval
11967 = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
11968 if (size <= 4 * probe_interval)
11970 HOST_WIDE_INT i;
11971 for (i = probe_interval; i <= size; i += probe_interval)
11973 /* Allocate PROBE_INTERVAL bytes. */
11974 rtx insn
11975 = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11976 GEN_INT (-PROBE_INTERVAL), -1,
11977 m->fs.cfa_reg == stack_pointer_rtx);
11978 add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
11980 /* And probe at *sp. */
11981 emit_stack_probe (stack_pointer_rtx);
11982 emit_insn (gen_blockage ());
11985 /* We need to allocate space for the residual, but we do not need
11986 to probe the residual. */
11987 HOST_WIDE_INT residual = (i - probe_interval - size);
11988 if (residual)
11989 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11990 GEN_INT (residual), -1,
11991 m->fs.cfa_reg == stack_pointer_rtx);
11992 dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
11994 else
11996 struct scratch_reg sr;
11997 get_scratch_register_on_entry (&sr);
11999 /* Step 1: round SIZE down to a multiple of the interval. */
12000 HOST_WIDE_INT rounded_size = size & -probe_interval;
12002 /* Step 2: compute final value of the loop counter. Use lea if
12003 possible. */
12004 rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size);
12005 rtx insn;
12006 if (address_no_seg_operand (addr, Pmode))
12007 insn = emit_insn (gen_rtx_SET (sr.reg, addr));
12008 else
12010 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
12011 insn = emit_insn (gen_rtx_SET (sr.reg,
12012 gen_rtx_PLUS (Pmode, sr.reg,
12013 stack_pointer_rtx)));
12015 if (m->fs.cfa_reg == stack_pointer_rtx)
12017 add_reg_note (insn, REG_CFA_DEF_CFA,
12018 plus_constant (Pmode, sr.reg,
12019 m->fs.cfa_offset + rounded_size));
12020 RTX_FRAME_RELATED_P (insn) = 1;
12023 /* Step 3: the loop. */
12024 rtx size_rtx = GEN_INT (rounded_size);
12025 insn = emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg,
12026 size_rtx));
12027 if (m->fs.cfa_reg == stack_pointer_rtx)
12029 m->fs.cfa_offset += rounded_size;
12030 add_reg_note (insn, REG_CFA_DEF_CFA,
12031 plus_constant (Pmode, stack_pointer_rtx,
12032 m->fs.cfa_offset));
12033 RTX_FRAME_RELATED_P (insn) = 1;
12035 m->fs.sp_offset += rounded_size;
12036 emit_insn (gen_blockage ());
12038 /* Step 4: adjust SP if we cannot assert at compile-time that SIZE
12039 is equal to ROUNDED_SIZE. */
12041 if (size != rounded_size)
12042 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12043 GEN_INT (rounded_size - size), -1,
12044 m->fs.cfa_reg == stack_pointer_rtx);
12045 dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
12047 release_scratch_register_on_entry (&sr);
12050 /* Make sure nothing is scheduled before we are done. */
12051 emit_insn (gen_blockage ());
12054 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
12056 static void
12057 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
12059 /* We skip the probe for the first interval + a small dope of 4 words and
12060 probe that many bytes past the specified size to maintain a protection
12061 area at the botton of the stack. */
12062 const int dope = 4 * UNITS_PER_WORD;
12063 rtx size_rtx = GEN_INT (size), last;
12065 /* See if we have a constant small number of probes to generate. If so,
12066 that's the easy case. The run-time loop is made up of 9 insns in the
12067 generic case while the compile-time loop is made up of 3+2*(n-1) insns
12068 for n # of intervals. */
12069 if (size <= 4 * PROBE_INTERVAL)
12071 HOST_WIDE_INT i, adjust;
12072 bool first_probe = true;
12074 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
12075 values of N from 1 until it exceeds SIZE. If only one probe is
12076 needed, this will not generate any code. Then adjust and probe
12077 to PROBE_INTERVAL + SIZE. */
12078 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
12080 if (first_probe)
12082 adjust = 2 * PROBE_INTERVAL + dope;
12083 first_probe = false;
12085 else
12086 adjust = PROBE_INTERVAL;
12088 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12089 plus_constant (Pmode, stack_pointer_rtx,
12090 -adjust)));
12091 emit_stack_probe (stack_pointer_rtx);
12094 if (first_probe)
12095 adjust = size + PROBE_INTERVAL + dope;
12096 else
12097 adjust = size + PROBE_INTERVAL - i;
12099 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12100 plus_constant (Pmode, stack_pointer_rtx,
12101 -adjust)));
12102 emit_stack_probe (stack_pointer_rtx);
12104 /* Adjust back to account for the additional first interval. */
12105 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
12106 plus_constant (Pmode, stack_pointer_rtx,
12107 PROBE_INTERVAL + dope)));
12110 /* Otherwise, do the same as above, but in a loop. Note that we must be
12111 extra careful with variables wrapping around because we might be at
12112 the very top (or the very bottom) of the address space and we have
12113 to be able to handle this case properly; in particular, we use an
12114 equality test for the loop condition. */
12115 else
12117 HOST_WIDE_INT rounded_size;
12118 struct scratch_reg sr;
12120 get_scratch_register_on_entry (&sr);
12123 /* Step 1: round SIZE to the previous multiple of the interval. */
12125 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
12128 /* Step 2: compute initial and final value of the loop counter. */
12130 /* SP = SP_0 + PROBE_INTERVAL. */
12131 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12132 plus_constant (Pmode, stack_pointer_rtx,
12133 - (PROBE_INTERVAL + dope))));
12135 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
12136 if (rounded_size <= (HOST_WIDE_INT_1 << 31))
12137 emit_insn (gen_rtx_SET (sr.reg,
12138 plus_constant (Pmode, stack_pointer_rtx,
12139 -rounded_size)));
12140 else
12142 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
12143 emit_insn (gen_rtx_SET (sr.reg,
12144 gen_rtx_PLUS (Pmode, sr.reg,
12145 stack_pointer_rtx)));
12149 /* Step 3: the loop
12153 SP = SP + PROBE_INTERVAL
12154 probe at SP
12156 while (SP != LAST_ADDR)
12158 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
12159 values of N from 1 until it is equal to ROUNDED_SIZE. */
12161 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
12164 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
12165 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
12167 if (size != rounded_size)
12169 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12170 plus_constant (Pmode, stack_pointer_rtx,
12171 rounded_size - size)));
12172 emit_stack_probe (stack_pointer_rtx);
12175 /* Adjust back to account for the additional first interval. */
12176 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
12177 plus_constant (Pmode, stack_pointer_rtx,
12178 PROBE_INTERVAL + dope)));
12180 release_scratch_register_on_entry (&sr);
12183 /* Even if the stack pointer isn't the CFA register, we need to correctly
12184 describe the adjustments made to it, in particular differentiate the
12185 frame-related ones from the frame-unrelated ones. */
12186 if (size > 0)
12188 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
12189 XVECEXP (expr, 0, 0)
12190 = gen_rtx_SET (stack_pointer_rtx,
12191 plus_constant (Pmode, stack_pointer_rtx, -size));
12192 XVECEXP (expr, 0, 1)
12193 = gen_rtx_SET (stack_pointer_rtx,
12194 plus_constant (Pmode, stack_pointer_rtx,
12195 PROBE_INTERVAL + dope + size));
12196 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
12197 RTX_FRAME_RELATED_P (last) = 1;
12199 cfun->machine->fs.sp_offset += size;
12202 /* Make sure nothing is scheduled before we are done. */
12203 emit_insn (gen_blockage ());
12206 /* Adjust the stack pointer up to REG while probing it. */
12208 const char *
12209 output_adjust_stack_and_probe (rtx reg)
12211 static int labelno = 0;
12212 char loop_lab[32];
12213 rtx xops[2];
12215 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
12217 /* Loop. */
12218 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
12220 /* SP = SP + PROBE_INTERVAL. */
12221 xops[0] = stack_pointer_rtx;
12222 xops[1] = GEN_INT (PROBE_INTERVAL);
12223 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
12225 /* Probe at SP. */
12226 xops[1] = const0_rtx;
12227 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
12229 /* Test if SP == LAST_ADDR. */
12230 xops[0] = stack_pointer_rtx;
12231 xops[1] = reg;
12232 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
12234 /* Branch. */
12235 fputs ("\tjne\t", asm_out_file);
12236 assemble_name_raw (asm_out_file, loop_lab);
12237 fputc ('\n', asm_out_file);
12239 return "";
12242 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
12243 inclusive. These are offsets from the current stack pointer. */
12245 static void
12246 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
12248 /* See if we have a constant small number of probes to generate. If so,
12249 that's the easy case. The run-time loop is made up of 6 insns in the
12250 generic case while the compile-time loop is made up of n insns for n #
12251 of intervals. */
12252 if (size <= 6 * PROBE_INTERVAL)
12254 HOST_WIDE_INT i;
12256 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
12257 it exceeds SIZE. If only one probe is needed, this will not
12258 generate any code. Then probe at FIRST + SIZE. */
12259 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
12260 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
12261 -(first + i)));
12263 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
12264 -(first + size)));
12267 /* Otherwise, do the same as above, but in a loop. Note that we must be
12268 extra careful with variables wrapping around because we might be at
12269 the very top (or the very bottom) of the address space and we have
12270 to be able to handle this case properly; in particular, we use an
12271 equality test for the loop condition. */
12272 else
12274 HOST_WIDE_INT rounded_size, last;
12275 struct scratch_reg sr;
12277 get_scratch_register_on_entry (&sr);
12280 /* Step 1: round SIZE to the previous multiple of the interval. */
12282 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
12285 /* Step 2: compute initial and final value of the loop counter. */
12287 /* TEST_OFFSET = FIRST. */
12288 emit_move_insn (sr.reg, GEN_INT (-first));
12290 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
12291 last = first + rounded_size;
12294 /* Step 3: the loop
12298 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
12299 probe at TEST_ADDR
12301 while (TEST_ADDR != LAST_ADDR)
12303 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
12304 until it is equal to ROUNDED_SIZE. */
12306 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
12309 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
12310 that SIZE is equal to ROUNDED_SIZE. */
12312 if (size != rounded_size)
12313 emit_stack_probe (plus_constant (Pmode,
12314 gen_rtx_PLUS (Pmode,
12315 stack_pointer_rtx,
12316 sr.reg),
12317 rounded_size - size));
12319 release_scratch_register_on_entry (&sr);
12322 /* Make sure nothing is scheduled before we are done. */
12323 emit_insn (gen_blockage ());
12326 /* Probe a range of stack addresses from REG to END, inclusive. These are
12327 offsets from the current stack pointer. */
12329 const char *
12330 output_probe_stack_range (rtx reg, rtx end)
12332 static int labelno = 0;
12333 char loop_lab[32];
12334 rtx xops[3];
12336 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
12338 /* Loop. */
12339 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
12341 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
12342 xops[0] = reg;
12343 xops[1] = GEN_INT (PROBE_INTERVAL);
12344 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
12346 /* Probe at TEST_ADDR. */
12347 xops[0] = stack_pointer_rtx;
12348 xops[1] = reg;
12349 xops[2] = const0_rtx;
12350 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
12352 /* Test if TEST_ADDR == LAST_ADDR. */
12353 xops[0] = reg;
12354 xops[1] = end;
12355 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
12357 /* Branch. */
12358 fputs ("\tjne\t", asm_out_file);
12359 assemble_name_raw (asm_out_file, loop_lab);
12360 fputc ('\n', asm_out_file);
12362 return "";
12365 /* Finalize stack_realign_needed and frame_pointer_needed flags, which
12366 will guide prologue/epilogue to be generated in correct form. */
12368 static void
12369 ix86_finalize_stack_frame_flags (void)
12371 /* Check if stack realign is really needed after reload, and
12372 stores result in cfun */
12373 unsigned int incoming_stack_boundary
12374 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
12375 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
12376 unsigned int stack_alignment
12377 = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
12378 ? crtl->max_used_stack_slot_alignment
12379 : crtl->stack_alignment_needed);
12380 unsigned int stack_realign
12381 = (incoming_stack_boundary < stack_alignment);
12382 bool recompute_frame_layout_p = false;
12384 if (crtl->stack_realign_finalized)
12386 /* After stack_realign_needed is finalized, we can't no longer
12387 change it. */
12388 gcc_assert (crtl->stack_realign_needed == stack_realign);
12389 return;
12392 /* If the only reason for frame_pointer_needed is that we conservatively
12393 assumed stack realignment might be needed or -fno-omit-frame-pointer
12394 is used, but in the end nothing that needed the stack alignment had
12395 been spilled nor stack access, clear frame_pointer_needed and say we
12396 don't need stack realignment. */
12397 if ((stack_realign || !flag_omit_frame_pointer)
12398 && frame_pointer_needed
12399 && crtl->is_leaf
12400 && crtl->sp_is_unchanging
12401 && !ix86_current_function_calls_tls_descriptor
12402 && !crtl->accesses_prior_frames
12403 && !cfun->calls_alloca
12404 && !crtl->calls_eh_return
12405 /* See ira_setup_eliminable_regset for the rationale. */
12406 && !(STACK_CHECK_MOVING_SP
12407 && flag_stack_check
12408 && flag_exceptions
12409 && cfun->can_throw_non_call_exceptions)
12410 && !ix86_frame_pointer_required ()
12411 && get_frame_size () == 0
12412 && ix86_nsaved_sseregs () == 0
12413 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
12415 HARD_REG_SET set_up_by_prologue, prologue_used;
12416 basic_block bb;
12418 CLEAR_HARD_REG_SET (prologue_used);
12419 CLEAR_HARD_REG_SET (set_up_by_prologue);
12420 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
12421 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
12422 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
12423 HARD_FRAME_POINTER_REGNUM);
12425 /* The preferred stack alignment is the minimum stack alignment. */
12426 if (stack_alignment > crtl->preferred_stack_boundary)
12427 stack_alignment = crtl->preferred_stack_boundary;
12429 bool require_stack_frame = false;
12431 FOR_EACH_BB_FN (bb, cfun)
12433 rtx_insn *insn;
12434 FOR_BB_INSNS (bb, insn)
12435 if (NONDEBUG_INSN_P (insn)
12436 && requires_stack_frame_p (insn, prologue_used,
12437 set_up_by_prologue))
12439 require_stack_frame = true;
12441 if (stack_realign)
12443 /* Find the maximum stack alignment. */
12444 subrtx_iterator::array_type array;
12445 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12446 if (MEM_P (*iter)
12447 && (reg_mentioned_p (stack_pointer_rtx,
12448 *iter)
12449 || reg_mentioned_p (frame_pointer_rtx,
12450 *iter)))
12452 unsigned int alignment = MEM_ALIGN (*iter);
12453 if (alignment > stack_alignment)
12454 stack_alignment = alignment;
12460 if (require_stack_frame)
12462 /* Stack frame is required. If stack alignment needed is less
12463 than incoming stack boundary, don't realign stack. */
12464 stack_realign = incoming_stack_boundary < stack_alignment;
12465 if (!stack_realign)
12467 crtl->max_used_stack_slot_alignment
12468 = incoming_stack_boundary;
12469 crtl->stack_alignment_needed
12470 = incoming_stack_boundary;
12471 /* Also update preferred_stack_boundary for leaf
12472 functions. */
12473 crtl->preferred_stack_boundary
12474 = incoming_stack_boundary;
12477 else
12479 /* If drap has been set, but it actually isn't live at the
12480 start of the function, there is no reason to set it up. */
12481 if (crtl->drap_reg)
12483 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
12484 if (! REGNO_REG_SET_P (DF_LR_IN (bb),
12485 REGNO (crtl->drap_reg)))
12487 crtl->drap_reg = NULL_RTX;
12488 crtl->need_drap = false;
12491 else
12492 cfun->machine->no_drap_save_restore = true;
12494 frame_pointer_needed = false;
12495 stack_realign = false;
12496 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
12497 crtl->stack_alignment_needed = incoming_stack_boundary;
12498 crtl->stack_alignment_estimated = incoming_stack_boundary;
12499 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
12500 crtl->preferred_stack_boundary = incoming_stack_boundary;
12501 df_finish_pass (true);
12502 df_scan_alloc (NULL);
12503 df_scan_blocks ();
12504 df_compute_regs_ever_live (true);
12505 df_analyze ();
12507 if (flag_var_tracking)
12509 /* Since frame pointer is no longer available, replace it with
12510 stack pointer - UNITS_PER_WORD in debug insns. */
12511 df_ref ref, next;
12512 for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM);
12513 ref; ref = next)
12515 rtx_insn *insn = DF_REF_INSN (ref);
12516 /* Make sure the next ref is for a different instruction,
12517 so that we're not affected by the rescan. */
12518 next = DF_REF_NEXT_REG (ref);
12519 while (next && DF_REF_INSN (next) == insn)
12520 next = DF_REF_NEXT_REG (next);
12522 if (DEBUG_INSN_P (insn))
12524 bool changed = false;
12525 for (; ref != next; ref = DF_REF_NEXT_REG (ref))
12527 rtx *loc = DF_REF_LOC (ref);
12528 if (*loc == hard_frame_pointer_rtx)
12530 *loc = plus_constant (Pmode,
12531 stack_pointer_rtx,
12532 -UNITS_PER_WORD);
12533 changed = true;
12536 if (changed)
12537 df_insn_rescan (insn);
12542 recompute_frame_layout_p = true;
12546 if (crtl->stack_realign_needed != stack_realign)
12547 recompute_frame_layout_p = true;
12548 crtl->stack_realign_needed = stack_realign;
12549 crtl->stack_realign_finalized = true;
12550 if (recompute_frame_layout_p)
12551 ix86_compute_frame_layout ();
12554 /* Delete SET_GOT right after entry block if it is allocated to reg. */
12556 static void
12557 ix86_elim_entry_set_got (rtx reg)
12559 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
12560 rtx_insn *c_insn = BB_HEAD (bb);
12561 if (!NONDEBUG_INSN_P (c_insn))
12562 c_insn = next_nonnote_nondebug_insn (c_insn);
12563 if (c_insn && NONJUMP_INSN_P (c_insn))
12565 rtx pat = PATTERN (c_insn);
12566 if (GET_CODE (pat) == PARALLEL)
12568 rtx vec = XVECEXP (pat, 0, 0);
12569 if (GET_CODE (vec) == SET
12570 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
12571 && REGNO (XEXP (vec, 0)) == REGNO (reg))
12572 delete_insn (c_insn);
12577 static rtx
12578 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
12580 rtx addr, mem;
12582 if (offset)
12583 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
12584 mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg);
12585 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
12588 static inline rtx
12589 gen_frame_load (rtx reg, rtx frame_reg, int offset)
12591 return gen_frame_set (reg, frame_reg, offset, false);
12594 static inline rtx
12595 gen_frame_store (rtx reg, rtx frame_reg, int offset)
12597 return gen_frame_set (reg, frame_reg, offset, true);
12600 static void
12601 ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame)
12603 struct machine_function *m = cfun->machine;
12604 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
12605 + m->call_ms2sysv_extra_regs;
12606 rtvec v = rtvec_alloc (ncregs + 1);
12607 unsigned int align, i, vi = 0;
12608 rtx_insn *insn;
12609 rtx sym, addr;
12610 rtx rax = gen_rtx_REG (word_mode, AX_REG);
12611 const struct xlogue_layout &xlogue = xlogue_layout::get_instance ();
12612 HOST_WIDE_INT allocate = frame.stack_pointer_offset - m->fs.sp_offset;
12614 /* AL should only be live with sysv_abi. */
12615 gcc_assert (!ix86_eax_live_at_start_p ());
12617 /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather
12618 we've actually realigned the stack or not. */
12619 align = GET_MODE_ALIGNMENT (V4SFmode);
12620 addr = choose_baseaddr (frame.stack_realign_offset
12621 + xlogue.get_stub_ptr_offset (), &align);
12622 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
12623 emit_insn (gen_rtx_SET (rax, addr));
12625 /* Allocate stack if not already done. */
12626 if (allocate > 0)
12627 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12628 GEN_INT (-allocate), -1, false);
12630 /* Get the stub symbol. */
12631 sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
12632 : XLOGUE_STUB_SAVE);
12633 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
12635 for (i = 0; i < ncregs; ++i)
12637 const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
12638 rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode),
12639 r.regno);
12640 RTVEC_ELT (v, vi++) = gen_frame_store (reg, rax, -r.offset);
12643 gcc_assert (vi == (unsigned)GET_NUM_ELEM (v));
12645 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
12646 RTX_FRAME_RELATED_P (insn) = true;
12649 /* Expand the prologue into a bunch of separate insns. */
12651 void
12652 ix86_expand_prologue (void)
12654 struct machine_function *m = cfun->machine;
12655 rtx insn, t;
12656 struct ix86_frame frame;
12657 HOST_WIDE_INT allocate;
12658 bool int_registers_saved;
12659 bool sse_registers_saved;
12660 rtx static_chain = NULL_RTX;
12662 if (ix86_function_naked (current_function_decl))
12663 return;
12665 ix86_finalize_stack_frame_flags ();
12667 /* DRAP should not coexist with stack_realign_fp */
12668 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
12670 memset (&m->fs, 0, sizeof (m->fs));
12672 /* Initialize CFA state for before the prologue. */
12673 m->fs.cfa_reg = stack_pointer_rtx;
12674 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
12676 /* Track SP offset to the CFA. We continue tracking this after we've
12677 swapped the CFA register away from SP. In the case of re-alignment
12678 this is fudged; we're interested to offsets within the local frame. */
12679 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
12680 m->fs.sp_valid = true;
12681 m->fs.sp_realigned = false;
12683 frame = m->frame;
12685 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
12687 /* We should have already generated an error for any use of
12688 ms_hook on a nested function. */
12689 gcc_checking_assert (!ix86_static_chain_on_stack);
12691 /* Check if profiling is active and we shall use profiling before
12692 prologue variant. If so sorry. */
12693 if (crtl->profile && flag_fentry != 0)
12694 sorry ("ms_hook_prologue attribute isn%'t compatible "
12695 "with -mfentry for 32-bit");
12697 /* In ix86_asm_output_function_label we emitted:
12698 8b ff movl.s %edi,%edi
12699 55 push %ebp
12700 8b ec movl.s %esp,%ebp
12702 This matches the hookable function prologue in Win32 API
12703 functions in Microsoft Windows XP Service Pack 2 and newer.
12704 Wine uses this to enable Windows apps to hook the Win32 API
12705 functions provided by Wine.
12707 What that means is that we've already set up the frame pointer. */
12709 if (frame_pointer_needed
12710 && !(crtl->drap_reg && crtl->stack_realign_needed))
12712 rtx push, mov;
12714 /* We've decided to use the frame pointer already set up.
12715 Describe this to the unwinder by pretending that both
12716 push and mov insns happen right here.
12718 Putting the unwind info here at the end of the ms_hook
12719 is done so that we can make absolutely certain we get
12720 the required byte sequence at the start of the function,
12721 rather than relying on an assembler that can produce
12722 the exact encoding required.
12724 However it does mean (in the unpatched case) that we have
12725 a 1 insn window where the asynchronous unwind info is
12726 incorrect. However, if we placed the unwind info at
12727 its correct location we would have incorrect unwind info
12728 in the patched case. Which is probably all moot since
12729 I don't expect Wine generates dwarf2 unwind info for the
12730 system libraries that use this feature. */
12732 insn = emit_insn (gen_blockage ());
12734 push = gen_push (hard_frame_pointer_rtx);
12735 mov = gen_rtx_SET (hard_frame_pointer_rtx,
12736 stack_pointer_rtx);
12737 RTX_FRAME_RELATED_P (push) = 1;
12738 RTX_FRAME_RELATED_P (mov) = 1;
12740 RTX_FRAME_RELATED_P (insn) = 1;
12741 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
12742 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
12744 /* Note that gen_push incremented m->fs.cfa_offset, even
12745 though we didn't emit the push insn here. */
12746 m->fs.cfa_reg = hard_frame_pointer_rtx;
12747 m->fs.fp_offset = m->fs.cfa_offset;
12748 m->fs.fp_valid = true;
12750 else
12752 /* The frame pointer is not needed so pop %ebp again.
12753 This leaves us with a pristine state. */
12754 emit_insn (gen_pop (hard_frame_pointer_rtx));
12758 /* The first insn of a function that accepts its static chain on the
12759 stack is to push the register that would be filled in by a direct
12760 call. This insn will be skipped by the trampoline. */
12761 else if (ix86_static_chain_on_stack)
12763 static_chain = ix86_static_chain (cfun->decl, false);
12764 insn = emit_insn (gen_push (static_chain));
12765 emit_insn (gen_blockage ());
12767 /* We don't want to interpret this push insn as a register save,
12768 only as a stack adjustment. The real copy of the register as
12769 a save will be done later, if needed. */
12770 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
12771 t = gen_rtx_SET (stack_pointer_rtx, t);
12772 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
12773 RTX_FRAME_RELATED_P (insn) = 1;
12776 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
12777 of DRAP is needed and stack realignment is really needed after reload */
12778 if (stack_realign_drap)
12780 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
12782 /* Can't use DRAP in interrupt function. */
12783 if (cfun->machine->func_type != TYPE_NORMAL)
12784 sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
12785 "in interrupt service routine. This may be worked "
12786 "around by avoiding functions with aggregate return.");
12788 /* Only need to push parameter pointer reg if it is caller saved. */
12789 if (!call_used_regs[REGNO (crtl->drap_reg)])
12791 /* Push arg pointer reg */
12792 insn = emit_insn (gen_push (crtl->drap_reg));
12793 RTX_FRAME_RELATED_P (insn) = 1;
12796 /* Grab the argument pointer. */
12797 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
12798 insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
12799 RTX_FRAME_RELATED_P (insn) = 1;
12800 m->fs.cfa_reg = crtl->drap_reg;
12801 m->fs.cfa_offset = 0;
12803 /* Align the stack. */
12804 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
12805 stack_pointer_rtx,
12806 GEN_INT (-align_bytes)));
12807 RTX_FRAME_RELATED_P (insn) = 1;
12809 /* Replicate the return address on the stack so that return
12810 address can be reached via (argp - 1) slot. This is needed
12811 to implement macro RETURN_ADDR_RTX and intrinsic function
12812 expand_builtin_return_addr etc. */
12813 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
12814 t = gen_frame_mem (word_mode, t);
12815 insn = emit_insn (gen_push (t));
12816 RTX_FRAME_RELATED_P (insn) = 1;
12818 /* For the purposes of frame and register save area addressing,
12819 we've started over with a new frame. */
12820 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
12821 m->fs.realigned = true;
12823 if (static_chain)
12825 /* Replicate static chain on the stack so that static chain
12826 can be reached via (argp - 2) slot. This is needed for
12827 nested function with stack realignment. */
12828 insn = emit_insn (gen_push (static_chain));
12829 RTX_FRAME_RELATED_P (insn) = 1;
12833 int_registers_saved = (frame.nregs == 0);
12834 sse_registers_saved = (frame.nsseregs == 0);
12836 if (frame_pointer_needed && !m->fs.fp_valid)
12838 /* Note: AT&T enter does NOT have reversed args. Enter is probably
12839 slower on all targets. Also sdb doesn't like it. */
12840 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
12841 RTX_FRAME_RELATED_P (insn) = 1;
12843 /* Push registers now, before setting the frame pointer
12844 on SEH target. */
12845 if (!int_registers_saved
12846 && TARGET_SEH
12847 && !frame.save_regs_using_mov)
12849 ix86_emit_save_regs ();
12850 int_registers_saved = true;
12851 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
12854 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
12856 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
12857 RTX_FRAME_RELATED_P (insn) = 1;
12859 if (m->fs.cfa_reg == stack_pointer_rtx)
12860 m->fs.cfa_reg = hard_frame_pointer_rtx;
12861 m->fs.fp_offset = m->fs.sp_offset;
12862 m->fs.fp_valid = true;
12866 if (!int_registers_saved)
12868 /* If saving registers via PUSH, do so now. */
12869 if (!frame.save_regs_using_mov)
12871 ix86_emit_save_regs ();
12872 int_registers_saved = true;
12873 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
12876 /* When using red zone we may start register saving before allocating
12877 the stack frame saving one cycle of the prologue. However, avoid
12878 doing this if we have to probe the stack; at least on x86_64 the
12879 stack probe can turn into a call that clobbers a red zone location. */
12880 else if (ix86_using_red_zone ()
12881 && (! TARGET_STACK_PROBE
12882 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
12884 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
12885 int_registers_saved = true;
12889 if (stack_realign_fp)
12891 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
12892 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
12894 /* Record last valid frame pointer offset. */
12895 m->fs.sp_realigned_fp_last = frame.reg_save_offset;
12897 /* The computation of the size of the re-aligned stack frame means
12898 that we must allocate the size of the register save area before
12899 performing the actual alignment. Otherwise we cannot guarantee
12900 that there's enough storage above the realignment point. */
12901 allocate = frame.reg_save_offset - m->fs.sp_offset
12902 + frame.stack_realign_allocate;
12903 if (allocate)
12904 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12905 GEN_INT (-allocate), -1, false);
12907 /* Align the stack. */
12908 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
12909 stack_pointer_rtx,
12910 GEN_INT (-align_bytes)));
12911 m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
12912 m->fs.sp_realigned_offset = m->fs.sp_offset
12913 - frame.stack_realign_allocate;
12914 /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
12915 Beyond this point, stack access should be done via choose_baseaddr or
12916 by using sp_valid_at and fp_valid_at to determine the correct base
12917 register. Henceforth, any CFA offset should be thought of as logical
12918 and not physical. */
12919 gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last);
12920 gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset);
12921 m->fs.sp_realigned = true;
12923 /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
12924 is needed to describe where a register is saved using a realigned
12925 stack pointer, so we need to invalidate the stack pointer for that
12926 target. */
12927 if (TARGET_SEH)
12928 m->fs.sp_valid = false;
12931 if (m->call_ms2sysv)
12932 ix86_emit_outlined_ms2sysv_save (frame);
12934 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
12936 if (flag_stack_usage_info)
12938 /* We start to count from ARG_POINTER. */
12939 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
12941 /* If it was realigned, take into account the fake frame. */
12942 if (stack_realign_drap)
12944 if (ix86_static_chain_on_stack)
12945 stack_size += UNITS_PER_WORD;
12947 if (!call_used_regs[REGNO (crtl->drap_reg)])
12948 stack_size += UNITS_PER_WORD;
12950 /* This over-estimates by 1 minimal-stack-alignment-unit but
12951 mitigates that by counting in the new return address slot. */
12952 current_function_dynamic_stack_size
12953 += crtl->stack_alignment_needed / BITS_PER_UNIT;
12956 current_function_static_stack_size = stack_size;
12959 /* On SEH target with very large frame size, allocate an area to save
12960 SSE registers (as the very large allocation won't be described). */
12961 if (TARGET_SEH
12962 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
12963 && !sse_registers_saved)
12965 HOST_WIDE_INT sse_size =
12966 frame.sse_reg_save_offset - frame.reg_save_offset;
12968 gcc_assert (int_registers_saved);
12970 /* No need to do stack checking as the area will be immediately
12971 written. */
12972 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12973 GEN_INT (-sse_size), -1,
12974 m->fs.cfa_reg == stack_pointer_rtx);
12975 allocate -= sse_size;
12976 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
12977 sse_registers_saved = true;
12980 /* The stack has already been decremented by the instruction calling us
12981 so probe if the size is non-negative to preserve the protection area. */
12982 if (allocate >= 0
12983 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
12984 || flag_stack_clash_protection))
12986 /* We expect the GP registers to be saved when probes are used. */
12987 gcc_assert (int_registers_saved);
12989 if (flag_stack_clash_protection)
12991 ix86_adjust_stack_and_probe_stack_clash (allocate);
12992 allocate = 0;
12994 else if (STACK_CHECK_MOVING_SP)
12996 if (!(crtl->is_leaf && !cfun->calls_alloca
12997 && allocate <= PROBE_INTERVAL))
12999 ix86_adjust_stack_and_probe (allocate);
13000 allocate = 0;
13003 else
13005 HOST_WIDE_INT size = allocate;
13007 if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
13008 size = 0x80000000 - get_stack_check_protect () - 1;
13010 if (TARGET_STACK_PROBE)
13012 if (crtl->is_leaf && !cfun->calls_alloca)
13014 if (size > PROBE_INTERVAL)
13015 ix86_emit_probe_stack_range (0, size);
13017 else
13018 ix86_emit_probe_stack_range (0,
13019 size + get_stack_check_protect ());
13021 else
13023 if (crtl->is_leaf && !cfun->calls_alloca)
13025 if (size > PROBE_INTERVAL
13026 && size > get_stack_check_protect ())
13027 ix86_emit_probe_stack_range (get_stack_check_protect (),
13028 size - get_stack_check_protect ());
13030 else
13031 ix86_emit_probe_stack_range (get_stack_check_protect (), size);
13036 if (allocate == 0)
13038 else if (!ix86_target_stack_probe ()
13039 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
13041 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13042 GEN_INT (-allocate), -1,
13043 m->fs.cfa_reg == stack_pointer_rtx);
13045 else
13047 rtx eax = gen_rtx_REG (Pmode, AX_REG);
13048 rtx r10 = NULL;
13049 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
13050 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
13051 bool eax_live = ix86_eax_live_at_start_p ();
13052 bool r10_live = false;
13054 if (TARGET_64BIT)
13055 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
13057 if (eax_live)
13059 insn = emit_insn (gen_push (eax));
13060 allocate -= UNITS_PER_WORD;
13061 /* Note that SEH directives need to continue tracking the stack
13062 pointer even after the frame pointer has been set up. */
13063 if (sp_is_cfa_reg || TARGET_SEH)
13065 if (sp_is_cfa_reg)
13066 m->fs.cfa_offset += UNITS_PER_WORD;
13067 RTX_FRAME_RELATED_P (insn) = 1;
13068 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
13069 gen_rtx_SET (stack_pointer_rtx,
13070 plus_constant (Pmode, stack_pointer_rtx,
13071 -UNITS_PER_WORD)));
13075 if (r10_live)
13077 r10 = gen_rtx_REG (Pmode, R10_REG);
13078 insn = emit_insn (gen_push (r10));
13079 allocate -= UNITS_PER_WORD;
13080 if (sp_is_cfa_reg || TARGET_SEH)
13082 if (sp_is_cfa_reg)
13083 m->fs.cfa_offset += UNITS_PER_WORD;
13084 RTX_FRAME_RELATED_P (insn) = 1;
13085 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
13086 gen_rtx_SET (stack_pointer_rtx,
13087 plus_constant (Pmode, stack_pointer_rtx,
13088 -UNITS_PER_WORD)));
13092 emit_move_insn (eax, GEN_INT (allocate));
13093 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
13095 /* Use the fact that AX still contains ALLOCATE. */
13096 adjust_stack_insn = (Pmode == DImode
13097 ? gen_pro_epilogue_adjust_stack_di_sub
13098 : gen_pro_epilogue_adjust_stack_si_sub);
13100 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
13101 stack_pointer_rtx, eax));
13103 if (sp_is_cfa_reg || TARGET_SEH)
13105 if (sp_is_cfa_reg)
13106 m->fs.cfa_offset += allocate;
13107 RTX_FRAME_RELATED_P (insn) = 1;
13108 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
13109 gen_rtx_SET (stack_pointer_rtx,
13110 plus_constant (Pmode, stack_pointer_rtx,
13111 -allocate)));
13113 m->fs.sp_offset += allocate;
13115 /* Use stack_pointer_rtx for relative addressing so that code
13116 works for realigned stack, too. */
13117 if (r10_live && eax_live)
13119 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
13120 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
13121 gen_frame_mem (word_mode, t));
13122 t = plus_constant (Pmode, t, UNITS_PER_WORD);
13123 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
13124 gen_frame_mem (word_mode, t));
13126 else if (eax_live || r10_live)
13128 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
13129 emit_move_insn (gen_rtx_REG (word_mode,
13130 (eax_live ? AX_REG : R10_REG)),
13131 gen_frame_mem (word_mode, t));
13134 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
13136 /* If we havn't already set up the frame pointer, do so now. */
13137 if (frame_pointer_needed && !m->fs.fp_valid)
13139 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
13140 GEN_INT (frame.stack_pointer_offset
13141 - frame.hard_frame_pointer_offset));
13142 insn = emit_insn (insn);
13143 RTX_FRAME_RELATED_P (insn) = 1;
13144 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
13146 if (m->fs.cfa_reg == stack_pointer_rtx)
13147 m->fs.cfa_reg = hard_frame_pointer_rtx;
13148 m->fs.fp_offset = frame.hard_frame_pointer_offset;
13149 m->fs.fp_valid = true;
13152 if (!int_registers_saved)
13153 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
13154 if (!sse_registers_saved)
13155 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
13157 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
13158 in PROLOGUE. */
13159 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
13161 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
13162 insn = emit_insn (gen_set_got (pic));
13163 RTX_FRAME_RELATED_P (insn) = 1;
13164 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
13165 emit_insn (gen_prologue_use (pic));
13166 /* Deleting already emmitted SET_GOT if exist and allocated to
13167 REAL_PIC_OFFSET_TABLE_REGNUM. */
13168 ix86_elim_entry_set_got (pic);
13171 if (crtl->drap_reg && !crtl->stack_realign_needed)
13173 /* vDRAP is setup but after reload it turns out stack realign
13174 isn't necessary, here we will emit prologue to setup DRAP
13175 without stack realign adjustment */
13176 t = choose_baseaddr (0, NULL);
13177 emit_insn (gen_rtx_SET (crtl->drap_reg, t));
13180 /* Prevent instructions from being scheduled into register save push
13181 sequence when access to the redzone area is done through frame pointer.
13182 The offset between the frame pointer and the stack pointer is calculated
13183 relative to the value of the stack pointer at the end of the function
13184 prologue, and moving instructions that access redzone area via frame
13185 pointer inside push sequence violates this assumption. */
13186 if (frame_pointer_needed && frame.red_zone_size)
13187 emit_insn (gen_memory_blockage ());
13189 /* SEH requires that the prologue end within 256 bytes of the start of
13190 the function. Prevent instruction schedules that would extend that.
13191 Further, prevent alloca modifications to the stack pointer from being
13192 combined with prologue modifications. */
13193 if (TARGET_SEH)
13194 emit_insn (gen_prologue_use (stack_pointer_rtx));
13197 /* Emit code to restore REG using a POP insn. */
13199 static void
13200 ix86_emit_restore_reg_using_pop (rtx reg)
13202 struct machine_function *m = cfun->machine;
13203 rtx_insn *insn = emit_insn (gen_pop (reg));
13205 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
13206 m->fs.sp_offset -= UNITS_PER_WORD;
13208 if (m->fs.cfa_reg == crtl->drap_reg
13209 && REGNO (reg) == REGNO (crtl->drap_reg))
13211 /* Previously we'd represented the CFA as an expression
13212 like *(%ebp - 8). We've just popped that value from
13213 the stack, which means we need to reset the CFA to
13214 the drap register. This will remain until we restore
13215 the stack pointer. */
13216 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
13217 RTX_FRAME_RELATED_P (insn) = 1;
13219 /* This means that the DRAP register is valid for addressing too. */
13220 m->fs.drap_valid = true;
13221 return;
13224 if (m->fs.cfa_reg == stack_pointer_rtx)
13226 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
13227 x = gen_rtx_SET (stack_pointer_rtx, x);
13228 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
13229 RTX_FRAME_RELATED_P (insn) = 1;
13231 m->fs.cfa_offset -= UNITS_PER_WORD;
13234 /* When the frame pointer is the CFA, and we pop it, we are
13235 swapping back to the stack pointer as the CFA. This happens
13236 for stack frames that don't allocate other data, so we assume
13237 the stack pointer is now pointing at the return address, i.e.
13238 the function entry state, which makes the offset be 1 word. */
13239 if (reg == hard_frame_pointer_rtx)
13241 m->fs.fp_valid = false;
13242 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
13244 m->fs.cfa_reg = stack_pointer_rtx;
13245 m->fs.cfa_offset -= UNITS_PER_WORD;
13247 add_reg_note (insn, REG_CFA_DEF_CFA,
13248 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13249 GEN_INT (m->fs.cfa_offset)));
13250 RTX_FRAME_RELATED_P (insn) = 1;
13255 /* Emit code to restore saved registers using POP insns. */
13257 static void
13258 ix86_emit_restore_regs_using_pop (void)
13260 unsigned int regno;
13262 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13263 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
13264 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
13267 /* Emit code and notes for the LEAVE instruction. If insn is non-null,
13268 omits the emit and only attaches the notes. */
13270 static void
13271 ix86_emit_leave (rtx_insn *insn)
13273 struct machine_function *m = cfun->machine;
13274 if (!insn)
13275 insn = emit_insn (ix86_gen_leave ());
13277 ix86_add_queued_cfa_restore_notes (insn);
13279 gcc_assert (m->fs.fp_valid);
13280 m->fs.sp_valid = true;
13281 m->fs.sp_realigned = false;
13282 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
13283 m->fs.fp_valid = false;
13285 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
13287 m->fs.cfa_reg = stack_pointer_rtx;
13288 m->fs.cfa_offset = m->fs.sp_offset;
13290 add_reg_note (insn, REG_CFA_DEF_CFA,
13291 plus_constant (Pmode, stack_pointer_rtx,
13292 m->fs.sp_offset));
13293 RTX_FRAME_RELATED_P (insn) = 1;
13295 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
13296 m->fs.fp_offset);
13299 /* Emit code to restore saved registers using MOV insns.
13300 First register is restored from CFA - CFA_OFFSET. */
13301 static void
13302 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
13303 bool maybe_eh_return)
13305 struct machine_function *m = cfun->machine;
13306 unsigned int regno;
13308 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13309 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
13311 rtx reg = gen_rtx_REG (word_mode, regno);
13312 rtx mem;
13313 rtx_insn *insn;
13315 mem = choose_baseaddr (cfa_offset, NULL);
13316 mem = gen_frame_mem (word_mode, mem);
13317 insn = emit_move_insn (reg, mem);
13319 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
13321 /* Previously we'd represented the CFA as an expression
13322 like *(%ebp - 8). We've just popped that value from
13323 the stack, which means we need to reset the CFA to
13324 the drap register. This will remain until we restore
13325 the stack pointer. */
13326 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
13327 RTX_FRAME_RELATED_P (insn) = 1;
13329 /* This means that the DRAP register is valid for addressing. */
13330 m->fs.drap_valid = true;
13332 else
13333 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
13335 cfa_offset -= UNITS_PER_WORD;
13339 /* Emit code to restore saved registers using MOV insns.
13340 First register is restored from CFA - CFA_OFFSET. */
13341 static void
13342 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
13343 bool maybe_eh_return)
13345 unsigned int regno;
13347 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13348 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
13350 rtx reg = gen_rtx_REG (V4SFmode, regno);
13351 rtx mem;
13352 unsigned int align = GET_MODE_ALIGNMENT (V4SFmode);
13354 mem = choose_baseaddr (cfa_offset, &align);
13355 mem = gen_rtx_MEM (V4SFmode, mem);
13357 /* The location aligment depends upon the base register. */
13358 align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align);
13359 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
13360 set_mem_align (mem, align);
13361 emit_insn (gen_rtx_SET (reg, mem));
13363 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
13365 cfa_offset -= GET_MODE_SIZE (V4SFmode);
13369 static void
13370 ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame,
13371 bool use_call, int style)
13373 struct machine_function *m = cfun->machine;
13374 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
13375 + m->call_ms2sysv_extra_regs;
13376 rtvec v;
13377 unsigned int elems_needed, align, i, vi = 0;
13378 rtx_insn *insn;
13379 rtx sym, tmp;
13380 rtx rsi = gen_rtx_REG (word_mode, SI_REG);
13381 rtx r10 = NULL_RTX;
13382 const struct xlogue_layout &xlogue = xlogue_layout::get_instance ();
13383 HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset ();
13384 HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset;
13385 rtx rsi_frame_load = NULL_RTX;
13386 HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1;
13387 enum xlogue_stub stub;
13389 gcc_assert (!m->fs.fp_valid || frame_pointer_needed);
13391 /* If using a realigned stack, we should never start with padding. */
13392 gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ());
13394 /* Setup RSI as the stub's base pointer. */
13395 align = GET_MODE_ALIGNMENT (V4SFmode);
13396 tmp = choose_baseaddr (rsi_offset, &align);
13397 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
13398 emit_insn (gen_rtx_SET (rsi, tmp));
13400 /* Get a symbol for the stub. */
13401 if (frame_pointer_needed)
13402 stub = use_call ? XLOGUE_STUB_RESTORE_HFP
13403 : XLOGUE_STUB_RESTORE_HFP_TAIL;
13404 else
13405 stub = use_call ? XLOGUE_STUB_RESTORE
13406 : XLOGUE_STUB_RESTORE_TAIL;
13407 sym = xlogue.get_stub_rtx (stub);
13409 elems_needed = ncregs;
13410 if (use_call)
13411 elems_needed += 1;
13412 else
13413 elems_needed += frame_pointer_needed ? 5 : 3;
13414 v = rtvec_alloc (elems_needed);
13416 /* We call the epilogue stub when we need to pop incoming args or we are
13417 doing a sibling call as the tail. Otherwise, we will emit a jmp to the
13418 epilogue stub and it is the tail-call. */
13419 if (use_call)
13420 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
13421 else
13423 RTVEC_ELT (v, vi++) = ret_rtx;
13424 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
13425 if (frame_pointer_needed)
13427 rtx rbp = gen_rtx_REG (DImode, BP_REG);
13428 gcc_assert (m->fs.fp_valid);
13429 gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx);
13431 tmp = gen_rtx_PLUS (DImode, rbp, GEN_INT (8));
13432 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp);
13433 RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp));
13434 tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
13435 RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp);
13437 else
13439 /* If no hard frame pointer, we set R10 to the SP restore value. */
13440 gcc_assert (!m->fs.fp_valid);
13441 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
13442 gcc_assert (m->fs.sp_valid);
13444 r10 = gen_rtx_REG (DImode, R10_REG);
13445 tmp = gen_rtx_PLUS (Pmode, rsi, GEN_INT (stub_ptr_offset));
13446 emit_insn (gen_rtx_SET (r10, tmp));
13448 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10);
13452 /* Generate frame load insns and restore notes. */
13453 for (i = 0; i < ncregs; ++i)
13455 const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
13456 machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode;
13457 rtx reg, frame_load;
13459 reg = gen_rtx_REG (mode, r.regno);
13460 frame_load = gen_frame_load (reg, rsi, r.offset);
13462 /* Save RSI frame load insn & note to add last. */
13463 if (r.regno == SI_REG)
13465 gcc_assert (!rsi_frame_load);
13466 rsi_frame_load = frame_load;
13467 rsi_restore_offset = r.offset;
13469 else
13471 RTVEC_ELT (v, vi++) = frame_load;
13472 ix86_add_cfa_restore_note (NULL, reg, r.offset);
13476 /* Add RSI frame load & restore note at the end. */
13477 gcc_assert (rsi_frame_load);
13478 gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1);
13479 RTVEC_ELT (v, vi++) = rsi_frame_load;
13480 ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG),
13481 rsi_restore_offset);
13483 /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */
13484 if (!use_call && !frame_pointer_needed)
13486 gcc_assert (m->fs.sp_valid);
13487 gcc_assert (!m->fs.sp_realigned);
13489 /* At this point, R10 should point to frame.stack_realign_offset. */
13490 if (m->fs.cfa_reg == stack_pointer_rtx)
13491 m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset;
13492 m->fs.sp_offset = frame.stack_realign_offset;
13495 gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v));
13496 tmp = gen_rtx_PARALLEL (VOIDmode, v);
13497 if (use_call)
13498 insn = emit_insn (tmp);
13499 else
13501 insn = emit_jump_insn (tmp);
13502 JUMP_LABEL (insn) = ret_rtx;
13504 if (frame_pointer_needed)
13505 ix86_emit_leave (insn);
13506 else
13508 /* Need CFA adjust note. */
13509 tmp = gen_rtx_SET (stack_pointer_rtx, r10);
13510 add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp);
13514 RTX_FRAME_RELATED_P (insn) = true;
13515 ix86_add_queued_cfa_restore_notes (insn);
13517 /* If we're not doing a tail-call, we need to adjust the stack. */
13518 if (use_call && m->fs.sp_valid)
13520 HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset;
13521 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13522 GEN_INT (dealloc), style,
13523 m->fs.cfa_reg == stack_pointer_rtx);
13527 /* Restore function stack, frame, and registers. */
13529 void
13530 ix86_expand_epilogue (int style)
13532 struct machine_function *m = cfun->machine;
13533 struct machine_frame_state frame_state_save = m->fs;
13534 struct ix86_frame frame;
13535 bool restore_regs_via_mov;
13536 bool using_drap;
13537 bool restore_stub_is_tail = false;
13539 if (ix86_function_naked (current_function_decl))
13541 /* The program should not reach this point. */
13542 emit_insn (gen_ud2 ());
13543 return;
13546 ix86_finalize_stack_frame_flags ();
13547 frame = m->frame;
13549 m->fs.sp_realigned = stack_realign_fp;
13550 m->fs.sp_valid = stack_realign_fp
13551 || !frame_pointer_needed
13552 || crtl->sp_is_unchanging;
13553 gcc_assert (!m->fs.sp_valid
13554 || m->fs.sp_offset == frame.stack_pointer_offset);
13556 /* The FP must be valid if the frame pointer is present. */
13557 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
13558 gcc_assert (!m->fs.fp_valid
13559 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
13561 /* We must have *some* valid pointer to the stack frame. */
13562 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
13564 /* The DRAP is never valid at this point. */
13565 gcc_assert (!m->fs.drap_valid);
13567 /* See the comment about red zone and frame
13568 pointer usage in ix86_expand_prologue. */
13569 if (frame_pointer_needed && frame.red_zone_size)
13570 emit_insn (gen_memory_blockage ());
13572 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
13573 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
13575 /* Determine the CFA offset of the end of the red-zone. */
13576 m->fs.red_zone_offset = 0;
13577 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
13579 /* The red-zone begins below return address and error code in
13580 exception handler. */
13581 m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET;
13583 /* When the register save area is in the aligned portion of
13584 the stack, determine the maximum runtime displacement that
13585 matches up with the aligned frame. */
13586 if (stack_realign_drap)
13587 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
13588 + UNITS_PER_WORD);
13591 /* Special care must be taken for the normal return case of a function
13592 using eh_return: the eax and edx registers are marked as saved, but
13593 not restored along this path. Adjust the save location to match. */
13594 if (crtl->calls_eh_return && style != 2)
13595 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
13597 /* EH_RETURN requires the use of moves to function properly. */
13598 if (crtl->calls_eh_return)
13599 restore_regs_via_mov = true;
13600 /* SEH requires the use of pops to identify the epilogue. */
13601 else if (TARGET_SEH)
13602 restore_regs_via_mov = false;
13603 /* If we're only restoring one register and sp cannot be used then
13604 using a move instruction to restore the register since it's
13605 less work than reloading sp and popping the register. */
13606 else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1)
13607 restore_regs_via_mov = true;
13608 else if (TARGET_EPILOGUE_USING_MOVE
13609 && cfun->machine->use_fast_prologue_epilogue
13610 && (frame.nregs > 1
13611 || m->fs.sp_offset != frame.reg_save_offset))
13612 restore_regs_via_mov = true;
13613 else if (frame_pointer_needed
13614 && !frame.nregs
13615 && m->fs.sp_offset != frame.reg_save_offset)
13616 restore_regs_via_mov = true;
13617 else if (frame_pointer_needed
13618 && TARGET_USE_LEAVE
13619 && cfun->machine->use_fast_prologue_epilogue
13620 && frame.nregs == 1)
13621 restore_regs_via_mov = true;
13622 else
13623 restore_regs_via_mov = false;
13625 if (restore_regs_via_mov || frame.nsseregs)
13627 /* Ensure that the entire register save area is addressable via
13628 the stack pointer, if we will restore SSE regs via sp. */
13629 if (TARGET_64BIT
13630 && m->fs.sp_offset > 0x7fffffff
13631 && sp_valid_at (frame.stack_realign_offset)
13632 && (frame.nsseregs + frame.nregs) != 0)
13634 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13635 GEN_INT (m->fs.sp_offset
13636 - frame.sse_reg_save_offset),
13637 style,
13638 m->fs.cfa_reg == stack_pointer_rtx);
13642 /* If there are any SSE registers to restore, then we have to do it
13643 via moves, since there's obviously no pop for SSE regs. */
13644 if (frame.nsseregs)
13645 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
13646 style == 2);
13648 if (m->call_ms2sysv)
13650 int pop_incoming_args = crtl->args.pops_args && crtl->args.size;
13652 /* We cannot use a tail-call for the stub if:
13653 1. We have to pop incoming args,
13654 2. We have additional int regs to restore, or
13655 3. A sibling call will be the tail-call, or
13656 4. We are emitting an eh_return_internal epilogue.
13658 TODO: Item 4 has not yet tested!
13660 If any of the above are true, we will call the stub rather than
13661 jump to it. */
13662 restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1);
13663 ix86_emit_outlined_ms2sysv_restore (frame, !restore_stub_is_tail, style);
13666 /* If using out-of-line stub that is a tail-call, then...*/
13667 if (m->call_ms2sysv && restore_stub_is_tail)
13669 /* TODO: parinoid tests. (remove eventually) */
13670 gcc_assert (m->fs.sp_valid);
13671 gcc_assert (!m->fs.sp_realigned);
13672 gcc_assert (!m->fs.fp_valid);
13673 gcc_assert (!m->fs.realigned);
13674 gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
13675 gcc_assert (!crtl->drap_reg);
13676 gcc_assert (!frame.nregs);
13678 else if (restore_regs_via_mov)
13680 rtx t;
13682 if (frame.nregs)
13683 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
13685 /* eh_return epilogues need %ecx added to the stack pointer. */
13686 if (style == 2)
13688 rtx sa = EH_RETURN_STACKADJ_RTX;
13689 rtx_insn *insn;
13691 /* %ecx can't be used for both DRAP register and eh_return. */
13692 if (crtl->drap_reg)
13693 gcc_assert (REGNO (crtl->drap_reg) != CX_REG);
13695 /* regparm nested functions don't work with eh_return. */
13696 gcc_assert (!ix86_static_chain_on_stack);
13698 if (frame_pointer_needed)
13700 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
13701 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
13702 emit_insn (gen_rtx_SET (sa, t));
13704 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
13705 insn = emit_move_insn (hard_frame_pointer_rtx, t);
13707 /* Note that we use SA as a temporary CFA, as the return
13708 address is at the proper place relative to it. We
13709 pretend this happens at the FP restore insn because
13710 prior to this insn the FP would be stored at the wrong
13711 offset relative to SA, and after this insn we have no
13712 other reasonable register to use for the CFA. We don't
13713 bother resetting the CFA to the SP for the duration of
13714 the return insn. */
13715 add_reg_note (insn, REG_CFA_DEF_CFA,
13716 plus_constant (Pmode, sa, UNITS_PER_WORD));
13717 ix86_add_queued_cfa_restore_notes (insn);
13718 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
13719 RTX_FRAME_RELATED_P (insn) = 1;
13721 m->fs.cfa_reg = sa;
13722 m->fs.cfa_offset = UNITS_PER_WORD;
13723 m->fs.fp_valid = false;
13725 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
13726 const0_rtx, style, false);
13728 else
13730 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
13731 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
13732 insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
13733 ix86_add_queued_cfa_restore_notes (insn);
13735 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
13736 if (m->fs.cfa_offset != UNITS_PER_WORD)
13738 m->fs.cfa_offset = UNITS_PER_WORD;
13739 add_reg_note (insn, REG_CFA_DEF_CFA,
13740 plus_constant (Pmode, stack_pointer_rtx,
13741 UNITS_PER_WORD));
13742 RTX_FRAME_RELATED_P (insn) = 1;
13745 m->fs.sp_offset = UNITS_PER_WORD;
13746 m->fs.sp_valid = true;
13747 m->fs.sp_realigned = false;
13750 else
13752 /* SEH requires that the function end with (1) a stack adjustment
13753 if necessary, (2) a sequence of pops, and (3) a return or
13754 jump instruction. Prevent insns from the function body from
13755 being scheduled into this sequence. */
13756 if (TARGET_SEH)
13758 /* Prevent a catch region from being adjacent to the standard
13759 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
13760 several other flags that would be interesting to test are
13761 not yet set up. */
13762 if (flag_non_call_exceptions)
13763 emit_insn (gen_nops (const1_rtx));
13764 else
13765 emit_insn (gen_blockage ());
13768 /* First step is to deallocate the stack frame so that we can
13769 pop the registers. If the stack pointer was realigned, it needs
13770 to be restored now. Also do it on SEH target for very large
13771 frame as the emitted instructions aren't allowed by the ABI
13772 in epilogues. */
13773 if (!m->fs.sp_valid || m->fs.sp_realigned
13774 || (TARGET_SEH
13775 && (m->fs.sp_offset - frame.reg_save_offset
13776 >= SEH_MAX_FRAME_SIZE)))
13778 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
13779 GEN_INT (m->fs.fp_offset
13780 - frame.reg_save_offset),
13781 style, false);
13783 else if (m->fs.sp_offset != frame.reg_save_offset)
13785 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13786 GEN_INT (m->fs.sp_offset
13787 - frame.reg_save_offset),
13788 style,
13789 m->fs.cfa_reg == stack_pointer_rtx);
13792 ix86_emit_restore_regs_using_pop ();
13795 /* If we used a stack pointer and haven't already got rid of it,
13796 then do so now. */
13797 if (m->fs.fp_valid)
13799 /* If the stack pointer is valid and pointing at the frame
13800 pointer store address, then we only need a pop. */
13801 if (sp_valid_at (frame.hfp_save_offset)
13802 && m->fs.sp_offset == frame.hfp_save_offset)
13803 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
13804 /* Leave results in shorter dependency chains on CPUs that are
13805 able to grok it fast. */
13806 else if (TARGET_USE_LEAVE
13807 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
13808 || !cfun->machine->use_fast_prologue_epilogue)
13809 ix86_emit_leave (NULL);
13810 else
13812 pro_epilogue_adjust_stack (stack_pointer_rtx,
13813 hard_frame_pointer_rtx,
13814 const0_rtx, style, !using_drap);
13815 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
13819 if (using_drap)
13821 int param_ptr_offset = UNITS_PER_WORD;
13822 rtx_insn *insn;
13824 gcc_assert (stack_realign_drap);
13826 if (ix86_static_chain_on_stack)
13827 param_ptr_offset += UNITS_PER_WORD;
13828 if (!call_used_regs[REGNO (crtl->drap_reg)])
13829 param_ptr_offset += UNITS_PER_WORD;
13831 insn = emit_insn (gen_rtx_SET
13832 (stack_pointer_rtx,
13833 gen_rtx_PLUS (Pmode,
13834 crtl->drap_reg,
13835 GEN_INT (-param_ptr_offset))));
13836 m->fs.cfa_reg = stack_pointer_rtx;
13837 m->fs.cfa_offset = param_ptr_offset;
13838 m->fs.sp_offset = param_ptr_offset;
13839 m->fs.realigned = false;
13841 add_reg_note (insn, REG_CFA_DEF_CFA,
13842 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13843 GEN_INT (param_ptr_offset)));
13844 RTX_FRAME_RELATED_P (insn) = 1;
13846 if (!call_used_regs[REGNO (crtl->drap_reg)])
13847 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
13850 /* At this point the stack pointer must be valid, and we must have
13851 restored all of the registers. We may not have deallocated the
13852 entire stack frame. We've delayed this until now because it may
13853 be possible to merge the local stack deallocation with the
13854 deallocation forced by ix86_static_chain_on_stack. */
13855 gcc_assert (m->fs.sp_valid);
13856 gcc_assert (!m->fs.sp_realigned);
13857 gcc_assert (!m->fs.fp_valid);
13858 gcc_assert (!m->fs.realigned);
13859 if (m->fs.sp_offset != UNITS_PER_WORD)
13861 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13862 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
13863 style, true);
13865 else
13866 ix86_add_queued_cfa_restore_notes (get_last_insn ());
13868 /* Sibcall epilogues don't want a return instruction. */
13869 if (style == 0)
13871 m->fs = frame_state_save;
13872 return;
13875 if (cfun->machine->func_type != TYPE_NORMAL)
13876 emit_jump_insn (gen_interrupt_return ());
13877 else if (crtl->args.pops_args && crtl->args.size)
13879 rtx popc = GEN_INT (crtl->args.pops_args);
13881 /* i386 can only pop 64K bytes. If asked to pop more, pop return
13882 address, do explicit add, and jump indirectly to the caller. */
13884 if (crtl->args.pops_args >= 65536)
13886 rtx ecx = gen_rtx_REG (SImode, CX_REG);
13887 rtx_insn *insn;
13889 /* There is no "pascal" calling convention in any 64bit ABI. */
13890 gcc_assert (!TARGET_64BIT);
13892 insn = emit_insn (gen_pop (ecx));
13893 m->fs.cfa_offset -= UNITS_PER_WORD;
13894 m->fs.sp_offset -= UNITS_PER_WORD;
13896 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
13897 x = gen_rtx_SET (stack_pointer_rtx, x);
13898 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
13899 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
13900 RTX_FRAME_RELATED_P (insn) = 1;
13902 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13903 popc, -1, true);
13904 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
13906 else
13907 emit_jump_insn (gen_simple_return_pop_internal (popc));
13909 else if (!m->call_ms2sysv || !restore_stub_is_tail)
13910 emit_jump_insn (gen_simple_return_internal ());
13912 /* Restore the state back to the state from the prologue,
13913 so that it's correct for the next epilogue. */
13914 m->fs = frame_state_save;
13917 /* Reset from the function's potential modifications. */
13919 static void
13920 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED)
13922 if (pic_offset_table_rtx
13923 && !ix86_use_pseudo_pic_reg ())
13924 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
13926 if (TARGET_MACHO)
13928 rtx_insn *insn = get_last_insn ();
13929 rtx_insn *deleted_debug_label = NULL;
13931 /* Mach-O doesn't support labels at the end of objects, so if
13932 it looks like we might want one, take special action.
13933 First, collect any sequence of deleted debug labels. */
13934 while (insn
13935 && NOTE_P (insn)
13936 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
13938 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
13939 notes only, instead set their CODE_LABEL_NUMBER to -1,
13940 otherwise there would be code generation differences
13941 in between -g and -g0. */
13942 if (NOTE_P (insn) && NOTE_KIND (insn)
13943 == NOTE_INSN_DELETED_DEBUG_LABEL)
13944 deleted_debug_label = insn;
13945 insn = PREV_INSN (insn);
13948 /* If we have:
13949 label:
13950 barrier
13951 then this needs to be detected, so skip past the barrier. */
13953 if (insn && BARRIER_P (insn))
13954 insn = PREV_INSN (insn);
13956 /* Up to now we've only seen notes or barriers. */
13957 if (insn)
13959 if (LABEL_P (insn)
13960 || (NOTE_P (insn)
13961 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
13962 /* Trailing label. */
13963 fputs ("\tnop\n", file);
13964 else if (cfun && ! cfun->is_thunk)
13966 /* See if we have a completely empty function body, skipping
13967 the special case of the picbase thunk emitted as asm. */
13968 while (insn && ! INSN_P (insn))
13969 insn = PREV_INSN (insn);
13970 /* If we don't find any insns, we've got an empty function body;
13971 I.e. completely empty - without a return or branch. This is
13972 taken as the case where a function body has been removed
13973 because it contains an inline __builtin_unreachable(). GCC
13974 declares that reaching __builtin_unreachable() means UB so
13975 we're not obliged to do anything special; however, we want
13976 non-zero-sized function bodies. To meet this, and help the
13977 user out, let's trap the case. */
13978 if (insn == NULL)
13979 fputs ("\tud2\n", file);
13982 else if (deleted_debug_label)
13983 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
13984 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
13985 CODE_LABEL_NUMBER (insn) = -1;
13989 /* Return a scratch register to use in the split stack prologue. The
13990 split stack prologue is used for -fsplit-stack. It is the first
13991 instructions in the function, even before the regular prologue.
13992 The scratch register can be any caller-saved register which is not
13993 used for parameters or for the static chain. */
13995 static unsigned int
13996 split_stack_prologue_scratch_regno (void)
13998 if (TARGET_64BIT)
13999 return R11_REG;
14000 else
14002 bool is_fastcall, is_thiscall;
14003 int regparm;
14005 is_fastcall = (lookup_attribute ("fastcall",
14006 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
14007 != NULL);
14008 is_thiscall = (lookup_attribute ("thiscall",
14009 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
14010 != NULL);
14011 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
14013 if (is_fastcall)
14015 if (DECL_STATIC_CHAIN (cfun->decl))
14017 sorry ("-fsplit-stack does not support fastcall with "
14018 "nested function");
14019 return INVALID_REGNUM;
14021 return AX_REG;
14023 else if (is_thiscall)
14025 if (!DECL_STATIC_CHAIN (cfun->decl))
14026 return DX_REG;
14027 return AX_REG;
14029 else if (regparm < 3)
14031 if (!DECL_STATIC_CHAIN (cfun->decl))
14032 return CX_REG;
14033 else
14035 if (regparm >= 2)
14037 sorry ("-fsplit-stack does not support 2 register "
14038 "parameters for a nested function");
14039 return INVALID_REGNUM;
14041 return DX_REG;
14044 else
14046 /* FIXME: We could make this work by pushing a register
14047 around the addition and comparison. */
14048 sorry ("-fsplit-stack does not support 3 register parameters");
14049 return INVALID_REGNUM;
14054 /* A SYMBOL_REF for the function which allocates new stackspace for
14055 -fsplit-stack. */
14057 static GTY(()) rtx split_stack_fn;
14059 /* A SYMBOL_REF for the more stack function when using the large
14060 model. */
14062 static GTY(()) rtx split_stack_fn_large;
14064 /* Return location of the stack guard value in the TLS block. */
14067 ix86_split_stack_guard (void)
14069 int offset;
14070 addr_space_t as = DEFAULT_TLS_SEG_REG;
14071 rtx r;
14073 gcc_assert (flag_split_stack);
14075 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14076 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
14077 #else
14078 gcc_unreachable ();
14079 #endif
14081 r = GEN_INT (offset);
14082 r = gen_const_mem (Pmode, r);
14083 set_mem_addr_space (r, as);
14085 return r;
14088 /* Handle -fsplit-stack. These are the first instructions in the
14089 function, even before the regular prologue. */
14091 void
14092 ix86_expand_split_stack_prologue (void)
14094 struct ix86_frame frame;
14095 HOST_WIDE_INT allocate;
14096 unsigned HOST_WIDE_INT args_size;
14097 rtx_code_label *label;
14098 rtx limit, current, allocate_rtx, call_insn, call_fusage;
14099 rtx scratch_reg = NULL_RTX;
14100 rtx_code_label *varargs_label = NULL;
14101 rtx fn;
14103 gcc_assert (flag_split_stack && reload_completed);
14105 ix86_finalize_stack_frame_flags ();
14106 frame = cfun->machine->frame;
14107 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
14109 /* This is the label we will branch to if we have enough stack
14110 space. We expect the basic block reordering pass to reverse this
14111 branch if optimizing, so that we branch in the unlikely case. */
14112 label = gen_label_rtx ();
14114 /* We need to compare the stack pointer minus the frame size with
14115 the stack boundary in the TCB. The stack boundary always gives
14116 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
14117 can compare directly. Otherwise we need to do an addition. */
14119 limit = ix86_split_stack_guard ();
14121 if (allocate < SPLIT_STACK_AVAILABLE)
14122 current = stack_pointer_rtx;
14123 else
14125 unsigned int scratch_regno;
14126 rtx offset;
14128 /* We need a scratch register to hold the stack pointer minus
14129 the required frame size. Since this is the very start of the
14130 function, the scratch register can be any caller-saved
14131 register which is not used for parameters. */
14132 offset = GEN_INT (- allocate);
14133 scratch_regno = split_stack_prologue_scratch_regno ();
14134 if (scratch_regno == INVALID_REGNUM)
14135 return;
14136 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
14137 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
14139 /* We don't use ix86_gen_add3 in this case because it will
14140 want to split to lea, but when not optimizing the insn
14141 will not be split after this point. */
14142 emit_insn (gen_rtx_SET (scratch_reg,
14143 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14144 offset)));
14146 else
14148 emit_move_insn (scratch_reg, offset);
14149 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
14150 stack_pointer_rtx));
14152 current = scratch_reg;
14155 ix86_expand_branch (GEU, current, limit, label);
14156 rtx_insn *jump_insn = get_last_insn ();
14157 JUMP_LABEL (jump_insn) = label;
14159 /* Mark the jump as very likely to be taken. */
14160 add_reg_br_prob_note (jump_insn, profile_probability::very_likely ());
14162 if (split_stack_fn == NULL_RTX)
14164 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
14165 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
14167 fn = split_stack_fn;
14169 /* Get more stack space. We pass in the desired stack space and the
14170 size of the arguments to copy to the new stack. In 32-bit mode
14171 we push the parameters; __morestack will return on a new stack
14172 anyhow. In 64-bit mode we pass the parameters in r10 and
14173 r11. */
14174 allocate_rtx = GEN_INT (allocate);
14175 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
14176 call_fusage = NULL_RTX;
14177 rtx pop = NULL_RTX;
14178 if (TARGET_64BIT)
14180 rtx reg10, reg11;
14182 reg10 = gen_rtx_REG (Pmode, R10_REG);
14183 reg11 = gen_rtx_REG (Pmode, R11_REG);
14185 /* If this function uses a static chain, it will be in %r10.
14186 Preserve it across the call to __morestack. */
14187 if (DECL_STATIC_CHAIN (cfun->decl))
14189 rtx rax;
14191 rax = gen_rtx_REG (word_mode, AX_REG);
14192 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
14193 use_reg (&call_fusage, rax);
14196 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
14197 && !TARGET_PECOFF)
14199 HOST_WIDE_INT argval;
14201 gcc_assert (Pmode == DImode);
14202 /* When using the large model we need to load the address
14203 into a register, and we've run out of registers. So we
14204 switch to a different calling convention, and we call a
14205 different function: __morestack_large. We pass the
14206 argument size in the upper 32 bits of r10 and pass the
14207 frame size in the lower 32 bits. */
14208 gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate);
14209 gcc_assert ((args_size & 0xffffffff) == args_size);
14211 if (split_stack_fn_large == NULL_RTX)
14213 split_stack_fn_large =
14214 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
14215 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
14217 if (ix86_cmodel == CM_LARGE_PIC)
14219 rtx_code_label *label;
14220 rtx x;
14222 label = gen_label_rtx ();
14223 emit_label (label);
14224 LABEL_PRESERVE_P (label) = 1;
14225 emit_insn (gen_set_rip_rex64 (reg10, label));
14226 emit_insn (gen_set_got_offset_rex64 (reg11, label));
14227 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
14228 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
14229 UNSPEC_GOT);
14230 x = gen_rtx_CONST (Pmode, x);
14231 emit_move_insn (reg11, x);
14232 x = gen_rtx_PLUS (Pmode, reg10, reg11);
14233 x = gen_const_mem (Pmode, x);
14234 emit_move_insn (reg11, x);
14236 else
14237 emit_move_insn (reg11, split_stack_fn_large);
14239 fn = reg11;
14241 argval = ((args_size << 16) << 16) + allocate;
14242 emit_move_insn (reg10, GEN_INT (argval));
14244 else
14246 emit_move_insn (reg10, allocate_rtx);
14247 emit_move_insn (reg11, GEN_INT (args_size));
14248 use_reg (&call_fusage, reg11);
14251 use_reg (&call_fusage, reg10);
14253 else
14255 rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size)));
14256 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD));
14257 insn = emit_insn (gen_push (allocate_rtx));
14258 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD));
14259 pop = GEN_INT (2 * UNITS_PER_WORD);
14261 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
14262 GEN_INT (UNITS_PER_WORD), constm1_rtx,
14263 pop, false);
14264 add_function_usage_to (call_insn, call_fusage);
14265 if (!TARGET_64BIT)
14266 add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0));
14267 /* Indicate that this function can't jump to non-local gotos. */
14268 make_reg_eh_region_note_nothrow_nononlocal (as_a <rtx_insn *> (call_insn));
14270 /* In order to make call/return prediction work right, we now need
14271 to execute a return instruction. See
14272 libgcc/config/i386/morestack.S for the details on how this works.
14274 For flow purposes gcc must not see this as a return
14275 instruction--we need control flow to continue at the subsequent
14276 label. Therefore, we use an unspec. */
14277 gcc_assert (crtl->args.pops_args < 65536);
14278 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
14280 /* If we are in 64-bit mode and this function uses a static chain,
14281 we saved %r10 in %rax before calling _morestack. */
14282 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
14283 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
14284 gen_rtx_REG (word_mode, AX_REG));
14286 /* If this function calls va_start, we need to store a pointer to
14287 the arguments on the old stack, because they may not have been
14288 all copied to the new stack. At this point the old stack can be
14289 found at the frame pointer value used by __morestack, because
14290 __morestack has set that up before calling back to us. Here we
14291 store that pointer in a scratch register, and in
14292 ix86_expand_prologue we store the scratch register in a stack
14293 slot. */
14294 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
14296 unsigned int scratch_regno;
14297 rtx frame_reg;
14298 int words;
14300 scratch_regno = split_stack_prologue_scratch_regno ();
14301 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
14302 frame_reg = gen_rtx_REG (Pmode, BP_REG);
14304 /* 64-bit:
14305 fp -> old fp value
14306 return address within this function
14307 return address of caller of this function
14308 stack arguments
14309 So we add three words to get to the stack arguments.
14311 32-bit:
14312 fp -> old fp value
14313 return address within this function
14314 first argument to __morestack
14315 second argument to __morestack
14316 return address of caller of this function
14317 stack arguments
14318 So we add five words to get to the stack arguments.
14320 words = TARGET_64BIT ? 3 : 5;
14321 emit_insn (gen_rtx_SET (scratch_reg,
14322 gen_rtx_PLUS (Pmode, frame_reg,
14323 GEN_INT (words * UNITS_PER_WORD))));
14325 varargs_label = gen_label_rtx ();
14326 emit_jump_insn (gen_jump (varargs_label));
14327 JUMP_LABEL (get_last_insn ()) = varargs_label;
14329 emit_barrier ();
14332 emit_label (label);
14333 LABEL_NUSES (label) = 1;
14335 /* If this function calls va_start, we now have to set the scratch
14336 register for the case where we do not call __morestack. In this
14337 case we need to set it based on the stack pointer. */
14338 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
14340 emit_insn (gen_rtx_SET (scratch_reg,
14341 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14342 GEN_INT (UNITS_PER_WORD))));
14344 emit_label (varargs_label);
14345 LABEL_NUSES (varargs_label) = 1;
14349 /* We may have to tell the dataflow pass that the split stack prologue
14350 is initializing a scratch register. */
14352 static void
14353 ix86_live_on_entry (bitmap regs)
14355 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
14357 gcc_assert (flag_split_stack);
14358 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
14362 /* Extract the parts of an RTL expression that is a valid memory address
14363 for an instruction. Return 0 if the structure of the address is
14364 grossly off. Return -1 if the address contains ASHIFT, so it is not
14365 strictly valid, but still used for computing length of lea instruction. */
14368 ix86_decompose_address (rtx addr, struct ix86_address *out)
14370 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
14371 rtx base_reg, index_reg;
14372 HOST_WIDE_INT scale = 1;
14373 rtx scale_rtx = NULL_RTX;
14374 rtx tmp;
14375 int retval = 1;
14376 addr_space_t seg = ADDR_SPACE_GENERIC;
14378 /* Allow zero-extended SImode addresses,
14379 they will be emitted with addr32 prefix. */
14380 if (TARGET_64BIT && GET_MODE (addr) == DImode)
14382 if (GET_CODE (addr) == ZERO_EXTEND
14383 && GET_MODE (XEXP (addr, 0)) == SImode)
14385 addr = XEXP (addr, 0);
14386 if (CONST_INT_P (addr))
14387 return 0;
14389 else if (GET_CODE (addr) == AND
14390 && const_32bit_mask (XEXP (addr, 1), DImode))
14392 addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode);
14393 if (addr == NULL_RTX)
14394 return 0;
14396 if (CONST_INT_P (addr))
14397 return 0;
14401 /* Allow SImode subregs of DImode addresses,
14402 they will be emitted with addr32 prefix. */
14403 if (TARGET_64BIT && GET_MODE (addr) == SImode)
14405 if (SUBREG_P (addr)
14406 && GET_MODE (SUBREG_REG (addr)) == DImode)
14408 addr = SUBREG_REG (addr);
14409 if (CONST_INT_P (addr))
14410 return 0;
14414 if (REG_P (addr))
14415 base = addr;
14416 else if (SUBREG_P (addr))
14418 if (REG_P (SUBREG_REG (addr)))
14419 base = addr;
14420 else
14421 return 0;
14423 else if (GET_CODE (addr) == PLUS)
14425 rtx addends[4], op;
14426 int n = 0, i;
14428 op = addr;
14431 if (n >= 4)
14432 return 0;
14433 addends[n++] = XEXP (op, 1);
14434 op = XEXP (op, 0);
14436 while (GET_CODE (op) == PLUS);
14437 if (n >= 4)
14438 return 0;
14439 addends[n] = op;
14441 for (i = n; i >= 0; --i)
14443 op = addends[i];
14444 switch (GET_CODE (op))
14446 case MULT:
14447 if (index)
14448 return 0;
14449 index = XEXP (op, 0);
14450 scale_rtx = XEXP (op, 1);
14451 break;
14453 case ASHIFT:
14454 if (index)
14455 return 0;
14456 index = XEXP (op, 0);
14457 tmp = XEXP (op, 1);
14458 if (!CONST_INT_P (tmp))
14459 return 0;
14460 scale = INTVAL (tmp);
14461 if ((unsigned HOST_WIDE_INT) scale > 3)
14462 return 0;
14463 scale = 1 << scale;
14464 break;
14466 case ZERO_EXTEND:
14467 op = XEXP (op, 0);
14468 if (GET_CODE (op) != UNSPEC)
14469 return 0;
14470 /* FALLTHRU */
14472 case UNSPEC:
14473 if (XINT (op, 1) == UNSPEC_TP
14474 && TARGET_TLS_DIRECT_SEG_REFS
14475 && seg == ADDR_SPACE_GENERIC)
14476 seg = DEFAULT_TLS_SEG_REG;
14477 else
14478 return 0;
14479 break;
14481 case SUBREG:
14482 if (!REG_P (SUBREG_REG (op)))
14483 return 0;
14484 /* FALLTHRU */
14486 case REG:
14487 if (!base)
14488 base = op;
14489 else if (!index)
14490 index = op;
14491 else
14492 return 0;
14493 break;
14495 case CONST:
14496 case CONST_INT:
14497 case SYMBOL_REF:
14498 case LABEL_REF:
14499 if (disp)
14500 return 0;
14501 disp = op;
14502 break;
14504 default:
14505 return 0;
14509 else if (GET_CODE (addr) == MULT)
14511 index = XEXP (addr, 0); /* index*scale */
14512 scale_rtx = XEXP (addr, 1);
14514 else if (GET_CODE (addr) == ASHIFT)
14516 /* We're called for lea too, which implements ashift on occasion. */
14517 index = XEXP (addr, 0);
14518 tmp = XEXP (addr, 1);
14519 if (!CONST_INT_P (tmp))
14520 return 0;
14521 scale = INTVAL (tmp);
14522 if ((unsigned HOST_WIDE_INT) scale > 3)
14523 return 0;
14524 scale = 1 << scale;
14525 retval = -1;
14527 else
14528 disp = addr; /* displacement */
14530 if (index)
14532 if (REG_P (index))
14534 else if (SUBREG_P (index)
14535 && REG_P (SUBREG_REG (index)))
14537 else
14538 return 0;
14541 /* Extract the integral value of scale. */
14542 if (scale_rtx)
14544 if (!CONST_INT_P (scale_rtx))
14545 return 0;
14546 scale = INTVAL (scale_rtx);
14549 base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
14550 index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
14552 /* Avoid useless 0 displacement. */
14553 if (disp == const0_rtx && (base || index))
14554 disp = NULL_RTX;
14556 /* Allow arg pointer and stack pointer as index if there is not scaling. */
14557 if (base_reg && index_reg && scale == 1
14558 && (REGNO (index_reg) == ARG_POINTER_REGNUM
14559 || REGNO (index_reg) == FRAME_POINTER_REGNUM
14560 || REGNO (index_reg) == SP_REG))
14562 std::swap (base, index);
14563 std::swap (base_reg, index_reg);
14566 /* Special case: %ebp cannot be encoded as a base without a displacement.
14567 Similarly %r13. */
14568 if (!disp && base_reg
14569 && (REGNO (base_reg) == ARG_POINTER_REGNUM
14570 || REGNO (base_reg) == FRAME_POINTER_REGNUM
14571 || REGNO (base_reg) == BP_REG
14572 || REGNO (base_reg) == R13_REG))
14573 disp = const0_rtx;
14575 /* Special case: on K6, [%esi] makes the instruction vector decoded.
14576 Avoid this by transforming to [%esi+0].
14577 Reload calls address legitimization without cfun defined, so we need
14578 to test cfun for being non-NULL. */
14579 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
14580 && base_reg && !index_reg && !disp
14581 && REGNO (base_reg) == SI_REG)
14582 disp = const0_rtx;
14584 /* Special case: encode reg+reg instead of reg*2. */
14585 if (!base && index && scale == 2)
14586 base = index, base_reg = index_reg, scale = 1;
14588 /* Special case: scaling cannot be encoded without base or displacement. */
14589 if (!base && !disp && index && scale != 1)
14590 disp = const0_rtx;
14592 out->base = base;
14593 out->index = index;
14594 out->disp = disp;
14595 out->scale = scale;
14596 out->seg = seg;
14598 return retval;
14601 /* Return cost of the memory address x.
14602 For i386, it is better to use a complex address than let gcc copy
14603 the address into a reg and make a new pseudo. But not if the address
14604 requires to two regs - that would mean more pseudos with longer
14605 lifetimes. */
14606 static int
14607 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
14609 struct ix86_address parts;
14610 int cost = 1;
14611 int ok = ix86_decompose_address (x, &parts);
14613 gcc_assert (ok);
14615 if (parts.base && SUBREG_P (parts.base))
14616 parts.base = SUBREG_REG (parts.base);
14617 if (parts.index && SUBREG_P (parts.index))
14618 parts.index = SUBREG_REG (parts.index);
14620 /* Attempt to minimize number of registers in the address by increasing
14621 address cost for each used register. We don't increase address cost
14622 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
14623 is not invariant itself it most likely means that base or index is not
14624 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
14625 which is not profitable for x86. */
14626 if (parts.base
14627 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
14628 && (current_pass->type == GIMPLE_PASS
14629 || !pic_offset_table_rtx
14630 || !REG_P (parts.base)
14631 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
14632 cost++;
14634 if (parts.index
14635 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
14636 && (current_pass->type == GIMPLE_PASS
14637 || !pic_offset_table_rtx
14638 || !REG_P (parts.index)
14639 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
14640 cost++;
14642 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
14643 since it's predecode logic can't detect the length of instructions
14644 and it degenerates to vector decoded. Increase cost of such
14645 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
14646 to split such addresses or even refuse such addresses at all.
14648 Following addressing modes are affected:
14649 [base+scale*index]
14650 [scale*index+disp]
14651 [base+index]
14653 The first and last case may be avoidable by explicitly coding the zero in
14654 memory address, but I don't have AMD-K6 machine handy to check this
14655 theory. */
14657 if (TARGET_K6
14658 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
14659 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
14660 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
14661 cost += 10;
14663 return cost;
14666 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
14667 this is used for to form addresses to local data when -fPIC is in
14668 use. */
14670 static bool
14671 darwin_local_data_pic (rtx disp)
14673 return (GET_CODE (disp) == UNSPEC
14674 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
14677 /* True if operand X should be loaded from GOT. */
14679 bool
14680 ix86_force_load_from_GOT_p (rtx x)
14682 return ((TARGET_64BIT || HAVE_AS_IX86_GOT32X)
14683 && !TARGET_PECOFF && !TARGET_MACHO
14684 && !flag_plt && !flag_pic
14685 && ix86_cmodel != CM_LARGE
14686 && GET_CODE (x) == SYMBOL_REF
14687 && SYMBOL_REF_FUNCTION_P (x)
14688 && !SYMBOL_REF_LOCAL_P (x));
14691 /* Determine if a given RTX is a valid constant. We already know this
14692 satisfies CONSTANT_P. */
14694 static bool
14695 ix86_legitimate_constant_p (machine_mode mode, rtx x)
14697 /* Pointer bounds constants are not valid. */
14698 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
14699 return false;
14701 switch (GET_CODE (x))
14703 case CONST:
14704 x = XEXP (x, 0);
14706 if (GET_CODE (x) == PLUS)
14708 if (!CONST_INT_P (XEXP (x, 1)))
14709 return false;
14710 x = XEXP (x, 0);
14713 if (TARGET_MACHO && darwin_local_data_pic (x))
14714 return true;
14716 /* Only some unspecs are valid as "constants". */
14717 if (GET_CODE (x) == UNSPEC)
14718 switch (XINT (x, 1))
14720 case UNSPEC_GOT:
14721 case UNSPEC_GOTOFF:
14722 case UNSPEC_PLTOFF:
14723 return TARGET_64BIT;
14724 case UNSPEC_TPOFF:
14725 case UNSPEC_NTPOFF:
14726 x = XVECEXP (x, 0, 0);
14727 return (GET_CODE (x) == SYMBOL_REF
14728 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
14729 case UNSPEC_DTPOFF:
14730 x = XVECEXP (x, 0, 0);
14731 return (GET_CODE (x) == SYMBOL_REF
14732 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
14733 default:
14734 return false;
14737 /* We must have drilled down to a symbol. */
14738 if (GET_CODE (x) == LABEL_REF)
14739 return true;
14740 if (GET_CODE (x) != SYMBOL_REF)
14741 return false;
14742 /* FALLTHRU */
14744 case SYMBOL_REF:
14745 /* TLS symbols are never valid. */
14746 if (SYMBOL_REF_TLS_MODEL (x))
14747 return false;
14749 /* DLLIMPORT symbols are never valid. */
14750 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
14751 && SYMBOL_REF_DLLIMPORT_P (x))
14752 return false;
14754 #if TARGET_MACHO
14755 /* mdynamic-no-pic */
14756 if (MACHO_DYNAMIC_NO_PIC_P)
14757 return machopic_symbol_defined_p (x);
14758 #endif
14760 /* External function address should be loaded
14761 via the GOT slot to avoid PLT. */
14762 if (ix86_force_load_from_GOT_p (x))
14763 return false;
14765 break;
14767 CASE_CONST_SCALAR_INT:
14768 switch (mode)
14770 case E_TImode:
14771 if (TARGET_64BIT)
14772 return true;
14773 /* FALLTHRU */
14774 case E_OImode:
14775 case E_XImode:
14776 if (!standard_sse_constant_p (x, mode))
14777 return false;
14778 default:
14779 break;
14781 break;
14783 case CONST_VECTOR:
14784 if (!standard_sse_constant_p (x, mode))
14785 return false;
14787 default:
14788 break;
14791 /* Otherwise we handle everything else in the move patterns. */
14792 return true;
14795 /* Determine if it's legal to put X into the constant pool. This
14796 is not possible for the address of thread-local symbols, which
14797 is checked above. */
14799 static bool
14800 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
14802 /* We can put any immediate constant in memory. */
14803 switch (GET_CODE (x))
14805 CASE_CONST_ANY:
14806 return false;
14808 default:
14809 break;
14812 return !ix86_legitimate_constant_p (mode, x);
14815 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
14816 otherwise zero. */
14818 static bool
14819 is_imported_p (rtx x)
14821 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
14822 || GET_CODE (x) != SYMBOL_REF)
14823 return false;
14825 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
14829 /* Nonzero if the constant value X is a legitimate general operand
14830 when generating PIC code. It is given that flag_pic is on and
14831 that X satisfies CONSTANT_P. */
14833 bool
14834 legitimate_pic_operand_p (rtx x)
14836 rtx inner;
14838 switch (GET_CODE (x))
14840 case CONST:
14841 inner = XEXP (x, 0);
14842 if (GET_CODE (inner) == PLUS
14843 && CONST_INT_P (XEXP (inner, 1)))
14844 inner = XEXP (inner, 0);
14846 /* Only some unspecs are valid as "constants". */
14847 if (GET_CODE (inner) == UNSPEC)
14848 switch (XINT (inner, 1))
14850 case UNSPEC_GOT:
14851 case UNSPEC_GOTOFF:
14852 case UNSPEC_PLTOFF:
14853 return TARGET_64BIT;
14854 case UNSPEC_TPOFF:
14855 x = XVECEXP (inner, 0, 0);
14856 return (GET_CODE (x) == SYMBOL_REF
14857 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
14858 case UNSPEC_MACHOPIC_OFFSET:
14859 return legitimate_pic_address_disp_p (x);
14860 default:
14861 return false;
14863 /* FALLTHRU */
14865 case SYMBOL_REF:
14866 case LABEL_REF:
14867 return legitimate_pic_address_disp_p (x);
14869 default:
14870 return true;
14874 /* Determine if a given CONST RTX is a valid memory displacement
14875 in PIC mode. */
14877 bool
14878 legitimate_pic_address_disp_p (rtx disp)
14880 bool saw_plus;
14882 /* In 64bit mode we can allow direct addresses of symbols and labels
14883 when they are not dynamic symbols. */
14884 if (TARGET_64BIT)
14886 rtx op0 = disp, op1;
14888 switch (GET_CODE (disp))
14890 case LABEL_REF:
14891 return true;
14893 case CONST:
14894 if (GET_CODE (XEXP (disp, 0)) != PLUS)
14895 break;
14896 op0 = XEXP (XEXP (disp, 0), 0);
14897 op1 = XEXP (XEXP (disp, 0), 1);
14898 if (!CONST_INT_P (op1)
14899 || INTVAL (op1) >= 16*1024*1024
14900 || INTVAL (op1) < -16*1024*1024)
14901 break;
14902 if (GET_CODE (op0) == LABEL_REF)
14903 return true;
14904 if (GET_CODE (op0) == CONST
14905 && GET_CODE (XEXP (op0, 0)) == UNSPEC
14906 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
14907 return true;
14908 if (GET_CODE (op0) == UNSPEC
14909 && XINT (op0, 1) == UNSPEC_PCREL)
14910 return true;
14911 if (GET_CODE (op0) != SYMBOL_REF)
14912 break;
14913 /* FALLTHRU */
14915 case SYMBOL_REF:
14916 /* TLS references should always be enclosed in UNSPEC.
14917 The dllimported symbol needs always to be resolved. */
14918 if (SYMBOL_REF_TLS_MODEL (op0)
14919 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
14920 return false;
14922 if (TARGET_PECOFF)
14924 if (is_imported_p (op0))
14925 return true;
14927 if (SYMBOL_REF_FAR_ADDR_P (op0)
14928 || !SYMBOL_REF_LOCAL_P (op0))
14929 break;
14931 /* Function-symbols need to be resolved only for
14932 large-model.
14933 For the small-model we don't need to resolve anything
14934 here. */
14935 if ((ix86_cmodel != CM_LARGE_PIC
14936 && SYMBOL_REF_FUNCTION_P (op0))
14937 || ix86_cmodel == CM_SMALL_PIC)
14938 return true;
14939 /* Non-external symbols don't need to be resolved for
14940 large, and medium-model. */
14941 if ((ix86_cmodel == CM_LARGE_PIC
14942 || ix86_cmodel == CM_MEDIUM_PIC)
14943 && !SYMBOL_REF_EXTERNAL_P (op0))
14944 return true;
14946 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
14947 && (SYMBOL_REF_LOCAL_P (op0)
14948 || (HAVE_LD_PIE_COPYRELOC
14949 && flag_pie
14950 && !SYMBOL_REF_WEAK (op0)
14951 && !SYMBOL_REF_FUNCTION_P (op0)))
14952 && ix86_cmodel != CM_LARGE_PIC)
14953 return true;
14954 break;
14956 default:
14957 break;
14960 if (GET_CODE (disp) != CONST)
14961 return false;
14962 disp = XEXP (disp, 0);
14964 if (TARGET_64BIT)
14966 /* We are unsafe to allow PLUS expressions. This limit allowed distance
14967 of GOT tables. We should not need these anyway. */
14968 if (GET_CODE (disp) != UNSPEC
14969 || (XINT (disp, 1) != UNSPEC_GOTPCREL
14970 && XINT (disp, 1) != UNSPEC_GOTOFF
14971 && XINT (disp, 1) != UNSPEC_PCREL
14972 && XINT (disp, 1) != UNSPEC_PLTOFF))
14973 return false;
14975 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
14976 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
14977 return false;
14978 return true;
14981 saw_plus = false;
14982 if (GET_CODE (disp) == PLUS)
14984 if (!CONST_INT_P (XEXP (disp, 1)))
14985 return false;
14986 disp = XEXP (disp, 0);
14987 saw_plus = true;
14990 if (TARGET_MACHO && darwin_local_data_pic (disp))
14991 return true;
14993 if (GET_CODE (disp) != UNSPEC)
14994 return false;
14996 switch (XINT (disp, 1))
14998 case UNSPEC_GOT:
14999 if (saw_plus)
15000 return false;
15001 /* We need to check for both symbols and labels because VxWorks loads
15002 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
15003 details. */
15004 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
15005 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
15006 case UNSPEC_GOTOFF:
15007 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
15008 While ABI specify also 32bit relocation but we don't produce it in
15009 small PIC model at all. */
15010 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
15011 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
15012 && !TARGET_64BIT)
15013 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
15014 return false;
15015 case UNSPEC_GOTTPOFF:
15016 case UNSPEC_GOTNTPOFF:
15017 case UNSPEC_INDNTPOFF:
15018 if (saw_plus)
15019 return false;
15020 disp = XVECEXP (disp, 0, 0);
15021 return (GET_CODE (disp) == SYMBOL_REF
15022 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
15023 case UNSPEC_NTPOFF:
15024 disp = XVECEXP (disp, 0, 0);
15025 return (GET_CODE (disp) == SYMBOL_REF
15026 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
15027 case UNSPEC_DTPOFF:
15028 disp = XVECEXP (disp, 0, 0);
15029 return (GET_CODE (disp) == SYMBOL_REF
15030 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
15033 return false;
15036 /* Determine if op is suitable RTX for an address register.
15037 Return naked register if a register or a register subreg is
15038 found, otherwise return NULL_RTX. */
15040 static rtx
15041 ix86_validate_address_register (rtx op)
15043 machine_mode mode = GET_MODE (op);
15045 /* Only SImode or DImode registers can form the address. */
15046 if (mode != SImode && mode != DImode)
15047 return NULL_RTX;
15049 if (REG_P (op))
15050 return op;
15051 else if (SUBREG_P (op))
15053 rtx reg = SUBREG_REG (op);
15055 if (!REG_P (reg))
15056 return NULL_RTX;
15058 mode = GET_MODE (reg);
15060 /* Don't allow SUBREGs that span more than a word. It can
15061 lead to spill failures when the register is one word out
15062 of a two word structure. */
15063 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
15064 return NULL_RTX;
15066 /* Allow only SUBREGs of non-eliminable hard registers. */
15067 if (register_no_elim_operand (reg, mode))
15068 return reg;
15071 /* Op is not a register. */
15072 return NULL_RTX;
15075 /* Recognizes RTL expressions that are valid memory addresses for an
15076 instruction. The MODE argument is the machine mode for the MEM
15077 expression that wants to use this address.
15079 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
15080 convert common non-canonical forms to canonical form so that they will
15081 be recognized. */
15083 static bool
15084 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
15086 struct ix86_address parts;
15087 rtx base, index, disp;
15088 HOST_WIDE_INT scale;
15089 addr_space_t seg;
15091 if (ix86_decompose_address (addr, &parts) <= 0)
15092 /* Decomposition failed. */
15093 return false;
15095 base = parts.base;
15096 index = parts.index;
15097 disp = parts.disp;
15098 scale = parts.scale;
15099 seg = parts.seg;
15101 /* Validate base register. */
15102 if (base)
15104 rtx reg = ix86_validate_address_register (base);
15106 if (reg == NULL_RTX)
15107 return false;
15109 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
15110 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
15111 /* Base is not valid. */
15112 return false;
15115 /* Validate index register. */
15116 if (index)
15118 rtx reg = ix86_validate_address_register (index);
15120 if (reg == NULL_RTX)
15121 return false;
15123 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
15124 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
15125 /* Index is not valid. */
15126 return false;
15129 /* Index and base should have the same mode. */
15130 if (base && index
15131 && GET_MODE (base) != GET_MODE (index))
15132 return false;
15134 /* Address override works only on the (%reg) part of %fs:(%reg). */
15135 if (seg != ADDR_SPACE_GENERIC
15136 && ((base && GET_MODE (base) != word_mode)
15137 || (index && GET_MODE (index) != word_mode)))
15138 return false;
15140 /* Validate scale factor. */
15141 if (scale != 1)
15143 if (!index)
15144 /* Scale without index. */
15145 return false;
15147 if (scale != 2 && scale != 4 && scale != 8)
15148 /* Scale is not a valid multiplier. */
15149 return false;
15152 /* Validate displacement. */
15153 if (disp)
15155 if (GET_CODE (disp) == CONST
15156 && GET_CODE (XEXP (disp, 0)) == UNSPEC
15157 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
15158 switch (XINT (XEXP (disp, 0), 1))
15160 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
15161 when used. While ABI specify also 32bit relocations, we
15162 don't produce them at all and use IP relative instead.
15163 Allow GOT in 32bit mode for both PIC and non-PIC if symbol
15164 should be loaded via GOT. */
15165 case UNSPEC_GOT:
15166 if (!TARGET_64BIT
15167 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
15168 goto is_legitimate_pic;
15169 /* FALLTHRU */
15170 case UNSPEC_GOTOFF:
15171 gcc_assert (flag_pic);
15172 if (!TARGET_64BIT)
15173 goto is_legitimate_pic;
15175 /* 64bit address unspec. */
15176 return false;
15178 case UNSPEC_GOTPCREL:
15179 if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
15180 goto is_legitimate_pic;
15181 /* FALLTHRU */
15182 case UNSPEC_PCREL:
15183 gcc_assert (flag_pic);
15184 goto is_legitimate_pic;
15186 case UNSPEC_GOTTPOFF:
15187 case UNSPEC_GOTNTPOFF:
15188 case UNSPEC_INDNTPOFF:
15189 case UNSPEC_NTPOFF:
15190 case UNSPEC_DTPOFF:
15191 break;
15193 default:
15194 /* Invalid address unspec. */
15195 return false;
15198 else if (SYMBOLIC_CONST (disp)
15199 && (flag_pic
15200 || (TARGET_MACHO
15201 #if TARGET_MACHO
15202 && MACHOPIC_INDIRECT
15203 && !machopic_operand_p (disp)
15204 #endif
15208 is_legitimate_pic:
15209 if (TARGET_64BIT && (index || base))
15211 /* foo@dtpoff(%rX) is ok. */
15212 if (GET_CODE (disp) != CONST
15213 || GET_CODE (XEXP (disp, 0)) != PLUS
15214 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
15215 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
15216 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
15217 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
15218 /* Non-constant pic memory reference. */
15219 return false;
15221 else if ((!TARGET_MACHO || flag_pic)
15222 && ! legitimate_pic_address_disp_p (disp))
15223 /* Displacement is an invalid pic construct. */
15224 return false;
15225 #if TARGET_MACHO
15226 else if (MACHO_DYNAMIC_NO_PIC_P
15227 && !ix86_legitimate_constant_p (Pmode, disp))
15228 /* displacment must be referenced via non_lazy_pointer */
15229 return false;
15230 #endif
15232 /* This code used to verify that a symbolic pic displacement
15233 includes the pic_offset_table_rtx register.
15235 While this is good idea, unfortunately these constructs may
15236 be created by "adds using lea" optimization for incorrect
15237 code like:
15239 int a;
15240 int foo(int i)
15242 return *(&a+i);
15245 This code is nonsensical, but results in addressing
15246 GOT table with pic_offset_table_rtx base. We can't
15247 just refuse it easily, since it gets matched by
15248 "addsi3" pattern, that later gets split to lea in the
15249 case output register differs from input. While this
15250 can be handled by separate addsi pattern for this case
15251 that never results in lea, this seems to be easier and
15252 correct fix for crash to disable this test. */
15254 else if (GET_CODE (disp) != LABEL_REF
15255 && !CONST_INT_P (disp)
15256 && (GET_CODE (disp) != CONST
15257 || !ix86_legitimate_constant_p (Pmode, disp))
15258 && (GET_CODE (disp) != SYMBOL_REF
15259 || !ix86_legitimate_constant_p (Pmode, disp)))
15260 /* Displacement is not constant. */
15261 return false;
15262 else if (TARGET_64BIT
15263 && !x86_64_immediate_operand (disp, VOIDmode))
15264 /* Displacement is out of range. */
15265 return false;
15266 /* In x32 mode, constant addresses are sign extended to 64bit, so
15267 we have to prevent addresses from 0x80000000 to 0xffffffff. */
15268 else if (TARGET_X32 && !(index || base)
15269 && CONST_INT_P (disp)
15270 && val_signbit_known_set_p (SImode, INTVAL (disp)))
15271 return false;
15274 /* Everything looks valid. */
15275 return true;
15278 /* Determine if a given RTX is a valid constant address. */
15280 bool
15281 constant_address_p (rtx x)
15283 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
15286 /* Return a unique alias set for the GOT. */
15288 static alias_set_type
15289 ix86_GOT_alias_set (void)
15291 static alias_set_type set = -1;
15292 if (set == -1)
15293 set = new_alias_set ();
15294 return set;
15297 /* Return a legitimate reference for ORIG (an address) using the
15298 register REG. If REG is 0, a new pseudo is generated.
15300 There are two types of references that must be handled:
15302 1. Global data references must load the address from the GOT, via
15303 the PIC reg. An insn is emitted to do this load, and the reg is
15304 returned.
15306 2. Static data references, constant pool addresses, and code labels
15307 compute the address as an offset from the GOT, whose base is in
15308 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
15309 differentiate them from global data objects. The returned
15310 address is the PIC reg + an unspec constant.
15312 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
15313 reg also appears in the address. */
15315 static rtx
15316 legitimize_pic_address (rtx orig, rtx reg)
15318 rtx addr = orig;
15319 rtx new_rtx = orig;
15321 #if TARGET_MACHO
15322 if (TARGET_MACHO && !TARGET_64BIT)
15324 if (reg == 0)
15325 reg = gen_reg_rtx (Pmode);
15326 /* Use the generic Mach-O PIC machinery. */
15327 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
15329 #endif
15331 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
15333 rtx tmp = legitimize_pe_coff_symbol (addr, true);
15334 if (tmp)
15335 return tmp;
15338 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
15339 new_rtx = addr;
15340 else if ((!TARGET_64BIT
15341 || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC)
15342 && !TARGET_PECOFF
15343 && gotoff_operand (addr, Pmode))
15345 /* This symbol may be referenced via a displacement
15346 from the PIC base address (@GOTOFF). */
15347 if (GET_CODE (addr) == CONST)
15348 addr = XEXP (addr, 0);
15350 if (GET_CODE (addr) == PLUS)
15352 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
15353 UNSPEC_GOTOFF);
15354 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
15356 else
15357 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
15359 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15361 if (TARGET_64BIT)
15362 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
15364 if (reg != 0)
15366 gcc_assert (REG_P (reg));
15367 new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
15368 new_rtx, reg, 1, OPTAB_DIRECT);
15370 else
15371 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
15373 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
15374 /* We can't use @GOTOFF for text labels
15375 on VxWorks, see gotoff_operand. */
15376 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
15378 rtx tmp = legitimize_pe_coff_symbol (addr, true);
15379 if (tmp)
15380 return tmp;
15382 /* For x64 PE-COFF there is no GOT table,
15383 so we use address directly. */
15384 if (TARGET_64BIT && TARGET_PECOFF)
15386 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
15387 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15389 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
15391 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
15392 UNSPEC_GOTPCREL);
15393 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15394 new_rtx = gen_const_mem (Pmode, new_rtx);
15395 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
15397 else
15399 /* This symbol must be referenced via a load
15400 from the Global Offset Table (@GOT). */
15401 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
15402 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15403 if (TARGET_64BIT)
15404 new_rtx = force_reg (Pmode, new_rtx);
15405 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
15406 new_rtx = gen_const_mem (Pmode, new_rtx);
15407 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
15410 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
15412 else
15414 if (CONST_INT_P (addr)
15415 && !x86_64_immediate_operand (addr, VOIDmode))
15416 new_rtx = copy_to_suggested_reg (addr, reg, Pmode);
15417 else if (GET_CODE (addr) == CONST)
15419 addr = XEXP (addr, 0);
15421 /* We must match stuff we generate before. Assume the only
15422 unspecs that can get here are ours. Not that we could do
15423 anything with them anyway.... */
15424 if (GET_CODE (addr) == UNSPEC
15425 || (GET_CODE (addr) == PLUS
15426 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
15427 return orig;
15428 gcc_assert (GET_CODE (addr) == PLUS);
15431 if (GET_CODE (addr) == PLUS)
15433 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
15435 /* Check first to see if this is a constant
15436 offset from a @GOTOFF symbol reference. */
15437 if (!TARGET_PECOFF
15438 && gotoff_operand (op0, Pmode)
15439 && CONST_INT_P (op1))
15441 if (!TARGET_64BIT)
15443 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
15444 UNSPEC_GOTOFF);
15445 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
15446 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15448 if (reg != 0)
15450 gcc_assert (REG_P (reg));
15451 new_rtx = expand_simple_binop (Pmode, PLUS,
15452 pic_offset_table_rtx,
15453 new_rtx, reg, 1,
15454 OPTAB_DIRECT);
15456 else
15457 new_rtx
15458 = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
15460 else
15462 if (INTVAL (op1) < -16*1024*1024
15463 || INTVAL (op1) >= 16*1024*1024)
15465 if (!x86_64_immediate_operand (op1, Pmode))
15466 op1 = force_reg (Pmode, op1);
15468 new_rtx
15469 = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
15473 else
15475 rtx base = legitimize_pic_address (op0, reg);
15476 machine_mode mode = GET_MODE (base);
15477 new_rtx
15478 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
15480 if (CONST_INT_P (new_rtx))
15482 if (INTVAL (new_rtx) < -16*1024*1024
15483 || INTVAL (new_rtx) >= 16*1024*1024)
15485 if (!x86_64_immediate_operand (new_rtx, mode))
15486 new_rtx = force_reg (mode, new_rtx);
15488 new_rtx
15489 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
15491 else
15492 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
15494 else
15496 /* For %rip addressing, we have to use
15497 just disp32, not base nor index. */
15498 if (TARGET_64BIT
15499 && (GET_CODE (base) == SYMBOL_REF
15500 || GET_CODE (base) == LABEL_REF))
15501 base = force_reg (mode, base);
15502 if (GET_CODE (new_rtx) == PLUS
15503 && CONSTANT_P (XEXP (new_rtx, 1)))
15505 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
15506 new_rtx = XEXP (new_rtx, 1);
15508 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
15513 return new_rtx;
15516 /* Load the thread pointer. If TO_REG is true, force it into a register. */
15518 static rtx
15519 get_thread_pointer (machine_mode tp_mode, bool to_reg)
15521 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
15523 if (GET_MODE (tp) != tp_mode)
15525 gcc_assert (GET_MODE (tp) == SImode);
15526 gcc_assert (tp_mode == DImode);
15528 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
15531 if (to_reg)
15532 tp = copy_to_mode_reg (tp_mode, tp);
15534 return tp;
15537 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15539 static GTY(()) rtx ix86_tls_symbol;
15541 static rtx
15542 ix86_tls_get_addr (void)
15544 if (!ix86_tls_symbol)
15546 const char *sym
15547 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
15548 ? "___tls_get_addr" : "__tls_get_addr");
15550 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
15553 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
15555 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
15556 UNSPEC_PLTOFF);
15557 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
15558 gen_rtx_CONST (Pmode, unspec));
15561 return ix86_tls_symbol;
15564 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15566 static GTY(()) rtx ix86_tls_module_base_symbol;
15569 ix86_tls_module_base (void)
15571 if (!ix86_tls_module_base_symbol)
15573 ix86_tls_module_base_symbol
15574 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
15576 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
15577 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
15580 return ix86_tls_module_base_symbol;
15583 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
15584 false if we expect this to be used for a memory address and true if
15585 we expect to load the address into a register. */
15587 static rtx
15588 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
15590 rtx dest, base, off;
15591 rtx pic = NULL_RTX, tp = NULL_RTX;
15592 machine_mode tp_mode = Pmode;
15593 int type;
15595 /* Fall back to global dynamic model if tool chain cannot support local
15596 dynamic. */
15597 if (TARGET_SUN_TLS && !TARGET_64BIT
15598 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
15599 && model == TLS_MODEL_LOCAL_DYNAMIC)
15600 model = TLS_MODEL_GLOBAL_DYNAMIC;
15602 switch (model)
15604 case TLS_MODEL_GLOBAL_DYNAMIC:
15605 dest = gen_reg_rtx (Pmode);
15607 if (!TARGET_64BIT)
15609 if (flag_pic && !TARGET_PECOFF)
15610 pic = pic_offset_table_rtx;
15611 else
15613 pic = gen_reg_rtx (Pmode);
15614 emit_insn (gen_set_got (pic));
15618 if (TARGET_GNU2_TLS)
15620 if (TARGET_64BIT)
15621 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
15622 else
15623 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
15625 tp = get_thread_pointer (Pmode, true);
15626 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
15628 if (GET_MODE (x) != Pmode)
15629 x = gen_rtx_ZERO_EXTEND (Pmode, x);
15631 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
15633 else
15635 rtx caddr = ix86_tls_get_addr ();
15637 if (TARGET_64BIT)
15639 rtx rax = gen_rtx_REG (Pmode, AX_REG);
15640 rtx_insn *insns;
15642 start_sequence ();
15643 emit_call_insn
15644 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
15645 insns = get_insns ();
15646 end_sequence ();
15648 if (GET_MODE (x) != Pmode)
15649 x = gen_rtx_ZERO_EXTEND (Pmode, x);
15651 RTL_CONST_CALL_P (insns) = 1;
15652 emit_libcall_block (insns, dest, rax, x);
15654 else
15655 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
15657 break;
15659 case TLS_MODEL_LOCAL_DYNAMIC:
15660 base = gen_reg_rtx (Pmode);
15662 if (!TARGET_64BIT)
15664 if (flag_pic)
15665 pic = pic_offset_table_rtx;
15666 else
15668 pic = gen_reg_rtx (Pmode);
15669 emit_insn (gen_set_got (pic));
15673 if (TARGET_GNU2_TLS)
15675 rtx tmp = ix86_tls_module_base ();
15677 if (TARGET_64BIT)
15678 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
15679 else
15680 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
15682 tp = get_thread_pointer (Pmode, true);
15683 set_unique_reg_note (get_last_insn (), REG_EQUAL,
15684 gen_rtx_MINUS (Pmode, tmp, tp));
15686 else
15688 rtx caddr = ix86_tls_get_addr ();
15690 if (TARGET_64BIT)
15692 rtx rax = gen_rtx_REG (Pmode, AX_REG);
15693 rtx_insn *insns;
15694 rtx eqv;
15696 start_sequence ();
15697 emit_call_insn
15698 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
15699 insns = get_insns ();
15700 end_sequence ();
15702 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
15703 share the LD_BASE result with other LD model accesses. */
15704 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
15705 UNSPEC_TLS_LD_BASE);
15707 RTL_CONST_CALL_P (insns) = 1;
15708 emit_libcall_block (insns, base, rax, eqv);
15710 else
15711 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
15714 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
15715 off = gen_rtx_CONST (Pmode, off);
15717 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
15719 if (TARGET_GNU2_TLS)
15721 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
15723 if (GET_MODE (x) != Pmode)
15724 x = gen_rtx_ZERO_EXTEND (Pmode, x);
15726 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
15728 break;
15730 case TLS_MODEL_INITIAL_EXEC:
15731 if (TARGET_64BIT)
15733 if (TARGET_SUN_TLS && !TARGET_X32)
15735 /* The Sun linker took the AMD64 TLS spec literally
15736 and can only handle %rax as destination of the
15737 initial executable code sequence. */
15739 dest = gen_reg_rtx (DImode);
15740 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
15741 return dest;
15744 /* Generate DImode references to avoid %fs:(%reg32)
15745 problems and linker IE->LE relaxation bug. */
15746 tp_mode = DImode;
15747 pic = NULL;
15748 type = UNSPEC_GOTNTPOFF;
15750 else if (flag_pic)
15752 pic = pic_offset_table_rtx;
15753 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
15755 else if (!TARGET_ANY_GNU_TLS)
15757 pic = gen_reg_rtx (Pmode);
15758 emit_insn (gen_set_got (pic));
15759 type = UNSPEC_GOTTPOFF;
15761 else
15763 pic = NULL;
15764 type = UNSPEC_INDNTPOFF;
15767 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
15768 off = gen_rtx_CONST (tp_mode, off);
15769 if (pic)
15770 off = gen_rtx_PLUS (tp_mode, pic, off);
15771 off = gen_const_mem (tp_mode, off);
15772 set_mem_alias_set (off, ix86_GOT_alias_set ());
15774 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
15776 base = get_thread_pointer (tp_mode,
15777 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
15778 off = force_reg (tp_mode, off);
15779 dest = gen_rtx_PLUS (tp_mode, base, off);
15780 if (tp_mode != Pmode)
15781 dest = convert_to_mode (Pmode, dest, 1);
15783 else
15785 base = get_thread_pointer (Pmode, true);
15786 dest = gen_reg_rtx (Pmode);
15787 emit_insn (ix86_gen_sub3 (dest, base, off));
15789 break;
15791 case TLS_MODEL_LOCAL_EXEC:
15792 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
15793 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
15794 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
15795 off = gen_rtx_CONST (Pmode, off);
15797 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
15799 base = get_thread_pointer (Pmode,
15800 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
15801 return gen_rtx_PLUS (Pmode, base, off);
15803 else
15805 base = get_thread_pointer (Pmode, true);
15806 dest = gen_reg_rtx (Pmode);
15807 emit_insn (ix86_gen_sub3 (dest, base, off));
15809 break;
15811 default:
15812 gcc_unreachable ();
15815 return dest;
15818 /* Return true if OP refers to a TLS address. */
15819 bool
15820 ix86_tls_address_pattern_p (rtx op)
15822 subrtx_var_iterator::array_type array;
15823 FOR_EACH_SUBRTX_VAR (iter, array, op, ALL)
15825 rtx op = *iter;
15826 if (MEM_P (op))
15828 rtx *x = &XEXP (op, 0);
15829 while (GET_CODE (*x) == PLUS)
15831 int i;
15832 for (i = 0; i < 2; i++)
15834 rtx u = XEXP (*x, i);
15835 if (GET_CODE (u) == ZERO_EXTEND)
15836 u = XEXP (u, 0);
15837 if (GET_CODE (u) == UNSPEC
15838 && XINT (u, 1) == UNSPEC_TP)
15839 return true;
15841 x = &XEXP (*x, 0);
15844 iter.skip_subrtxes ();
15848 return false;
15851 /* Rewrite *LOC so that it refers to a default TLS address space. */
15852 void
15853 ix86_rewrite_tls_address_1 (rtx *loc)
15855 subrtx_ptr_iterator::array_type array;
15856 FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL)
15858 rtx *loc = *iter;
15859 if (MEM_P (*loc))
15861 rtx addr = XEXP (*loc, 0);
15862 rtx *x = &addr;
15863 while (GET_CODE (*x) == PLUS)
15865 int i;
15866 for (i = 0; i < 2; i++)
15868 rtx u = XEXP (*x, i);
15869 if (GET_CODE (u) == ZERO_EXTEND)
15870 u = XEXP (u, 0);
15871 if (GET_CODE (u) == UNSPEC
15872 && XINT (u, 1) == UNSPEC_TP)
15874 addr_space_t as = DEFAULT_TLS_SEG_REG;
15876 *x = XEXP (*x, 1 - i);
15878 *loc = replace_equiv_address_nv (*loc, addr, true);
15879 set_mem_addr_space (*loc, as);
15880 return;
15883 x = &XEXP (*x, 0);
15886 iter.skip_subrtxes ();
15891 /* Rewrite instruction pattern involvning TLS address
15892 so that it refers to a default TLS address space. */
15894 ix86_rewrite_tls_address (rtx pattern)
15896 pattern = copy_insn (pattern);
15897 ix86_rewrite_tls_address_1 (&pattern);
15898 return pattern;
15901 /* Create or return the unique __imp_DECL dllimport symbol corresponding
15902 to symbol DECL if BEIMPORT is true. Otherwise create or return the
15903 unique refptr-DECL symbol corresponding to symbol DECL. */
15905 struct dllimport_hasher : ggc_cache_ptr_hash<tree_map>
15907 static inline hashval_t hash (tree_map *m) { return m->hash; }
15908 static inline bool
15909 equal (tree_map *a, tree_map *b)
15911 return a->base.from == b->base.from;
15914 static int
15915 keep_cache_entry (tree_map *&m)
15917 return ggc_marked_p (m->base.from);
15921 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
15923 static tree
15924 get_dllimport_decl (tree decl, bool beimport)
15926 struct tree_map *h, in;
15927 const char *name;
15928 const char *prefix;
15929 size_t namelen, prefixlen;
15930 char *imp_name;
15931 tree to;
15932 rtx rtl;
15934 if (!dllimport_map)
15935 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
15937 in.hash = htab_hash_pointer (decl);
15938 in.base.from = decl;
15939 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
15940 h = *loc;
15941 if (h)
15942 return h->to;
15944 *loc = h = ggc_alloc<tree_map> ();
15945 h->hash = in.hash;
15946 h->base.from = decl;
15947 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
15948 VAR_DECL, NULL, ptr_type_node);
15949 DECL_ARTIFICIAL (to) = 1;
15950 DECL_IGNORED_P (to) = 1;
15951 DECL_EXTERNAL (to) = 1;
15952 TREE_READONLY (to) = 1;
15954 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
15955 name = targetm.strip_name_encoding (name);
15956 if (beimport)
15957 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
15958 ? "*__imp_" : "*__imp__";
15959 else
15960 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
15961 namelen = strlen (name);
15962 prefixlen = strlen (prefix);
15963 imp_name = (char *) alloca (namelen + prefixlen + 1);
15964 memcpy (imp_name, prefix, prefixlen);
15965 memcpy (imp_name + prefixlen, name, namelen + 1);
15967 name = ggc_alloc_string (imp_name, namelen + prefixlen);
15968 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
15969 SET_SYMBOL_REF_DECL (rtl, to);
15970 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
15971 if (!beimport)
15973 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
15974 #ifdef SUB_TARGET_RECORD_STUB
15975 SUB_TARGET_RECORD_STUB (name);
15976 #endif
15979 rtl = gen_const_mem (Pmode, rtl);
15980 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
15982 SET_DECL_RTL (to, rtl);
15983 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
15985 return to;
15988 /* Expand SYMBOL into its corresponding far-address symbol.
15989 WANT_REG is true if we require the result be a register. */
15991 static rtx
15992 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
15994 tree imp_decl;
15995 rtx x;
15997 gcc_assert (SYMBOL_REF_DECL (symbol));
15998 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
16000 x = DECL_RTL (imp_decl);
16001 if (want_reg)
16002 x = force_reg (Pmode, x);
16003 return x;
16006 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
16007 true if we require the result be a register. */
16009 static rtx
16010 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
16012 tree imp_decl;
16013 rtx x;
16015 gcc_assert (SYMBOL_REF_DECL (symbol));
16016 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
16018 x = DECL_RTL (imp_decl);
16019 if (want_reg)
16020 x = force_reg (Pmode, x);
16021 return x;
16024 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
16025 is true if we require the result be a register. */
16027 static rtx
16028 legitimize_pe_coff_symbol (rtx addr, bool inreg)
16030 if (!TARGET_PECOFF)
16031 return NULL_RTX;
16033 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
16035 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
16036 return legitimize_dllimport_symbol (addr, inreg);
16037 if (GET_CODE (addr) == CONST
16038 && GET_CODE (XEXP (addr, 0)) == PLUS
16039 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
16040 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
16042 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
16043 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
16047 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
16048 return NULL_RTX;
16049 if (GET_CODE (addr) == SYMBOL_REF
16050 && !is_imported_p (addr)
16051 && SYMBOL_REF_EXTERNAL_P (addr)
16052 && SYMBOL_REF_DECL (addr))
16053 return legitimize_pe_coff_extern_decl (addr, inreg);
16055 if (GET_CODE (addr) == CONST
16056 && GET_CODE (XEXP (addr, 0)) == PLUS
16057 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
16058 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
16059 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
16060 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
16062 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
16063 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
16065 return NULL_RTX;
16068 /* Try machine-dependent ways of modifying an illegitimate address
16069 to be legitimate. If we find one, return the new, valid address.
16070 This macro is used in only one place: `memory_address' in explow.c.
16072 OLDX is the address as it was before break_out_memory_refs was called.
16073 In some cases it is useful to look at this to decide what needs to be done.
16075 It is always safe for this macro to do nothing. It exists to recognize
16076 opportunities to optimize the output.
16078 For the 80386, we handle X+REG by loading X into a register R and
16079 using R+REG. R will go in a general reg and indexing will be used.
16080 However, if REG is a broken-out memory address or multiplication,
16081 nothing needs to be done because REG can certainly go in a general reg.
16083 When -fpic is used, special handling is needed for symbolic references.
16084 See comments by legitimize_pic_address in i386.c for details. */
16086 static rtx
16087 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
16089 bool changed = false;
16090 unsigned log;
16092 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
16093 if (log)
16094 return legitimize_tls_address (x, (enum tls_model) log, false);
16095 if (GET_CODE (x) == CONST
16096 && GET_CODE (XEXP (x, 0)) == PLUS
16097 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
16098 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
16100 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
16101 (enum tls_model) log, false);
16102 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
16105 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
16107 rtx tmp = legitimize_pe_coff_symbol (x, true);
16108 if (tmp)
16109 return tmp;
16112 if (flag_pic && SYMBOLIC_CONST (x))
16113 return legitimize_pic_address (x, 0);
16115 #if TARGET_MACHO
16116 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
16117 return machopic_indirect_data_reference (x, 0);
16118 #endif
16120 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
16121 if (GET_CODE (x) == ASHIFT
16122 && CONST_INT_P (XEXP (x, 1))
16123 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
16125 changed = true;
16126 log = INTVAL (XEXP (x, 1));
16127 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
16128 GEN_INT (1 << log));
16131 if (GET_CODE (x) == PLUS)
16133 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
16135 if (GET_CODE (XEXP (x, 0)) == ASHIFT
16136 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
16137 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
16139 changed = true;
16140 log = INTVAL (XEXP (XEXP (x, 0), 1));
16141 XEXP (x, 0) = gen_rtx_MULT (Pmode,
16142 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
16143 GEN_INT (1 << log));
16146 if (GET_CODE (XEXP (x, 1)) == ASHIFT
16147 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
16148 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
16150 changed = true;
16151 log = INTVAL (XEXP (XEXP (x, 1), 1));
16152 XEXP (x, 1) = gen_rtx_MULT (Pmode,
16153 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
16154 GEN_INT (1 << log));
16157 /* Put multiply first if it isn't already. */
16158 if (GET_CODE (XEXP (x, 1)) == MULT)
16160 std::swap (XEXP (x, 0), XEXP (x, 1));
16161 changed = true;
16164 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
16165 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
16166 created by virtual register instantiation, register elimination, and
16167 similar optimizations. */
16168 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
16170 changed = true;
16171 x = gen_rtx_PLUS (Pmode,
16172 gen_rtx_PLUS (Pmode, XEXP (x, 0),
16173 XEXP (XEXP (x, 1), 0)),
16174 XEXP (XEXP (x, 1), 1));
16177 /* Canonicalize
16178 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
16179 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
16180 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
16181 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
16182 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
16183 && CONSTANT_P (XEXP (x, 1)))
16185 rtx constant;
16186 rtx other = NULL_RTX;
16188 if (CONST_INT_P (XEXP (x, 1)))
16190 constant = XEXP (x, 1);
16191 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
16193 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
16195 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
16196 other = XEXP (x, 1);
16198 else
16199 constant = 0;
16201 if (constant)
16203 changed = true;
16204 x = gen_rtx_PLUS (Pmode,
16205 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
16206 XEXP (XEXP (XEXP (x, 0), 1), 0)),
16207 plus_constant (Pmode, other,
16208 INTVAL (constant)));
16212 if (changed && ix86_legitimate_address_p (mode, x, false))
16213 return x;
16215 if (GET_CODE (XEXP (x, 0)) == MULT)
16217 changed = true;
16218 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
16221 if (GET_CODE (XEXP (x, 1)) == MULT)
16223 changed = true;
16224 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
16227 if (changed
16228 && REG_P (XEXP (x, 1))
16229 && REG_P (XEXP (x, 0)))
16230 return x;
16232 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
16234 changed = true;
16235 x = legitimize_pic_address (x, 0);
16238 if (changed && ix86_legitimate_address_p (mode, x, false))
16239 return x;
16241 if (REG_P (XEXP (x, 0)))
16243 rtx temp = gen_reg_rtx (Pmode);
16244 rtx val = force_operand (XEXP (x, 1), temp);
16245 if (val != temp)
16247 val = convert_to_mode (Pmode, val, 1);
16248 emit_move_insn (temp, val);
16251 XEXP (x, 1) = temp;
16252 return x;
16255 else if (REG_P (XEXP (x, 1)))
16257 rtx temp = gen_reg_rtx (Pmode);
16258 rtx val = force_operand (XEXP (x, 0), temp);
16259 if (val != temp)
16261 val = convert_to_mode (Pmode, val, 1);
16262 emit_move_insn (temp, val);
16265 XEXP (x, 0) = temp;
16266 return x;
16270 return x;
16273 /* Print an integer constant expression in assembler syntax. Addition
16274 and subtraction are the only arithmetic that may appear in these
16275 expressions. FILE is the stdio stream to write to, X is the rtx, and
16276 CODE is the operand print code from the output string. */
16278 static void
16279 output_pic_addr_const (FILE *file, rtx x, int code)
16281 char buf[256];
16283 switch (GET_CODE (x))
16285 case PC:
16286 gcc_assert (flag_pic);
16287 putc ('.', file);
16288 break;
16290 case SYMBOL_REF:
16291 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
16292 output_addr_const (file, x);
16293 else
16295 const char *name = XSTR (x, 0);
16297 /* Mark the decl as referenced so that cgraph will
16298 output the function. */
16299 if (SYMBOL_REF_DECL (x))
16300 mark_decl_referenced (SYMBOL_REF_DECL (x));
16302 #if TARGET_MACHO
16303 if (MACHOPIC_INDIRECT
16304 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
16305 name = machopic_indirection_name (x, /*stub_p=*/true);
16306 #endif
16307 assemble_name (file, name);
16309 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
16310 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
16311 fputs ("@PLT", file);
16312 break;
16314 case LABEL_REF:
16315 x = XEXP (x, 0);
16316 /* FALLTHRU */
16317 case CODE_LABEL:
16318 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
16319 assemble_name (asm_out_file, buf);
16320 break;
16322 case CONST_INT:
16323 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16324 break;
16326 case CONST:
16327 /* This used to output parentheses around the expression,
16328 but that does not work on the 386 (either ATT or BSD assembler). */
16329 output_pic_addr_const (file, XEXP (x, 0), code);
16330 break;
16332 case CONST_DOUBLE:
16333 /* We can't handle floating point constants;
16334 TARGET_PRINT_OPERAND must handle them. */
16335 output_operand_lossage ("floating constant misused");
16336 break;
16338 case PLUS:
16339 /* Some assemblers need integer constants to appear first. */
16340 if (CONST_INT_P (XEXP (x, 0)))
16342 output_pic_addr_const (file, XEXP (x, 0), code);
16343 putc ('+', file);
16344 output_pic_addr_const (file, XEXP (x, 1), code);
16346 else
16348 gcc_assert (CONST_INT_P (XEXP (x, 1)));
16349 output_pic_addr_const (file, XEXP (x, 1), code);
16350 putc ('+', file);
16351 output_pic_addr_const (file, XEXP (x, 0), code);
16353 break;
16355 case MINUS:
16356 if (!TARGET_MACHO)
16357 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
16358 output_pic_addr_const (file, XEXP (x, 0), code);
16359 putc ('-', file);
16360 output_pic_addr_const (file, XEXP (x, 1), code);
16361 if (!TARGET_MACHO)
16362 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
16363 break;
16365 case UNSPEC:
16366 gcc_assert (XVECLEN (x, 0) == 1);
16367 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
16368 switch (XINT (x, 1))
16370 case UNSPEC_GOT:
16371 fputs ("@GOT", file);
16372 break;
16373 case UNSPEC_GOTOFF:
16374 fputs ("@GOTOFF", file);
16375 break;
16376 case UNSPEC_PLTOFF:
16377 fputs ("@PLTOFF", file);
16378 break;
16379 case UNSPEC_PCREL:
16380 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16381 "(%rip)" : "[rip]", file);
16382 break;
16383 case UNSPEC_GOTPCREL:
16384 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16385 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
16386 break;
16387 case UNSPEC_GOTTPOFF:
16388 /* FIXME: This might be @TPOFF in Sun ld too. */
16389 fputs ("@gottpoff", file);
16390 break;
16391 case UNSPEC_TPOFF:
16392 fputs ("@tpoff", file);
16393 break;
16394 case UNSPEC_NTPOFF:
16395 if (TARGET_64BIT)
16396 fputs ("@tpoff", file);
16397 else
16398 fputs ("@ntpoff", file);
16399 break;
16400 case UNSPEC_DTPOFF:
16401 fputs ("@dtpoff", file);
16402 break;
16403 case UNSPEC_GOTNTPOFF:
16404 if (TARGET_64BIT)
16405 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16406 "@gottpoff(%rip)": "@gottpoff[rip]", file);
16407 else
16408 fputs ("@gotntpoff", file);
16409 break;
16410 case UNSPEC_INDNTPOFF:
16411 fputs ("@indntpoff", file);
16412 break;
16413 #if TARGET_MACHO
16414 case UNSPEC_MACHOPIC_OFFSET:
16415 putc ('-', file);
16416 machopic_output_function_base_name (file);
16417 break;
16418 #endif
16419 default:
16420 output_operand_lossage ("invalid UNSPEC as operand");
16421 break;
16423 break;
16425 default:
16426 output_operand_lossage ("invalid expression as operand");
16430 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
16431 We need to emit DTP-relative relocations. */
16433 static void ATTRIBUTE_UNUSED
16434 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
16436 fputs (ASM_LONG, file);
16437 output_addr_const (file, x);
16438 fputs ("@dtpoff", file);
16439 switch (size)
16441 case 4:
16442 break;
16443 case 8:
16444 fputs (", 0", file);
16445 break;
16446 default:
16447 gcc_unreachable ();
16451 /* Return true if X is a representation of the PIC register. This copes
16452 with calls from ix86_find_base_term, where the register might have
16453 been replaced by a cselib value. */
16455 static bool
16456 ix86_pic_register_p (rtx x)
16458 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
16459 return (pic_offset_table_rtx
16460 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
16461 else if (!REG_P (x))
16462 return false;
16463 else if (pic_offset_table_rtx)
16465 if (REGNO (x) == REGNO (pic_offset_table_rtx))
16466 return true;
16467 if (HARD_REGISTER_P (x)
16468 && !HARD_REGISTER_P (pic_offset_table_rtx)
16469 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
16470 return true;
16471 return false;
16473 else
16474 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
16477 /* Helper function for ix86_delegitimize_address.
16478 Attempt to delegitimize TLS local-exec accesses. */
16480 static rtx
16481 ix86_delegitimize_tls_address (rtx orig_x)
16483 rtx x = orig_x, unspec;
16484 struct ix86_address addr;
16486 if (!TARGET_TLS_DIRECT_SEG_REFS)
16487 return orig_x;
16488 if (MEM_P (x))
16489 x = XEXP (x, 0);
16490 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
16491 return orig_x;
16492 if (ix86_decompose_address (x, &addr) == 0
16493 || addr.seg != DEFAULT_TLS_SEG_REG
16494 || addr.disp == NULL_RTX
16495 || GET_CODE (addr.disp) != CONST)
16496 return orig_x;
16497 unspec = XEXP (addr.disp, 0);
16498 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
16499 unspec = XEXP (unspec, 0);
16500 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
16501 return orig_x;
16502 x = XVECEXP (unspec, 0, 0);
16503 gcc_assert (GET_CODE (x) == SYMBOL_REF);
16504 if (unspec != XEXP (addr.disp, 0))
16505 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
16506 if (addr.index)
16508 rtx idx = addr.index;
16509 if (addr.scale != 1)
16510 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
16511 x = gen_rtx_PLUS (Pmode, idx, x);
16513 if (addr.base)
16514 x = gen_rtx_PLUS (Pmode, addr.base, x);
16515 if (MEM_P (orig_x))
16516 x = replace_equiv_address_nv (orig_x, x);
16517 return x;
16520 /* In the name of slightly smaller debug output, and to cater to
16521 general assembler lossage, recognize PIC+GOTOFF and turn it back
16522 into a direct symbol reference.
16524 On Darwin, this is necessary to avoid a crash, because Darwin
16525 has a different PIC label for each routine but the DWARF debugging
16526 information is not associated with any particular routine, so it's
16527 necessary to remove references to the PIC label from RTL stored by
16528 the DWARF output code.
16530 This helper is used in the normal ix86_delegitimize_address
16531 entrypoint (e.g. used in the target delegitimization hook) and
16532 in ix86_find_base_term. As compile time memory optimization, we
16533 avoid allocating rtxes that will not change anything on the outcome
16534 of the callers (find_base_value and find_base_term). */
16536 static inline rtx
16537 ix86_delegitimize_address_1 (rtx x, bool base_term_p)
16539 rtx orig_x = delegitimize_mem_from_attrs (x);
16540 /* addend is NULL or some rtx if x is something+GOTOFF where
16541 something doesn't include the PIC register. */
16542 rtx addend = NULL_RTX;
16543 /* reg_addend is NULL or a multiple of some register. */
16544 rtx reg_addend = NULL_RTX;
16545 /* const_addend is NULL or a const_int. */
16546 rtx const_addend = NULL_RTX;
16547 /* This is the result, or NULL. */
16548 rtx result = NULL_RTX;
16550 x = orig_x;
16552 if (MEM_P (x))
16553 x = XEXP (x, 0);
16555 if (TARGET_64BIT)
16557 if (GET_CODE (x) == CONST
16558 && GET_CODE (XEXP (x, 0)) == PLUS
16559 && GET_MODE (XEXP (x, 0)) == Pmode
16560 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
16561 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
16562 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
16564 /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
16565 base. A CONST can't be arg_pointer_rtx based. */
16566 if (base_term_p && MEM_P (orig_x))
16567 return orig_x;
16568 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
16569 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
16570 if (MEM_P (orig_x))
16571 x = replace_equiv_address_nv (orig_x, x);
16572 return x;
16575 if (GET_CODE (x) == CONST
16576 && GET_CODE (XEXP (x, 0)) == UNSPEC
16577 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
16578 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
16579 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
16581 x = XVECEXP (XEXP (x, 0), 0, 0);
16582 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
16584 x = lowpart_subreg (GET_MODE (orig_x), x, GET_MODE (x));
16585 if (x == NULL_RTX)
16586 return orig_x;
16588 return x;
16591 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
16592 return ix86_delegitimize_tls_address (orig_x);
16594 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
16595 and -mcmodel=medium -fpic. */
16598 if (GET_CODE (x) != PLUS
16599 || GET_CODE (XEXP (x, 1)) != CONST)
16600 return ix86_delegitimize_tls_address (orig_x);
16602 if (ix86_pic_register_p (XEXP (x, 0)))
16603 /* %ebx + GOT/GOTOFF */
16605 else if (GET_CODE (XEXP (x, 0)) == PLUS)
16607 /* %ebx + %reg * scale + GOT/GOTOFF */
16608 reg_addend = XEXP (x, 0);
16609 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
16610 reg_addend = XEXP (reg_addend, 1);
16611 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
16612 reg_addend = XEXP (reg_addend, 0);
16613 else
16615 reg_addend = NULL_RTX;
16616 addend = XEXP (x, 0);
16619 else
16620 addend = XEXP (x, 0);
16622 x = XEXP (XEXP (x, 1), 0);
16623 if (GET_CODE (x) == PLUS
16624 && CONST_INT_P (XEXP (x, 1)))
16626 const_addend = XEXP (x, 1);
16627 x = XEXP (x, 0);
16630 if (GET_CODE (x) == UNSPEC
16631 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
16632 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
16633 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
16634 && !MEM_P (orig_x) && !addend)))
16635 result = XVECEXP (x, 0, 0);
16637 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
16638 && !MEM_P (orig_x))
16639 result = XVECEXP (x, 0, 0);
16641 if (! result)
16642 return ix86_delegitimize_tls_address (orig_x);
16644 /* For (PLUS something CONST_INT) both find_base_{value,term} just
16645 recurse on the first operand. */
16646 if (const_addend && !base_term_p)
16647 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
16648 if (reg_addend)
16649 result = gen_rtx_PLUS (Pmode, reg_addend, result);
16650 if (addend)
16652 /* If the rest of original X doesn't involve the PIC register, add
16653 addend and subtract pic_offset_table_rtx. This can happen e.g.
16654 for code like:
16655 leal (%ebx, %ecx, 4), %ecx
16657 movl foo@GOTOFF(%ecx), %edx
16658 in which case we return (%ecx - %ebx) + foo
16659 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
16660 and reload has completed. */
16661 if (pic_offset_table_rtx
16662 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
16663 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
16664 pic_offset_table_rtx),
16665 result);
16666 else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
16668 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
16669 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
16670 result = gen_rtx_PLUS (Pmode, tmp, result);
16672 else
16673 return orig_x;
16675 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
16677 result = lowpart_subreg (GET_MODE (orig_x), result, Pmode);
16678 if (result == NULL_RTX)
16679 return orig_x;
16681 return result;
16684 /* The normal instantiation of the above template. */
16686 static rtx
16687 ix86_delegitimize_address (rtx x)
16689 return ix86_delegitimize_address_1 (x, false);
16692 /* If X is a machine specific address (i.e. a symbol or label being
16693 referenced as a displacement from the GOT implemented using an
16694 UNSPEC), then return the base term. Otherwise return X. */
16697 ix86_find_base_term (rtx x)
16699 rtx term;
16701 if (TARGET_64BIT)
16703 if (GET_CODE (x) != CONST)
16704 return x;
16705 term = XEXP (x, 0);
16706 if (GET_CODE (term) == PLUS
16707 && CONST_INT_P (XEXP (term, 1)))
16708 term = XEXP (term, 0);
16709 if (GET_CODE (term) != UNSPEC
16710 || (XINT (term, 1) != UNSPEC_GOTPCREL
16711 && XINT (term, 1) != UNSPEC_PCREL))
16712 return x;
16714 return XVECEXP (term, 0, 0);
16717 return ix86_delegitimize_address_1 (x, true);
16720 static void
16721 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
16722 bool fp, FILE *file)
16724 const char *suffix;
16726 if (mode == CCFPmode || mode == CCFPUmode)
16728 code = ix86_fp_compare_code_to_integer (code);
16729 mode = CCmode;
16731 if (reverse)
16732 code = reverse_condition (code);
16734 switch (code)
16736 case EQ:
16737 gcc_assert (mode != CCGZmode);
16738 switch (mode)
16740 case E_CCAmode:
16741 suffix = "a";
16742 break;
16743 case E_CCCmode:
16744 suffix = "c";
16745 break;
16746 case E_CCOmode:
16747 suffix = "o";
16748 break;
16749 case E_CCPmode:
16750 suffix = "p";
16751 break;
16752 case E_CCSmode:
16753 suffix = "s";
16754 break;
16755 default:
16756 suffix = "e";
16757 break;
16759 break;
16760 case NE:
16761 gcc_assert (mode != CCGZmode);
16762 switch (mode)
16764 case E_CCAmode:
16765 suffix = "na";
16766 break;
16767 case E_CCCmode:
16768 suffix = "nc";
16769 break;
16770 case E_CCOmode:
16771 suffix = "no";
16772 break;
16773 case E_CCPmode:
16774 suffix = "np";
16775 break;
16776 case E_CCSmode:
16777 suffix = "ns";
16778 break;
16779 default:
16780 suffix = "ne";
16781 break;
16783 break;
16784 case GT:
16785 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
16786 suffix = "g";
16787 break;
16788 case GTU:
16789 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
16790 Those same assemblers have the same but opposite lossage on cmov. */
16791 if (mode == CCmode)
16792 suffix = fp ? "nbe" : "a";
16793 else
16794 gcc_unreachable ();
16795 break;
16796 case LT:
16797 switch (mode)
16799 case E_CCNOmode:
16800 case E_CCGOCmode:
16801 suffix = "s";
16802 break;
16804 case E_CCmode:
16805 case E_CCGCmode:
16806 case E_CCGZmode:
16807 suffix = "l";
16808 break;
16810 default:
16811 gcc_unreachable ();
16813 break;
16814 case LTU:
16815 if (mode == CCmode || mode == CCGZmode)
16816 suffix = "b";
16817 else if (mode == CCCmode)
16818 suffix = fp ? "b" : "c";
16819 else
16820 gcc_unreachable ();
16821 break;
16822 case GE:
16823 switch (mode)
16825 case E_CCNOmode:
16826 case E_CCGOCmode:
16827 suffix = "ns";
16828 break;
16830 case E_CCmode:
16831 case E_CCGCmode:
16832 case E_CCGZmode:
16833 suffix = "ge";
16834 break;
16836 default:
16837 gcc_unreachable ();
16839 break;
16840 case GEU:
16841 if (mode == CCmode || mode == CCGZmode)
16842 suffix = "nb";
16843 else if (mode == CCCmode)
16844 suffix = fp ? "nb" : "nc";
16845 else
16846 gcc_unreachable ();
16847 break;
16848 case LE:
16849 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
16850 suffix = "le";
16851 break;
16852 case LEU:
16853 if (mode == CCmode)
16854 suffix = "be";
16855 else
16856 gcc_unreachable ();
16857 break;
16858 case UNORDERED:
16859 suffix = fp ? "u" : "p";
16860 break;
16861 case ORDERED:
16862 suffix = fp ? "nu" : "np";
16863 break;
16864 default:
16865 gcc_unreachable ();
16867 fputs (suffix, file);
16870 /* Print the name of register X to FILE based on its machine mode and number.
16871 If CODE is 'w', pretend the mode is HImode.
16872 If CODE is 'b', pretend the mode is QImode.
16873 If CODE is 'k', pretend the mode is SImode.
16874 If CODE is 'q', pretend the mode is DImode.
16875 If CODE is 'x', pretend the mode is V4SFmode.
16876 If CODE is 't', pretend the mode is V8SFmode.
16877 If CODE is 'g', pretend the mode is V16SFmode.
16878 If CODE is 'h', pretend the reg is the 'high' byte register.
16879 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
16880 If CODE is 'd', duplicate the operand for AVX instruction.
16883 void
16884 print_reg (rtx x, int code, FILE *file)
16886 const char *reg;
16887 int msize;
16888 unsigned int regno;
16889 bool duplicated;
16891 if (ASSEMBLER_DIALECT == ASM_ATT)
16892 putc ('%', file);
16894 if (x == pc_rtx)
16896 gcc_assert (TARGET_64BIT);
16897 fputs ("rip", file);
16898 return;
16901 if (code == 'y' && STACK_TOP_P (x))
16903 fputs ("st(0)", file);
16904 return;
16907 if (code == 'w')
16908 msize = 2;
16909 else if (code == 'b')
16910 msize = 1;
16911 else if (code == 'k')
16912 msize = 4;
16913 else if (code == 'q')
16914 msize = 8;
16915 else if (code == 'h')
16916 msize = 0;
16917 else if (code == 'x')
16918 msize = 16;
16919 else if (code == 't')
16920 msize = 32;
16921 else if (code == 'g')
16922 msize = 64;
16923 else
16924 msize = GET_MODE_SIZE (GET_MODE (x));
16926 regno = REGNO (x);
16928 if (regno == ARG_POINTER_REGNUM
16929 || regno == FRAME_POINTER_REGNUM
16930 || regno == FPSR_REG
16931 || regno == FPCR_REG)
16933 output_operand_lossage
16934 ("invalid use of register '%s'", reg_names[regno]);
16935 return;
16937 else if (regno == FLAGS_REG)
16939 output_operand_lossage ("invalid use of asm flag output");
16940 return;
16943 duplicated = code == 'd' && TARGET_AVX;
16945 switch (msize)
16947 case 16:
16948 case 12:
16949 case 8:
16950 if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode))
16951 warning (0, "unsupported size for integer register");
16952 /* FALLTHRU */
16953 case 4:
16954 if (LEGACY_INT_REGNO_P (regno))
16955 putc (msize > 4 && TARGET_64BIT ? 'r' : 'e', file);
16956 /* FALLTHRU */
16957 case 2:
16958 normal:
16959 reg = hi_reg_name[regno];
16960 break;
16961 case 1:
16962 if (regno >= ARRAY_SIZE (qi_reg_name))
16963 goto normal;
16964 if (!ANY_QI_REGNO_P (regno))
16965 error ("unsupported size for integer register");
16966 reg = qi_reg_name[regno];
16967 break;
16968 case 0:
16969 if (regno >= ARRAY_SIZE (qi_high_reg_name))
16970 goto normal;
16971 reg = qi_high_reg_name[regno];
16972 break;
16973 case 32:
16974 case 64:
16975 if (SSE_REGNO_P (regno))
16977 gcc_assert (!duplicated);
16978 putc (msize == 32 ? 'y' : 'z', file);
16979 reg = hi_reg_name[regno] + 1;
16980 break;
16982 goto normal;
16983 default:
16984 gcc_unreachable ();
16987 fputs (reg, file);
16989 /* Irritatingly, AMD extended registers use
16990 different naming convention: "r%d[bwd]" */
16991 if (REX_INT_REGNO_P (regno))
16993 gcc_assert (TARGET_64BIT);
16994 switch (msize)
16996 case 0:
16997 error ("extended registers have no high halves");
16998 break;
16999 case 1:
17000 putc ('b', file);
17001 break;
17002 case 2:
17003 putc ('w', file);
17004 break;
17005 case 4:
17006 putc ('d', file);
17007 break;
17008 case 8:
17009 /* no suffix */
17010 break;
17011 default:
17012 error ("unsupported operand size for extended register");
17013 break;
17015 return;
17018 if (duplicated)
17020 if (ASSEMBLER_DIALECT == ASM_ATT)
17021 fprintf (file, ", %%%s", reg);
17022 else
17023 fprintf (file, ", %s", reg);
17027 /* Meaning of CODE:
17028 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
17029 C -- print opcode suffix for set/cmov insn.
17030 c -- like C, but print reversed condition
17031 F,f -- likewise, but for floating-point.
17032 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
17033 otherwise nothing
17034 R -- print embeded rounding and sae.
17035 r -- print only sae.
17036 z -- print the opcode suffix for the size of the current operand.
17037 Z -- likewise, with special suffixes for x87 instructions.
17038 * -- print a star (in certain assembler syntax)
17039 A -- print an absolute memory reference.
17040 E -- print address with DImode register names if TARGET_64BIT.
17041 w -- print the operand as if it's a "word" (HImode) even if it isn't.
17042 s -- print a shift double count, followed by the assemblers argument
17043 delimiter.
17044 b -- print the QImode name of the register for the indicated operand.
17045 %b0 would print %al if operands[0] is reg 0.
17046 w -- likewise, print the HImode name of the register.
17047 k -- likewise, print the SImode name of the register.
17048 q -- likewise, print the DImode name of the register.
17049 x -- likewise, print the V4SFmode name of the register.
17050 t -- likewise, print the V8SFmode name of the register.
17051 g -- likewise, print the V16SFmode name of the register.
17052 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
17053 y -- print "st(0)" instead of "st" as a register.
17054 d -- print duplicated register operand for AVX instruction.
17055 D -- print condition for SSE cmp instruction.
17056 P -- if PIC, print an @PLT suffix.
17057 p -- print raw symbol name.
17058 X -- don't print any sort of PIC '@' suffix for a symbol.
17059 & -- print some in-use local-dynamic symbol name.
17060 H -- print a memory address offset by 8; used for sse high-parts
17061 Y -- print condition for XOP pcom* instruction.
17062 + -- print a branch hint as 'cs' or 'ds' prefix
17063 ; -- print a semicolon (after prefixes due to bug in older gas).
17064 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
17065 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
17066 ! -- print MPX prefix for jxx/call/ret instructions if required.
17069 void
17070 ix86_print_operand (FILE *file, rtx x, int code)
17072 if (code)
17074 switch (code)
17076 case 'A':
17077 switch (ASSEMBLER_DIALECT)
17079 case ASM_ATT:
17080 putc ('*', file);
17081 break;
17083 case ASM_INTEL:
17084 /* Intel syntax. For absolute addresses, registers should not
17085 be surrounded by braces. */
17086 if (!REG_P (x))
17088 putc ('[', file);
17089 ix86_print_operand (file, x, 0);
17090 putc (']', file);
17091 return;
17093 break;
17095 default:
17096 gcc_unreachable ();
17099 ix86_print_operand (file, x, 0);
17100 return;
17102 case 'E':
17103 /* Wrap address in an UNSPEC to declare special handling. */
17104 if (TARGET_64BIT)
17105 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
17107 output_address (VOIDmode, x);
17108 return;
17110 case 'L':
17111 if (ASSEMBLER_DIALECT == ASM_ATT)
17112 putc ('l', file);
17113 return;
17115 case 'W':
17116 if (ASSEMBLER_DIALECT == ASM_ATT)
17117 putc ('w', file);
17118 return;
17120 case 'B':
17121 if (ASSEMBLER_DIALECT == ASM_ATT)
17122 putc ('b', file);
17123 return;
17125 case 'Q':
17126 if (ASSEMBLER_DIALECT == ASM_ATT)
17127 putc ('l', file);
17128 return;
17130 case 'S':
17131 if (ASSEMBLER_DIALECT == ASM_ATT)
17132 putc ('s', file);
17133 return;
17135 case 'T':
17136 if (ASSEMBLER_DIALECT == ASM_ATT)
17137 putc ('t', file);
17138 return;
17140 case 'O':
17141 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
17142 if (ASSEMBLER_DIALECT != ASM_ATT)
17143 return;
17145 switch (GET_MODE_SIZE (GET_MODE (x)))
17147 case 2:
17148 putc ('w', file);
17149 break;
17151 case 4:
17152 putc ('l', file);
17153 break;
17155 case 8:
17156 putc ('q', file);
17157 break;
17159 default:
17160 output_operand_lossage ("invalid operand size for operand "
17161 "code 'O'");
17162 return;
17165 putc ('.', file);
17166 #endif
17167 return;
17169 case 'z':
17170 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
17172 /* Opcodes don't get size suffixes if using Intel opcodes. */
17173 if (ASSEMBLER_DIALECT == ASM_INTEL)
17174 return;
17176 switch (GET_MODE_SIZE (GET_MODE (x)))
17178 case 1:
17179 putc ('b', file);
17180 return;
17182 case 2:
17183 putc ('w', file);
17184 return;
17186 case 4:
17187 putc ('l', file);
17188 return;
17190 case 8:
17191 putc ('q', file);
17192 return;
17194 default:
17195 output_operand_lossage ("invalid operand size for operand "
17196 "code 'z'");
17197 return;
17201 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
17202 warning (0, "non-integer operand used with operand code 'z'");
17203 /* FALLTHRU */
17205 case 'Z':
17206 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
17207 if (ASSEMBLER_DIALECT == ASM_INTEL)
17208 return;
17210 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
17212 switch (GET_MODE_SIZE (GET_MODE (x)))
17214 case 2:
17215 #ifdef HAVE_AS_IX86_FILDS
17216 putc ('s', file);
17217 #endif
17218 return;
17220 case 4:
17221 putc ('l', file);
17222 return;
17224 case 8:
17225 #ifdef HAVE_AS_IX86_FILDQ
17226 putc ('q', file);
17227 #else
17228 fputs ("ll", file);
17229 #endif
17230 return;
17232 default:
17233 break;
17236 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
17238 /* 387 opcodes don't get size suffixes
17239 if the operands are registers. */
17240 if (STACK_REG_P (x))
17241 return;
17243 switch (GET_MODE_SIZE (GET_MODE (x)))
17245 case 4:
17246 putc ('s', file);
17247 return;
17249 case 8:
17250 putc ('l', file);
17251 return;
17253 case 12:
17254 case 16:
17255 putc ('t', file);
17256 return;
17258 default:
17259 break;
17262 else
17264 output_operand_lossage ("invalid operand type used with "
17265 "operand code 'Z'");
17266 return;
17269 output_operand_lossage ("invalid operand size for operand code 'Z'");
17270 return;
17272 case 'd':
17273 case 'b':
17274 case 'w':
17275 case 'k':
17276 case 'q':
17277 case 'h':
17278 case 't':
17279 case 'g':
17280 case 'y':
17281 case 'x':
17282 case 'X':
17283 case 'P':
17284 case 'p':
17285 break;
17287 case 's':
17288 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
17290 ix86_print_operand (file, x, 0);
17291 fputs (", ", file);
17293 return;
17295 case 'Y':
17296 switch (GET_CODE (x))
17298 case NE:
17299 fputs ("neq", file);
17300 break;
17301 case EQ:
17302 fputs ("eq", file);
17303 break;
17304 case GE:
17305 case GEU:
17306 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
17307 break;
17308 case GT:
17309 case GTU:
17310 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
17311 break;
17312 case LE:
17313 case LEU:
17314 fputs ("le", file);
17315 break;
17316 case LT:
17317 case LTU:
17318 fputs ("lt", file);
17319 break;
17320 case UNORDERED:
17321 fputs ("unord", file);
17322 break;
17323 case ORDERED:
17324 fputs ("ord", file);
17325 break;
17326 case UNEQ:
17327 fputs ("ueq", file);
17328 break;
17329 case UNGE:
17330 fputs ("nlt", file);
17331 break;
17332 case UNGT:
17333 fputs ("nle", file);
17334 break;
17335 case UNLE:
17336 fputs ("ule", file);
17337 break;
17338 case UNLT:
17339 fputs ("ult", file);
17340 break;
17341 case LTGT:
17342 fputs ("une", file);
17343 break;
17344 default:
17345 output_operand_lossage ("operand is not a condition code, "
17346 "invalid operand code 'Y'");
17347 return;
17349 return;
17351 case 'D':
17352 /* Little bit of braindamage here. The SSE compare instructions
17353 does use completely different names for the comparisons that the
17354 fp conditional moves. */
17355 switch (GET_CODE (x))
17357 case UNEQ:
17358 if (TARGET_AVX)
17360 fputs ("eq_us", file);
17361 break;
17363 /* FALLTHRU */
17364 case EQ:
17365 fputs ("eq", file);
17366 break;
17367 case UNLT:
17368 if (TARGET_AVX)
17370 fputs ("nge", file);
17371 break;
17373 /* FALLTHRU */
17374 case LT:
17375 fputs ("lt", file);
17376 break;
17377 case UNLE:
17378 if (TARGET_AVX)
17380 fputs ("ngt", file);
17381 break;
17383 /* FALLTHRU */
17384 case LE:
17385 fputs ("le", file);
17386 break;
17387 case UNORDERED:
17388 fputs ("unord", file);
17389 break;
17390 case LTGT:
17391 if (TARGET_AVX)
17393 fputs ("neq_oq", file);
17394 break;
17396 /* FALLTHRU */
17397 case NE:
17398 fputs ("neq", file);
17399 break;
17400 case GE:
17401 if (TARGET_AVX)
17403 fputs ("ge", file);
17404 break;
17406 /* FALLTHRU */
17407 case UNGE:
17408 fputs ("nlt", file);
17409 break;
17410 case GT:
17411 if (TARGET_AVX)
17413 fputs ("gt", file);
17414 break;
17416 /* FALLTHRU */
17417 case UNGT:
17418 fputs ("nle", file);
17419 break;
17420 case ORDERED:
17421 fputs ("ord", file);
17422 break;
17423 default:
17424 output_operand_lossage ("operand is not a condition code, "
17425 "invalid operand code 'D'");
17426 return;
17428 return;
17430 case 'F':
17431 case 'f':
17432 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
17433 if (ASSEMBLER_DIALECT == ASM_ATT)
17434 putc ('.', file);
17435 gcc_fallthrough ();
17436 #endif
17438 case 'C':
17439 case 'c':
17440 if (!COMPARISON_P (x))
17442 output_operand_lossage ("operand is not a condition code, "
17443 "invalid operand code '%c'", code);
17444 return;
17446 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
17447 code == 'c' || code == 'f',
17448 code == 'F' || code == 'f',
17449 file);
17450 return;
17452 case 'H':
17453 if (!offsettable_memref_p (x))
17455 output_operand_lossage ("operand is not an offsettable memory "
17456 "reference, invalid operand code 'H'");
17457 return;
17459 /* It doesn't actually matter what mode we use here, as we're
17460 only going to use this for printing. */
17461 x = adjust_address_nv (x, DImode, 8);
17462 /* Output 'qword ptr' for intel assembler dialect. */
17463 if (ASSEMBLER_DIALECT == ASM_INTEL)
17464 code = 'q';
17465 break;
17467 case 'K':
17468 if (!CONST_INT_P (x))
17470 output_operand_lossage ("operand is not an integer, invalid "
17471 "operand code 'K'");
17472 return;
17475 if (INTVAL (x) & IX86_HLE_ACQUIRE)
17476 #ifdef HAVE_AS_IX86_HLE
17477 fputs ("xacquire ", file);
17478 #else
17479 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
17480 #endif
17481 else if (INTVAL (x) & IX86_HLE_RELEASE)
17482 #ifdef HAVE_AS_IX86_HLE
17483 fputs ("xrelease ", file);
17484 #else
17485 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
17486 #endif
17487 /* We do not want to print value of the operand. */
17488 return;
17490 case 'N':
17491 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
17492 fputs ("{z}", file);
17493 return;
17495 case 'r':
17496 if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE)
17498 output_operand_lossage ("operand is not a specific integer, "
17499 "invalid operand code 'r'");
17500 return;
17503 if (ASSEMBLER_DIALECT == ASM_INTEL)
17504 fputs (", ", file);
17506 fputs ("{sae}", file);
17508 if (ASSEMBLER_DIALECT == ASM_ATT)
17509 fputs (", ", file);
17511 return;
17513 case 'R':
17514 if (!CONST_INT_P (x))
17516 output_operand_lossage ("operand is not an integer, invalid "
17517 "operand code 'R'");
17518 return;
17521 if (ASSEMBLER_DIALECT == ASM_INTEL)
17522 fputs (", ", file);
17524 switch (INTVAL (x))
17526 case ROUND_NEAREST_INT | ROUND_SAE:
17527 fputs ("{rn-sae}", file);
17528 break;
17529 case ROUND_NEG_INF | ROUND_SAE:
17530 fputs ("{rd-sae}", file);
17531 break;
17532 case ROUND_POS_INF | ROUND_SAE:
17533 fputs ("{ru-sae}", file);
17534 break;
17535 case ROUND_ZERO | ROUND_SAE:
17536 fputs ("{rz-sae}", file);
17537 break;
17538 default:
17539 output_operand_lossage ("operand is not a specific integer, "
17540 "invalid operand code 'R'");
17543 if (ASSEMBLER_DIALECT == ASM_ATT)
17544 fputs (", ", file);
17546 return;
17548 case '*':
17549 if (ASSEMBLER_DIALECT == ASM_ATT)
17550 putc ('*', file);
17551 return;
17553 case '&':
17555 const char *name = get_some_local_dynamic_name ();
17556 if (name == NULL)
17557 output_operand_lossage ("'%%&' used without any "
17558 "local dynamic TLS references");
17559 else
17560 assemble_name (file, name);
17561 return;
17564 case '+':
17566 rtx x;
17568 if (!optimize
17569 || optimize_function_for_size_p (cfun)
17570 || !TARGET_BRANCH_PREDICTION_HINTS)
17571 return;
17573 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
17574 if (x)
17576 int pred_val = profile_probability::from_reg_br_prob_note
17577 (XINT (x, 0)).to_reg_br_prob_base ();
17579 if (pred_val < REG_BR_PROB_BASE * 45 / 100
17580 || pred_val > REG_BR_PROB_BASE * 55 / 100)
17582 bool taken = pred_val > REG_BR_PROB_BASE / 2;
17583 bool cputaken
17584 = final_forward_branch_p (current_output_insn) == 0;
17586 /* Emit hints only in the case default branch prediction
17587 heuristics would fail. */
17588 if (taken != cputaken)
17590 /* We use 3e (DS) prefix for taken branches and
17591 2e (CS) prefix for not taken branches. */
17592 if (taken)
17593 fputs ("ds ; ", file);
17594 else
17595 fputs ("cs ; ", file);
17599 return;
17602 case ';':
17603 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
17604 putc (';', file);
17605 #endif
17606 return;
17608 case '~':
17609 putc (TARGET_AVX2 ? 'i' : 'f', file);
17610 return;
17612 case '^':
17613 if (TARGET_64BIT && Pmode != word_mode)
17614 fputs ("addr32 ", file);
17615 return;
17617 case '!':
17618 if (ix86_bnd_prefixed_insn_p (current_output_insn))
17619 fputs ("bnd ", file);
17620 return;
17622 default:
17623 output_operand_lossage ("invalid operand code '%c'", code);
17627 if (REG_P (x))
17628 print_reg (x, code, file);
17630 else if (MEM_P (x))
17632 rtx addr = XEXP (x, 0);
17634 /* No `byte ptr' prefix for call instructions ... */
17635 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
17637 machine_mode mode = GET_MODE (x);
17638 const char *size;
17640 /* Check for explicit size override codes. */
17641 if (code == 'b')
17642 size = "BYTE";
17643 else if (code == 'w')
17644 size = "WORD";
17645 else if (code == 'k')
17646 size = "DWORD";
17647 else if (code == 'q')
17648 size = "QWORD";
17649 else if (code == 'x')
17650 size = "XMMWORD";
17651 else if (code == 't')
17652 size = "YMMWORD";
17653 else if (code == 'g')
17654 size = "ZMMWORD";
17655 else if (mode == BLKmode)
17656 /* ... or BLKmode operands, when not overridden. */
17657 size = NULL;
17658 else
17659 switch (GET_MODE_SIZE (mode))
17661 case 1: size = "BYTE"; break;
17662 case 2: size = "WORD"; break;
17663 case 4: size = "DWORD"; break;
17664 case 8: size = "QWORD"; break;
17665 case 12: size = "TBYTE"; break;
17666 case 16:
17667 if (mode == XFmode)
17668 size = "TBYTE";
17669 else
17670 size = "XMMWORD";
17671 break;
17672 case 32: size = "YMMWORD"; break;
17673 case 64: size = "ZMMWORD"; break;
17674 default:
17675 gcc_unreachable ();
17677 if (size)
17679 fputs (size, file);
17680 fputs (" PTR ", file);
17684 if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
17685 output_operand_lossage ("invalid constraints for operand");
17686 else
17687 ix86_print_operand_address_as
17688 (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
17691 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
17693 long l;
17695 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
17697 if (ASSEMBLER_DIALECT == ASM_ATT)
17698 putc ('$', file);
17699 /* Sign extend 32bit SFmode immediate to 8 bytes. */
17700 if (code == 'q')
17701 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
17702 (unsigned long long) (int) l);
17703 else
17704 fprintf (file, "0x%08x", (unsigned int) l);
17707 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
17709 long l[2];
17711 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
17713 if (ASSEMBLER_DIALECT == ASM_ATT)
17714 putc ('$', file);
17715 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
17718 /* These float cases don't actually occur as immediate operands. */
17719 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
17721 char dstr[30];
17723 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
17724 fputs (dstr, file);
17727 else
17729 /* We have patterns that allow zero sets of memory, for instance.
17730 In 64-bit mode, we should probably support all 8-byte vectors,
17731 since we can in fact encode that into an immediate. */
17732 if (GET_CODE (x) == CONST_VECTOR)
17734 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
17735 x = const0_rtx;
17738 if (code != 'P' && code != 'p')
17740 if (CONST_INT_P (x))
17742 if (ASSEMBLER_DIALECT == ASM_ATT)
17743 putc ('$', file);
17745 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
17746 || GET_CODE (x) == LABEL_REF)
17748 if (ASSEMBLER_DIALECT == ASM_ATT)
17749 putc ('$', file);
17750 else
17751 fputs ("OFFSET FLAT:", file);
17754 if (CONST_INT_P (x))
17755 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
17756 else if (flag_pic || MACHOPIC_INDIRECT)
17757 output_pic_addr_const (file, x, code);
17758 else
17759 output_addr_const (file, x);
17763 static bool
17764 ix86_print_operand_punct_valid_p (unsigned char code)
17766 return (code == '*' || code == '+' || code == '&' || code == ';'
17767 || code == '~' || code == '^' || code == '!');
17770 /* Print a memory operand whose address is ADDR. */
17772 static void
17773 ix86_print_operand_address_as (FILE *file, rtx addr,
17774 addr_space_t as, bool no_rip)
17776 struct ix86_address parts;
17777 rtx base, index, disp;
17778 int scale;
17779 int ok;
17780 bool vsib = false;
17781 int code = 0;
17783 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
17785 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
17786 gcc_assert (parts.index == NULL_RTX);
17787 parts.index = XVECEXP (addr, 0, 1);
17788 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
17789 addr = XVECEXP (addr, 0, 0);
17790 vsib = true;
17792 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
17794 gcc_assert (TARGET_64BIT);
17795 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
17796 code = 'q';
17798 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
17800 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
17801 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
17802 if (parts.base != NULL_RTX)
17804 parts.index = parts.base;
17805 parts.scale = 1;
17807 parts.base = XVECEXP (addr, 0, 0);
17808 addr = XVECEXP (addr, 0, 0);
17810 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
17812 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
17813 gcc_assert (parts.index == NULL_RTX);
17814 parts.index = XVECEXP (addr, 0, 1);
17815 addr = XVECEXP (addr, 0, 0);
17817 else
17818 ok = ix86_decompose_address (addr, &parts);
17820 gcc_assert (ok);
17822 base = parts.base;
17823 index = parts.index;
17824 disp = parts.disp;
17825 scale = parts.scale;
17827 if (ADDR_SPACE_GENERIC_P (as))
17828 as = parts.seg;
17829 else
17830 gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg));
17832 if (!ADDR_SPACE_GENERIC_P (as))
17834 const char *string;
17836 if (as == ADDR_SPACE_SEG_FS)
17837 string = (ASSEMBLER_DIALECT == ASM_ATT ? "%fs:" : "fs:");
17838 else if (as == ADDR_SPACE_SEG_GS)
17839 string = (ASSEMBLER_DIALECT == ASM_ATT ? "%gs:" : "gs:");
17840 else
17841 gcc_unreachable ();
17842 fputs (string, file);
17845 /* Use one byte shorter RIP relative addressing for 64bit mode. */
17846 if (TARGET_64BIT && !base && !index && !no_rip)
17848 rtx symbol = disp;
17850 if (GET_CODE (disp) == CONST
17851 && GET_CODE (XEXP (disp, 0)) == PLUS
17852 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
17853 symbol = XEXP (XEXP (disp, 0), 0);
17855 if (GET_CODE (symbol) == LABEL_REF
17856 || (GET_CODE (symbol) == SYMBOL_REF
17857 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
17858 base = pc_rtx;
17861 if (!base && !index)
17863 /* Displacement only requires special attention. */
17864 if (CONST_INT_P (disp))
17866 if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as))
17867 fputs ("ds:", file);
17868 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
17870 /* Load the external function address via the GOT slot to avoid PLT. */
17871 else if (GET_CODE (disp) == CONST
17872 && GET_CODE (XEXP (disp, 0)) == UNSPEC
17873 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL
17874 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT)
17875 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
17876 output_pic_addr_const (file, disp, 0);
17877 else if (flag_pic)
17878 output_pic_addr_const (file, disp, 0);
17879 else
17880 output_addr_const (file, disp);
17882 else
17884 /* Print SImode register names to force addr32 prefix. */
17885 if (SImode_address_operand (addr, VOIDmode))
17887 if (flag_checking)
17889 gcc_assert (TARGET_64BIT);
17890 switch (GET_CODE (addr))
17892 case SUBREG:
17893 gcc_assert (GET_MODE (addr) == SImode);
17894 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
17895 break;
17896 case ZERO_EXTEND:
17897 case AND:
17898 gcc_assert (GET_MODE (addr) == DImode);
17899 break;
17900 default:
17901 gcc_unreachable ();
17904 gcc_assert (!code);
17905 code = 'k';
17907 else if (code == 0
17908 && TARGET_X32
17909 && disp
17910 && CONST_INT_P (disp)
17911 && INTVAL (disp) < -16*1024*1024)
17913 /* X32 runs in 64-bit mode, where displacement, DISP, in
17914 address DISP(%r64), is encoded as 32-bit immediate sign-
17915 extended from 32-bit to 64-bit. For -0x40000300(%r64),
17916 address is %r64 + 0xffffffffbffffd00. When %r64 <
17917 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
17918 which is invalid for x32. The correct address is %r64
17919 - 0x40000300 == 0xf7ffdd64. To properly encode
17920 -0x40000300(%r64) for x32, we zero-extend negative
17921 displacement by forcing addr32 prefix which truncates
17922 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
17923 zero-extend all negative displacements, including -1(%rsp).
17924 However, for small negative displacements, sign-extension
17925 won't cause overflow. We only zero-extend negative
17926 displacements if they < -16*1024*1024, which is also used
17927 to check legitimate address displacements for PIC. */
17928 code = 'k';
17931 /* Since the upper 32 bits of RSP are always zero for x32,
17932 we can encode %esp as %rsp to avoid 0x67 prefix if
17933 there is no index register. */
17934 if (TARGET_X32 && Pmode == SImode
17935 && !index && base && REG_P (base) && REGNO (base) == SP_REG)
17936 code = 'q';
17938 if (ASSEMBLER_DIALECT == ASM_ATT)
17940 if (disp)
17942 if (flag_pic)
17943 output_pic_addr_const (file, disp, 0);
17944 else if (GET_CODE (disp) == LABEL_REF)
17945 output_asm_label (disp);
17946 else
17947 output_addr_const (file, disp);
17950 putc ('(', file);
17951 if (base)
17952 print_reg (base, code, file);
17953 if (index)
17955 putc (',', file);
17956 print_reg (index, vsib ? 0 : code, file);
17957 if (scale != 1 || vsib)
17958 fprintf (file, ",%d", scale);
17960 putc (')', file);
17962 else
17964 rtx offset = NULL_RTX;
17966 if (disp)
17968 /* Pull out the offset of a symbol; print any symbol itself. */
17969 if (GET_CODE (disp) == CONST
17970 && GET_CODE (XEXP (disp, 0)) == PLUS
17971 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
17973 offset = XEXP (XEXP (disp, 0), 1);
17974 disp = gen_rtx_CONST (VOIDmode,
17975 XEXP (XEXP (disp, 0), 0));
17978 if (flag_pic)
17979 output_pic_addr_const (file, disp, 0);
17980 else if (GET_CODE (disp) == LABEL_REF)
17981 output_asm_label (disp);
17982 else if (CONST_INT_P (disp))
17983 offset = disp;
17984 else
17985 output_addr_const (file, disp);
17988 putc ('[', file);
17989 if (base)
17991 print_reg (base, code, file);
17992 if (offset)
17994 if (INTVAL (offset) >= 0)
17995 putc ('+', file);
17996 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
17999 else if (offset)
18000 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
18001 else
18002 putc ('0', file);
18004 if (index)
18006 putc ('+', file);
18007 print_reg (index, vsib ? 0 : code, file);
18008 if (scale != 1 || vsib)
18009 fprintf (file, "*%d", scale);
18011 putc (']', file);
18016 static void
18017 ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
18019 ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false);
18022 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
18024 static bool
18025 i386_asm_output_addr_const_extra (FILE *file, rtx x)
18027 rtx op;
18029 if (GET_CODE (x) != UNSPEC)
18030 return false;
18032 op = XVECEXP (x, 0, 0);
18033 switch (XINT (x, 1))
18035 case UNSPEC_GOTTPOFF:
18036 output_addr_const (file, op);
18037 /* FIXME: This might be @TPOFF in Sun ld. */
18038 fputs ("@gottpoff", file);
18039 break;
18040 case UNSPEC_TPOFF:
18041 output_addr_const (file, op);
18042 fputs ("@tpoff", file);
18043 break;
18044 case UNSPEC_NTPOFF:
18045 output_addr_const (file, op);
18046 if (TARGET_64BIT)
18047 fputs ("@tpoff", file);
18048 else
18049 fputs ("@ntpoff", file);
18050 break;
18051 case UNSPEC_DTPOFF:
18052 output_addr_const (file, op);
18053 fputs ("@dtpoff", file);
18054 break;
18055 case UNSPEC_GOTNTPOFF:
18056 output_addr_const (file, op);
18057 if (TARGET_64BIT)
18058 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
18059 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
18060 else
18061 fputs ("@gotntpoff", file);
18062 break;
18063 case UNSPEC_INDNTPOFF:
18064 output_addr_const (file, op);
18065 fputs ("@indntpoff", file);
18066 break;
18067 #if TARGET_MACHO
18068 case UNSPEC_MACHOPIC_OFFSET:
18069 output_addr_const (file, op);
18070 putc ('-', file);
18071 machopic_output_function_base_name (file);
18072 break;
18073 #endif
18075 default:
18076 return false;
18079 return true;
18082 /* Split one or more double-mode RTL references into pairs of half-mode
18083 references. The RTL can be REG, offsettable MEM, integer constant, or
18084 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
18085 split and "num" is its length. lo_half and hi_half are output arrays
18086 that parallel "operands". */
18088 void
18089 split_double_mode (machine_mode mode, rtx operands[],
18090 int num, rtx lo_half[], rtx hi_half[])
18092 machine_mode half_mode;
18093 unsigned int byte;
18095 switch (mode)
18097 case E_TImode:
18098 half_mode = DImode;
18099 break;
18100 case E_DImode:
18101 half_mode = SImode;
18102 break;
18103 default:
18104 gcc_unreachable ();
18107 byte = GET_MODE_SIZE (half_mode);
18109 while (num--)
18111 rtx op = operands[num];
18113 /* simplify_subreg refuse to split volatile memory addresses,
18114 but we still have to handle it. */
18115 if (MEM_P (op))
18117 lo_half[num] = adjust_address (op, half_mode, 0);
18118 hi_half[num] = adjust_address (op, half_mode, byte);
18120 else
18122 lo_half[num] = simplify_gen_subreg (half_mode, op,
18123 GET_MODE (op) == VOIDmode
18124 ? mode : GET_MODE (op), 0);
18125 hi_half[num] = simplify_gen_subreg (half_mode, op,
18126 GET_MODE (op) == VOIDmode
18127 ? mode : GET_MODE (op), byte);
18132 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
18133 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
18134 is the expression of the binary operation. The output may either be
18135 emitted here, or returned to the caller, like all output_* functions.
18137 There is no guarantee that the operands are the same mode, as they
18138 might be within FLOAT or FLOAT_EXTEND expressions. */
18140 #ifndef SYSV386_COMPAT
18141 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
18142 wants to fix the assemblers because that causes incompatibility
18143 with gcc. No-one wants to fix gcc because that causes
18144 incompatibility with assemblers... You can use the option of
18145 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
18146 #define SYSV386_COMPAT 1
18147 #endif
18149 const char *
18150 output_387_binary_op (rtx_insn *insn, rtx *operands)
18152 static char buf[40];
18153 const char *p;
18154 bool is_sse
18155 = (SSE_REG_P (operands[0])
18156 || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]));
18158 if (is_sse)
18159 p = "%v";
18160 else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
18161 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
18162 p = "fi";
18163 else
18164 p = "f";
18166 strcpy (buf, p);
18168 switch (GET_CODE (operands[3]))
18170 case PLUS:
18171 p = "add"; break;
18172 case MINUS:
18173 p = "sub"; break;
18174 case MULT:
18175 p = "mul"; break;
18176 case DIV:
18177 p = "div"; break;
18178 default:
18179 gcc_unreachable ();
18182 strcat (buf, p);
18184 if (is_sse)
18186 p = (GET_MODE (operands[0]) == SFmode) ? "ss" : "sd";
18187 strcat (buf, p);
18189 if (TARGET_AVX)
18190 p = "\t{%2, %1, %0|%0, %1, %2}";
18191 else
18192 p = "\t{%2, %0|%0, %2}";
18194 strcat (buf, p);
18195 return buf;
18198 /* Even if we do not want to check the inputs, this documents input
18199 constraints. Which helps in understanding the following code. */
18200 if (flag_checking)
18202 if (STACK_REG_P (operands[0])
18203 && ((REG_P (operands[1])
18204 && REGNO (operands[0]) == REGNO (operands[1])
18205 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
18206 || (REG_P (operands[2])
18207 && REGNO (operands[0]) == REGNO (operands[2])
18208 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
18209 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
18210 ; /* ok */
18211 else
18212 gcc_unreachable ();
18215 switch (GET_CODE (operands[3]))
18217 case MULT:
18218 case PLUS:
18219 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
18220 std::swap (operands[1], operands[2]);
18222 /* know operands[0] == operands[1]. */
18224 if (MEM_P (operands[2]))
18226 p = "%Z2\t%2";
18227 break;
18230 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
18232 if (STACK_TOP_P (operands[0]))
18233 /* How is it that we are storing to a dead operand[2]?
18234 Well, presumably operands[1] is dead too. We can't
18235 store the result to st(0) as st(0) gets popped on this
18236 instruction. Instead store to operands[2] (which I
18237 think has to be st(1)). st(1) will be popped later.
18238 gcc <= 2.8.1 didn't have this check and generated
18239 assembly code that the Unixware assembler rejected. */
18240 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
18241 else
18242 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
18243 break;
18246 if (STACK_TOP_P (operands[0]))
18247 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
18248 else
18249 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
18250 break;
18252 case MINUS:
18253 case DIV:
18254 if (MEM_P (operands[1]))
18256 p = "r%Z1\t%1";
18257 break;
18260 if (MEM_P (operands[2]))
18262 p = "%Z2\t%2";
18263 break;
18266 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
18268 #if SYSV386_COMPAT
18269 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
18270 derived assemblers, confusingly reverse the direction of
18271 the operation for fsub{r} and fdiv{r} when the
18272 destination register is not st(0). The Intel assembler
18273 doesn't have this brain damage. Read !SYSV386_COMPAT to
18274 figure out what the hardware really does. */
18275 if (STACK_TOP_P (operands[0]))
18276 p = "{p\t%0, %2|rp\t%2, %0}";
18277 else
18278 p = "{rp\t%2, %0|p\t%0, %2}";
18279 #else
18280 if (STACK_TOP_P (operands[0]))
18281 /* As above for fmul/fadd, we can't store to st(0). */
18282 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
18283 else
18284 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
18285 #endif
18286 break;
18289 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
18291 #if SYSV386_COMPAT
18292 if (STACK_TOP_P (operands[0]))
18293 p = "{rp\t%0, %1|p\t%1, %0}";
18294 else
18295 p = "{p\t%1, %0|rp\t%0, %1}";
18296 #else
18297 if (STACK_TOP_P (operands[0]))
18298 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
18299 else
18300 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
18301 #endif
18302 break;
18305 if (STACK_TOP_P (operands[0]))
18307 if (STACK_TOP_P (operands[1]))
18308 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
18309 else
18310 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
18311 break;
18313 else if (STACK_TOP_P (operands[1]))
18315 #if SYSV386_COMPAT
18316 p = "{\t%1, %0|r\t%0, %1}";
18317 #else
18318 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
18319 #endif
18321 else
18323 #if SYSV386_COMPAT
18324 p = "{r\t%2, %0|\t%0, %2}";
18325 #else
18326 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
18327 #endif
18329 break;
18331 default:
18332 gcc_unreachable ();
18335 strcat (buf, p);
18336 return buf;
18339 /* Return needed mode for entity in optimize_mode_switching pass. */
18341 static int
18342 ix86_dirflag_mode_needed (rtx_insn *insn)
18344 if (CALL_P (insn))
18346 if (cfun->machine->func_type == TYPE_NORMAL)
18347 return X86_DIRFLAG_ANY;
18348 else
18349 /* No need to emit CLD in interrupt handler for TARGET_CLD. */
18350 return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET;
18353 if (recog_memoized (insn) < 0)
18354 return X86_DIRFLAG_ANY;
18356 if (get_attr_type (insn) == TYPE_STR)
18358 /* Emit cld instruction if stringops are used in the function. */
18359 if (cfun->machine->func_type == TYPE_NORMAL)
18360 return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY;
18361 else
18362 return X86_DIRFLAG_RESET;
18365 return X86_DIRFLAG_ANY;
18368 /* Check if a 256bit AVX register is referenced inside of EXP. */
18370 static bool
18371 ix86_check_avx256_register (const_rtx exp)
18373 if (SUBREG_P (exp))
18374 exp = SUBREG_REG (exp);
18376 return (REG_P (exp)
18377 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
18380 /* Return needed mode for entity in optimize_mode_switching pass. */
18382 static int
18383 ix86_avx_u128_mode_needed (rtx_insn *insn)
18385 if (CALL_P (insn))
18387 rtx link;
18389 /* Needed mode is set to AVX_U128_CLEAN if there are
18390 no 256bit modes used in function arguments. */
18391 for (link = CALL_INSN_FUNCTION_USAGE (insn);
18392 link;
18393 link = XEXP (link, 1))
18395 if (GET_CODE (XEXP (link, 0)) == USE)
18397 rtx arg = XEXP (XEXP (link, 0), 0);
18399 if (ix86_check_avx256_register (arg))
18400 return AVX_U128_DIRTY;
18404 return AVX_U128_CLEAN;
18407 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
18408 changes state only when a 256bit register is written to, but we need
18409 to prevent the compiler from moving optimal insertion point above
18410 eventual read from 256bit register. */
18411 subrtx_iterator::array_type array;
18412 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
18413 if (ix86_check_avx256_register (*iter))
18414 return AVX_U128_DIRTY;
18416 return AVX_U128_ANY;
18419 /* Return mode that i387 must be switched into
18420 prior to the execution of insn. */
18422 static int
18423 ix86_i387_mode_needed (int entity, rtx_insn *insn)
18425 enum attr_i387_cw mode;
18427 /* The mode UNINITIALIZED is used to store control word after a
18428 function call or ASM pattern. The mode ANY specify that function
18429 has no requirements on the control word and make no changes in the
18430 bits we are interested in. */
18432 if (CALL_P (insn)
18433 || (NONJUMP_INSN_P (insn)
18434 && (asm_noperands (PATTERN (insn)) >= 0
18435 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
18436 return I387_CW_UNINITIALIZED;
18438 if (recog_memoized (insn) < 0)
18439 return I387_CW_ANY;
18441 mode = get_attr_i387_cw (insn);
18443 switch (entity)
18445 case I387_TRUNC:
18446 if (mode == I387_CW_TRUNC)
18447 return mode;
18448 break;
18450 case I387_FLOOR:
18451 if (mode == I387_CW_FLOOR)
18452 return mode;
18453 break;
18455 case I387_CEIL:
18456 if (mode == I387_CW_CEIL)
18457 return mode;
18458 break;
18460 case I387_MASK_PM:
18461 if (mode == I387_CW_MASK_PM)
18462 return mode;
18463 break;
18465 default:
18466 gcc_unreachable ();
18469 return I387_CW_ANY;
18472 /* Return mode that entity must be switched into
18473 prior to the execution of insn. */
18475 static int
18476 ix86_mode_needed (int entity, rtx_insn *insn)
18478 switch (entity)
18480 case X86_DIRFLAG:
18481 return ix86_dirflag_mode_needed (insn);
18482 case AVX_U128:
18483 return ix86_avx_u128_mode_needed (insn);
18484 case I387_TRUNC:
18485 case I387_FLOOR:
18486 case I387_CEIL:
18487 case I387_MASK_PM:
18488 return ix86_i387_mode_needed (entity, insn);
18489 default:
18490 gcc_unreachable ();
18492 return 0;
18495 /* Check if a 256bit AVX register is referenced in stores. */
18497 static void
18498 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
18500 if (ix86_check_avx256_register (dest))
18502 bool *used = (bool *) data;
18503 *used = true;
18507 /* Calculate mode of upper 128bit AVX registers after the insn. */
18509 static int
18510 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
18512 rtx pat = PATTERN (insn);
18514 if (vzeroupper_operation (pat, VOIDmode)
18515 || vzeroall_operation (pat, VOIDmode))
18516 return AVX_U128_CLEAN;
18518 /* We know that state is clean after CALL insn if there are no
18519 256bit registers used in the function return register. */
18520 if (CALL_P (insn))
18522 bool avx_reg256_found = false;
18523 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
18525 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
18528 /* Otherwise, return current mode. Remember that if insn
18529 references AVX 256bit registers, the mode was already changed
18530 to DIRTY from MODE_NEEDED. */
18531 return mode;
18534 /* Return the mode that an insn results in. */
18536 static int
18537 ix86_mode_after (int entity, int mode, rtx_insn *insn)
18539 switch (entity)
18541 case X86_DIRFLAG:
18542 return mode;
18543 case AVX_U128:
18544 return ix86_avx_u128_mode_after (mode, insn);
18545 case I387_TRUNC:
18546 case I387_FLOOR:
18547 case I387_CEIL:
18548 case I387_MASK_PM:
18549 return mode;
18550 default:
18551 gcc_unreachable ();
18555 static int
18556 ix86_dirflag_mode_entry (void)
18558 /* For TARGET_CLD or in the interrupt handler we can't assume
18559 direction flag state at function entry. */
18560 if (TARGET_CLD
18561 || cfun->machine->func_type != TYPE_NORMAL)
18562 return X86_DIRFLAG_ANY;
18564 return X86_DIRFLAG_RESET;
18567 static int
18568 ix86_avx_u128_mode_entry (void)
18570 tree arg;
18572 /* Entry mode is set to AVX_U128_DIRTY if there are
18573 256bit modes used in function arguments. */
18574 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
18575 arg = TREE_CHAIN (arg))
18577 rtx incoming = DECL_INCOMING_RTL (arg);
18579 if (incoming && ix86_check_avx256_register (incoming))
18580 return AVX_U128_DIRTY;
18583 return AVX_U128_CLEAN;
18586 /* Return a mode that ENTITY is assumed to be
18587 switched to at function entry. */
18589 static int
18590 ix86_mode_entry (int entity)
18592 switch (entity)
18594 case X86_DIRFLAG:
18595 return ix86_dirflag_mode_entry ();
18596 case AVX_U128:
18597 return ix86_avx_u128_mode_entry ();
18598 case I387_TRUNC:
18599 case I387_FLOOR:
18600 case I387_CEIL:
18601 case I387_MASK_PM:
18602 return I387_CW_ANY;
18603 default:
18604 gcc_unreachable ();
18608 static int
18609 ix86_avx_u128_mode_exit (void)
18611 rtx reg = crtl->return_rtx;
18613 /* Exit mode is set to AVX_U128_DIRTY if there are
18614 256bit modes used in the function return register. */
18615 if (reg && ix86_check_avx256_register (reg))
18616 return AVX_U128_DIRTY;
18618 return AVX_U128_CLEAN;
18621 /* Return a mode that ENTITY is assumed to be
18622 switched to at function exit. */
18624 static int
18625 ix86_mode_exit (int entity)
18627 switch (entity)
18629 case X86_DIRFLAG:
18630 return X86_DIRFLAG_ANY;
18631 case AVX_U128:
18632 return ix86_avx_u128_mode_exit ();
18633 case I387_TRUNC:
18634 case I387_FLOOR:
18635 case I387_CEIL:
18636 case I387_MASK_PM:
18637 return I387_CW_ANY;
18638 default:
18639 gcc_unreachable ();
18643 static int
18644 ix86_mode_priority (int, int n)
18646 return n;
18649 /* Output code to initialize control word copies used by trunc?f?i and
18650 rounding patterns. CURRENT_MODE is set to current control word,
18651 while NEW_MODE is set to new control word. */
18653 static void
18654 emit_i387_cw_initialization (int mode)
18656 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
18657 rtx new_mode;
18659 enum ix86_stack_slot slot;
18661 rtx reg = gen_reg_rtx (HImode);
18663 emit_insn (gen_x86_fnstcw_1 (stored_mode));
18664 emit_move_insn (reg, copy_rtx (stored_mode));
18666 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
18667 || optimize_insn_for_size_p ())
18669 switch (mode)
18671 case I387_CW_TRUNC:
18672 /* round toward zero (truncate) */
18673 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
18674 slot = SLOT_CW_TRUNC;
18675 break;
18677 case I387_CW_FLOOR:
18678 /* round down toward -oo */
18679 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
18680 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
18681 slot = SLOT_CW_FLOOR;
18682 break;
18684 case I387_CW_CEIL:
18685 /* round up toward +oo */
18686 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
18687 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
18688 slot = SLOT_CW_CEIL;
18689 break;
18691 case I387_CW_MASK_PM:
18692 /* mask precision exception for nearbyint() */
18693 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
18694 slot = SLOT_CW_MASK_PM;
18695 break;
18697 default:
18698 gcc_unreachable ();
18701 else
18703 switch (mode)
18705 case I387_CW_TRUNC:
18706 /* round toward zero (truncate) */
18707 emit_insn (gen_insvsi_1 (reg, GEN_INT (0xc)));
18708 slot = SLOT_CW_TRUNC;
18709 break;
18711 case I387_CW_FLOOR:
18712 /* round down toward -oo */
18713 emit_insn (gen_insvsi_1 (reg, GEN_INT (0x4)));
18714 slot = SLOT_CW_FLOOR;
18715 break;
18717 case I387_CW_CEIL:
18718 /* round up toward +oo */
18719 emit_insn (gen_insvsi_1 (reg, GEN_INT (0x8)));
18720 slot = SLOT_CW_CEIL;
18721 break;
18723 case I387_CW_MASK_PM:
18724 /* mask precision exception for nearbyint() */
18725 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
18726 slot = SLOT_CW_MASK_PM;
18727 break;
18729 default:
18730 gcc_unreachable ();
18734 gcc_assert (slot < MAX_386_STACK_LOCALS);
18736 new_mode = assign_386_stack_local (HImode, slot);
18737 emit_move_insn (new_mode, reg);
18740 /* Emit vzeroupper. */
18742 void
18743 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
18745 int i;
18747 /* Cancel automatic vzeroupper insertion if there are
18748 live call-saved SSE registers at the insertion point. */
18750 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
18751 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
18752 return;
18754 if (TARGET_64BIT)
18755 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
18756 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
18757 return;
18759 emit_insn (gen_avx_vzeroupper ());
18762 /* Generate one or more insns to set ENTITY to MODE. */
18764 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
18765 is the set of hard registers live at the point where the insn(s)
18766 are to be inserted. */
18768 static void
18769 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
18770 HARD_REG_SET regs_live)
18772 switch (entity)
18774 case X86_DIRFLAG:
18775 if (mode == X86_DIRFLAG_RESET)
18776 emit_insn (gen_cld ());
18777 break;
18778 case AVX_U128:
18779 if (mode == AVX_U128_CLEAN)
18780 ix86_avx_emit_vzeroupper (regs_live);
18781 break;
18782 case I387_TRUNC:
18783 case I387_FLOOR:
18784 case I387_CEIL:
18785 case I387_MASK_PM:
18786 if (mode != I387_CW_ANY
18787 && mode != I387_CW_UNINITIALIZED)
18788 emit_i387_cw_initialization (mode);
18789 break;
18790 default:
18791 gcc_unreachable ();
18795 /* Output code for INSN to convert a float to a signed int. OPERANDS
18796 are the insn operands. The output may be [HSD]Imode and the input
18797 operand may be [SDX]Fmode. */
18799 const char *
18800 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
18802 bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
18803 bool dimode_p = GET_MODE (operands[0]) == DImode;
18804 int round_mode = get_attr_i387_cw (insn);
18806 static char buf[40];
18807 const char *p;
18809 /* Jump through a hoop or two for DImode, since the hardware has no
18810 non-popping instruction. We used to do this a different way, but
18811 that was somewhat fragile and broke with post-reload splitters. */
18812 if ((dimode_p || fisttp) && !stack_top_dies)
18813 output_asm_insn ("fld\t%y1", operands);
18815 gcc_assert (STACK_TOP_P (operands[1]));
18816 gcc_assert (MEM_P (operands[0]));
18817 gcc_assert (GET_MODE (operands[1]) != TFmode);
18819 if (fisttp)
18820 return "fisttp%Z0\t%0";
18822 strcpy (buf, "fist");
18824 if (round_mode != I387_CW_ANY)
18825 output_asm_insn ("fldcw\t%3", operands);
18827 p = "p%Z0\t%0";
18828 strcat (buf, p + !(stack_top_dies || dimode_p));
18830 output_asm_insn (buf, operands);
18832 if (round_mode != I387_CW_ANY)
18833 output_asm_insn ("fldcw\t%2", operands);
18835 return "";
18838 /* Output code for x87 ffreep insn. The OPNO argument, which may only
18839 have the values zero or one, indicates the ffreep insn's operand
18840 from the OPERANDS array. */
18842 static const char *
18843 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
18845 if (TARGET_USE_FFREEP)
18846 #ifdef HAVE_AS_IX86_FFREEP
18847 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
18848 #else
18850 static char retval[32];
18851 int regno = REGNO (operands[opno]);
18853 gcc_assert (STACK_REGNO_P (regno));
18855 regno -= FIRST_STACK_REG;
18857 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
18858 return retval;
18860 #endif
18862 return opno ? "fstp\t%y1" : "fstp\t%y0";
18866 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
18867 should be used. UNORDERED_P is true when fucom should be used. */
18869 const char *
18870 output_fp_compare (rtx_insn *insn, rtx *operands,
18871 bool eflags_p, bool unordered_p)
18873 rtx *xops = eflags_p ? &operands[0] : &operands[1];
18874 bool stack_top_dies;
18876 static char buf[40];
18877 const char *p;
18879 gcc_assert (STACK_TOP_P (xops[0]));
18881 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
18883 if (eflags_p)
18885 p = unordered_p ? "fucomi" : "fcomi";
18886 strcpy (buf, p);
18888 p = "p\t{%y1, %0|%0, %y1}";
18889 strcat (buf, p + !stack_top_dies);
18891 return buf;
18894 if (STACK_REG_P (xops[1])
18895 && stack_top_dies
18896 && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1))
18898 gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1);
18900 /* If both the top of the 387 stack die, and the other operand
18901 is also a stack register that dies, then this must be a
18902 `fcompp' float compare. */
18903 p = unordered_p ? "fucompp" : "fcompp";
18904 strcpy (buf, p);
18906 else if (const0_operand (xops[1], VOIDmode))
18908 gcc_assert (!unordered_p);
18909 strcpy (buf, "ftst");
18911 else
18913 if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT)
18915 gcc_assert (!unordered_p);
18916 p = "ficom";
18918 else
18919 p = unordered_p ? "fucom" : "fcom";
18921 strcpy (buf, p);
18923 p = "p%Z2\t%y2";
18924 strcat (buf, p + !stack_top_dies);
18927 output_asm_insn (buf, operands);
18928 return "fnstsw\t%0";
18931 void
18932 ix86_output_addr_vec_elt (FILE *file, int value)
18934 const char *directive = ASM_LONG;
18936 #ifdef ASM_QUAD
18937 if (TARGET_LP64)
18938 directive = ASM_QUAD;
18939 #else
18940 gcc_assert (!TARGET_64BIT);
18941 #endif
18943 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
18946 void
18947 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
18949 const char *directive = ASM_LONG;
18951 #ifdef ASM_QUAD
18952 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
18953 directive = ASM_QUAD;
18954 #else
18955 gcc_assert (!TARGET_64BIT);
18956 #endif
18957 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
18958 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
18959 fprintf (file, "%s%s%d-%s%d\n",
18960 directive, LPREFIX, value, LPREFIX, rel);
18961 else if (HAVE_AS_GOTOFF_IN_DATA)
18962 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
18963 #if TARGET_MACHO
18964 else if (TARGET_MACHO)
18966 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
18967 machopic_output_function_base_name (file);
18968 putc ('\n', file);
18970 #endif
18971 else
18972 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
18973 GOT_SYMBOL_NAME, LPREFIX, value);
18976 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
18977 for the target. */
18979 void
18980 ix86_expand_clear (rtx dest)
18982 rtx tmp;
18984 /* We play register width games, which are only valid after reload. */
18985 gcc_assert (reload_completed);
18987 /* Avoid HImode and its attendant prefix byte. */
18988 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
18989 dest = gen_rtx_REG (SImode, REGNO (dest));
18990 tmp = gen_rtx_SET (dest, const0_rtx);
18992 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
18994 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18995 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
18998 emit_insn (tmp);
19001 /* X is an unchanging MEM. If it is a constant pool reference, return
19002 the constant pool rtx, else NULL. */
19005 maybe_get_pool_constant (rtx x)
19007 x = ix86_delegitimize_address (XEXP (x, 0));
19009 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
19010 return get_pool_constant (x);
19012 return NULL_RTX;
19015 void
19016 ix86_expand_move (machine_mode mode, rtx operands[])
19018 rtx op0, op1;
19019 rtx tmp, addend = NULL_RTX;
19020 enum tls_model model;
19022 op0 = operands[0];
19023 op1 = operands[1];
19025 switch (GET_CODE (op1))
19027 case CONST:
19028 tmp = XEXP (op1, 0);
19030 if (GET_CODE (tmp) != PLUS
19031 || GET_CODE (XEXP (tmp, 0)) != SYMBOL_REF)
19032 break;
19034 op1 = XEXP (tmp, 0);
19035 addend = XEXP (tmp, 1);
19036 /* FALLTHRU */
19038 case SYMBOL_REF:
19039 model = SYMBOL_REF_TLS_MODEL (op1);
19041 if (model)
19042 op1 = legitimize_tls_address (op1, model, true);
19043 else if (ix86_force_load_from_GOT_p (op1))
19045 /* Load the external function address via GOT slot to avoid PLT. */
19046 op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1),
19047 (TARGET_64BIT
19048 ? UNSPEC_GOTPCREL
19049 : UNSPEC_GOT));
19050 op1 = gen_rtx_CONST (Pmode, op1);
19051 op1 = gen_const_mem (Pmode, op1);
19052 set_mem_alias_set (op1, ix86_GOT_alias_set ());
19054 else
19056 tmp = legitimize_pe_coff_symbol (op1, addend != NULL_RTX);
19057 if (tmp)
19059 op1 = tmp;
19060 if (!addend)
19061 break;
19063 else
19065 op1 = operands[1];
19066 break;
19070 if (addend)
19072 op1 = force_operand (op1, NULL_RTX);
19073 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
19074 op0, 1, OPTAB_DIRECT);
19076 else
19077 op1 = force_operand (op1, op0);
19079 if (op1 == op0)
19080 return;
19082 op1 = convert_to_mode (mode, op1, 1);
19084 default:
19085 break;
19088 if ((flag_pic || MACHOPIC_INDIRECT)
19089 && symbolic_operand (op1, mode))
19091 if (TARGET_MACHO && !TARGET_64BIT)
19093 #if TARGET_MACHO
19094 /* dynamic-no-pic */
19095 if (MACHOPIC_INDIRECT)
19097 rtx temp = (op0 && REG_P (op0) && mode == Pmode)
19098 ? op0 : gen_reg_rtx (Pmode);
19099 op1 = machopic_indirect_data_reference (op1, temp);
19100 if (MACHOPIC_PURE)
19101 op1 = machopic_legitimize_pic_address (op1, mode,
19102 temp == op1 ? 0 : temp);
19104 if (op0 != op1 && GET_CODE (op0) != MEM)
19106 rtx insn = gen_rtx_SET (op0, op1);
19107 emit_insn (insn);
19108 return;
19110 if (GET_CODE (op0) == MEM)
19111 op1 = force_reg (Pmode, op1);
19112 else
19114 rtx temp = op0;
19115 if (GET_CODE (temp) != REG)
19116 temp = gen_reg_rtx (Pmode);
19117 temp = legitimize_pic_address (op1, temp);
19118 if (temp == op0)
19119 return;
19120 op1 = temp;
19122 /* dynamic-no-pic */
19123 #endif
19125 else
19127 if (MEM_P (op0))
19128 op1 = force_reg (mode, op1);
19129 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
19131 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
19132 op1 = legitimize_pic_address (op1, reg);
19133 if (op0 == op1)
19134 return;
19135 op1 = convert_to_mode (mode, op1, 1);
19139 else
19141 if (MEM_P (op0)
19142 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
19143 || !push_operand (op0, mode))
19144 && MEM_P (op1))
19145 op1 = force_reg (mode, op1);
19147 if (push_operand (op0, mode)
19148 && ! general_no_elim_operand (op1, mode))
19149 op1 = copy_to_mode_reg (mode, op1);
19151 /* Force large constants in 64bit compilation into register
19152 to get them CSEed. */
19153 if (can_create_pseudo_p ()
19154 && (mode == DImode) && TARGET_64BIT
19155 && immediate_operand (op1, mode)
19156 && !x86_64_zext_immediate_operand (op1, VOIDmode)
19157 && !register_operand (op0, mode)
19158 && optimize)
19159 op1 = copy_to_mode_reg (mode, op1);
19161 if (can_create_pseudo_p ()
19162 && CONST_DOUBLE_P (op1))
19164 /* If we are loading a floating point constant to a register,
19165 force the value to memory now, since we'll get better code
19166 out the back end. */
19168 op1 = validize_mem (force_const_mem (mode, op1));
19169 if (!register_operand (op0, mode))
19171 rtx temp = gen_reg_rtx (mode);
19172 emit_insn (gen_rtx_SET (temp, op1));
19173 emit_move_insn (op0, temp);
19174 return;
19179 emit_insn (gen_rtx_SET (op0, op1));
19182 void
19183 ix86_expand_vector_move (machine_mode mode, rtx operands[])
19185 rtx op0 = operands[0], op1 = operands[1];
19186 /* Use GET_MODE_BITSIZE instead of GET_MODE_ALIGNMENT for IA MCU
19187 psABI since the biggest alignment is 4 byte for IA MCU psABI. */
19188 unsigned int align = (TARGET_IAMCU
19189 ? GET_MODE_BITSIZE (mode)
19190 : GET_MODE_ALIGNMENT (mode));
19192 if (push_operand (op0, VOIDmode))
19193 op0 = emit_move_resolve_push (mode, op0);
19195 /* Force constants other than zero into memory. We do not know how
19196 the instructions used to build constants modify the upper 64 bits
19197 of the register, once we have that information we may be able
19198 to handle some of them more efficiently. */
19199 if (can_create_pseudo_p ()
19200 && (CONSTANT_P (op1)
19201 || (SUBREG_P (op1)
19202 && CONSTANT_P (SUBREG_REG (op1))))
19203 && ((register_operand (op0, mode)
19204 && !standard_sse_constant_p (op1, mode))
19205 /* ix86_expand_vector_move_misalign() does not like constants. */
19206 || (SSE_REG_MODE_P (mode)
19207 && MEM_P (op0)
19208 && MEM_ALIGN (op0) < align)))
19210 if (SUBREG_P (op1))
19212 machine_mode imode = GET_MODE (SUBREG_REG (op1));
19213 rtx r = force_const_mem (imode, SUBREG_REG (op1));
19214 if (r)
19215 r = validize_mem (r);
19216 else
19217 r = force_reg (imode, SUBREG_REG (op1));
19218 op1 = simplify_gen_subreg (mode, r, imode, SUBREG_BYTE (op1));
19220 else
19221 op1 = validize_mem (force_const_mem (mode, op1));
19224 /* We need to check memory alignment for SSE mode since attribute
19225 can make operands unaligned. */
19226 if (can_create_pseudo_p ()
19227 && SSE_REG_MODE_P (mode)
19228 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
19229 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
19231 rtx tmp[2];
19233 /* ix86_expand_vector_move_misalign() does not like both
19234 arguments in memory. */
19235 if (!register_operand (op0, mode)
19236 && !register_operand (op1, mode))
19237 op1 = force_reg (mode, op1);
19239 tmp[0] = op0; tmp[1] = op1;
19240 ix86_expand_vector_move_misalign (mode, tmp);
19241 return;
19244 /* Make operand1 a register if it isn't already. */
19245 if (can_create_pseudo_p ()
19246 && !register_operand (op0, mode)
19247 && !register_operand (op1, mode))
19249 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
19250 return;
19253 emit_insn (gen_rtx_SET (op0, op1));
19256 /* Split 32-byte AVX unaligned load and store if needed. */
19258 static void
19259 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
19261 rtx m;
19262 rtx (*extract) (rtx, rtx, rtx);
19263 machine_mode mode;
19265 if ((MEM_P (op1) && !TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
19266 || (MEM_P (op0) && !TARGET_AVX256_SPLIT_UNALIGNED_STORE))
19268 emit_insn (gen_rtx_SET (op0, op1));
19269 return;
19272 rtx orig_op0 = NULL_RTX;
19273 mode = GET_MODE (op0);
19274 switch (GET_MODE_CLASS (mode))
19276 case MODE_VECTOR_INT:
19277 case MODE_INT:
19278 if (mode != V32QImode)
19280 if (!MEM_P (op0))
19282 orig_op0 = op0;
19283 op0 = gen_reg_rtx (V32QImode);
19285 else
19286 op0 = gen_lowpart (V32QImode, op0);
19287 op1 = gen_lowpart (V32QImode, op1);
19288 mode = V32QImode;
19290 break;
19291 case MODE_VECTOR_FLOAT:
19292 break;
19293 default:
19294 gcc_unreachable ();
19297 switch (mode)
19299 default:
19300 gcc_unreachable ();
19301 case E_V32QImode:
19302 extract = gen_avx_vextractf128v32qi;
19303 mode = V16QImode;
19304 break;
19305 case E_V8SFmode:
19306 extract = gen_avx_vextractf128v8sf;
19307 mode = V4SFmode;
19308 break;
19309 case E_V4DFmode:
19310 extract = gen_avx_vextractf128v4df;
19311 mode = V2DFmode;
19312 break;
19315 if (MEM_P (op1))
19317 rtx r = gen_reg_rtx (mode);
19318 m = adjust_address (op1, mode, 0);
19319 emit_move_insn (r, m);
19320 m = adjust_address (op1, mode, 16);
19321 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
19322 emit_move_insn (op0, r);
19324 else if (MEM_P (op0))
19326 m = adjust_address (op0, mode, 0);
19327 emit_insn (extract (m, op1, const0_rtx));
19328 m = adjust_address (op0, mode, 16);
19329 emit_insn (extract (m, copy_rtx (op1), const1_rtx));
19331 else
19332 gcc_unreachable ();
19334 if (orig_op0)
19335 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
19338 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
19339 straight to ix86_expand_vector_move. */
19340 /* Code generation for scalar reg-reg moves of single and double precision data:
19341 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
19342 movaps reg, reg
19343 else
19344 movss reg, reg
19345 if (x86_sse_partial_reg_dependency == true)
19346 movapd reg, reg
19347 else
19348 movsd reg, reg
19350 Code generation for scalar loads of double precision data:
19351 if (x86_sse_split_regs == true)
19352 movlpd mem, reg (gas syntax)
19353 else
19354 movsd mem, reg
19356 Code generation for unaligned packed loads of single precision data
19357 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
19358 if (x86_sse_unaligned_move_optimal)
19359 movups mem, reg
19361 if (x86_sse_partial_reg_dependency == true)
19363 xorps reg, reg
19364 movlps mem, reg
19365 movhps mem+8, reg
19367 else
19369 movlps mem, reg
19370 movhps mem+8, reg
19373 Code generation for unaligned packed loads of double precision data
19374 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
19375 if (x86_sse_unaligned_move_optimal)
19376 movupd mem, reg
19378 if (x86_sse_split_regs == true)
19380 movlpd mem, reg
19381 movhpd mem+8, reg
19383 else
19385 movsd mem, reg
19386 movhpd mem+8, reg
19390 void
19391 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
19393 rtx op0, op1, m;
19395 op0 = operands[0];
19396 op1 = operands[1];
19398 /* Use unaligned load/store for AVX512 or when optimizing for size. */
19399 if (GET_MODE_SIZE (mode) == 64 || optimize_insn_for_size_p ())
19401 emit_insn (gen_rtx_SET (op0, op1));
19402 return;
19405 if (TARGET_AVX)
19407 if (GET_MODE_SIZE (mode) == 32)
19408 ix86_avx256_split_vector_move_misalign (op0, op1);
19409 else
19410 /* Always use 128-bit mov<mode>_internal pattern for AVX. */
19411 emit_insn (gen_rtx_SET (op0, op1));
19412 return;
19415 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
19416 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
19418 emit_insn (gen_rtx_SET (op0, op1));
19419 return;
19422 /* ??? If we have typed data, then it would appear that using
19423 movdqu is the only way to get unaligned data loaded with
19424 integer type. */
19425 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19427 emit_insn (gen_rtx_SET (op0, op1));
19428 return;
19431 if (MEM_P (op1))
19433 if (TARGET_SSE2 && mode == V2DFmode)
19435 rtx zero;
19437 /* When SSE registers are split into halves, we can avoid
19438 writing to the top half twice. */
19439 if (TARGET_SSE_SPLIT_REGS)
19441 emit_clobber (op0);
19442 zero = op0;
19444 else
19446 /* ??? Not sure about the best option for the Intel chips.
19447 The following would seem to satisfy; the register is
19448 entirely cleared, breaking the dependency chain. We
19449 then store to the upper half, with a dependency depth
19450 of one. A rumor has it that Intel recommends two movsd
19451 followed by an unpacklpd, but this is unconfirmed. And
19452 given that the dependency depth of the unpacklpd would
19453 still be one, I'm not sure why this would be better. */
19454 zero = CONST0_RTX (V2DFmode);
19457 m = adjust_address (op1, DFmode, 0);
19458 emit_insn (gen_sse2_loadlpd (op0, zero, m));
19459 m = adjust_address (op1, DFmode, 8);
19460 emit_insn (gen_sse2_loadhpd (op0, op0, m));
19462 else
19464 rtx t;
19466 if (mode != V4SFmode)
19467 t = gen_reg_rtx (V4SFmode);
19468 else
19469 t = op0;
19471 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
19472 emit_move_insn (t, CONST0_RTX (V4SFmode));
19473 else
19474 emit_clobber (t);
19476 m = adjust_address (op1, V2SFmode, 0);
19477 emit_insn (gen_sse_loadlps (t, t, m));
19478 m = adjust_address (op1, V2SFmode, 8);
19479 emit_insn (gen_sse_loadhps (t, t, m));
19480 if (mode != V4SFmode)
19481 emit_move_insn (op0, gen_lowpart (mode, t));
19484 else if (MEM_P (op0))
19486 if (TARGET_SSE2 && mode == V2DFmode)
19488 m = adjust_address (op0, DFmode, 0);
19489 emit_insn (gen_sse2_storelpd (m, op1));
19490 m = adjust_address (op0, DFmode, 8);
19491 emit_insn (gen_sse2_storehpd (m, op1));
19493 else
19495 if (mode != V4SFmode)
19496 op1 = gen_lowpart (V4SFmode, op1);
19498 m = adjust_address (op0, V2SFmode, 0);
19499 emit_insn (gen_sse_storelps (m, op1));
19500 m = adjust_address (op0, V2SFmode, 8);
19501 emit_insn (gen_sse_storehps (m, copy_rtx (op1)));
19504 else
19505 gcc_unreachable ();
19508 /* Helper function of ix86_fixup_binary_operands to canonicalize
19509 operand order. Returns true if the operands should be swapped. */
19511 static bool
19512 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
19513 rtx operands[])
19515 rtx dst = operands[0];
19516 rtx src1 = operands[1];
19517 rtx src2 = operands[2];
19519 /* If the operation is not commutative, we can't do anything. */
19520 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
19521 return false;
19523 /* Highest priority is that src1 should match dst. */
19524 if (rtx_equal_p (dst, src1))
19525 return false;
19526 if (rtx_equal_p (dst, src2))
19527 return true;
19529 /* Next highest priority is that immediate constants come second. */
19530 if (immediate_operand (src2, mode))
19531 return false;
19532 if (immediate_operand (src1, mode))
19533 return true;
19535 /* Lowest priority is that memory references should come second. */
19536 if (MEM_P (src2))
19537 return false;
19538 if (MEM_P (src1))
19539 return true;
19541 return false;
19545 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
19546 destination to use for the operation. If different from the true
19547 destination in operands[0], a copy operation will be required. */
19550 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
19551 rtx operands[])
19553 rtx dst = operands[0];
19554 rtx src1 = operands[1];
19555 rtx src2 = operands[2];
19557 /* Canonicalize operand order. */
19558 if (ix86_swap_binary_operands_p (code, mode, operands))
19560 /* It is invalid to swap operands of different modes. */
19561 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
19563 std::swap (src1, src2);
19566 /* Both source operands cannot be in memory. */
19567 if (MEM_P (src1) && MEM_P (src2))
19569 /* Optimization: Only read from memory once. */
19570 if (rtx_equal_p (src1, src2))
19572 src2 = force_reg (mode, src2);
19573 src1 = src2;
19575 else if (rtx_equal_p (dst, src1))
19576 src2 = force_reg (mode, src2);
19577 else
19578 src1 = force_reg (mode, src1);
19581 /* If the destination is memory, and we do not have matching source
19582 operands, do things in registers. */
19583 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
19584 dst = gen_reg_rtx (mode);
19586 /* Source 1 cannot be a constant. */
19587 if (CONSTANT_P (src1))
19588 src1 = force_reg (mode, src1);
19590 /* Source 1 cannot be a non-matching memory. */
19591 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
19592 src1 = force_reg (mode, src1);
19594 /* Improve address combine. */
19595 if (code == PLUS
19596 && GET_MODE_CLASS (mode) == MODE_INT
19597 && MEM_P (src2))
19598 src2 = force_reg (mode, src2);
19600 operands[1] = src1;
19601 operands[2] = src2;
19602 return dst;
19605 /* Similarly, but assume that the destination has already been
19606 set up properly. */
19608 void
19609 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
19610 machine_mode mode, rtx operands[])
19612 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
19613 gcc_assert (dst == operands[0]);
19616 /* Attempt to expand a binary operator. Make the expansion closer to the
19617 actual machine, then just general_operand, which will allow 3 separate
19618 memory references (one output, two input) in a single insn. */
19620 void
19621 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
19622 rtx operands[])
19624 rtx src1, src2, dst, op, clob;
19626 dst = ix86_fixup_binary_operands (code, mode, operands);
19627 src1 = operands[1];
19628 src2 = operands[2];
19630 /* Emit the instruction. */
19632 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, src1, src2));
19634 if (reload_completed
19635 && code == PLUS
19636 && !rtx_equal_p (dst, src1))
19638 /* This is going to be an LEA; avoid splitting it later. */
19639 emit_insn (op);
19641 else
19643 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19644 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
19647 /* Fix up the destination if needed. */
19648 if (dst != operands[0])
19649 emit_move_insn (operands[0], dst);
19652 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
19653 the given OPERANDS. */
19655 void
19656 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
19657 rtx operands[])
19659 rtx op1 = NULL_RTX, op2 = NULL_RTX;
19660 if (SUBREG_P (operands[1]))
19662 op1 = operands[1];
19663 op2 = operands[2];
19665 else if (SUBREG_P (operands[2]))
19667 op1 = operands[2];
19668 op2 = operands[1];
19670 /* Optimize (__m128i) d | (__m128i) e and similar code
19671 when d and e are float vectors into float vector logical
19672 insn. In C/C++ without using intrinsics there is no other way
19673 to express vector logical operation on float vectors than
19674 to cast them temporarily to integer vectors. */
19675 if (op1
19676 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
19677 && (SUBREG_P (op2) || GET_CODE (op2) == CONST_VECTOR)
19678 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
19679 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
19680 && SUBREG_BYTE (op1) == 0
19681 && (GET_CODE (op2) == CONST_VECTOR
19682 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
19683 && SUBREG_BYTE (op2) == 0))
19684 && can_create_pseudo_p ())
19686 rtx dst;
19687 switch (GET_MODE (SUBREG_REG (op1)))
19689 case E_V4SFmode:
19690 case E_V8SFmode:
19691 case E_V16SFmode:
19692 case E_V2DFmode:
19693 case E_V4DFmode:
19694 case E_V8DFmode:
19695 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
19696 if (GET_CODE (op2) == CONST_VECTOR)
19698 op2 = gen_lowpart (GET_MODE (dst), op2);
19699 op2 = force_reg (GET_MODE (dst), op2);
19701 else
19703 op1 = operands[1];
19704 op2 = SUBREG_REG (operands[2]);
19705 if (!vector_operand (op2, GET_MODE (dst)))
19706 op2 = force_reg (GET_MODE (dst), op2);
19708 op1 = SUBREG_REG (op1);
19709 if (!vector_operand (op1, GET_MODE (dst)))
19710 op1 = force_reg (GET_MODE (dst), op1);
19711 emit_insn (gen_rtx_SET (dst,
19712 gen_rtx_fmt_ee (code, GET_MODE (dst),
19713 op1, op2)));
19714 emit_move_insn (operands[0], gen_lowpart (mode, dst));
19715 return;
19716 default:
19717 break;
19720 if (!vector_operand (operands[1], mode))
19721 operands[1] = force_reg (mode, operands[1]);
19722 if (!vector_operand (operands[2], mode))
19723 operands[2] = force_reg (mode, operands[2]);
19724 ix86_fixup_binary_operands_no_copy (code, mode, operands);
19725 emit_insn (gen_rtx_SET (operands[0],
19726 gen_rtx_fmt_ee (code, mode, operands[1],
19727 operands[2])));
19730 /* Return TRUE or FALSE depending on whether the binary operator meets the
19731 appropriate constraints. */
19733 bool
19734 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
19735 rtx operands[3])
19737 rtx dst = operands[0];
19738 rtx src1 = operands[1];
19739 rtx src2 = operands[2];
19741 /* Both source operands cannot be in memory. */
19742 if (MEM_P (src1) && MEM_P (src2))
19743 return false;
19745 /* Canonicalize operand order for commutative operators. */
19746 if (ix86_swap_binary_operands_p (code, mode, operands))
19747 std::swap (src1, src2);
19749 /* If the destination is memory, we must have a matching source operand. */
19750 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
19751 return false;
19753 /* Source 1 cannot be a constant. */
19754 if (CONSTANT_P (src1))
19755 return false;
19757 /* Source 1 cannot be a non-matching memory. */
19758 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
19759 /* Support "andhi/andsi/anddi" as a zero-extending move. */
19760 return (code == AND
19761 && (mode == HImode
19762 || mode == SImode
19763 || (TARGET_64BIT && mode == DImode))
19764 && satisfies_constraint_L (src2));
19766 return true;
19769 /* Attempt to expand a unary operator. Make the expansion closer to the
19770 actual machine, then just general_operand, which will allow 2 separate
19771 memory references (one output, one input) in a single insn. */
19773 void
19774 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
19775 rtx operands[])
19777 bool matching_memory = false;
19778 rtx src, dst, op, clob;
19780 dst = operands[0];
19781 src = operands[1];
19783 /* If the destination is memory, and we do not have matching source
19784 operands, do things in registers. */
19785 if (MEM_P (dst))
19787 if (rtx_equal_p (dst, src))
19788 matching_memory = true;
19789 else
19790 dst = gen_reg_rtx (mode);
19793 /* When source operand is memory, destination must match. */
19794 if (MEM_P (src) && !matching_memory)
19795 src = force_reg (mode, src);
19797 /* Emit the instruction. */
19799 op = gen_rtx_SET (dst, gen_rtx_fmt_e (code, mode, src));
19801 if (code == NOT)
19802 emit_insn (op);
19803 else
19805 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19806 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
19809 /* Fix up the destination if needed. */
19810 if (dst != operands[0])
19811 emit_move_insn (operands[0], dst);
19814 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
19815 divisor are within the range [0-255]. */
19817 void
19818 ix86_split_idivmod (machine_mode mode, rtx operands[],
19819 bool signed_p)
19821 rtx_code_label *end_label, *qimode_label;
19822 rtx div, mod;
19823 rtx_insn *insn;
19824 rtx scratch, tmp0, tmp1, tmp2;
19825 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
19826 rtx (*gen_zero_extend) (rtx, rtx);
19827 rtx (*gen_test_ccno_1) (rtx, rtx);
19829 switch (mode)
19831 case E_SImode:
19832 if (GET_MODE (operands[0]) == SImode)
19834 if (GET_MODE (operands[1]) == SImode)
19835 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
19836 else
19837 gen_divmod4_1
19838 = signed_p ? gen_divmodsi4_zext_2 : gen_udivmodsi4_zext_2;
19839 gen_zero_extend = gen_zero_extendqisi2;
19841 else
19843 gen_divmod4_1
19844 = signed_p ? gen_divmodsi4_zext_1 : gen_udivmodsi4_zext_1;
19845 gen_zero_extend = gen_zero_extendqidi2;
19847 gen_test_ccno_1 = gen_testsi_ccno_1;
19848 break;
19849 case E_DImode:
19850 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
19851 gen_test_ccno_1 = gen_testdi_ccno_1;
19852 gen_zero_extend = gen_zero_extendqidi2;
19853 break;
19854 default:
19855 gcc_unreachable ();
19858 end_label = gen_label_rtx ();
19859 qimode_label = gen_label_rtx ();
19861 scratch = gen_reg_rtx (mode);
19863 /* Use 8bit unsigned divimod if dividend and divisor are within
19864 the range [0-255]. */
19865 emit_move_insn (scratch, operands[2]);
19866 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
19867 scratch, 1, OPTAB_DIRECT);
19868 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
19869 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
19870 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
19871 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
19872 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
19873 pc_rtx);
19874 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp0));
19875 predict_jump (REG_BR_PROB_BASE * 50 / 100);
19876 JUMP_LABEL (insn) = qimode_label;
19878 /* Generate original signed/unsigned divimod. */
19879 div = gen_divmod4_1 (operands[0], operands[1],
19880 operands[2], operands[3]);
19881 emit_insn (div);
19883 /* Branch to the end. */
19884 emit_jump_insn (gen_jump (end_label));
19885 emit_barrier ();
19887 /* Generate 8bit unsigned divide. */
19888 emit_label (qimode_label);
19889 /* Don't use operands[0] for result of 8bit divide since not all
19890 registers support QImode ZERO_EXTRACT. */
19891 tmp0 = lowpart_subreg (HImode, scratch, mode);
19892 tmp1 = lowpart_subreg (HImode, operands[2], mode);
19893 tmp2 = lowpart_subreg (QImode, operands[3], mode);
19894 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
19896 if (signed_p)
19898 div = gen_rtx_DIV (mode, operands[2], operands[3]);
19899 mod = gen_rtx_MOD (mode, operands[2], operands[3]);
19901 else
19903 div = gen_rtx_UDIV (mode, operands[2], operands[3]);
19904 mod = gen_rtx_UMOD (mode, operands[2], operands[3]);
19906 if (mode == SImode)
19908 if (GET_MODE (operands[0]) != SImode)
19909 div = gen_rtx_ZERO_EXTEND (DImode, div);
19910 if (GET_MODE (operands[1]) != SImode)
19911 mod = gen_rtx_ZERO_EXTEND (DImode, mod);
19914 /* Extract remainder from AH. */
19915 tmp1 = gen_rtx_ZERO_EXTRACT (GET_MODE (operands[1]),
19916 tmp0, GEN_INT (8), GEN_INT (8));
19917 if (REG_P (operands[1]))
19918 insn = emit_move_insn (operands[1], tmp1);
19919 else
19921 /* Need a new scratch register since the old one has result
19922 of 8bit divide. */
19923 scratch = gen_reg_rtx (GET_MODE (operands[1]));
19924 emit_move_insn (scratch, tmp1);
19925 insn = emit_move_insn (operands[1], scratch);
19927 set_unique_reg_note (insn, REG_EQUAL, mod);
19929 /* Zero extend quotient from AL. */
19930 tmp1 = gen_lowpart (QImode, tmp0);
19931 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
19932 set_unique_reg_note (insn, REG_EQUAL, div);
19934 emit_label (end_label);
19937 #define LEA_MAX_STALL (3)
19938 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
19940 /* Increase given DISTANCE in half-cycles according to
19941 dependencies between PREV and NEXT instructions.
19942 Add 1 half-cycle if there is no dependency and
19943 go to next cycle if there is some dependecy. */
19945 static unsigned int
19946 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
19948 df_ref def, use;
19950 if (!prev || !next)
19951 return distance + (distance & 1) + 2;
19953 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
19954 return distance + 1;
19956 FOR_EACH_INSN_USE (use, next)
19957 FOR_EACH_INSN_DEF (def, prev)
19958 if (!DF_REF_IS_ARTIFICIAL (def)
19959 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
19960 return distance + (distance & 1) + 2;
19962 return distance + 1;
19965 /* Function checks if instruction INSN defines register number
19966 REGNO1 or REGNO2. */
19968 static bool
19969 insn_defines_reg (unsigned int regno1, unsigned int regno2,
19970 rtx_insn *insn)
19972 df_ref def;
19974 FOR_EACH_INSN_DEF (def, insn)
19975 if (DF_REF_REG_DEF_P (def)
19976 && !DF_REF_IS_ARTIFICIAL (def)
19977 && (regno1 == DF_REF_REGNO (def)
19978 || regno2 == DF_REF_REGNO (def)))
19979 return true;
19981 return false;
19984 /* Function checks if instruction INSN uses register number
19985 REGNO as a part of address expression. */
19987 static bool
19988 insn_uses_reg_mem (unsigned int regno, rtx insn)
19990 df_ref use;
19992 FOR_EACH_INSN_USE (use, insn)
19993 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
19994 return true;
19996 return false;
19999 /* Search backward for non-agu definition of register number REGNO1
20000 or register number REGNO2 in basic block starting from instruction
20001 START up to head of basic block or instruction INSN.
20003 Function puts true value into *FOUND var if definition was found
20004 and false otherwise.
20006 Distance in half-cycles between START and found instruction or head
20007 of BB is added to DISTANCE and returned. */
20009 static int
20010 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
20011 rtx_insn *insn, int distance,
20012 rtx_insn *start, bool *found)
20014 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
20015 rtx_insn *prev = start;
20016 rtx_insn *next = NULL;
20018 *found = false;
20020 while (prev
20021 && prev != insn
20022 && distance < LEA_SEARCH_THRESHOLD)
20024 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
20026 distance = increase_distance (prev, next, distance);
20027 if (insn_defines_reg (regno1, regno2, prev))
20029 if (recog_memoized (prev) < 0
20030 || get_attr_type (prev) != TYPE_LEA)
20032 *found = true;
20033 return distance;
20037 next = prev;
20039 if (prev == BB_HEAD (bb))
20040 break;
20042 prev = PREV_INSN (prev);
20045 return distance;
20048 /* Search backward for non-agu definition of register number REGNO1
20049 or register number REGNO2 in INSN's basic block until
20050 1. Pass LEA_SEARCH_THRESHOLD instructions, or
20051 2. Reach neighbor BBs boundary, or
20052 3. Reach agu definition.
20053 Returns the distance between the non-agu definition point and INSN.
20054 If no definition point, returns -1. */
20056 static int
20057 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
20058 rtx_insn *insn)
20060 basic_block bb = BLOCK_FOR_INSN (insn);
20061 int distance = 0;
20062 bool found = false;
20064 if (insn != BB_HEAD (bb))
20065 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
20066 distance, PREV_INSN (insn),
20067 &found);
20069 if (!found && distance < LEA_SEARCH_THRESHOLD)
20071 edge e;
20072 edge_iterator ei;
20073 bool simple_loop = false;
20075 FOR_EACH_EDGE (e, ei, bb->preds)
20076 if (e->src == bb)
20078 simple_loop = true;
20079 break;
20082 if (simple_loop)
20083 distance = distance_non_agu_define_in_bb (regno1, regno2,
20084 insn, distance,
20085 BB_END (bb), &found);
20086 else
20088 int shortest_dist = -1;
20089 bool found_in_bb = false;
20091 FOR_EACH_EDGE (e, ei, bb->preds)
20093 int bb_dist
20094 = distance_non_agu_define_in_bb (regno1, regno2,
20095 insn, distance,
20096 BB_END (e->src),
20097 &found_in_bb);
20098 if (found_in_bb)
20100 if (shortest_dist < 0)
20101 shortest_dist = bb_dist;
20102 else if (bb_dist > 0)
20103 shortest_dist = MIN (bb_dist, shortest_dist);
20105 found = true;
20109 distance = shortest_dist;
20113 /* get_attr_type may modify recog data. We want to make sure
20114 that recog data is valid for instruction INSN, on which
20115 distance_non_agu_define is called. INSN is unchanged here. */
20116 extract_insn_cached (insn);
20118 if (!found)
20119 return -1;
20121 return distance >> 1;
20124 /* Return the distance in half-cycles between INSN and the next
20125 insn that uses register number REGNO in memory address added
20126 to DISTANCE. Return -1 if REGNO0 is set.
20128 Put true value into *FOUND if register usage was found and
20129 false otherwise.
20130 Put true value into *REDEFINED if register redefinition was
20131 found and false otherwise. */
20133 static int
20134 distance_agu_use_in_bb (unsigned int regno,
20135 rtx_insn *insn, int distance, rtx_insn *start,
20136 bool *found, bool *redefined)
20138 basic_block bb = NULL;
20139 rtx_insn *next = start;
20140 rtx_insn *prev = NULL;
20142 *found = false;
20143 *redefined = false;
20145 if (start != NULL_RTX)
20147 bb = BLOCK_FOR_INSN (start);
20148 if (start != BB_HEAD (bb))
20149 /* If insn and start belong to the same bb, set prev to insn,
20150 so the call to increase_distance will increase the distance
20151 between insns by 1. */
20152 prev = insn;
20155 while (next
20156 && next != insn
20157 && distance < LEA_SEARCH_THRESHOLD)
20159 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
20161 distance = increase_distance(prev, next, distance);
20162 if (insn_uses_reg_mem (regno, next))
20164 /* Return DISTANCE if OP0 is used in memory
20165 address in NEXT. */
20166 *found = true;
20167 return distance;
20170 if (insn_defines_reg (regno, INVALID_REGNUM, next))
20172 /* Return -1 if OP0 is set in NEXT. */
20173 *redefined = true;
20174 return -1;
20177 prev = next;
20180 if (next == BB_END (bb))
20181 break;
20183 next = NEXT_INSN (next);
20186 return distance;
20189 /* Return the distance between INSN and the next insn that uses
20190 register number REGNO0 in memory address. Return -1 if no such
20191 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
20193 static int
20194 distance_agu_use (unsigned int regno0, rtx_insn *insn)
20196 basic_block bb = BLOCK_FOR_INSN (insn);
20197 int distance = 0;
20198 bool found = false;
20199 bool redefined = false;
20201 if (insn != BB_END (bb))
20202 distance = distance_agu_use_in_bb (regno0, insn, distance,
20203 NEXT_INSN (insn),
20204 &found, &redefined);
20206 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
20208 edge e;
20209 edge_iterator ei;
20210 bool simple_loop = false;
20212 FOR_EACH_EDGE (e, ei, bb->succs)
20213 if (e->dest == bb)
20215 simple_loop = true;
20216 break;
20219 if (simple_loop)
20220 distance = distance_agu_use_in_bb (regno0, insn,
20221 distance, BB_HEAD (bb),
20222 &found, &redefined);
20223 else
20225 int shortest_dist = -1;
20226 bool found_in_bb = false;
20227 bool redefined_in_bb = false;
20229 FOR_EACH_EDGE (e, ei, bb->succs)
20231 int bb_dist
20232 = distance_agu_use_in_bb (regno0, insn,
20233 distance, BB_HEAD (e->dest),
20234 &found_in_bb, &redefined_in_bb);
20235 if (found_in_bb)
20237 if (shortest_dist < 0)
20238 shortest_dist = bb_dist;
20239 else if (bb_dist > 0)
20240 shortest_dist = MIN (bb_dist, shortest_dist);
20242 found = true;
20246 distance = shortest_dist;
20250 if (!found || redefined)
20251 return -1;
20253 return distance >> 1;
20256 /* Define this macro to tune LEA priority vs ADD, it take effect when
20257 there is a dilemma of choicing LEA or ADD
20258 Negative value: ADD is more preferred than LEA
20259 Zero: Netrual
20260 Positive value: LEA is more preferred than ADD*/
20261 #define IX86_LEA_PRIORITY 0
20263 /* Return true if usage of lea INSN has performance advantage
20264 over a sequence of instructions. Instructions sequence has
20265 SPLIT_COST cycles higher latency than lea latency. */
20267 static bool
20268 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
20269 unsigned int regno2, int split_cost, bool has_scale)
20271 int dist_define, dist_use;
20273 /* For Silvermont if using a 2-source or 3-source LEA for
20274 non-destructive destination purposes, or due to wanting
20275 ability to use SCALE, the use of LEA is justified. */
20276 if (TARGET_SILVERMONT || TARGET_INTEL)
20278 if (has_scale)
20279 return true;
20280 if (split_cost < 1)
20281 return false;
20282 if (regno0 == regno1 || regno0 == regno2)
20283 return false;
20284 return true;
20287 dist_define = distance_non_agu_define (regno1, regno2, insn);
20288 dist_use = distance_agu_use (regno0, insn);
20290 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
20292 /* If there is no non AGU operand definition, no AGU
20293 operand usage and split cost is 0 then both lea
20294 and non lea variants have same priority. Currently
20295 we prefer lea for 64 bit code and non lea on 32 bit
20296 code. */
20297 if (dist_use < 0 && split_cost == 0)
20298 return TARGET_64BIT || IX86_LEA_PRIORITY;
20299 else
20300 return true;
20303 /* With longer definitions distance lea is more preferable.
20304 Here we change it to take into account splitting cost and
20305 lea priority. */
20306 dist_define += split_cost + IX86_LEA_PRIORITY;
20308 /* If there is no use in memory addess then we just check
20309 that split cost exceeds AGU stall. */
20310 if (dist_use < 0)
20311 return dist_define > LEA_MAX_STALL;
20313 /* If this insn has both backward non-agu dependence and forward
20314 agu dependence, the one with short distance takes effect. */
20315 return dist_define >= dist_use;
20318 /* Return true if it is legal to clobber flags by INSN and
20319 false otherwise. */
20321 static bool
20322 ix86_ok_to_clobber_flags (rtx_insn *insn)
20324 basic_block bb = BLOCK_FOR_INSN (insn);
20325 df_ref use;
20326 bitmap live;
20328 while (insn)
20330 if (NONDEBUG_INSN_P (insn))
20332 FOR_EACH_INSN_USE (use, insn)
20333 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
20334 return false;
20336 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
20337 return true;
20340 if (insn == BB_END (bb))
20341 break;
20343 insn = NEXT_INSN (insn);
20346 live = df_get_live_out(bb);
20347 return !REGNO_REG_SET_P (live, FLAGS_REG);
20350 /* Return true if we need to split op0 = op1 + op2 into a sequence of
20351 move and add to avoid AGU stalls. */
20353 bool
20354 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
20356 unsigned int regno0, regno1, regno2;
20358 /* Check if we need to optimize. */
20359 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
20360 return false;
20362 /* Check it is correct to split here. */
20363 if (!ix86_ok_to_clobber_flags(insn))
20364 return false;
20366 regno0 = true_regnum (operands[0]);
20367 regno1 = true_regnum (operands[1]);
20368 regno2 = true_regnum (operands[2]);
20370 /* We need to split only adds with non destructive
20371 destination operand. */
20372 if (regno0 == regno1 || regno0 == regno2)
20373 return false;
20374 else
20375 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
20378 /* Return true if we should emit lea instruction instead of mov
20379 instruction. */
20381 bool
20382 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
20384 unsigned int regno0, regno1;
20386 /* Check if we need to optimize. */
20387 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
20388 return false;
20390 /* Use lea for reg to reg moves only. */
20391 if (!REG_P (operands[0]) || !REG_P (operands[1]))
20392 return false;
20394 regno0 = true_regnum (operands[0]);
20395 regno1 = true_regnum (operands[1]);
20397 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
20400 /* Return true if we need to split lea into a sequence of
20401 instructions to avoid AGU stalls. */
20403 bool
20404 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
20406 unsigned int regno0, regno1, regno2;
20407 int split_cost;
20408 struct ix86_address parts;
20409 int ok;
20411 /* Check we need to optimize. */
20412 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
20413 return false;
20415 /* The "at least two components" test below might not catch simple
20416 move or zero extension insns if parts.base is non-NULL and parts.disp
20417 is const0_rtx as the only components in the address, e.g. if the
20418 register is %rbp or %r13. As this test is much cheaper and moves or
20419 zero extensions are the common case, do this check first. */
20420 if (REG_P (operands[1])
20421 || (SImode_address_operand (operands[1], VOIDmode)
20422 && REG_P (XEXP (operands[1], 0))))
20423 return false;
20425 /* Check if it is OK to split here. */
20426 if (!ix86_ok_to_clobber_flags (insn))
20427 return false;
20429 ok = ix86_decompose_address (operands[1], &parts);
20430 gcc_assert (ok);
20432 /* There should be at least two components in the address. */
20433 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
20434 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
20435 return false;
20437 /* We should not split into add if non legitimate pic
20438 operand is used as displacement. */
20439 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
20440 return false;
20442 regno0 = true_regnum (operands[0]) ;
20443 regno1 = INVALID_REGNUM;
20444 regno2 = INVALID_REGNUM;
20446 if (parts.base)
20447 regno1 = true_regnum (parts.base);
20448 if (parts.index)
20449 regno2 = true_regnum (parts.index);
20451 split_cost = 0;
20453 /* Compute how many cycles we will add to execution time
20454 if split lea into a sequence of instructions. */
20455 if (parts.base || parts.index)
20457 /* Have to use mov instruction if non desctructive
20458 destination form is used. */
20459 if (regno1 != regno0 && regno2 != regno0)
20460 split_cost += 1;
20462 /* Have to add index to base if both exist. */
20463 if (parts.base && parts.index)
20464 split_cost += 1;
20466 /* Have to use shift and adds if scale is 2 or greater. */
20467 if (parts.scale > 1)
20469 if (regno0 != regno1)
20470 split_cost += 1;
20471 else if (regno2 == regno0)
20472 split_cost += 4;
20473 else
20474 split_cost += parts.scale;
20477 /* Have to use add instruction with immediate if
20478 disp is non zero. */
20479 if (parts.disp && parts.disp != const0_rtx)
20480 split_cost += 1;
20482 /* Subtract the price of lea. */
20483 split_cost -= 1;
20486 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
20487 parts.scale > 1);
20490 /* Emit x86 binary operand CODE in mode MODE, where the first operand
20491 matches destination. RTX includes clobber of FLAGS_REG. */
20493 static void
20494 ix86_emit_binop (enum rtx_code code, machine_mode mode,
20495 rtx dst, rtx src)
20497 rtx op, clob;
20499 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, dst, src));
20500 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
20502 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
20505 /* Return true if regno1 def is nearest to the insn. */
20507 static bool
20508 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
20510 rtx_insn *prev = insn;
20511 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
20513 if (insn == start)
20514 return false;
20515 while (prev && prev != start)
20517 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
20519 prev = PREV_INSN (prev);
20520 continue;
20522 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
20523 return true;
20524 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
20525 return false;
20526 prev = PREV_INSN (prev);
20529 /* None of the regs is defined in the bb. */
20530 return false;
20533 /* Split lea instructions into a sequence of instructions
20534 which are executed on ALU to avoid AGU stalls.
20535 It is assumed that it is allowed to clobber flags register
20536 at lea position. */
20538 void
20539 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
20541 unsigned int regno0, regno1, regno2;
20542 struct ix86_address parts;
20543 rtx target, tmp;
20544 int ok, adds;
20546 ok = ix86_decompose_address (operands[1], &parts);
20547 gcc_assert (ok);
20549 target = gen_lowpart (mode, operands[0]);
20551 regno0 = true_regnum (target);
20552 regno1 = INVALID_REGNUM;
20553 regno2 = INVALID_REGNUM;
20555 if (parts.base)
20557 parts.base = gen_lowpart (mode, parts.base);
20558 regno1 = true_regnum (parts.base);
20561 if (parts.index)
20563 parts.index = gen_lowpart (mode, parts.index);
20564 regno2 = true_regnum (parts.index);
20567 if (parts.disp)
20568 parts.disp = gen_lowpart (mode, parts.disp);
20570 if (parts.scale > 1)
20572 /* Case r1 = r1 + ... */
20573 if (regno1 == regno0)
20575 /* If we have a case r1 = r1 + C * r2 then we
20576 should use multiplication which is very
20577 expensive. Assume cost model is wrong if we
20578 have such case here. */
20579 gcc_assert (regno2 != regno0);
20581 for (adds = parts.scale; adds > 0; adds--)
20582 ix86_emit_binop (PLUS, mode, target, parts.index);
20584 else
20586 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
20587 if (regno0 != regno2)
20588 emit_insn (gen_rtx_SET (target, parts.index));
20590 /* Use shift for scaling. */
20591 ix86_emit_binop (ASHIFT, mode, target,
20592 GEN_INT (exact_log2 (parts.scale)));
20594 if (parts.base)
20595 ix86_emit_binop (PLUS, mode, target, parts.base);
20597 if (parts.disp && parts.disp != const0_rtx)
20598 ix86_emit_binop (PLUS, mode, target, parts.disp);
20601 else if (!parts.base && !parts.index)
20603 gcc_assert(parts.disp);
20604 emit_insn (gen_rtx_SET (target, parts.disp));
20606 else
20608 if (!parts.base)
20610 if (regno0 != regno2)
20611 emit_insn (gen_rtx_SET (target, parts.index));
20613 else if (!parts.index)
20615 if (regno0 != regno1)
20616 emit_insn (gen_rtx_SET (target, parts.base));
20618 else
20620 if (regno0 == regno1)
20621 tmp = parts.index;
20622 else if (regno0 == regno2)
20623 tmp = parts.base;
20624 else
20626 rtx tmp1;
20628 /* Find better operand for SET instruction, depending
20629 on which definition is farther from the insn. */
20630 if (find_nearest_reg_def (insn, regno1, regno2))
20631 tmp = parts.index, tmp1 = parts.base;
20632 else
20633 tmp = parts.base, tmp1 = parts.index;
20635 emit_insn (gen_rtx_SET (target, tmp));
20637 if (parts.disp && parts.disp != const0_rtx)
20638 ix86_emit_binop (PLUS, mode, target, parts.disp);
20640 ix86_emit_binop (PLUS, mode, target, tmp1);
20641 return;
20644 ix86_emit_binop (PLUS, mode, target, tmp);
20647 if (parts.disp && parts.disp != const0_rtx)
20648 ix86_emit_binop (PLUS, mode, target, parts.disp);
20652 /* Return true if it is ok to optimize an ADD operation to LEA
20653 operation to avoid flag register consumation. For most processors,
20654 ADD is faster than LEA. For the processors like BONNELL, if the
20655 destination register of LEA holds an actual address which will be
20656 used soon, LEA is better and otherwise ADD is better. */
20658 bool
20659 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
20661 unsigned int regno0 = true_regnum (operands[0]);
20662 unsigned int regno1 = true_regnum (operands[1]);
20663 unsigned int regno2 = true_regnum (operands[2]);
20665 /* If a = b + c, (a!=b && a!=c), must use lea form. */
20666 if (regno0 != regno1 && regno0 != regno2)
20667 return true;
20669 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
20670 return false;
20672 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
20675 /* Return true if destination reg of SET_BODY is shift count of
20676 USE_BODY. */
20678 static bool
20679 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
20681 rtx set_dest;
20682 rtx shift_rtx;
20683 int i;
20685 /* Retrieve destination of SET_BODY. */
20686 switch (GET_CODE (set_body))
20688 case SET:
20689 set_dest = SET_DEST (set_body);
20690 if (!set_dest || !REG_P (set_dest))
20691 return false;
20692 break;
20693 case PARALLEL:
20694 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
20695 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
20696 use_body))
20697 return true;
20698 /* FALLTHROUGH */
20699 default:
20700 return false;
20703 /* Retrieve shift count of USE_BODY. */
20704 switch (GET_CODE (use_body))
20706 case SET:
20707 shift_rtx = XEXP (use_body, 1);
20708 break;
20709 case PARALLEL:
20710 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
20711 if (ix86_dep_by_shift_count_body (set_body,
20712 XVECEXP (use_body, 0, i)))
20713 return true;
20714 /* FALLTHROUGH */
20715 default:
20716 return false;
20719 if (shift_rtx
20720 && (GET_CODE (shift_rtx) == ASHIFT
20721 || GET_CODE (shift_rtx) == LSHIFTRT
20722 || GET_CODE (shift_rtx) == ASHIFTRT
20723 || GET_CODE (shift_rtx) == ROTATE
20724 || GET_CODE (shift_rtx) == ROTATERT))
20726 rtx shift_count = XEXP (shift_rtx, 1);
20728 /* Return true if shift count is dest of SET_BODY. */
20729 if (REG_P (shift_count))
20731 /* Add check since it can be invoked before register
20732 allocation in pre-reload schedule. */
20733 if (reload_completed
20734 && true_regnum (set_dest) == true_regnum (shift_count))
20735 return true;
20736 else if (REGNO(set_dest) == REGNO(shift_count))
20737 return true;
20741 return false;
20744 /* Return true if destination reg of SET_INSN is shift count of
20745 USE_INSN. */
20747 bool
20748 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
20750 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
20751 PATTERN (use_insn));
20754 /* Return TRUE or FALSE depending on whether the unary operator meets the
20755 appropriate constraints. */
20757 bool
20758 ix86_unary_operator_ok (enum rtx_code,
20759 machine_mode,
20760 rtx operands[2])
20762 /* If one of operands is memory, source and destination must match. */
20763 if ((MEM_P (operands[0])
20764 || MEM_P (operands[1]))
20765 && ! rtx_equal_p (operands[0], operands[1]))
20766 return false;
20767 return true;
20770 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
20771 are ok, keeping in mind the possible movddup alternative. */
20773 bool
20774 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
20776 if (MEM_P (operands[0]))
20777 return rtx_equal_p (operands[0], operands[1 + high]);
20778 if (MEM_P (operands[1]) && MEM_P (operands[2]))
20779 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
20780 return true;
20783 /* Post-reload splitter for converting an SF or DFmode value in an
20784 SSE register into an unsigned SImode. */
20786 void
20787 ix86_split_convert_uns_si_sse (rtx operands[])
20789 machine_mode vecmode;
20790 rtx value, large, zero_or_two31, input, two31, x;
20792 large = operands[1];
20793 zero_or_two31 = operands[2];
20794 input = operands[3];
20795 two31 = operands[4];
20796 vecmode = GET_MODE (large);
20797 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
20799 /* Load up the value into the low element. We must ensure that the other
20800 elements are valid floats -- zero is the easiest such value. */
20801 if (MEM_P (input))
20803 if (vecmode == V4SFmode)
20804 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
20805 else
20806 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
20808 else
20810 input = gen_rtx_REG (vecmode, REGNO (input));
20811 emit_move_insn (value, CONST0_RTX (vecmode));
20812 if (vecmode == V4SFmode)
20813 emit_insn (gen_sse_movss (value, value, input));
20814 else
20815 emit_insn (gen_sse2_movsd (value, value, input));
20818 emit_move_insn (large, two31);
20819 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
20821 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
20822 emit_insn (gen_rtx_SET (large, x));
20824 x = gen_rtx_AND (vecmode, zero_or_two31, large);
20825 emit_insn (gen_rtx_SET (zero_or_two31, x));
20827 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
20828 emit_insn (gen_rtx_SET (value, x));
20830 large = gen_rtx_REG (V4SImode, REGNO (large));
20831 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
20833 x = gen_rtx_REG (V4SImode, REGNO (value));
20834 if (vecmode == V4SFmode)
20835 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
20836 else
20837 emit_insn (gen_sse2_cvttpd2dq (x, value));
20838 value = x;
20840 emit_insn (gen_xorv4si3 (value, value, large));
20843 /* Convert an unsigned DImode value into a DFmode, using only SSE.
20844 Expects the 64-bit DImode to be supplied in a pair of integral
20845 registers. Requires SSE2; will use SSE3 if available. For x86_32,
20846 -mfpmath=sse, !optimize_size only. */
20848 void
20849 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
20851 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
20852 rtx int_xmm, fp_xmm;
20853 rtx biases, exponents;
20854 rtx x;
20856 int_xmm = gen_reg_rtx (V4SImode);
20857 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
20858 emit_insn (gen_movdi_to_sse (int_xmm, input));
20859 else if (TARGET_SSE_SPLIT_REGS)
20861 emit_clobber (int_xmm);
20862 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
20864 else
20866 x = gen_reg_rtx (V2DImode);
20867 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
20868 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
20871 x = gen_rtx_CONST_VECTOR (V4SImode,
20872 gen_rtvec (4, GEN_INT (0x43300000UL),
20873 GEN_INT (0x45300000UL),
20874 const0_rtx, const0_rtx));
20875 exponents = validize_mem (force_const_mem (V4SImode, x));
20877 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
20878 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
20880 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
20881 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
20882 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
20883 (0x1.0p84 + double(fp_value_hi_xmm)).
20884 Note these exponents differ by 32. */
20886 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
20888 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
20889 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
20890 real_ldexp (&bias_lo_rvt, &dconst1, 52);
20891 real_ldexp (&bias_hi_rvt, &dconst1, 84);
20892 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
20893 x = const_double_from_real_value (bias_hi_rvt, DFmode);
20894 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
20895 biases = validize_mem (force_const_mem (V2DFmode, biases));
20896 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
20898 /* Add the upper and lower DFmode values together. */
20899 if (TARGET_SSE3)
20900 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
20901 else
20903 x = copy_to_mode_reg (V2DFmode, fp_xmm);
20904 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
20905 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
20908 ix86_expand_vector_extract (false, target, fp_xmm, 0);
20911 /* Not used, but eases macroization of patterns. */
20912 void
20913 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
20915 gcc_unreachable ();
20918 /* Convert an unsigned SImode value into a DFmode. Only currently used
20919 for SSE, but applicable anywhere. */
20921 void
20922 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
20924 REAL_VALUE_TYPE TWO31r;
20925 rtx x, fp;
20927 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
20928 NULL, 1, OPTAB_DIRECT);
20930 fp = gen_reg_rtx (DFmode);
20931 emit_insn (gen_floatsidf2 (fp, x));
20933 real_ldexp (&TWO31r, &dconst1, 31);
20934 x = const_double_from_real_value (TWO31r, DFmode);
20936 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
20937 if (x != target)
20938 emit_move_insn (target, x);
20941 /* Convert a signed DImode value into a DFmode. Only used for SSE in
20942 32-bit mode; otherwise we have a direct convert instruction. */
20944 void
20945 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
20947 REAL_VALUE_TYPE TWO32r;
20948 rtx fp_lo, fp_hi, x;
20950 fp_lo = gen_reg_rtx (DFmode);
20951 fp_hi = gen_reg_rtx (DFmode);
20953 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
20955 real_ldexp (&TWO32r, &dconst1, 32);
20956 x = const_double_from_real_value (TWO32r, DFmode);
20957 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
20959 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
20961 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
20962 0, OPTAB_DIRECT);
20963 if (x != target)
20964 emit_move_insn (target, x);
20967 /* Convert an unsigned SImode value into a SFmode, using only SSE.
20968 For x86_32, -mfpmath=sse, !optimize_size only. */
20969 void
20970 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
20972 REAL_VALUE_TYPE ONE16r;
20973 rtx fp_hi, fp_lo, int_hi, int_lo, x;
20975 real_ldexp (&ONE16r, &dconst1, 16);
20976 x = const_double_from_real_value (ONE16r, SFmode);
20977 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
20978 NULL, 0, OPTAB_DIRECT);
20979 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
20980 NULL, 0, OPTAB_DIRECT);
20981 fp_hi = gen_reg_rtx (SFmode);
20982 fp_lo = gen_reg_rtx (SFmode);
20983 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
20984 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
20985 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
20986 0, OPTAB_DIRECT);
20987 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
20988 0, OPTAB_DIRECT);
20989 if (!rtx_equal_p (target, fp_hi))
20990 emit_move_insn (target, fp_hi);
20993 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
20994 a vector of unsigned ints VAL to vector of floats TARGET. */
20996 void
20997 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
20999 rtx tmp[8];
21000 REAL_VALUE_TYPE TWO16r;
21001 machine_mode intmode = GET_MODE (val);
21002 machine_mode fltmode = GET_MODE (target);
21003 rtx (*cvt) (rtx, rtx);
21005 if (intmode == V4SImode)
21006 cvt = gen_floatv4siv4sf2;
21007 else
21008 cvt = gen_floatv8siv8sf2;
21009 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
21010 tmp[0] = force_reg (intmode, tmp[0]);
21011 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
21012 OPTAB_DIRECT);
21013 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
21014 NULL_RTX, 1, OPTAB_DIRECT);
21015 tmp[3] = gen_reg_rtx (fltmode);
21016 emit_insn (cvt (tmp[3], tmp[1]));
21017 tmp[4] = gen_reg_rtx (fltmode);
21018 emit_insn (cvt (tmp[4], tmp[2]));
21019 real_ldexp (&TWO16r, &dconst1, 16);
21020 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
21021 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
21022 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
21023 OPTAB_DIRECT);
21024 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
21025 OPTAB_DIRECT);
21026 if (tmp[7] != target)
21027 emit_move_insn (target, tmp[7]);
21030 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
21031 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
21032 This is done by doing just signed conversion if < 0x1p31, and otherwise by
21033 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
21036 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
21038 REAL_VALUE_TYPE TWO31r;
21039 rtx two31r, tmp[4];
21040 machine_mode mode = GET_MODE (val);
21041 machine_mode scalarmode = GET_MODE_INNER (mode);
21042 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
21043 rtx (*cmp) (rtx, rtx, rtx, rtx);
21044 int i;
21046 for (i = 0; i < 3; i++)
21047 tmp[i] = gen_reg_rtx (mode);
21048 real_ldexp (&TWO31r, &dconst1, 31);
21049 two31r = const_double_from_real_value (TWO31r, scalarmode);
21050 two31r = ix86_build_const_vector (mode, 1, two31r);
21051 two31r = force_reg (mode, two31r);
21052 switch (mode)
21054 case E_V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
21055 case E_V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
21056 case E_V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
21057 case E_V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
21058 default: gcc_unreachable ();
21060 tmp[3] = gen_rtx_LE (mode, two31r, val);
21061 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
21062 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
21063 0, OPTAB_DIRECT);
21064 if (intmode == V4SImode || TARGET_AVX2)
21065 *xorp = expand_simple_binop (intmode, ASHIFT,
21066 gen_lowpart (intmode, tmp[0]),
21067 GEN_INT (31), NULL_RTX, 0,
21068 OPTAB_DIRECT);
21069 else
21071 rtx two31 = GEN_INT (HOST_WIDE_INT_1U << 31);
21072 two31 = ix86_build_const_vector (intmode, 1, two31);
21073 *xorp = expand_simple_binop (intmode, AND,
21074 gen_lowpart (intmode, tmp[0]),
21075 two31, NULL_RTX, 0,
21076 OPTAB_DIRECT);
21078 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
21079 0, OPTAB_DIRECT);
21082 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
21083 then replicate the value for all elements of the vector
21084 register. */
21087 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
21089 int i, n_elt;
21090 rtvec v;
21091 machine_mode scalar_mode;
21093 switch (mode)
21095 case E_V64QImode:
21096 case E_V32QImode:
21097 case E_V16QImode:
21098 case E_V32HImode:
21099 case E_V16HImode:
21100 case E_V8HImode:
21101 case E_V16SImode:
21102 case E_V8SImode:
21103 case E_V4SImode:
21104 case E_V8DImode:
21105 case E_V4DImode:
21106 case E_V2DImode:
21107 gcc_assert (vect);
21108 /* FALLTHRU */
21109 case E_V16SFmode:
21110 case E_V8SFmode:
21111 case E_V4SFmode:
21112 case E_V8DFmode:
21113 case E_V4DFmode:
21114 case E_V2DFmode:
21115 n_elt = GET_MODE_NUNITS (mode);
21116 v = rtvec_alloc (n_elt);
21117 scalar_mode = GET_MODE_INNER (mode);
21119 RTVEC_ELT (v, 0) = value;
21121 for (i = 1; i < n_elt; ++i)
21122 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
21124 return gen_rtx_CONST_VECTOR (mode, v);
21126 default:
21127 gcc_unreachable ();
21131 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
21132 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
21133 for an SSE register. If VECT is true, then replicate the mask for
21134 all elements of the vector register. If INVERT is true, then create
21135 a mask excluding the sign bit. */
21138 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
21140 machine_mode vec_mode, imode;
21141 wide_int w;
21142 rtx mask, v;
21144 switch (mode)
21146 case E_V16SImode:
21147 case E_V16SFmode:
21148 case E_V8SImode:
21149 case E_V4SImode:
21150 case E_V8SFmode:
21151 case E_V4SFmode:
21152 vec_mode = mode;
21153 imode = SImode;
21154 break;
21156 case E_V8DImode:
21157 case E_V4DImode:
21158 case E_V2DImode:
21159 case E_V8DFmode:
21160 case E_V4DFmode:
21161 case E_V2DFmode:
21162 vec_mode = mode;
21163 imode = DImode;
21164 break;
21166 case E_TImode:
21167 case E_TFmode:
21168 vec_mode = VOIDmode;
21169 imode = TImode;
21170 break;
21172 default:
21173 gcc_unreachable ();
21176 machine_mode inner_mode = GET_MODE_INNER (mode);
21177 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
21178 GET_MODE_BITSIZE (inner_mode));
21179 if (invert)
21180 w = wi::bit_not (w);
21182 /* Force this value into the low part of a fp vector constant. */
21183 mask = immed_wide_int_const (w, imode);
21184 mask = gen_lowpart (inner_mode, mask);
21186 if (vec_mode == VOIDmode)
21187 return force_reg (inner_mode, mask);
21189 v = ix86_build_const_vector (vec_mode, vect, mask);
21190 return force_reg (vec_mode, v);
21193 /* Generate code for floating point ABS or NEG. */
21195 void
21196 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
21197 rtx operands[])
21199 rtx mask, set, dst, src;
21200 bool use_sse = false;
21201 bool vector_mode = VECTOR_MODE_P (mode);
21202 machine_mode vmode = mode;
21204 if (vector_mode)
21205 use_sse = true;
21206 else if (mode == TFmode)
21207 use_sse = true;
21208 else if (TARGET_SSE_MATH)
21210 use_sse = SSE_FLOAT_MODE_P (mode);
21211 if (mode == SFmode)
21212 vmode = V4SFmode;
21213 else if (mode == DFmode)
21214 vmode = V2DFmode;
21217 /* NEG and ABS performed with SSE use bitwise mask operations.
21218 Create the appropriate mask now. */
21219 if (use_sse)
21220 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
21221 else
21222 mask = NULL_RTX;
21224 dst = operands[0];
21225 src = operands[1];
21227 set = gen_rtx_fmt_e (code, mode, src);
21228 set = gen_rtx_SET (dst, set);
21230 if (mask)
21232 rtx use, clob;
21233 rtvec par;
21235 use = gen_rtx_USE (VOIDmode, mask);
21236 if (vector_mode)
21237 par = gen_rtvec (2, set, use);
21238 else
21240 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
21241 par = gen_rtvec (3, set, use, clob);
21243 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
21245 else
21246 emit_insn (set);
21249 /* Expand a copysign operation. Special case operand 0 being a constant. */
21251 void
21252 ix86_expand_copysign (rtx operands[])
21254 machine_mode mode, vmode;
21255 rtx dest, op0, op1, mask, nmask;
21257 dest = operands[0];
21258 op0 = operands[1];
21259 op1 = operands[2];
21261 mode = GET_MODE (dest);
21263 if (mode == SFmode)
21264 vmode = V4SFmode;
21265 else if (mode == DFmode)
21266 vmode = V2DFmode;
21267 else
21268 vmode = mode;
21270 if (CONST_DOUBLE_P (op0))
21272 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
21274 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
21275 op0 = simplify_unary_operation (ABS, mode, op0, mode);
21277 if (mode == SFmode || mode == DFmode)
21279 if (op0 == CONST0_RTX (mode))
21280 op0 = CONST0_RTX (vmode);
21281 else
21283 rtx v = ix86_build_const_vector (vmode, false, op0);
21285 op0 = force_reg (vmode, v);
21288 else if (op0 != CONST0_RTX (mode))
21289 op0 = force_reg (mode, op0);
21291 mask = ix86_build_signbit_mask (vmode, 0, 0);
21293 if (mode == SFmode)
21294 copysign_insn = gen_copysignsf3_const;
21295 else if (mode == DFmode)
21296 copysign_insn = gen_copysigndf3_const;
21297 else
21298 copysign_insn = gen_copysigntf3_const;
21300 emit_insn (copysign_insn (dest, op0, op1, mask));
21302 else
21304 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
21306 nmask = ix86_build_signbit_mask (vmode, 0, 1);
21307 mask = ix86_build_signbit_mask (vmode, 0, 0);
21309 if (mode == SFmode)
21310 copysign_insn = gen_copysignsf3_var;
21311 else if (mode == DFmode)
21312 copysign_insn = gen_copysigndf3_var;
21313 else
21314 copysign_insn = gen_copysigntf3_var;
21316 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
21320 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
21321 be a constant, and so has already been expanded into a vector constant. */
21323 void
21324 ix86_split_copysign_const (rtx operands[])
21326 machine_mode mode, vmode;
21327 rtx dest, op0, mask, x;
21329 dest = operands[0];
21330 op0 = operands[1];
21331 mask = operands[3];
21333 mode = GET_MODE (dest);
21334 vmode = GET_MODE (mask);
21336 dest = lowpart_subreg (vmode, dest, mode);
21337 x = gen_rtx_AND (vmode, dest, mask);
21338 emit_insn (gen_rtx_SET (dest, x));
21340 if (op0 != CONST0_RTX (vmode))
21342 x = gen_rtx_IOR (vmode, dest, op0);
21343 emit_insn (gen_rtx_SET (dest, x));
21347 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
21348 so we have to do two masks. */
21350 void
21351 ix86_split_copysign_var (rtx operands[])
21353 machine_mode mode, vmode;
21354 rtx dest, scratch, op0, op1, mask, nmask, x;
21356 dest = operands[0];
21357 scratch = operands[1];
21358 op0 = operands[2];
21359 op1 = operands[3];
21360 nmask = operands[4];
21361 mask = operands[5];
21363 mode = GET_MODE (dest);
21364 vmode = GET_MODE (mask);
21366 if (rtx_equal_p (op0, op1))
21368 /* Shouldn't happen often (it's useless, obviously), but when it does
21369 we'd generate incorrect code if we continue below. */
21370 emit_move_insn (dest, op0);
21371 return;
21374 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
21376 gcc_assert (REGNO (op1) == REGNO (scratch));
21378 x = gen_rtx_AND (vmode, scratch, mask);
21379 emit_insn (gen_rtx_SET (scratch, x));
21381 dest = mask;
21382 op0 = lowpart_subreg (vmode, op0, mode);
21383 x = gen_rtx_NOT (vmode, dest);
21384 x = gen_rtx_AND (vmode, x, op0);
21385 emit_insn (gen_rtx_SET (dest, x));
21387 else
21389 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
21391 x = gen_rtx_AND (vmode, scratch, mask);
21393 else /* alternative 2,4 */
21395 gcc_assert (REGNO (mask) == REGNO (scratch));
21396 op1 = lowpart_subreg (vmode, op1, mode);
21397 x = gen_rtx_AND (vmode, scratch, op1);
21399 emit_insn (gen_rtx_SET (scratch, x));
21401 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
21403 dest = lowpart_subreg (vmode, op0, mode);
21404 x = gen_rtx_AND (vmode, dest, nmask);
21406 else /* alternative 3,4 */
21408 gcc_assert (REGNO (nmask) == REGNO (dest));
21409 dest = nmask;
21410 op0 = lowpart_subreg (vmode, op0, mode);
21411 x = gen_rtx_AND (vmode, dest, op0);
21413 emit_insn (gen_rtx_SET (dest, x));
21416 x = gen_rtx_IOR (vmode, dest, scratch);
21417 emit_insn (gen_rtx_SET (dest, x));
21420 /* Return TRUE or FALSE depending on whether the first SET in INSN
21421 has source and destination with matching CC modes, and that the
21422 CC mode is at least as constrained as REQ_MODE. */
21424 bool
21425 ix86_match_ccmode (rtx insn, machine_mode req_mode)
21427 rtx set;
21428 machine_mode set_mode;
21430 set = PATTERN (insn);
21431 if (GET_CODE (set) == PARALLEL)
21432 set = XVECEXP (set, 0, 0);
21433 gcc_assert (GET_CODE (set) == SET);
21434 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
21436 set_mode = GET_MODE (SET_DEST (set));
21437 switch (set_mode)
21439 case E_CCNOmode:
21440 if (req_mode != CCNOmode
21441 && (req_mode != CCmode
21442 || XEXP (SET_SRC (set), 1) != const0_rtx))
21443 return false;
21444 break;
21445 case E_CCmode:
21446 if (req_mode == CCGCmode)
21447 return false;
21448 /* FALLTHRU */
21449 case E_CCGCmode:
21450 if (req_mode == CCGOCmode || req_mode == CCNOmode)
21451 return false;
21452 /* FALLTHRU */
21453 case E_CCGOCmode:
21454 if (req_mode == CCZmode)
21455 return false;
21456 /* FALLTHRU */
21457 case E_CCZmode:
21458 break;
21460 case E_CCGZmode:
21462 case E_CCAmode:
21463 case E_CCCmode:
21464 case E_CCOmode:
21465 case E_CCPmode:
21466 case E_CCSmode:
21467 if (set_mode != req_mode)
21468 return false;
21469 break;
21471 default:
21472 gcc_unreachable ();
21475 return GET_MODE (SET_SRC (set)) == set_mode;
21478 /* Generate insn patterns to do an integer compare of OPERANDS. */
21480 static rtx
21481 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
21483 machine_mode cmpmode;
21484 rtx tmp, flags;
21486 cmpmode = SELECT_CC_MODE (code, op0, op1);
21487 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
21489 /* This is very simple, but making the interface the same as in the
21490 FP case makes the rest of the code easier. */
21491 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
21492 emit_insn (gen_rtx_SET (flags, tmp));
21494 /* Return the test that should be put into the flags user, i.e.
21495 the bcc, scc, or cmov instruction. */
21496 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
21499 /* Figure out whether to use ordered or unordered fp comparisons.
21500 Return the appropriate mode to use. */
21502 machine_mode
21503 ix86_fp_compare_mode (enum rtx_code)
21505 /* ??? In order to make all comparisons reversible, we do all comparisons
21506 non-trapping when compiling for IEEE. Once gcc is able to distinguish
21507 all forms trapping and nontrapping comparisons, we can make inequality
21508 comparisons trapping again, since it results in better code when using
21509 FCOM based compares. */
21510 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
21513 machine_mode
21514 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
21516 machine_mode mode = GET_MODE (op0);
21518 if (SCALAR_FLOAT_MODE_P (mode))
21520 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
21521 return ix86_fp_compare_mode (code);
21524 switch (code)
21526 /* Only zero flag is needed. */
21527 case EQ: /* ZF=0 */
21528 case NE: /* ZF!=0 */
21529 return CCZmode;
21530 /* Codes needing carry flag. */
21531 case GEU: /* CF=0 */
21532 case LTU: /* CF=1 */
21533 /* Detect overflow checks. They need just the carry flag. */
21534 if (GET_CODE (op0) == PLUS
21535 && (rtx_equal_p (op1, XEXP (op0, 0))
21536 || rtx_equal_p (op1, XEXP (op0, 1))))
21537 return CCCmode;
21538 else
21539 return CCmode;
21540 case GTU: /* CF=0 & ZF=0 */
21541 case LEU: /* CF=1 | ZF=1 */
21542 return CCmode;
21543 /* Codes possibly doable only with sign flag when
21544 comparing against zero. */
21545 case GE: /* SF=OF or SF=0 */
21546 case LT: /* SF<>OF or SF=1 */
21547 if (op1 == const0_rtx)
21548 return CCGOCmode;
21549 else
21550 /* For other cases Carry flag is not required. */
21551 return CCGCmode;
21552 /* Codes doable only with sign flag when comparing
21553 against zero, but we miss jump instruction for it
21554 so we need to use relational tests against overflow
21555 that thus needs to be zero. */
21556 case GT: /* ZF=0 & SF=OF */
21557 case LE: /* ZF=1 | SF<>OF */
21558 if (op1 == const0_rtx)
21559 return CCNOmode;
21560 else
21561 return CCGCmode;
21562 /* strcmp pattern do (use flags) and combine may ask us for proper
21563 mode. */
21564 case USE:
21565 return CCmode;
21566 default:
21567 gcc_unreachable ();
21571 /* Return the fixed registers used for condition codes. */
21573 static bool
21574 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
21576 *p1 = FLAGS_REG;
21577 *p2 = FPSR_REG;
21578 return true;
21581 /* If two condition code modes are compatible, return a condition code
21582 mode which is compatible with both. Otherwise, return
21583 VOIDmode. */
21585 static machine_mode
21586 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
21588 if (m1 == m2)
21589 return m1;
21591 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
21592 return VOIDmode;
21594 if ((m1 == CCGCmode && m2 == CCGOCmode)
21595 || (m1 == CCGOCmode && m2 == CCGCmode))
21596 return CCGCmode;
21598 if ((m1 == CCNOmode && m2 == CCGOCmode)
21599 || (m1 == CCGOCmode && m2 == CCNOmode))
21600 return CCNOmode;
21602 if (m1 == CCZmode
21603 && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode))
21604 return m2;
21605 else if (m2 == CCZmode
21606 && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode))
21607 return m1;
21609 switch (m1)
21611 default:
21612 gcc_unreachable ();
21614 case E_CCmode:
21615 case E_CCGCmode:
21616 case E_CCGOCmode:
21617 case E_CCNOmode:
21618 case E_CCAmode:
21619 case E_CCCmode:
21620 case E_CCOmode:
21621 case E_CCPmode:
21622 case E_CCSmode:
21623 case E_CCZmode:
21624 switch (m2)
21626 default:
21627 return VOIDmode;
21629 case E_CCmode:
21630 case E_CCGCmode:
21631 case E_CCGOCmode:
21632 case E_CCNOmode:
21633 case E_CCAmode:
21634 case E_CCCmode:
21635 case E_CCOmode:
21636 case E_CCPmode:
21637 case E_CCSmode:
21638 case E_CCZmode:
21639 return CCmode;
21642 case E_CCFPmode:
21643 case E_CCFPUmode:
21644 /* These are only compatible with themselves, which we already
21645 checked above. */
21646 return VOIDmode;
21651 /* Return a comparison we can do and that it is equivalent to
21652 swap_condition (code) apart possibly from orderedness.
21653 But, never change orderedness if TARGET_IEEE_FP, returning
21654 UNKNOWN in that case if necessary. */
21656 static enum rtx_code
21657 ix86_fp_swap_condition (enum rtx_code code)
21659 switch (code)
21661 case GT: /* GTU - CF=0 & ZF=0 */
21662 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
21663 case GE: /* GEU - CF=0 */
21664 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
21665 case UNLT: /* LTU - CF=1 */
21666 return TARGET_IEEE_FP ? UNKNOWN : GT;
21667 case UNLE: /* LEU - CF=1 | ZF=1 */
21668 return TARGET_IEEE_FP ? UNKNOWN : GE;
21669 default:
21670 return swap_condition (code);
21674 /* Return cost of comparison CODE using the best strategy for performance.
21675 All following functions do use number of instructions as a cost metrics.
21676 In future this should be tweaked to compute bytes for optimize_size and
21677 take into account performance of various instructions on various CPUs. */
21679 static int
21680 ix86_fp_comparison_cost (enum rtx_code code)
21682 int arith_cost;
21684 /* The cost of code using bit-twiddling on %ah. */
21685 switch (code)
21687 case UNLE:
21688 case UNLT:
21689 case LTGT:
21690 case GT:
21691 case GE:
21692 case UNORDERED:
21693 case ORDERED:
21694 case UNEQ:
21695 arith_cost = 4;
21696 break;
21697 case LT:
21698 case NE:
21699 case EQ:
21700 case UNGE:
21701 arith_cost = TARGET_IEEE_FP ? 5 : 4;
21702 break;
21703 case LE:
21704 case UNGT:
21705 arith_cost = TARGET_IEEE_FP ? 6 : 4;
21706 break;
21707 default:
21708 gcc_unreachable ();
21711 switch (ix86_fp_comparison_strategy (code))
21713 case IX86_FPCMP_COMI:
21714 return arith_cost > 4 ? 3 : 2;
21715 case IX86_FPCMP_SAHF:
21716 return arith_cost > 4 ? 4 : 3;
21717 default:
21718 return arith_cost;
21722 /* Return strategy to use for floating-point. We assume that fcomi is always
21723 preferrable where available, since that is also true when looking at size
21724 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
21726 enum ix86_fpcmp_strategy
21727 ix86_fp_comparison_strategy (enum rtx_code)
21729 /* Do fcomi/sahf based test when profitable. */
21731 if (TARGET_CMOVE)
21732 return IX86_FPCMP_COMI;
21734 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
21735 return IX86_FPCMP_SAHF;
21737 return IX86_FPCMP_ARITH;
21740 /* Swap, force into registers, or otherwise massage the two operands
21741 to a fp comparison. The operands are updated in place; the new
21742 comparison code is returned. */
21744 static enum rtx_code
21745 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
21747 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
21748 rtx op0 = *pop0, op1 = *pop1;
21749 machine_mode op_mode = GET_MODE (op0);
21750 bool is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
21752 /* All of the unordered compare instructions only work on registers.
21753 The same is true of the fcomi compare instructions. The XFmode
21754 compare instructions require registers except when comparing
21755 against zero or when converting operand 1 from fixed point to
21756 floating point. */
21758 if (!is_sse
21759 && (fpcmp_mode == CCFPUmode
21760 || (op_mode == XFmode
21761 && ! (standard_80387_constant_p (op0) == 1
21762 || standard_80387_constant_p (op1) == 1)
21763 && GET_CODE (op1) != FLOAT)
21764 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
21766 op0 = force_reg (op_mode, op0);
21767 op1 = force_reg (op_mode, op1);
21769 else
21771 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
21772 things around if they appear profitable, otherwise force op0
21773 into a register. */
21775 if (standard_80387_constant_p (op0) == 0
21776 || (MEM_P (op0)
21777 && ! (standard_80387_constant_p (op1) == 0
21778 || MEM_P (op1))))
21780 enum rtx_code new_code = ix86_fp_swap_condition (code);
21781 if (new_code != UNKNOWN)
21783 std::swap (op0, op1);
21784 code = new_code;
21788 if (!REG_P (op0))
21789 op0 = force_reg (op_mode, op0);
21791 if (CONSTANT_P (op1))
21793 int tmp = standard_80387_constant_p (op1);
21794 if (tmp == 0)
21795 op1 = validize_mem (force_const_mem (op_mode, op1));
21796 else if (tmp == 1)
21798 if (TARGET_CMOVE)
21799 op1 = force_reg (op_mode, op1);
21801 else
21802 op1 = force_reg (op_mode, op1);
21806 /* Try to rearrange the comparison to make it cheaper. */
21807 if (ix86_fp_comparison_cost (code)
21808 > ix86_fp_comparison_cost (swap_condition (code))
21809 && (REG_P (op1) || can_create_pseudo_p ()))
21811 std::swap (op0, op1);
21812 code = swap_condition (code);
21813 if (!REG_P (op0))
21814 op0 = force_reg (op_mode, op0);
21817 *pop0 = op0;
21818 *pop1 = op1;
21819 return code;
21822 /* Convert comparison codes we use to represent FP comparison to integer
21823 code that will result in proper branch. Return UNKNOWN if no such code
21824 is available. */
21826 enum rtx_code
21827 ix86_fp_compare_code_to_integer (enum rtx_code code)
21829 switch (code)
21831 case GT:
21832 return GTU;
21833 case GE:
21834 return GEU;
21835 case ORDERED:
21836 case UNORDERED:
21837 return code;
21838 case UNEQ:
21839 return EQ;
21840 case UNLT:
21841 return LTU;
21842 case UNLE:
21843 return LEU;
21844 case LTGT:
21845 return NE;
21846 default:
21847 return UNKNOWN;
21851 /* Generate insn patterns to do a floating point compare of OPERANDS. */
21853 static rtx
21854 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
21856 machine_mode fpcmp_mode, intcmp_mode;
21857 rtx tmp, tmp2;
21859 fpcmp_mode = ix86_fp_compare_mode (code);
21860 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
21862 /* Do fcomi/sahf based test when profitable. */
21863 switch (ix86_fp_comparison_strategy (code))
21865 case IX86_FPCMP_COMI:
21866 intcmp_mode = fpcmp_mode;
21867 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
21868 tmp = gen_rtx_SET (gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
21869 emit_insn (tmp);
21870 break;
21872 case IX86_FPCMP_SAHF:
21873 intcmp_mode = fpcmp_mode;
21874 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
21875 tmp = gen_rtx_SET (gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
21877 if (!scratch)
21878 scratch = gen_reg_rtx (HImode);
21879 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
21880 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
21881 break;
21883 case IX86_FPCMP_ARITH:
21884 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
21885 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
21886 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
21887 if (!scratch)
21888 scratch = gen_reg_rtx (HImode);
21889 emit_insn (gen_rtx_SET (scratch, tmp2));
21891 /* In the unordered case, we have to check C2 for NaN's, which
21892 doesn't happen to work out to anything nice combination-wise.
21893 So do some bit twiddling on the value we've got in AH to come
21894 up with an appropriate set of condition codes. */
21896 intcmp_mode = CCNOmode;
21897 switch (code)
21899 case GT:
21900 case UNGT:
21901 if (code == GT || !TARGET_IEEE_FP)
21903 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x45)));
21904 code = EQ;
21906 else
21908 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
21909 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
21910 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
21911 intcmp_mode = CCmode;
21912 code = GEU;
21914 break;
21915 case LT:
21916 case UNLT:
21917 if (code == LT && TARGET_IEEE_FP)
21919 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
21920 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
21921 intcmp_mode = CCmode;
21922 code = EQ;
21924 else
21926 emit_insn (gen_testqi_ext_1_ccno (scratch, const1_rtx));
21927 code = NE;
21929 break;
21930 case GE:
21931 case UNGE:
21932 if (code == GE || !TARGET_IEEE_FP)
21934 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x05)));
21935 code = EQ;
21937 else
21939 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
21940 emit_insn (gen_xorqi_ext_1_cc (scratch, scratch, const1_rtx));
21941 code = NE;
21943 break;
21944 case LE:
21945 case UNLE:
21946 if (code == LE && TARGET_IEEE_FP)
21948 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
21949 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
21950 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
21951 intcmp_mode = CCmode;
21952 code = LTU;
21954 else
21956 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x45)));
21957 code = NE;
21959 break;
21960 case EQ:
21961 case UNEQ:
21962 if (code == EQ && TARGET_IEEE_FP)
21964 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
21965 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
21966 intcmp_mode = CCmode;
21967 code = EQ;
21969 else
21971 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x40)));
21972 code = NE;
21974 break;
21975 case NE:
21976 case LTGT:
21977 if (code == NE && TARGET_IEEE_FP)
21979 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
21980 emit_insn (gen_xorqi_ext_1_cc (scratch, scratch,
21981 GEN_INT (0x40)));
21982 code = NE;
21984 else
21986 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x40)));
21987 code = EQ;
21989 break;
21991 case UNORDERED:
21992 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x04)));
21993 code = NE;
21994 break;
21995 case ORDERED:
21996 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x04)));
21997 code = EQ;
21998 break;
22000 default:
22001 gcc_unreachable ();
22003 break;
22005 default:
22006 gcc_unreachable();
22009 /* Return the test that should be put into the flags user, i.e.
22010 the bcc, scc, or cmov instruction. */
22011 return gen_rtx_fmt_ee (code, VOIDmode,
22012 gen_rtx_REG (intcmp_mode, FLAGS_REG),
22013 const0_rtx);
22016 static rtx
22017 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
22019 rtx ret;
22021 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
22022 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
22024 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
22026 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
22027 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
22029 else
22030 ret = ix86_expand_int_compare (code, op0, op1);
22032 return ret;
22035 void
22036 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
22038 machine_mode mode = GET_MODE (op0);
22039 rtx tmp;
22041 /* Handle special case - vector comparsion with boolean result, transform
22042 it using ptest instruction. */
22043 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22045 rtx flag = gen_rtx_REG (CCZmode, FLAGS_REG);
22046 machine_mode p_mode = GET_MODE_SIZE (mode) == 32 ? V4DImode : V2DImode;
22048 gcc_assert (code == EQ || code == NE);
22049 /* Generate XOR since we can't check that one operand is zero vector. */
22050 tmp = gen_reg_rtx (mode);
22051 emit_insn (gen_rtx_SET (tmp, gen_rtx_XOR (mode, op0, op1)));
22052 tmp = gen_lowpart (p_mode, tmp);
22053 emit_insn (gen_rtx_SET (gen_rtx_REG (CCmode, FLAGS_REG),
22054 gen_rtx_UNSPEC (CCmode,
22055 gen_rtvec (2, tmp, tmp),
22056 UNSPEC_PTEST)));
22057 tmp = gen_rtx_fmt_ee (code, VOIDmode, flag, const0_rtx);
22058 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
22059 gen_rtx_LABEL_REF (VOIDmode, label),
22060 pc_rtx);
22061 emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
22062 return;
22065 switch (mode)
22067 case E_SFmode:
22068 case E_DFmode:
22069 case E_XFmode:
22070 case E_QImode:
22071 case E_HImode:
22072 case E_SImode:
22073 simple:
22074 tmp = ix86_expand_compare (code, op0, op1);
22075 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
22076 gen_rtx_LABEL_REF (VOIDmode, label),
22077 pc_rtx);
22078 emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
22079 return;
22081 case E_DImode:
22082 if (TARGET_64BIT)
22083 goto simple;
22084 /* For 32-bit target DI comparison may be performed on
22085 SSE registers. To allow this we should avoid split
22086 to SI mode which is achieved by doing xor in DI mode
22087 and then comparing with zero (which is recognized by
22088 STV pass). We don't compare using xor when optimizing
22089 for size. */
22090 if (!optimize_insn_for_size_p ()
22091 && TARGET_STV
22092 && (code == EQ || code == NE))
22094 op0 = force_reg (mode, gen_rtx_XOR (mode, op0, op1));
22095 op1 = const0_rtx;
22097 /* FALLTHRU */
22098 case E_TImode:
22099 /* Expand DImode branch into multiple compare+branch. */
22101 rtx lo[2], hi[2];
22102 rtx_code_label *label2;
22103 enum rtx_code code1, code2, code3;
22104 machine_mode submode;
22106 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
22108 std::swap (op0, op1);
22109 code = swap_condition (code);
22112 split_double_mode (mode, &op0, 1, lo+0, hi+0);
22113 split_double_mode (mode, &op1, 1, lo+1, hi+1);
22115 submode = mode == DImode ? SImode : DImode;
22117 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
22118 avoid two branches. This costs one extra insn, so disable when
22119 optimizing for size. */
22121 if ((code == EQ || code == NE)
22122 && (!optimize_insn_for_size_p ()
22123 || hi[1] == const0_rtx || lo[1] == const0_rtx))
22125 rtx xor0, xor1;
22127 xor1 = hi[0];
22128 if (hi[1] != const0_rtx)
22129 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
22130 NULL_RTX, 0, OPTAB_WIDEN);
22132 xor0 = lo[0];
22133 if (lo[1] != const0_rtx)
22134 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
22135 NULL_RTX, 0, OPTAB_WIDEN);
22137 tmp = expand_binop (submode, ior_optab, xor1, xor0,
22138 NULL_RTX, 0, OPTAB_WIDEN);
22140 ix86_expand_branch (code, tmp, const0_rtx, label);
22141 return;
22144 /* Otherwise, if we are doing less-than or greater-or-equal-than,
22145 op1 is a constant and the low word is zero, then we can just
22146 examine the high word. Similarly for low word -1 and
22147 less-or-equal-than or greater-than. */
22149 if (CONST_INT_P (hi[1]))
22150 switch (code)
22152 case LT: case LTU: case GE: case GEU:
22153 if (lo[1] == const0_rtx)
22155 ix86_expand_branch (code, hi[0], hi[1], label);
22156 return;
22158 break;
22159 case LE: case LEU: case GT: case GTU:
22160 if (lo[1] == constm1_rtx)
22162 ix86_expand_branch (code, hi[0], hi[1], label);
22163 return;
22165 break;
22166 default:
22167 break;
22170 /* Emulate comparisons that do not depend on Zero flag with
22171 double-word subtraction. Note that only Overflow, Sign
22172 and Carry flags are valid, so swap arguments and condition
22173 of comparisons that would otherwise test Zero flag. */
22175 switch (code)
22177 case LE: case LEU: case GT: case GTU:
22178 std::swap (lo[0], lo[1]);
22179 std::swap (hi[0], hi[1]);
22180 code = swap_condition (code);
22181 /* FALLTHRU */
22183 case LT: case LTU: case GE: case GEU:
22185 rtx (*cmp_insn) (rtx, rtx);
22186 rtx (*sbb_insn) (rtx, rtx, rtx);
22188 if (TARGET_64BIT)
22189 cmp_insn = gen_cmpdi_1, sbb_insn = gen_subdi3_carry_ccgz;
22190 else
22191 cmp_insn = gen_cmpsi_1, sbb_insn = gen_subsi3_carry_ccgz;
22193 if (!nonimmediate_operand (lo[0], submode))
22194 lo[0] = force_reg (submode, lo[0]);
22195 if (!x86_64_general_operand (lo[1], submode))
22196 lo[1] = force_reg (submode, lo[1]);
22198 if (!register_operand (hi[0], submode))
22199 hi[0] = force_reg (submode, hi[0]);
22200 if (!x86_64_general_operand (hi[1], submode))
22201 hi[1] = force_reg (submode, hi[1]);
22203 emit_insn (cmp_insn (lo[0], lo[1]));
22204 emit_insn (sbb_insn (gen_rtx_SCRATCH (submode), hi[0], hi[1]));
22206 tmp = gen_rtx_REG (CCGZmode, FLAGS_REG);
22208 ix86_expand_branch (code, tmp, const0_rtx, label);
22209 return;
22212 default:
22213 break;
22216 /* Otherwise, we need two or three jumps. */
22218 label2 = gen_label_rtx ();
22220 code1 = code;
22221 code2 = swap_condition (code);
22222 code3 = unsigned_condition (code);
22224 switch (code)
22226 case LT: case GT: case LTU: case GTU:
22227 break;
22229 case LE: code1 = LT; code2 = GT; break;
22230 case GE: code1 = GT; code2 = LT; break;
22231 case LEU: code1 = LTU; code2 = GTU; break;
22232 case GEU: code1 = GTU; code2 = LTU; break;
22234 case EQ: code1 = UNKNOWN; code2 = NE; break;
22235 case NE: code2 = UNKNOWN; break;
22237 default:
22238 gcc_unreachable ();
22242 * a < b =>
22243 * if (hi(a) < hi(b)) goto true;
22244 * if (hi(a) > hi(b)) goto false;
22245 * if (lo(a) < lo(b)) goto true;
22246 * false:
22249 if (code1 != UNKNOWN)
22250 ix86_expand_branch (code1, hi[0], hi[1], label);
22251 if (code2 != UNKNOWN)
22252 ix86_expand_branch (code2, hi[0], hi[1], label2);
22254 ix86_expand_branch (code3, lo[0], lo[1], label);
22256 if (code2 != UNKNOWN)
22257 emit_label (label2);
22258 return;
22261 default:
22262 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
22263 goto simple;
22267 void
22268 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
22270 rtx ret;
22272 gcc_assert (GET_MODE (dest) == QImode);
22274 ret = ix86_expand_compare (code, op0, op1);
22275 PUT_MODE (ret, QImode);
22276 emit_insn (gen_rtx_SET (dest, ret));
22279 /* Expand comparison setting or clearing carry flag. Return true when
22280 successful and set pop for the operation. */
22281 static bool
22282 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
22284 machine_mode mode =
22285 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
22287 /* Do not handle double-mode compares that go through special path. */
22288 if (mode == (TARGET_64BIT ? TImode : DImode))
22289 return false;
22291 if (SCALAR_FLOAT_MODE_P (mode))
22293 rtx compare_op;
22294 rtx_insn *compare_seq;
22296 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
22298 /* Shortcut: following common codes never translate
22299 into carry flag compares. */
22300 if (code == EQ || code == NE || code == UNEQ || code == LTGT
22301 || code == ORDERED || code == UNORDERED)
22302 return false;
22304 /* These comparisons require zero flag; swap operands so they won't. */
22305 if ((code == GT || code == UNLE || code == LE || code == UNGT)
22306 && !TARGET_IEEE_FP)
22308 std::swap (op0, op1);
22309 code = swap_condition (code);
22312 /* Try to expand the comparison and verify that we end up with
22313 carry flag based comparison. This fails to be true only when
22314 we decide to expand comparison using arithmetic that is not
22315 too common scenario. */
22316 start_sequence ();
22317 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
22318 compare_seq = get_insns ();
22319 end_sequence ();
22321 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
22322 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
22323 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
22324 else
22325 code = GET_CODE (compare_op);
22327 if (code != LTU && code != GEU)
22328 return false;
22330 emit_insn (compare_seq);
22331 *pop = compare_op;
22332 return true;
22335 if (!INTEGRAL_MODE_P (mode))
22336 return false;
22338 switch (code)
22340 case LTU:
22341 case GEU:
22342 break;
22344 /* Convert a==0 into (unsigned)a<1. */
22345 case EQ:
22346 case NE:
22347 if (op1 != const0_rtx)
22348 return false;
22349 op1 = const1_rtx;
22350 code = (code == EQ ? LTU : GEU);
22351 break;
22353 /* Convert a>b into b<a or a>=b-1. */
22354 case GTU:
22355 case LEU:
22356 if (CONST_INT_P (op1))
22358 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
22359 /* Bail out on overflow. We still can swap operands but that
22360 would force loading of the constant into register. */
22361 if (op1 == const0_rtx
22362 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
22363 return false;
22364 code = (code == GTU ? GEU : LTU);
22366 else
22368 std::swap (op0, op1);
22369 code = (code == GTU ? LTU : GEU);
22371 break;
22373 /* Convert a>=0 into (unsigned)a<0x80000000. */
22374 case LT:
22375 case GE:
22376 if (mode == DImode || op1 != const0_rtx)
22377 return false;
22378 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
22379 code = (code == LT ? GEU : LTU);
22380 break;
22381 case LE:
22382 case GT:
22383 if (mode == DImode || op1 != constm1_rtx)
22384 return false;
22385 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
22386 code = (code == LE ? GEU : LTU);
22387 break;
22389 default:
22390 return false;
22392 /* Swapping operands may cause constant to appear as first operand. */
22393 if (!nonimmediate_operand (op0, VOIDmode))
22395 if (!can_create_pseudo_p ())
22396 return false;
22397 op0 = force_reg (mode, op0);
22399 *pop = ix86_expand_compare (code, op0, op1);
22400 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
22401 return true;
22404 bool
22405 ix86_expand_int_movcc (rtx operands[])
22407 enum rtx_code code = GET_CODE (operands[1]), compare_code;
22408 rtx_insn *compare_seq;
22409 rtx compare_op;
22410 machine_mode mode = GET_MODE (operands[0]);
22411 bool sign_bit_compare_p = false;
22412 rtx op0 = XEXP (operands[1], 0);
22413 rtx op1 = XEXP (operands[1], 1);
22415 if (GET_MODE (op0) == TImode
22416 || (GET_MODE (op0) == DImode
22417 && !TARGET_64BIT))
22418 return false;
22420 start_sequence ();
22421 compare_op = ix86_expand_compare (code, op0, op1);
22422 compare_seq = get_insns ();
22423 end_sequence ();
22425 compare_code = GET_CODE (compare_op);
22427 if ((op1 == const0_rtx && (code == GE || code == LT))
22428 || (op1 == constm1_rtx && (code == GT || code == LE)))
22429 sign_bit_compare_p = true;
22431 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
22432 HImode insns, we'd be swallowed in word prefix ops. */
22434 if ((mode != HImode || TARGET_FAST_PREFIX)
22435 && (mode != (TARGET_64BIT ? TImode : DImode))
22436 && CONST_INT_P (operands[2])
22437 && CONST_INT_P (operands[3]))
22439 rtx out = operands[0];
22440 HOST_WIDE_INT ct = INTVAL (operands[2]);
22441 HOST_WIDE_INT cf = INTVAL (operands[3]);
22442 HOST_WIDE_INT diff;
22444 diff = ct - cf;
22445 /* Sign bit compares are better done using shifts than we do by using
22446 sbb. */
22447 if (sign_bit_compare_p
22448 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22450 /* Detect overlap between destination and compare sources. */
22451 rtx tmp = out;
22453 if (!sign_bit_compare_p)
22455 rtx flags;
22456 bool fpcmp = false;
22458 compare_code = GET_CODE (compare_op);
22460 flags = XEXP (compare_op, 0);
22462 if (GET_MODE (flags) == CCFPmode
22463 || GET_MODE (flags) == CCFPUmode)
22465 fpcmp = true;
22466 compare_code
22467 = ix86_fp_compare_code_to_integer (compare_code);
22470 /* To simplify rest of code, restrict to the GEU case. */
22471 if (compare_code == LTU)
22473 std::swap (ct, cf);
22474 compare_code = reverse_condition (compare_code);
22475 code = reverse_condition (code);
22477 else
22479 if (fpcmp)
22480 PUT_CODE (compare_op,
22481 reverse_condition_maybe_unordered
22482 (GET_CODE (compare_op)));
22483 else
22484 PUT_CODE (compare_op,
22485 reverse_condition (GET_CODE (compare_op)));
22487 diff = ct - cf;
22489 if (reg_overlap_mentioned_p (out, op0)
22490 || reg_overlap_mentioned_p (out, op1))
22491 tmp = gen_reg_rtx (mode);
22493 if (mode == DImode)
22494 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
22495 else
22496 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
22497 flags, compare_op));
22499 else
22501 if (code == GT || code == GE)
22502 code = reverse_condition (code);
22503 else
22505 std::swap (ct, cf);
22506 diff = ct - cf;
22508 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
22511 if (diff == 1)
22514 * cmpl op0,op1
22515 * sbbl dest,dest
22516 * [addl dest, ct]
22518 * Size 5 - 8.
22520 if (ct)
22521 tmp = expand_simple_binop (mode, PLUS,
22522 tmp, GEN_INT (ct),
22523 copy_rtx (tmp), 1, OPTAB_DIRECT);
22525 else if (cf == -1)
22528 * cmpl op0,op1
22529 * sbbl dest,dest
22530 * orl $ct, dest
22532 * Size 8.
22534 tmp = expand_simple_binop (mode, IOR,
22535 tmp, GEN_INT (ct),
22536 copy_rtx (tmp), 1, OPTAB_DIRECT);
22538 else if (diff == -1 && ct)
22541 * cmpl op0,op1
22542 * sbbl dest,dest
22543 * notl dest
22544 * [addl dest, cf]
22546 * Size 8 - 11.
22548 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
22549 if (cf)
22550 tmp = expand_simple_binop (mode, PLUS,
22551 copy_rtx (tmp), GEN_INT (cf),
22552 copy_rtx (tmp), 1, OPTAB_DIRECT);
22554 else
22557 * cmpl op0,op1
22558 * sbbl dest,dest
22559 * [notl dest]
22560 * andl cf - ct, dest
22561 * [addl dest, ct]
22563 * Size 8 - 11.
22566 if (cf == 0)
22568 cf = ct;
22569 ct = 0;
22570 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
22573 tmp = expand_simple_binop (mode, AND,
22574 copy_rtx (tmp),
22575 gen_int_mode (cf - ct, mode),
22576 copy_rtx (tmp), 1, OPTAB_DIRECT);
22577 if (ct)
22578 tmp = expand_simple_binop (mode, PLUS,
22579 copy_rtx (tmp), GEN_INT (ct),
22580 copy_rtx (tmp), 1, OPTAB_DIRECT);
22583 if (!rtx_equal_p (tmp, out))
22584 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
22586 return true;
22589 if (diff < 0)
22591 machine_mode cmp_mode = GET_MODE (op0);
22592 enum rtx_code new_code;
22594 if (SCALAR_FLOAT_MODE_P (cmp_mode))
22596 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
22598 /* We may be reversing unordered compare to normal compare, that
22599 is not valid in general (we may convert non-trapping condition
22600 to trapping one), however on i386 we currently emit all
22601 comparisons unordered. */
22602 new_code = reverse_condition_maybe_unordered (code);
22604 else
22605 new_code = ix86_reverse_condition (code, cmp_mode);
22606 if (new_code != UNKNOWN)
22608 std::swap (ct, cf);
22609 diff = -diff;
22610 code = new_code;
22614 compare_code = UNKNOWN;
22615 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
22616 && CONST_INT_P (op1))
22618 if (op1 == const0_rtx
22619 && (code == LT || code == GE))
22620 compare_code = code;
22621 else if (op1 == constm1_rtx)
22623 if (code == LE)
22624 compare_code = LT;
22625 else if (code == GT)
22626 compare_code = GE;
22630 /* Optimize dest = (op0 < 0) ? -1 : cf. */
22631 if (compare_code != UNKNOWN
22632 && GET_MODE (op0) == GET_MODE (out)
22633 && (cf == -1 || ct == -1))
22635 /* If lea code below could be used, only optimize
22636 if it results in a 2 insn sequence. */
22638 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
22639 || diff == 3 || diff == 5 || diff == 9)
22640 || (compare_code == LT && ct == -1)
22641 || (compare_code == GE && cf == -1))
22644 * notl op1 (if necessary)
22645 * sarl $31, op1
22646 * orl cf, op1
22648 if (ct != -1)
22650 cf = ct;
22651 ct = -1;
22652 code = reverse_condition (code);
22655 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
22657 out = expand_simple_binop (mode, IOR,
22658 out, GEN_INT (cf),
22659 out, 1, OPTAB_DIRECT);
22660 if (out != operands[0])
22661 emit_move_insn (operands[0], out);
22663 return true;
22668 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
22669 || diff == 3 || diff == 5 || diff == 9)
22670 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
22671 && (mode != DImode
22672 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
22675 * xorl dest,dest
22676 * cmpl op1,op2
22677 * setcc dest
22678 * lea cf(dest*(ct-cf)),dest
22680 * Size 14.
22682 * This also catches the degenerate setcc-only case.
22685 rtx tmp;
22686 int nops;
22688 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
22690 nops = 0;
22691 /* On x86_64 the lea instruction operates on Pmode, so we need
22692 to get arithmetics done in proper mode to match. */
22693 if (diff == 1)
22694 tmp = copy_rtx (out);
22695 else
22697 rtx out1;
22698 out1 = copy_rtx (out);
22699 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
22700 nops++;
22701 if (diff & 1)
22703 tmp = gen_rtx_PLUS (mode, tmp, out1);
22704 nops++;
22707 if (cf != 0)
22709 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
22710 nops++;
22712 if (!rtx_equal_p (tmp, out))
22714 if (nops == 1)
22715 out = force_operand (tmp, copy_rtx (out));
22716 else
22717 emit_insn (gen_rtx_SET (copy_rtx (out), copy_rtx (tmp)));
22719 if (!rtx_equal_p (out, operands[0]))
22720 emit_move_insn (operands[0], copy_rtx (out));
22722 return true;
22726 * General case: Jumpful:
22727 * xorl dest,dest cmpl op1, op2
22728 * cmpl op1, op2 movl ct, dest
22729 * setcc dest jcc 1f
22730 * decl dest movl cf, dest
22731 * andl (cf-ct),dest 1:
22732 * addl ct,dest
22734 * Size 20. Size 14.
22736 * This is reasonably steep, but branch mispredict costs are
22737 * high on modern cpus, so consider failing only if optimizing
22738 * for space.
22741 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
22742 && BRANCH_COST (optimize_insn_for_speed_p (),
22743 false) >= 2)
22745 if (cf == 0)
22747 machine_mode cmp_mode = GET_MODE (op0);
22748 enum rtx_code new_code;
22750 if (SCALAR_FLOAT_MODE_P (cmp_mode))
22752 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
22754 /* We may be reversing unordered compare to normal compare,
22755 that is not valid in general (we may convert non-trapping
22756 condition to trapping one), however on i386 we currently
22757 emit all comparisons unordered. */
22758 new_code = reverse_condition_maybe_unordered (code);
22760 else
22762 new_code = ix86_reverse_condition (code, cmp_mode);
22763 if (compare_code != UNKNOWN && new_code != UNKNOWN)
22764 compare_code = reverse_condition (compare_code);
22767 if (new_code != UNKNOWN)
22769 cf = ct;
22770 ct = 0;
22771 code = new_code;
22775 if (compare_code != UNKNOWN)
22777 /* notl op1 (if needed)
22778 sarl $31, op1
22779 andl (cf-ct), op1
22780 addl ct, op1
22782 For x < 0 (resp. x <= -1) there will be no notl,
22783 so if possible swap the constants to get rid of the
22784 complement.
22785 True/false will be -1/0 while code below (store flag
22786 followed by decrement) is 0/-1, so the constants need
22787 to be exchanged once more. */
22789 if (compare_code == GE || !cf)
22791 code = reverse_condition (code);
22792 compare_code = LT;
22794 else
22795 std::swap (ct, cf);
22797 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
22799 else
22801 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
22803 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
22804 constm1_rtx,
22805 copy_rtx (out), 1, OPTAB_DIRECT);
22808 out = expand_simple_binop (mode, AND, copy_rtx (out),
22809 gen_int_mode (cf - ct, mode),
22810 copy_rtx (out), 1, OPTAB_DIRECT);
22811 if (ct)
22812 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
22813 copy_rtx (out), 1, OPTAB_DIRECT);
22814 if (!rtx_equal_p (out, operands[0]))
22815 emit_move_insn (operands[0], copy_rtx (out));
22817 return true;
22821 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
22823 /* Try a few things more with specific constants and a variable. */
22825 optab op;
22826 rtx var, orig_out, out, tmp;
22828 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
22829 return false;
22831 /* If one of the two operands is an interesting constant, load a
22832 constant with the above and mask it in with a logical operation. */
22834 if (CONST_INT_P (operands[2]))
22836 var = operands[3];
22837 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
22838 operands[3] = constm1_rtx, op = and_optab;
22839 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
22840 operands[3] = const0_rtx, op = ior_optab;
22841 else
22842 return false;
22844 else if (CONST_INT_P (operands[3]))
22846 var = operands[2];
22847 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
22848 operands[2] = constm1_rtx, op = and_optab;
22849 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
22850 operands[2] = const0_rtx, op = ior_optab;
22851 else
22852 return false;
22854 else
22855 return false;
22857 orig_out = operands[0];
22858 tmp = gen_reg_rtx (mode);
22859 operands[0] = tmp;
22861 /* Recurse to get the constant loaded. */
22862 if (!ix86_expand_int_movcc (operands))
22863 return false;
22865 /* Mask in the interesting variable. */
22866 out = expand_binop (mode, op, var, tmp, orig_out, 0,
22867 OPTAB_WIDEN);
22868 if (!rtx_equal_p (out, orig_out))
22869 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
22871 return true;
22875 * For comparison with above,
22877 * movl cf,dest
22878 * movl ct,tmp
22879 * cmpl op1,op2
22880 * cmovcc tmp,dest
22882 * Size 15.
22885 if (! nonimmediate_operand (operands[2], mode))
22886 operands[2] = force_reg (mode, operands[2]);
22887 if (! nonimmediate_operand (operands[3], mode))
22888 operands[3] = force_reg (mode, operands[3]);
22890 if (! register_operand (operands[2], VOIDmode)
22891 && (mode == QImode
22892 || ! register_operand (operands[3], VOIDmode)))
22893 operands[2] = force_reg (mode, operands[2]);
22895 if (mode == QImode
22896 && ! register_operand (operands[3], VOIDmode))
22897 operands[3] = force_reg (mode, operands[3]);
22899 emit_insn (compare_seq);
22900 emit_insn (gen_rtx_SET (operands[0],
22901 gen_rtx_IF_THEN_ELSE (mode,
22902 compare_op, operands[2],
22903 operands[3])));
22904 return true;
22907 /* Swap, force into registers, or otherwise massage the two operands
22908 to an sse comparison with a mask result. Thus we differ a bit from
22909 ix86_prepare_fp_compare_args which expects to produce a flags result.
22911 The DEST operand exists to help determine whether to commute commutative
22912 operators. The POP0/POP1 operands are updated in place. The new
22913 comparison code is returned, or UNKNOWN if not implementable. */
22915 static enum rtx_code
22916 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
22917 rtx *pop0, rtx *pop1)
22919 switch (code)
22921 case LTGT:
22922 case UNEQ:
22923 /* AVX supports all the needed comparisons. */
22924 if (TARGET_AVX)
22925 break;
22926 /* We have no LTGT as an operator. We could implement it with
22927 NE & ORDERED, but this requires an extra temporary. It's
22928 not clear that it's worth it. */
22929 return UNKNOWN;
22931 case LT:
22932 case LE:
22933 case UNGT:
22934 case UNGE:
22935 /* These are supported directly. */
22936 break;
22938 case EQ:
22939 case NE:
22940 case UNORDERED:
22941 case ORDERED:
22942 /* AVX has 3 operand comparisons, no need to swap anything. */
22943 if (TARGET_AVX)
22944 break;
22945 /* For commutative operators, try to canonicalize the destination
22946 operand to be first in the comparison - this helps reload to
22947 avoid extra moves. */
22948 if (!dest || !rtx_equal_p (dest, *pop1))
22949 break;
22950 /* FALLTHRU */
22952 case GE:
22953 case GT:
22954 case UNLE:
22955 case UNLT:
22956 /* These are not supported directly before AVX, and furthermore
22957 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
22958 comparison operands to transform into something that is
22959 supported. */
22960 std::swap (*pop0, *pop1);
22961 code = swap_condition (code);
22962 break;
22964 default:
22965 gcc_unreachable ();
22968 return code;
22971 /* Detect conditional moves that exactly match min/max operational
22972 semantics. Note that this is IEEE safe, as long as we don't
22973 interchange the operands.
22975 Returns FALSE if this conditional move doesn't match a MIN/MAX,
22976 and TRUE if the operation is successful and instructions are emitted. */
22978 static bool
22979 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
22980 rtx cmp_op1, rtx if_true, rtx if_false)
22982 machine_mode mode;
22983 bool is_min;
22984 rtx tmp;
22986 if (code == LT)
22988 else if (code == UNGE)
22989 std::swap (if_true, if_false);
22990 else
22991 return false;
22993 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
22994 is_min = true;
22995 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
22996 is_min = false;
22997 else
22998 return false;
23000 mode = GET_MODE (dest);
23002 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
23003 but MODE may be a vector mode and thus not appropriate. */
23004 if (!flag_finite_math_only || flag_signed_zeros)
23006 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
23007 rtvec v;
23009 if_true = force_reg (mode, if_true);
23010 v = gen_rtvec (2, if_true, if_false);
23011 tmp = gen_rtx_UNSPEC (mode, v, u);
23013 else
23015 code = is_min ? SMIN : SMAX;
23016 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
23019 emit_insn (gen_rtx_SET (dest, tmp));
23020 return true;
23023 /* Expand an sse vector comparison. Return the register with the result. */
23025 static rtx
23026 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
23027 rtx op_true, rtx op_false)
23029 machine_mode mode = GET_MODE (dest);
23030 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
23032 /* In general case result of comparison can differ from operands' type. */
23033 machine_mode cmp_mode;
23035 /* In AVX512F the result of comparison is an integer mask. */
23036 bool maskcmp = false;
23037 rtx x;
23039 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
23041 unsigned int nbits = GET_MODE_NUNITS (cmp_ops_mode);
23042 cmp_mode = int_mode_for_size (nbits, 0).require ();
23043 maskcmp = true;
23045 else
23046 cmp_mode = cmp_ops_mode;
23049 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
23050 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
23051 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
23053 if (optimize
23054 || (maskcmp && cmp_mode != mode)
23055 || (op_true && reg_overlap_mentioned_p (dest, op_true))
23056 || (op_false && reg_overlap_mentioned_p (dest, op_false)))
23057 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
23059 /* Compare patterns for int modes are unspec in AVX512F only. */
23060 if (maskcmp && (code == GT || code == EQ))
23062 rtx (*gen)(rtx, rtx, rtx);
23064 switch (cmp_ops_mode)
23066 case E_V64QImode:
23067 gcc_assert (TARGET_AVX512BW);
23068 gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
23069 break;
23070 case E_V32HImode:
23071 gcc_assert (TARGET_AVX512BW);
23072 gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
23073 break;
23074 case E_V16SImode:
23075 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
23076 break;
23077 case E_V8DImode:
23078 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
23079 break;
23080 default:
23081 gen = NULL;
23084 if (gen)
23086 emit_insn (gen (dest, cmp_op0, cmp_op1));
23087 return dest;
23090 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
23092 if (cmp_mode != mode && !maskcmp)
23094 x = force_reg (cmp_ops_mode, x);
23095 convert_move (dest, x, false);
23097 else
23098 emit_insn (gen_rtx_SET (dest, x));
23100 return dest;
23103 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
23104 operations. This is used for both scalar and vector conditional moves. */
23106 void
23107 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
23109 machine_mode mode = GET_MODE (dest);
23110 machine_mode cmpmode = GET_MODE (cmp);
23112 /* In AVX512F the result of comparison is an integer mask. */
23113 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
23115 rtx t2, t3, x;
23117 /* If we have an integer mask and FP value then we need
23118 to cast mask to FP mode. */
23119 if (mode != cmpmode && VECTOR_MODE_P (cmpmode))
23121 cmp = force_reg (cmpmode, cmp);
23122 cmp = gen_rtx_SUBREG (mode, cmp, 0);
23125 if (vector_all_ones_operand (op_true, mode)
23126 && rtx_equal_p (op_false, CONST0_RTX (mode))
23127 && !maskcmp)
23129 emit_insn (gen_rtx_SET (dest, cmp));
23131 else if (op_false == CONST0_RTX (mode)
23132 && !maskcmp)
23134 op_true = force_reg (mode, op_true);
23135 x = gen_rtx_AND (mode, cmp, op_true);
23136 emit_insn (gen_rtx_SET (dest, x));
23138 else if (op_true == CONST0_RTX (mode)
23139 && !maskcmp)
23141 op_false = force_reg (mode, op_false);
23142 x = gen_rtx_NOT (mode, cmp);
23143 x = gen_rtx_AND (mode, x, op_false);
23144 emit_insn (gen_rtx_SET (dest, x));
23146 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
23147 && !maskcmp)
23149 op_false = force_reg (mode, op_false);
23150 x = gen_rtx_IOR (mode, cmp, op_false);
23151 emit_insn (gen_rtx_SET (dest, x));
23153 else if (TARGET_XOP
23154 && !maskcmp)
23156 op_true = force_reg (mode, op_true);
23158 if (!nonimmediate_operand (op_false, mode))
23159 op_false = force_reg (mode, op_false);
23161 emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cmp,
23162 op_true,
23163 op_false)));
23165 else
23167 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
23168 rtx d = dest;
23170 if (!nonimmediate_operand (op_true, mode))
23171 op_true = force_reg (mode, op_true);
23173 op_false = force_reg (mode, op_false);
23175 switch (mode)
23177 case E_V4SFmode:
23178 if (TARGET_SSE4_1)
23179 gen = gen_sse4_1_blendvps;
23180 break;
23181 case E_V2DFmode:
23182 if (TARGET_SSE4_1)
23183 gen = gen_sse4_1_blendvpd;
23184 break;
23185 case E_V16QImode:
23186 case E_V8HImode:
23187 case E_V4SImode:
23188 case E_V2DImode:
23189 if (TARGET_SSE4_1)
23191 gen = gen_sse4_1_pblendvb;
23192 if (mode != V16QImode)
23193 d = gen_reg_rtx (V16QImode);
23194 op_false = gen_lowpart (V16QImode, op_false);
23195 op_true = gen_lowpart (V16QImode, op_true);
23196 cmp = gen_lowpart (V16QImode, cmp);
23198 break;
23199 case E_V8SFmode:
23200 if (TARGET_AVX)
23201 gen = gen_avx_blendvps256;
23202 break;
23203 case E_V4DFmode:
23204 if (TARGET_AVX)
23205 gen = gen_avx_blendvpd256;
23206 break;
23207 case E_V32QImode:
23208 case E_V16HImode:
23209 case E_V8SImode:
23210 case E_V4DImode:
23211 if (TARGET_AVX2)
23213 gen = gen_avx2_pblendvb;
23214 if (mode != V32QImode)
23215 d = gen_reg_rtx (V32QImode);
23216 op_false = gen_lowpart (V32QImode, op_false);
23217 op_true = gen_lowpart (V32QImode, op_true);
23218 cmp = gen_lowpart (V32QImode, cmp);
23220 break;
23222 case E_V64QImode:
23223 gen = gen_avx512bw_blendmv64qi;
23224 break;
23225 case E_V32HImode:
23226 gen = gen_avx512bw_blendmv32hi;
23227 break;
23228 case E_V16SImode:
23229 gen = gen_avx512f_blendmv16si;
23230 break;
23231 case E_V8DImode:
23232 gen = gen_avx512f_blendmv8di;
23233 break;
23234 case E_V8DFmode:
23235 gen = gen_avx512f_blendmv8df;
23236 break;
23237 case E_V16SFmode:
23238 gen = gen_avx512f_blendmv16sf;
23239 break;
23241 default:
23242 break;
23245 if (gen != NULL)
23247 emit_insn (gen (d, op_false, op_true, cmp));
23248 if (d != dest)
23249 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
23251 else
23253 op_true = force_reg (mode, op_true);
23255 t2 = gen_reg_rtx (mode);
23256 if (optimize)
23257 t3 = gen_reg_rtx (mode);
23258 else
23259 t3 = dest;
23261 x = gen_rtx_AND (mode, op_true, cmp);
23262 emit_insn (gen_rtx_SET (t2, x));
23264 x = gen_rtx_NOT (mode, cmp);
23265 x = gen_rtx_AND (mode, x, op_false);
23266 emit_insn (gen_rtx_SET (t3, x));
23268 x = gen_rtx_IOR (mode, t3, t2);
23269 emit_insn (gen_rtx_SET (dest, x));
23274 /* Expand a floating-point conditional move. Return true if successful. */
23276 bool
23277 ix86_expand_fp_movcc (rtx operands[])
23279 machine_mode mode = GET_MODE (operands[0]);
23280 enum rtx_code code = GET_CODE (operands[1]);
23281 rtx tmp, compare_op;
23282 rtx op0 = XEXP (operands[1], 0);
23283 rtx op1 = XEXP (operands[1], 1);
23285 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
23287 machine_mode cmode;
23289 /* Since we've no cmove for sse registers, don't force bad register
23290 allocation just to gain access to it. Deny movcc when the
23291 comparison mode doesn't match the move mode. */
23292 cmode = GET_MODE (op0);
23293 if (cmode == VOIDmode)
23294 cmode = GET_MODE (op1);
23295 if (cmode != mode)
23296 return false;
23298 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
23299 if (code == UNKNOWN)
23300 return false;
23302 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
23303 operands[2], operands[3]))
23304 return true;
23306 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
23307 operands[2], operands[3]);
23308 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
23309 return true;
23312 if (GET_MODE (op0) == TImode
23313 || (GET_MODE (op0) == DImode
23314 && !TARGET_64BIT))
23315 return false;
23317 /* The floating point conditional move instructions don't directly
23318 support conditions resulting from a signed integer comparison. */
23320 compare_op = ix86_expand_compare (code, op0, op1);
23321 if (!fcmov_comparison_operator (compare_op, VOIDmode))
23323 tmp = gen_reg_rtx (QImode);
23324 ix86_expand_setcc (tmp, code, op0, op1);
23326 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
23329 emit_insn (gen_rtx_SET (operands[0],
23330 gen_rtx_IF_THEN_ELSE (mode, compare_op,
23331 operands[2], operands[3])));
23333 return true;
23336 /* Helper for ix86_cmp_code_to_pcmp_immediate for int modes. */
23338 static int
23339 ix86_int_cmp_code_to_pcmp_immediate (enum rtx_code code)
23341 switch (code)
23343 case EQ:
23344 return 0;
23345 case LT:
23346 case LTU:
23347 return 1;
23348 case LE:
23349 case LEU:
23350 return 2;
23351 case NE:
23352 return 4;
23353 case GE:
23354 case GEU:
23355 return 5;
23356 case GT:
23357 case GTU:
23358 return 6;
23359 default:
23360 gcc_unreachable ();
23364 /* Helper for ix86_cmp_code_to_pcmp_immediate for fp modes. */
23366 static int
23367 ix86_fp_cmp_code_to_pcmp_immediate (enum rtx_code code)
23369 switch (code)
23371 case EQ:
23372 return 0x00;
23373 case NE:
23374 return 0x04;
23375 case GT:
23376 return 0x0e;
23377 case LE:
23378 return 0x02;
23379 case GE:
23380 return 0x0d;
23381 case LT:
23382 return 0x01;
23383 case UNLE:
23384 return 0x0a;
23385 case UNLT:
23386 return 0x09;
23387 case UNGE:
23388 return 0x05;
23389 case UNGT:
23390 return 0x06;
23391 case UNEQ:
23392 return 0x18;
23393 case LTGT:
23394 return 0x0c;
23395 case ORDERED:
23396 return 0x07;
23397 case UNORDERED:
23398 return 0x03;
23399 default:
23400 gcc_unreachable ();
23404 /* Return immediate value to be used in UNSPEC_PCMP
23405 for comparison CODE in MODE. */
23407 static int
23408 ix86_cmp_code_to_pcmp_immediate (enum rtx_code code, machine_mode mode)
23410 if (FLOAT_MODE_P (mode))
23411 return ix86_fp_cmp_code_to_pcmp_immediate (code);
23412 return ix86_int_cmp_code_to_pcmp_immediate (code);
23415 /* Expand AVX-512 vector comparison. */
23417 bool
23418 ix86_expand_mask_vec_cmp (rtx operands[])
23420 machine_mode mask_mode = GET_MODE (operands[0]);
23421 machine_mode cmp_mode = GET_MODE (operands[2]);
23422 enum rtx_code code = GET_CODE (operands[1]);
23423 rtx imm = GEN_INT (ix86_cmp_code_to_pcmp_immediate (code, cmp_mode));
23424 int unspec_code;
23425 rtx unspec;
23427 switch (code)
23429 case LEU:
23430 case GTU:
23431 case GEU:
23432 case LTU:
23433 unspec_code = UNSPEC_UNSIGNED_PCMP;
23434 break;
23436 default:
23437 unspec_code = UNSPEC_PCMP;
23440 unspec = gen_rtx_UNSPEC (mask_mode, gen_rtvec (3, operands[2],
23441 operands[3], imm),
23442 unspec_code);
23443 emit_insn (gen_rtx_SET (operands[0], unspec));
23445 return true;
23448 /* Expand fp vector comparison. */
23450 bool
23451 ix86_expand_fp_vec_cmp (rtx operands[])
23453 enum rtx_code code = GET_CODE (operands[1]);
23454 rtx cmp;
23456 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
23457 &operands[2], &operands[3]);
23458 if (code == UNKNOWN)
23460 rtx temp;
23461 switch (GET_CODE (operands[1]))
23463 case LTGT:
23464 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[2],
23465 operands[3], NULL, NULL);
23466 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[2],
23467 operands[3], NULL, NULL);
23468 code = AND;
23469 break;
23470 case UNEQ:
23471 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[2],
23472 operands[3], NULL, NULL);
23473 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[2],
23474 operands[3], NULL, NULL);
23475 code = IOR;
23476 break;
23477 default:
23478 gcc_unreachable ();
23480 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
23481 OPTAB_DIRECT);
23483 else
23484 cmp = ix86_expand_sse_cmp (operands[0], code, operands[2], operands[3],
23485 operands[1], operands[2]);
23487 if (operands[0] != cmp)
23488 emit_move_insn (operands[0], cmp);
23490 return true;
23493 static rtx
23494 ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1,
23495 rtx op_true, rtx op_false, bool *negate)
23497 machine_mode data_mode = GET_MODE (dest);
23498 machine_mode mode = GET_MODE (cop0);
23499 rtx x;
23501 *negate = false;
23503 /* XOP supports all of the comparisons on all 128-bit vector int types. */
23504 if (TARGET_XOP
23505 && (mode == V16QImode || mode == V8HImode
23506 || mode == V4SImode || mode == V2DImode))
23508 else
23510 /* Canonicalize the comparison to EQ, GT, GTU. */
23511 switch (code)
23513 case EQ:
23514 case GT:
23515 case GTU:
23516 break;
23518 case NE:
23519 case LE:
23520 case LEU:
23521 code = reverse_condition (code);
23522 *negate = true;
23523 break;
23525 case GE:
23526 case GEU:
23527 code = reverse_condition (code);
23528 *negate = true;
23529 /* FALLTHRU */
23531 case LT:
23532 case LTU:
23533 std::swap (cop0, cop1);
23534 code = swap_condition (code);
23535 break;
23537 default:
23538 gcc_unreachable ();
23541 /* Only SSE4.1/SSE4.2 supports V2DImode. */
23542 if (mode == V2DImode)
23544 switch (code)
23546 case EQ:
23547 /* SSE4.1 supports EQ. */
23548 if (!TARGET_SSE4_1)
23549 return NULL;
23550 break;
23552 case GT:
23553 case GTU:
23554 /* SSE4.2 supports GT/GTU. */
23555 if (!TARGET_SSE4_2)
23556 return NULL;
23557 break;
23559 default:
23560 gcc_unreachable ();
23564 /* Unsigned parallel compare is not supported by the hardware.
23565 Play some tricks to turn this into a signed comparison
23566 against 0. */
23567 if (code == GTU)
23569 cop0 = force_reg (mode, cop0);
23571 switch (mode)
23573 case E_V16SImode:
23574 case E_V8DImode:
23575 case E_V8SImode:
23576 case E_V4DImode:
23577 case E_V4SImode:
23578 case E_V2DImode:
23580 rtx t1, t2, mask;
23581 rtx (*gen_sub3) (rtx, rtx, rtx);
23583 switch (mode)
23585 case E_V16SImode: gen_sub3 = gen_subv16si3; break;
23586 case E_V8DImode: gen_sub3 = gen_subv8di3; break;
23587 case E_V8SImode: gen_sub3 = gen_subv8si3; break;
23588 case E_V4DImode: gen_sub3 = gen_subv4di3; break;
23589 case E_V4SImode: gen_sub3 = gen_subv4si3; break;
23590 case E_V2DImode: gen_sub3 = gen_subv2di3; break;
23591 default:
23592 gcc_unreachable ();
23594 /* Subtract (-(INT MAX) - 1) from both operands to make
23595 them signed. */
23596 mask = ix86_build_signbit_mask (mode, true, false);
23597 t1 = gen_reg_rtx (mode);
23598 emit_insn (gen_sub3 (t1, cop0, mask));
23600 t2 = gen_reg_rtx (mode);
23601 emit_insn (gen_sub3 (t2, cop1, mask));
23603 cop0 = t1;
23604 cop1 = t2;
23605 code = GT;
23607 break;
23609 case E_V64QImode:
23610 case E_V32HImode:
23611 case E_V32QImode:
23612 case E_V16HImode:
23613 case E_V16QImode:
23614 case E_V8HImode:
23615 /* Perform a parallel unsigned saturating subtraction. */
23616 x = gen_reg_rtx (mode);
23617 emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, cop0,
23618 cop1)));
23620 cop0 = x;
23621 cop1 = CONST0_RTX (mode);
23622 code = EQ;
23623 *negate = !*negate;
23624 break;
23626 default:
23627 gcc_unreachable ();
23632 if (*negate)
23633 std::swap (op_true, op_false);
23635 /* Allow the comparison to be done in one mode, but the movcc to
23636 happen in another mode. */
23637 if (data_mode == mode)
23639 x = ix86_expand_sse_cmp (dest, code, cop0, cop1,
23640 op_true, op_false);
23642 else
23644 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
23645 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
23646 op_true, op_false);
23647 if (GET_MODE (x) == mode)
23648 x = gen_lowpart (data_mode, x);
23651 return x;
23654 /* Expand integer vector comparison. */
23656 bool
23657 ix86_expand_int_vec_cmp (rtx operands[])
23659 rtx_code code = GET_CODE (operands[1]);
23660 bool negate = false;
23661 rtx cmp = ix86_expand_int_sse_cmp (operands[0], code, operands[2],
23662 operands[3], NULL, NULL, &negate);
23664 if (!cmp)
23665 return false;
23667 if (negate)
23668 cmp = ix86_expand_int_sse_cmp (operands[0], EQ, cmp,
23669 CONST0_RTX (GET_MODE (cmp)),
23670 NULL, NULL, &negate);
23672 gcc_assert (!negate);
23674 if (operands[0] != cmp)
23675 emit_move_insn (operands[0], cmp);
23677 return true;
23680 /* Expand a floating-point vector conditional move; a vcond operation
23681 rather than a movcc operation. */
23683 bool
23684 ix86_expand_fp_vcond (rtx operands[])
23686 enum rtx_code code = GET_CODE (operands[3]);
23687 rtx cmp;
23689 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
23690 &operands[4], &operands[5]);
23691 if (code == UNKNOWN)
23693 rtx temp;
23694 switch (GET_CODE (operands[3]))
23696 case LTGT:
23697 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
23698 operands[5], operands[0], operands[0]);
23699 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
23700 operands[5], operands[1], operands[2]);
23701 code = AND;
23702 break;
23703 case UNEQ:
23704 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
23705 operands[5], operands[0], operands[0]);
23706 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
23707 operands[5], operands[1], operands[2]);
23708 code = IOR;
23709 break;
23710 default:
23711 gcc_unreachable ();
23713 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
23714 OPTAB_DIRECT);
23715 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
23716 return true;
23719 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
23720 operands[5], operands[1], operands[2]))
23721 return true;
23723 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
23724 operands[1], operands[2]);
23725 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
23726 return true;
23729 /* Expand a signed/unsigned integral vector conditional move. */
23731 bool
23732 ix86_expand_int_vcond (rtx operands[])
23734 machine_mode data_mode = GET_MODE (operands[0]);
23735 machine_mode mode = GET_MODE (operands[4]);
23736 enum rtx_code code = GET_CODE (operands[3]);
23737 bool negate = false;
23738 rtx x, cop0, cop1;
23740 cop0 = operands[4];
23741 cop1 = operands[5];
23743 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
23744 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
23745 if ((code == LT || code == GE)
23746 && data_mode == mode
23747 && cop1 == CONST0_RTX (mode)
23748 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
23749 && GET_MODE_UNIT_SIZE (data_mode) > 1
23750 && GET_MODE_UNIT_SIZE (data_mode) <= 8
23751 && (GET_MODE_SIZE (data_mode) == 16
23752 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
23754 rtx negop = operands[2 - (code == LT)];
23755 int shift = GET_MODE_UNIT_BITSIZE (data_mode) - 1;
23756 if (negop == CONST1_RTX (data_mode))
23758 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
23759 operands[0], 1, OPTAB_DIRECT);
23760 if (res != operands[0])
23761 emit_move_insn (operands[0], res);
23762 return true;
23764 else if (GET_MODE_INNER (data_mode) != DImode
23765 && vector_all_ones_operand (negop, data_mode))
23767 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
23768 operands[0], 0, OPTAB_DIRECT);
23769 if (res != operands[0])
23770 emit_move_insn (operands[0], res);
23771 return true;
23775 if (!nonimmediate_operand (cop1, mode))
23776 cop1 = force_reg (mode, cop1);
23777 if (!general_operand (operands[1], data_mode))
23778 operands[1] = force_reg (data_mode, operands[1]);
23779 if (!general_operand (operands[2], data_mode))
23780 operands[2] = force_reg (data_mode, operands[2]);
23782 x = ix86_expand_int_sse_cmp (operands[0], code, cop0, cop1,
23783 operands[1], operands[2], &negate);
23785 if (!x)
23786 return false;
23788 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
23789 operands[2-negate]);
23790 return true;
23793 /* AVX512F does support 64-byte integer vector operations,
23794 thus the longest vector we are faced with is V64QImode. */
23795 #define MAX_VECT_LEN 64
23797 struct expand_vec_perm_d
23799 rtx target, op0, op1;
23800 unsigned char perm[MAX_VECT_LEN];
23801 machine_mode vmode;
23802 unsigned char nelt;
23803 bool one_operand_p;
23804 bool testing_p;
23807 static bool
23808 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
23809 struct expand_vec_perm_d *d)
23811 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
23812 expander, so args are either in d, or in op0, op1 etc. */
23813 machine_mode mode = GET_MODE (d ? d->op0 : op0);
23814 machine_mode maskmode = mode;
23815 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
23817 switch (mode)
23819 case E_V8HImode:
23820 if (TARGET_AVX512VL && TARGET_AVX512BW)
23821 gen = gen_avx512vl_vpermi2varv8hi3;
23822 break;
23823 case E_V16HImode:
23824 if (TARGET_AVX512VL && TARGET_AVX512BW)
23825 gen = gen_avx512vl_vpermi2varv16hi3;
23826 break;
23827 case E_V64QImode:
23828 if (TARGET_AVX512VBMI)
23829 gen = gen_avx512bw_vpermi2varv64qi3;
23830 break;
23831 case E_V32HImode:
23832 if (TARGET_AVX512BW)
23833 gen = gen_avx512bw_vpermi2varv32hi3;
23834 break;
23835 case E_V4SImode:
23836 if (TARGET_AVX512VL)
23837 gen = gen_avx512vl_vpermi2varv4si3;
23838 break;
23839 case E_V8SImode:
23840 if (TARGET_AVX512VL)
23841 gen = gen_avx512vl_vpermi2varv8si3;
23842 break;
23843 case E_V16SImode:
23844 if (TARGET_AVX512F)
23845 gen = gen_avx512f_vpermi2varv16si3;
23846 break;
23847 case E_V4SFmode:
23848 if (TARGET_AVX512VL)
23850 gen = gen_avx512vl_vpermi2varv4sf3;
23851 maskmode = V4SImode;
23853 break;
23854 case E_V8SFmode:
23855 if (TARGET_AVX512VL)
23857 gen = gen_avx512vl_vpermi2varv8sf3;
23858 maskmode = V8SImode;
23860 break;
23861 case E_V16SFmode:
23862 if (TARGET_AVX512F)
23864 gen = gen_avx512f_vpermi2varv16sf3;
23865 maskmode = V16SImode;
23867 break;
23868 case E_V2DImode:
23869 if (TARGET_AVX512VL)
23870 gen = gen_avx512vl_vpermi2varv2di3;
23871 break;
23872 case E_V4DImode:
23873 if (TARGET_AVX512VL)
23874 gen = gen_avx512vl_vpermi2varv4di3;
23875 break;
23876 case E_V8DImode:
23877 if (TARGET_AVX512F)
23878 gen = gen_avx512f_vpermi2varv8di3;
23879 break;
23880 case E_V2DFmode:
23881 if (TARGET_AVX512VL)
23883 gen = gen_avx512vl_vpermi2varv2df3;
23884 maskmode = V2DImode;
23886 break;
23887 case E_V4DFmode:
23888 if (TARGET_AVX512VL)
23890 gen = gen_avx512vl_vpermi2varv4df3;
23891 maskmode = V4DImode;
23893 break;
23894 case E_V8DFmode:
23895 if (TARGET_AVX512F)
23897 gen = gen_avx512f_vpermi2varv8df3;
23898 maskmode = V8DImode;
23900 break;
23901 default:
23902 break;
23905 if (gen == NULL)
23906 return false;
23908 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
23909 expander, so args are either in d, or in op0, op1 etc. */
23910 if (d)
23912 rtx vec[64];
23913 target = d->target;
23914 op0 = d->op0;
23915 op1 = d->op1;
23916 for (int i = 0; i < d->nelt; ++i)
23917 vec[i] = GEN_INT (d->perm[i]);
23918 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
23921 emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
23922 return true;
23925 /* Expand a variable vector permutation. */
23927 void
23928 ix86_expand_vec_perm (rtx operands[])
23930 rtx target = operands[0];
23931 rtx op0 = operands[1];
23932 rtx op1 = operands[2];
23933 rtx mask = operands[3];
23934 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
23935 machine_mode mode = GET_MODE (op0);
23936 machine_mode maskmode = GET_MODE (mask);
23937 int w, e, i;
23938 bool one_operand_shuffle = rtx_equal_p (op0, op1);
23940 /* Number of elements in the vector. */
23941 w = GET_MODE_NUNITS (mode);
23942 e = GET_MODE_UNIT_SIZE (mode);
23943 gcc_assert (w <= 64);
23945 if (TARGET_AVX512F && one_operand_shuffle)
23947 rtx (*gen) (rtx, rtx, rtx) = NULL;
23948 switch (mode)
23950 case E_V16SImode:
23951 gen =gen_avx512f_permvarv16si;
23952 break;
23953 case E_V16SFmode:
23954 gen = gen_avx512f_permvarv16sf;
23955 break;
23956 case E_V8DImode:
23957 gen = gen_avx512f_permvarv8di;
23958 break;
23959 case E_V8DFmode:
23960 gen = gen_avx512f_permvarv8df;
23961 break;
23962 default:
23963 break;
23965 if (gen != NULL)
23967 emit_insn (gen (target, op0, mask));
23968 return;
23972 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
23973 return;
23975 if (TARGET_AVX2)
23977 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
23979 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
23980 an constant shuffle operand. With a tiny bit of effort we can
23981 use VPERMD instead. A re-interpretation stall for V4DFmode is
23982 unfortunate but there's no avoiding it.
23983 Similarly for V16HImode we don't have instructions for variable
23984 shuffling, while for V32QImode we can use after preparing suitable
23985 masks vpshufb; vpshufb; vpermq; vpor. */
23987 if (mode == V16HImode)
23989 maskmode = mode = V32QImode;
23990 w = 32;
23991 e = 1;
23993 else
23995 maskmode = mode = V8SImode;
23996 w = 8;
23997 e = 4;
23999 t1 = gen_reg_rtx (maskmode);
24001 /* Replicate the low bits of the V4DImode mask into V8SImode:
24002 mask = { A B C D }
24003 t1 = { A A B B C C D D }. */
24004 for (i = 0; i < w / 2; ++i)
24005 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
24006 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
24007 vt = force_reg (maskmode, vt);
24008 mask = gen_lowpart (maskmode, mask);
24009 if (maskmode == V8SImode)
24010 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
24011 else
24012 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
24014 /* Multiply the shuffle indicies by two. */
24015 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
24016 OPTAB_DIRECT);
24018 /* Add one to the odd shuffle indicies:
24019 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
24020 for (i = 0; i < w / 2; ++i)
24022 vec[i * 2] = const0_rtx;
24023 vec[i * 2 + 1] = const1_rtx;
24025 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
24026 vt = validize_mem (force_const_mem (maskmode, vt));
24027 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
24028 OPTAB_DIRECT);
24030 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
24031 operands[3] = mask = t1;
24032 target = gen_reg_rtx (mode);
24033 op0 = gen_lowpart (mode, op0);
24034 op1 = gen_lowpart (mode, op1);
24037 switch (mode)
24039 case E_V8SImode:
24040 /* The VPERMD and VPERMPS instructions already properly ignore
24041 the high bits of the shuffle elements. No need for us to
24042 perform an AND ourselves. */
24043 if (one_operand_shuffle)
24045 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
24046 if (target != operands[0])
24047 emit_move_insn (operands[0],
24048 gen_lowpart (GET_MODE (operands[0]), target));
24050 else
24052 t1 = gen_reg_rtx (V8SImode);
24053 t2 = gen_reg_rtx (V8SImode);
24054 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
24055 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
24056 goto merge_two;
24058 return;
24060 case E_V8SFmode:
24061 mask = gen_lowpart (V8SImode, mask);
24062 if (one_operand_shuffle)
24063 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
24064 else
24066 t1 = gen_reg_rtx (V8SFmode);
24067 t2 = gen_reg_rtx (V8SFmode);
24068 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
24069 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
24070 goto merge_two;
24072 return;
24074 case E_V4SImode:
24075 /* By combining the two 128-bit input vectors into one 256-bit
24076 input vector, we can use VPERMD and VPERMPS for the full
24077 two-operand shuffle. */
24078 t1 = gen_reg_rtx (V8SImode);
24079 t2 = gen_reg_rtx (V8SImode);
24080 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
24081 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
24082 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
24083 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
24084 return;
24086 case E_V4SFmode:
24087 t1 = gen_reg_rtx (V8SFmode);
24088 t2 = gen_reg_rtx (V8SImode);
24089 mask = gen_lowpart (V4SImode, mask);
24090 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
24091 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
24092 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
24093 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
24094 return;
24096 case E_V32QImode:
24097 t1 = gen_reg_rtx (V32QImode);
24098 t2 = gen_reg_rtx (V32QImode);
24099 t3 = gen_reg_rtx (V32QImode);
24100 vt2 = GEN_INT (-128);
24101 for (i = 0; i < 32; i++)
24102 vec[i] = vt2;
24103 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
24104 vt = force_reg (V32QImode, vt);
24105 for (i = 0; i < 32; i++)
24106 vec[i] = i < 16 ? vt2 : const0_rtx;
24107 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
24108 vt2 = force_reg (V32QImode, vt2);
24109 /* From mask create two adjusted masks, which contain the same
24110 bits as mask in the low 7 bits of each vector element.
24111 The first mask will have the most significant bit clear
24112 if it requests element from the same 128-bit lane
24113 and MSB set if it requests element from the other 128-bit lane.
24114 The second mask will have the opposite values of the MSB,
24115 and additionally will have its 128-bit lanes swapped.
24116 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
24117 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
24118 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
24119 stands for other 12 bytes. */
24120 /* The bit whether element is from the same lane or the other
24121 lane is bit 4, so shift it up by 3 to the MSB position. */
24122 t5 = gen_reg_rtx (V4DImode);
24123 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
24124 GEN_INT (3)));
24125 /* Clear MSB bits from the mask just in case it had them set. */
24126 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
24127 /* After this t1 will have MSB set for elements from other lane. */
24128 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
24129 /* Clear bits other than MSB. */
24130 emit_insn (gen_andv32qi3 (t1, t1, vt));
24131 /* Or in the lower bits from mask into t3. */
24132 emit_insn (gen_iorv32qi3 (t3, t1, t2));
24133 /* And invert MSB bits in t1, so MSB is set for elements from the same
24134 lane. */
24135 emit_insn (gen_xorv32qi3 (t1, t1, vt));
24136 /* Swap 128-bit lanes in t3. */
24137 t6 = gen_reg_rtx (V4DImode);
24138 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
24139 const2_rtx, GEN_INT (3),
24140 const0_rtx, const1_rtx));
24141 /* And or in the lower bits from mask into t1. */
24142 emit_insn (gen_iorv32qi3 (t1, t1, t2));
24143 if (one_operand_shuffle)
24145 /* Each of these shuffles will put 0s in places where
24146 element from the other 128-bit lane is needed, otherwise
24147 will shuffle in the requested value. */
24148 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
24149 gen_lowpart (V32QImode, t6)));
24150 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
24151 /* For t3 the 128-bit lanes are swapped again. */
24152 t7 = gen_reg_rtx (V4DImode);
24153 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
24154 const2_rtx, GEN_INT (3),
24155 const0_rtx, const1_rtx));
24156 /* And oring both together leads to the result. */
24157 emit_insn (gen_iorv32qi3 (target, t1,
24158 gen_lowpart (V32QImode, t7)));
24159 if (target != operands[0])
24160 emit_move_insn (operands[0],
24161 gen_lowpart (GET_MODE (operands[0]), target));
24162 return;
24165 t4 = gen_reg_rtx (V32QImode);
24166 /* Similarly to the above one_operand_shuffle code,
24167 just for repeated twice for each operand. merge_two:
24168 code will merge the two results together. */
24169 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
24170 gen_lowpart (V32QImode, t6)));
24171 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
24172 gen_lowpart (V32QImode, t6)));
24173 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
24174 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
24175 t7 = gen_reg_rtx (V4DImode);
24176 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
24177 const2_rtx, GEN_INT (3),
24178 const0_rtx, const1_rtx));
24179 t8 = gen_reg_rtx (V4DImode);
24180 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
24181 const2_rtx, GEN_INT (3),
24182 const0_rtx, const1_rtx));
24183 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
24184 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
24185 t1 = t4;
24186 t2 = t3;
24187 goto merge_two;
24189 default:
24190 gcc_assert (GET_MODE_SIZE (mode) <= 16);
24191 break;
24195 if (TARGET_XOP)
24197 /* The XOP VPPERM insn supports three inputs. By ignoring the
24198 one_operand_shuffle special case, we avoid creating another
24199 set of constant vectors in memory. */
24200 one_operand_shuffle = false;
24202 /* mask = mask & {2*w-1, ...} */
24203 vt = GEN_INT (2*w - 1);
24205 else
24207 /* mask = mask & {w-1, ...} */
24208 vt = GEN_INT (w - 1);
24211 for (i = 0; i < w; i++)
24212 vec[i] = vt;
24213 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
24214 mask = expand_simple_binop (maskmode, AND, mask, vt,
24215 NULL_RTX, 0, OPTAB_DIRECT);
24217 /* For non-QImode operations, convert the word permutation control
24218 into a byte permutation control. */
24219 if (mode != V16QImode)
24221 mask = expand_simple_binop (maskmode, ASHIFT, mask,
24222 GEN_INT (exact_log2 (e)),
24223 NULL_RTX, 0, OPTAB_DIRECT);
24225 /* Convert mask to vector of chars. */
24226 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
24228 /* Replicate each of the input bytes into byte positions:
24229 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
24230 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
24231 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
24232 for (i = 0; i < 16; ++i)
24233 vec[i] = GEN_INT (i/e * e);
24234 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
24235 vt = validize_mem (force_const_mem (V16QImode, vt));
24236 if (TARGET_XOP)
24237 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
24238 else
24239 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
24241 /* Convert it into the byte positions by doing
24242 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
24243 for (i = 0; i < 16; ++i)
24244 vec[i] = GEN_INT (i % e);
24245 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
24246 vt = validize_mem (force_const_mem (V16QImode, vt));
24247 emit_insn (gen_addv16qi3 (mask, mask, vt));
24250 /* The actual shuffle operations all operate on V16QImode. */
24251 op0 = gen_lowpart (V16QImode, op0);
24252 op1 = gen_lowpart (V16QImode, op1);
24254 if (TARGET_XOP)
24256 if (GET_MODE (target) != V16QImode)
24257 target = gen_reg_rtx (V16QImode);
24258 emit_insn (gen_xop_pperm (target, op0, op1, mask));
24259 if (target != operands[0])
24260 emit_move_insn (operands[0],
24261 gen_lowpart (GET_MODE (operands[0]), target));
24263 else if (one_operand_shuffle)
24265 if (GET_MODE (target) != V16QImode)
24266 target = gen_reg_rtx (V16QImode);
24267 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
24268 if (target != operands[0])
24269 emit_move_insn (operands[0],
24270 gen_lowpart (GET_MODE (operands[0]), target));
24272 else
24274 rtx xops[6];
24275 bool ok;
24277 /* Shuffle the two input vectors independently. */
24278 t1 = gen_reg_rtx (V16QImode);
24279 t2 = gen_reg_rtx (V16QImode);
24280 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
24281 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
24283 merge_two:
24284 /* Then merge them together. The key is whether any given control
24285 element contained a bit set that indicates the second word. */
24286 mask = operands[3];
24287 vt = GEN_INT (w);
24288 if (maskmode == V2DImode && !TARGET_SSE4_1)
24290 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
24291 more shuffle to convert the V2DI input mask into a V4SI
24292 input mask. At which point the masking that expand_int_vcond
24293 will work as desired. */
24294 rtx t3 = gen_reg_rtx (V4SImode);
24295 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
24296 const0_rtx, const0_rtx,
24297 const2_rtx, const2_rtx));
24298 mask = t3;
24299 maskmode = V4SImode;
24300 e = w = 4;
24303 for (i = 0; i < w; i++)
24304 vec[i] = vt;
24305 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
24306 vt = force_reg (maskmode, vt);
24307 mask = expand_simple_binop (maskmode, AND, mask, vt,
24308 NULL_RTX, 0, OPTAB_DIRECT);
24310 if (GET_MODE (target) != mode)
24311 target = gen_reg_rtx (mode);
24312 xops[0] = target;
24313 xops[1] = gen_lowpart (mode, t2);
24314 xops[2] = gen_lowpart (mode, t1);
24315 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
24316 xops[4] = mask;
24317 xops[5] = vt;
24318 ok = ix86_expand_int_vcond (xops);
24319 gcc_assert (ok);
24320 if (target != operands[0])
24321 emit_move_insn (operands[0],
24322 gen_lowpart (GET_MODE (operands[0]), target));
24326 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
24327 true if we should do zero extension, else sign extension. HIGH_P is
24328 true if we want the N/2 high elements, else the low elements. */
24330 void
24331 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
24333 machine_mode imode = GET_MODE (src);
24334 rtx tmp;
24336 if (TARGET_SSE4_1)
24338 rtx (*unpack)(rtx, rtx);
24339 rtx (*extract)(rtx, rtx) = NULL;
24340 machine_mode halfmode = BLKmode;
24342 switch (imode)
24344 case E_V64QImode:
24345 if (unsigned_p)
24346 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
24347 else
24348 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
24349 halfmode = V32QImode;
24350 extract
24351 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
24352 break;
24353 case E_V32QImode:
24354 if (unsigned_p)
24355 unpack = gen_avx2_zero_extendv16qiv16hi2;
24356 else
24357 unpack = gen_avx2_sign_extendv16qiv16hi2;
24358 halfmode = V16QImode;
24359 extract
24360 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
24361 break;
24362 case E_V32HImode:
24363 if (unsigned_p)
24364 unpack = gen_avx512f_zero_extendv16hiv16si2;
24365 else
24366 unpack = gen_avx512f_sign_extendv16hiv16si2;
24367 halfmode = V16HImode;
24368 extract
24369 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
24370 break;
24371 case E_V16HImode:
24372 if (unsigned_p)
24373 unpack = gen_avx2_zero_extendv8hiv8si2;
24374 else
24375 unpack = gen_avx2_sign_extendv8hiv8si2;
24376 halfmode = V8HImode;
24377 extract
24378 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
24379 break;
24380 case E_V16SImode:
24381 if (unsigned_p)
24382 unpack = gen_avx512f_zero_extendv8siv8di2;
24383 else
24384 unpack = gen_avx512f_sign_extendv8siv8di2;
24385 halfmode = V8SImode;
24386 extract
24387 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
24388 break;
24389 case E_V8SImode:
24390 if (unsigned_p)
24391 unpack = gen_avx2_zero_extendv4siv4di2;
24392 else
24393 unpack = gen_avx2_sign_extendv4siv4di2;
24394 halfmode = V4SImode;
24395 extract
24396 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
24397 break;
24398 case E_V16QImode:
24399 if (unsigned_p)
24400 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
24401 else
24402 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
24403 break;
24404 case E_V8HImode:
24405 if (unsigned_p)
24406 unpack = gen_sse4_1_zero_extendv4hiv4si2;
24407 else
24408 unpack = gen_sse4_1_sign_extendv4hiv4si2;
24409 break;
24410 case E_V4SImode:
24411 if (unsigned_p)
24412 unpack = gen_sse4_1_zero_extendv2siv2di2;
24413 else
24414 unpack = gen_sse4_1_sign_extendv2siv2di2;
24415 break;
24416 default:
24417 gcc_unreachable ();
24420 if (GET_MODE_SIZE (imode) >= 32)
24422 tmp = gen_reg_rtx (halfmode);
24423 emit_insn (extract (tmp, src));
24425 else if (high_p)
24427 /* Shift higher 8 bytes to lower 8 bytes. */
24428 tmp = gen_reg_rtx (V1TImode);
24429 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
24430 GEN_INT (64)));
24431 tmp = gen_lowpart (imode, tmp);
24433 else
24434 tmp = src;
24436 emit_insn (unpack (dest, tmp));
24438 else
24440 rtx (*unpack)(rtx, rtx, rtx);
24442 switch (imode)
24444 case E_V16QImode:
24445 if (high_p)
24446 unpack = gen_vec_interleave_highv16qi;
24447 else
24448 unpack = gen_vec_interleave_lowv16qi;
24449 break;
24450 case E_V8HImode:
24451 if (high_p)
24452 unpack = gen_vec_interleave_highv8hi;
24453 else
24454 unpack = gen_vec_interleave_lowv8hi;
24455 break;
24456 case E_V4SImode:
24457 if (high_p)
24458 unpack = gen_vec_interleave_highv4si;
24459 else
24460 unpack = gen_vec_interleave_lowv4si;
24461 break;
24462 default:
24463 gcc_unreachable ();
24466 if (unsigned_p)
24467 tmp = force_reg (imode, CONST0_RTX (imode));
24468 else
24469 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
24470 src, pc_rtx, pc_rtx);
24472 rtx tmp2 = gen_reg_rtx (imode);
24473 emit_insn (unpack (tmp2, src, tmp));
24474 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
24478 /* Expand conditional increment or decrement using adb/sbb instructions.
24479 The default case using setcc followed by the conditional move can be
24480 done by generic code. */
24481 bool
24482 ix86_expand_int_addcc (rtx operands[])
24484 enum rtx_code code = GET_CODE (operands[1]);
24485 rtx flags;
24486 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
24487 rtx compare_op;
24488 rtx val = const0_rtx;
24489 bool fpcmp = false;
24490 machine_mode mode;
24491 rtx op0 = XEXP (operands[1], 0);
24492 rtx op1 = XEXP (operands[1], 1);
24494 if (operands[3] != const1_rtx
24495 && operands[3] != constm1_rtx)
24496 return false;
24497 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
24498 return false;
24499 code = GET_CODE (compare_op);
24501 flags = XEXP (compare_op, 0);
24503 if (GET_MODE (flags) == CCFPmode
24504 || GET_MODE (flags) == CCFPUmode)
24506 fpcmp = true;
24507 code = ix86_fp_compare_code_to_integer (code);
24510 if (code != LTU)
24512 val = constm1_rtx;
24513 if (fpcmp)
24514 PUT_CODE (compare_op,
24515 reverse_condition_maybe_unordered
24516 (GET_CODE (compare_op)));
24517 else
24518 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
24521 mode = GET_MODE (operands[0]);
24523 /* Construct either adc or sbb insn. */
24524 if ((code == LTU) == (operands[3] == constm1_rtx))
24526 switch (mode)
24528 case E_QImode:
24529 insn = gen_subqi3_carry;
24530 break;
24531 case E_HImode:
24532 insn = gen_subhi3_carry;
24533 break;
24534 case E_SImode:
24535 insn = gen_subsi3_carry;
24536 break;
24537 case E_DImode:
24538 insn = gen_subdi3_carry;
24539 break;
24540 default:
24541 gcc_unreachable ();
24544 else
24546 switch (mode)
24548 case E_QImode:
24549 insn = gen_addqi3_carry;
24550 break;
24551 case E_HImode:
24552 insn = gen_addhi3_carry;
24553 break;
24554 case E_SImode:
24555 insn = gen_addsi3_carry;
24556 break;
24557 case E_DImode:
24558 insn = gen_adddi3_carry;
24559 break;
24560 default:
24561 gcc_unreachable ();
24564 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
24566 return true;
24570 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
24571 but works for floating pointer parameters and nonoffsetable memories.
24572 For pushes, it returns just stack offsets; the values will be saved
24573 in the right order. Maximally three parts are generated. */
24575 static int
24576 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
24578 int size;
24580 if (!TARGET_64BIT)
24581 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
24582 else
24583 size = (GET_MODE_SIZE (mode) + 4) / 8;
24585 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
24586 gcc_assert (size >= 2 && size <= 4);
24588 /* Optimize constant pool reference to immediates. This is used by fp
24589 moves, that force all constants to memory to allow combining. */
24590 if (MEM_P (operand) && MEM_READONLY_P (operand))
24592 rtx tmp = maybe_get_pool_constant (operand);
24593 if (tmp)
24594 operand = tmp;
24597 if (MEM_P (operand) && !offsettable_memref_p (operand))
24599 /* The only non-offsetable memories we handle are pushes. */
24600 int ok = push_operand (operand, VOIDmode);
24602 gcc_assert (ok);
24604 operand = copy_rtx (operand);
24605 PUT_MODE (operand, word_mode);
24606 parts[0] = parts[1] = parts[2] = parts[3] = operand;
24607 return size;
24610 if (GET_CODE (operand) == CONST_VECTOR)
24612 scalar_int_mode imode = int_mode_for_mode (mode).require ();
24613 /* Caution: if we looked through a constant pool memory above,
24614 the operand may actually have a different mode now. That's
24615 ok, since we want to pun this all the way back to an integer. */
24616 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
24617 gcc_assert (operand != NULL);
24618 mode = imode;
24621 if (!TARGET_64BIT)
24623 if (mode == DImode)
24624 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
24625 else
24627 int i;
24629 if (REG_P (operand))
24631 gcc_assert (reload_completed);
24632 for (i = 0; i < size; i++)
24633 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
24635 else if (offsettable_memref_p (operand))
24637 operand = adjust_address (operand, SImode, 0);
24638 parts[0] = operand;
24639 for (i = 1; i < size; i++)
24640 parts[i] = adjust_address (operand, SImode, 4 * i);
24642 else if (CONST_DOUBLE_P (operand))
24644 const REAL_VALUE_TYPE *r;
24645 long l[4];
24647 r = CONST_DOUBLE_REAL_VALUE (operand);
24648 switch (mode)
24650 case E_TFmode:
24651 real_to_target (l, r, mode);
24652 parts[3] = gen_int_mode (l[3], SImode);
24653 parts[2] = gen_int_mode (l[2], SImode);
24654 break;
24655 case E_XFmode:
24656 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
24657 long double may not be 80-bit. */
24658 real_to_target (l, r, mode);
24659 parts[2] = gen_int_mode (l[2], SImode);
24660 break;
24661 case E_DFmode:
24662 REAL_VALUE_TO_TARGET_DOUBLE (*r, l);
24663 break;
24664 default:
24665 gcc_unreachable ();
24667 parts[1] = gen_int_mode (l[1], SImode);
24668 parts[0] = gen_int_mode (l[0], SImode);
24670 else
24671 gcc_unreachable ();
24674 else
24676 if (mode == TImode)
24677 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
24678 if (mode == XFmode || mode == TFmode)
24680 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
24681 if (REG_P (operand))
24683 gcc_assert (reload_completed);
24684 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
24685 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
24687 else if (offsettable_memref_p (operand))
24689 operand = adjust_address (operand, DImode, 0);
24690 parts[0] = operand;
24691 parts[1] = adjust_address (operand, upper_mode, 8);
24693 else if (CONST_DOUBLE_P (operand))
24695 long l[4];
24697 real_to_target (l, CONST_DOUBLE_REAL_VALUE (operand), mode);
24699 /* real_to_target puts 32-bit pieces in each long. */
24700 parts[0] = gen_int_mode ((l[0] & HOST_WIDE_INT_C (0xffffffff))
24701 | ((l[1] & HOST_WIDE_INT_C (0xffffffff))
24702 << 32), DImode);
24704 if (upper_mode == SImode)
24705 parts[1] = gen_int_mode (l[2], SImode);
24706 else
24707 parts[1]
24708 = gen_int_mode ((l[2] & HOST_WIDE_INT_C (0xffffffff))
24709 | ((l[3] & HOST_WIDE_INT_C (0xffffffff))
24710 << 32), DImode);
24712 else
24713 gcc_unreachable ();
24717 return size;
24720 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
24721 Return false when normal moves are needed; true when all required
24722 insns have been emitted. Operands 2-4 contain the input values
24723 int the correct order; operands 5-7 contain the output values. */
24725 void
24726 ix86_split_long_move (rtx operands[])
24728 rtx part[2][4];
24729 int nparts, i, j;
24730 int push = 0;
24731 int collisions = 0;
24732 machine_mode mode = GET_MODE (operands[0]);
24733 bool collisionparts[4];
24735 /* The DFmode expanders may ask us to move double.
24736 For 64bit target this is single move. By hiding the fact
24737 here we simplify i386.md splitters. */
24738 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
24740 /* Optimize constant pool reference to immediates. This is used by
24741 fp moves, that force all constants to memory to allow combining. */
24743 if (MEM_P (operands[1])
24744 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
24745 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
24746 operands[1] = get_pool_constant (XEXP (operands[1], 0));
24747 if (push_operand (operands[0], VOIDmode))
24749 operands[0] = copy_rtx (operands[0]);
24750 PUT_MODE (operands[0], word_mode);
24752 else
24753 operands[0] = gen_lowpart (DImode, operands[0]);
24754 operands[1] = gen_lowpart (DImode, operands[1]);
24755 emit_move_insn (operands[0], operands[1]);
24756 return;
24759 /* The only non-offsettable memory we handle is push. */
24760 if (push_operand (operands[0], VOIDmode))
24761 push = 1;
24762 else
24763 gcc_assert (!MEM_P (operands[0])
24764 || offsettable_memref_p (operands[0]));
24766 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
24767 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
24769 /* When emitting push, take care for source operands on the stack. */
24770 if (push && MEM_P (operands[1])
24771 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
24773 rtx src_base = XEXP (part[1][nparts - 1], 0);
24775 /* Compensate for the stack decrement by 4. */
24776 if (!TARGET_64BIT && nparts == 3
24777 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
24778 src_base = plus_constant (Pmode, src_base, 4);
24780 /* src_base refers to the stack pointer and is
24781 automatically decreased by emitted push. */
24782 for (i = 0; i < nparts; i++)
24783 part[1][i] = change_address (part[1][i],
24784 GET_MODE (part[1][i]), src_base);
24787 /* We need to do copy in the right order in case an address register
24788 of the source overlaps the destination. */
24789 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
24791 rtx tmp;
24793 for (i = 0; i < nparts; i++)
24795 collisionparts[i]
24796 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
24797 if (collisionparts[i])
24798 collisions++;
24801 /* Collision in the middle part can be handled by reordering. */
24802 if (collisions == 1 && nparts == 3 && collisionparts [1])
24804 std::swap (part[0][1], part[0][2]);
24805 std::swap (part[1][1], part[1][2]);
24807 else if (collisions == 1
24808 && nparts == 4
24809 && (collisionparts [1] || collisionparts [2]))
24811 if (collisionparts [1])
24813 std::swap (part[0][1], part[0][2]);
24814 std::swap (part[1][1], part[1][2]);
24816 else
24818 std::swap (part[0][2], part[0][3]);
24819 std::swap (part[1][2], part[1][3]);
24823 /* If there are more collisions, we can't handle it by reordering.
24824 Do an lea to the last part and use only one colliding move. */
24825 else if (collisions > 1)
24827 rtx base, addr;
24829 collisions = 1;
24831 base = part[0][nparts - 1];
24833 /* Handle the case when the last part isn't valid for lea.
24834 Happens in 64-bit mode storing the 12-byte XFmode. */
24835 if (GET_MODE (base) != Pmode)
24836 base = gen_rtx_REG (Pmode, REGNO (base));
24838 addr = XEXP (part[1][0], 0);
24839 if (TARGET_TLS_DIRECT_SEG_REFS)
24841 struct ix86_address parts;
24842 int ok = ix86_decompose_address (addr, &parts);
24843 gcc_assert (ok);
24844 /* It is not valid to use %gs: or %fs: in lea. */
24845 gcc_assert (parts.seg == ADDR_SPACE_GENERIC);
24847 emit_insn (gen_rtx_SET (base, addr));
24848 part[1][0] = replace_equiv_address (part[1][0], base);
24849 for (i = 1; i < nparts; i++)
24851 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
24852 part[1][i] = replace_equiv_address (part[1][i], tmp);
24857 if (push)
24859 if (!TARGET_64BIT)
24861 if (nparts == 3)
24863 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
24864 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
24865 stack_pointer_rtx, GEN_INT (-4)));
24866 emit_move_insn (part[0][2], part[1][2]);
24868 else if (nparts == 4)
24870 emit_move_insn (part[0][3], part[1][3]);
24871 emit_move_insn (part[0][2], part[1][2]);
24874 else
24876 /* In 64bit mode we don't have 32bit push available. In case this is
24877 register, it is OK - we will just use larger counterpart. We also
24878 retype memory - these comes from attempt to avoid REX prefix on
24879 moving of second half of TFmode value. */
24880 if (GET_MODE (part[1][1]) == SImode)
24882 switch (GET_CODE (part[1][1]))
24884 case MEM:
24885 part[1][1] = adjust_address (part[1][1], DImode, 0);
24886 break;
24888 case REG:
24889 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
24890 break;
24892 default:
24893 gcc_unreachable ();
24896 if (GET_MODE (part[1][0]) == SImode)
24897 part[1][0] = part[1][1];
24900 emit_move_insn (part[0][1], part[1][1]);
24901 emit_move_insn (part[0][0], part[1][0]);
24902 return;
24905 /* Choose correct order to not overwrite the source before it is copied. */
24906 if ((REG_P (part[0][0])
24907 && REG_P (part[1][1])
24908 && (REGNO (part[0][0]) == REGNO (part[1][1])
24909 || (nparts == 3
24910 && REGNO (part[0][0]) == REGNO (part[1][2]))
24911 || (nparts == 4
24912 && REGNO (part[0][0]) == REGNO (part[1][3]))))
24913 || (collisions > 0
24914 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
24916 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
24918 operands[2 + i] = part[0][j];
24919 operands[6 + i] = part[1][j];
24922 else
24924 for (i = 0; i < nparts; i++)
24926 operands[2 + i] = part[0][i];
24927 operands[6 + i] = part[1][i];
24931 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
24932 if (optimize_insn_for_size_p ())
24934 for (j = 0; j < nparts - 1; j++)
24935 if (CONST_INT_P (operands[6 + j])
24936 && operands[6 + j] != const0_rtx
24937 && REG_P (operands[2 + j]))
24938 for (i = j; i < nparts - 1; i++)
24939 if (CONST_INT_P (operands[7 + i])
24940 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
24941 operands[7 + i] = operands[2 + j];
24944 for (i = 0; i < nparts; i++)
24945 emit_move_insn (operands[2 + i], operands[6 + i]);
24947 return;
24950 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
24951 left shift by a constant, either using a single shift or
24952 a sequence of add instructions. */
24954 static void
24955 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
24957 rtx (*insn)(rtx, rtx, rtx);
24959 if (count == 1
24960 || (count * ix86_cost->add <= ix86_cost->shift_const
24961 && !optimize_insn_for_size_p ()))
24963 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
24964 while (count-- > 0)
24965 emit_insn (insn (operand, operand, operand));
24967 else
24969 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
24970 emit_insn (insn (operand, operand, GEN_INT (count)));
24974 void
24975 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
24977 rtx (*gen_ashl3)(rtx, rtx, rtx);
24978 rtx (*gen_shld)(rtx, rtx, rtx);
24979 int half_width = GET_MODE_BITSIZE (mode) >> 1;
24981 rtx low[2], high[2];
24982 int count;
24984 if (CONST_INT_P (operands[2]))
24986 split_double_mode (mode, operands, 2, low, high);
24987 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
24989 if (count >= half_width)
24991 emit_move_insn (high[0], low[1]);
24992 emit_move_insn (low[0], const0_rtx);
24994 if (count > half_width)
24995 ix86_expand_ashl_const (high[0], count - half_width, mode);
24997 else
24999 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
25001 if (!rtx_equal_p (operands[0], operands[1]))
25002 emit_move_insn (operands[0], operands[1]);
25004 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
25005 ix86_expand_ashl_const (low[0], count, mode);
25007 return;
25010 split_double_mode (mode, operands, 1, low, high);
25012 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
25014 if (operands[1] == const1_rtx)
25016 /* Assuming we've chosen a QImode capable registers, then 1 << N
25017 can be done with two 32/64-bit shifts, no branches, no cmoves. */
25018 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
25020 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
25022 ix86_expand_clear (low[0]);
25023 ix86_expand_clear (high[0]);
25024 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
25026 d = gen_lowpart (QImode, low[0]);
25027 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
25028 s = gen_rtx_EQ (QImode, flags, const0_rtx);
25029 emit_insn (gen_rtx_SET (d, s));
25031 d = gen_lowpart (QImode, high[0]);
25032 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
25033 s = gen_rtx_NE (QImode, flags, const0_rtx);
25034 emit_insn (gen_rtx_SET (d, s));
25037 /* Otherwise, we can get the same results by manually performing
25038 a bit extract operation on bit 5/6, and then performing the two
25039 shifts. The two methods of getting 0/1 into low/high are exactly
25040 the same size. Avoiding the shift in the bit extract case helps
25041 pentium4 a bit; no one else seems to care much either way. */
25042 else
25044 machine_mode half_mode;
25045 rtx (*gen_lshr3)(rtx, rtx, rtx);
25046 rtx (*gen_and3)(rtx, rtx, rtx);
25047 rtx (*gen_xor3)(rtx, rtx, rtx);
25048 HOST_WIDE_INT bits;
25049 rtx x;
25051 if (mode == DImode)
25053 half_mode = SImode;
25054 gen_lshr3 = gen_lshrsi3;
25055 gen_and3 = gen_andsi3;
25056 gen_xor3 = gen_xorsi3;
25057 bits = 5;
25059 else
25061 half_mode = DImode;
25062 gen_lshr3 = gen_lshrdi3;
25063 gen_and3 = gen_anddi3;
25064 gen_xor3 = gen_xordi3;
25065 bits = 6;
25068 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
25069 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
25070 else
25071 x = gen_lowpart (half_mode, operands[2]);
25072 emit_insn (gen_rtx_SET (high[0], x));
25074 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
25075 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
25076 emit_move_insn (low[0], high[0]);
25077 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
25080 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
25081 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
25082 return;
25085 if (operands[1] == constm1_rtx)
25087 /* For -1 << N, we can avoid the shld instruction, because we
25088 know that we're shifting 0...31/63 ones into a -1. */
25089 emit_move_insn (low[0], constm1_rtx);
25090 if (optimize_insn_for_size_p ())
25091 emit_move_insn (high[0], low[0]);
25092 else
25093 emit_move_insn (high[0], constm1_rtx);
25095 else
25097 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
25099 if (!rtx_equal_p (operands[0], operands[1]))
25100 emit_move_insn (operands[0], operands[1]);
25102 split_double_mode (mode, operands, 1, low, high);
25103 emit_insn (gen_shld (high[0], low[0], operands[2]));
25106 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
25108 if (TARGET_CMOVE && scratch)
25110 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
25111 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
25113 ix86_expand_clear (scratch);
25114 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
25116 else
25118 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
25119 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
25121 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
25125 void
25126 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
25128 rtx (*gen_ashr3)(rtx, rtx, rtx)
25129 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
25130 rtx (*gen_shrd)(rtx, rtx, rtx);
25131 int half_width = GET_MODE_BITSIZE (mode) >> 1;
25133 rtx low[2], high[2];
25134 int count;
25136 if (CONST_INT_P (operands[2]))
25138 split_double_mode (mode, operands, 2, low, high);
25139 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
25141 if (count == GET_MODE_BITSIZE (mode) - 1)
25143 emit_move_insn (high[0], high[1]);
25144 emit_insn (gen_ashr3 (high[0], high[0],
25145 GEN_INT (half_width - 1)));
25146 emit_move_insn (low[0], high[0]);
25149 else if (count >= half_width)
25151 emit_move_insn (low[0], high[1]);
25152 emit_move_insn (high[0], low[0]);
25153 emit_insn (gen_ashr3 (high[0], high[0],
25154 GEN_INT (half_width - 1)));
25156 if (count > half_width)
25157 emit_insn (gen_ashr3 (low[0], low[0],
25158 GEN_INT (count - half_width)));
25160 else
25162 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
25164 if (!rtx_equal_p (operands[0], operands[1]))
25165 emit_move_insn (operands[0], operands[1]);
25167 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
25168 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
25171 else
25173 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
25175 if (!rtx_equal_p (operands[0], operands[1]))
25176 emit_move_insn (operands[0], operands[1]);
25178 split_double_mode (mode, operands, 1, low, high);
25180 emit_insn (gen_shrd (low[0], high[0], operands[2]));
25181 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
25183 if (TARGET_CMOVE && scratch)
25185 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
25186 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
25188 emit_move_insn (scratch, high[0]);
25189 emit_insn (gen_ashr3 (scratch, scratch,
25190 GEN_INT (half_width - 1)));
25191 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
25192 scratch));
25194 else
25196 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
25197 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
25199 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
25204 void
25205 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
25207 rtx (*gen_lshr3)(rtx, rtx, rtx)
25208 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
25209 rtx (*gen_shrd)(rtx, rtx, rtx);
25210 int half_width = GET_MODE_BITSIZE (mode) >> 1;
25212 rtx low[2], high[2];
25213 int count;
25215 if (CONST_INT_P (operands[2]))
25217 split_double_mode (mode, operands, 2, low, high);
25218 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
25220 if (count >= half_width)
25222 emit_move_insn (low[0], high[1]);
25223 ix86_expand_clear (high[0]);
25225 if (count > half_width)
25226 emit_insn (gen_lshr3 (low[0], low[0],
25227 GEN_INT (count - half_width)));
25229 else
25231 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
25233 if (!rtx_equal_p (operands[0], operands[1]))
25234 emit_move_insn (operands[0], operands[1]);
25236 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
25237 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
25240 else
25242 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
25244 if (!rtx_equal_p (operands[0], operands[1]))
25245 emit_move_insn (operands[0], operands[1]);
25247 split_double_mode (mode, operands, 1, low, high);
25249 emit_insn (gen_shrd (low[0], high[0], operands[2]));
25250 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
25252 if (TARGET_CMOVE && scratch)
25254 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
25255 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
25257 ix86_expand_clear (scratch);
25258 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
25259 scratch));
25261 else
25263 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
25264 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
25266 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
25271 /* Predict just emitted jump instruction to be taken with probability PROB. */
25272 static void
25273 predict_jump (int prob)
25275 rtx_insn *insn = get_last_insn ();
25276 gcc_assert (JUMP_P (insn));
25277 add_reg_br_prob_note (insn, profile_probability::from_reg_br_prob_base (prob));
25280 /* Helper function for the string operations below. Dest VARIABLE whether
25281 it is aligned to VALUE bytes. If true, jump to the label. */
25282 static rtx_code_label *
25283 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
25285 rtx_code_label *label = gen_label_rtx ();
25286 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
25287 if (GET_MODE (variable) == DImode)
25288 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
25289 else
25290 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
25291 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
25292 1, label);
25293 if (epilogue)
25294 predict_jump (REG_BR_PROB_BASE * 50 / 100);
25295 else
25296 predict_jump (REG_BR_PROB_BASE * 90 / 100);
25297 return label;
25300 /* Adjust COUNTER by the VALUE. */
25301 static void
25302 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
25304 rtx (*gen_add)(rtx, rtx, rtx)
25305 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
25307 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
25310 /* Zero extend possibly SImode EXP to Pmode register. */
25312 ix86_zero_extend_to_Pmode (rtx exp)
25314 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
25317 /* Divide COUNTREG by SCALE. */
25318 static rtx
25319 scale_counter (rtx countreg, int scale)
25321 rtx sc;
25323 if (scale == 1)
25324 return countreg;
25325 if (CONST_INT_P (countreg))
25326 return GEN_INT (INTVAL (countreg) / scale);
25327 gcc_assert (REG_P (countreg));
25329 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
25330 GEN_INT (exact_log2 (scale)),
25331 NULL, 1, OPTAB_DIRECT);
25332 return sc;
25335 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
25336 DImode for constant loop counts. */
25338 static machine_mode
25339 counter_mode (rtx count_exp)
25341 if (GET_MODE (count_exp) != VOIDmode)
25342 return GET_MODE (count_exp);
25343 if (!CONST_INT_P (count_exp))
25344 return Pmode;
25345 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
25346 return DImode;
25347 return SImode;
25350 /* Copy the address to a Pmode register. This is used for x32 to
25351 truncate DImode TLS address to a SImode register. */
25353 static rtx
25354 ix86_copy_addr_to_reg (rtx addr)
25356 rtx reg;
25357 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
25359 reg = copy_addr_to_reg (addr);
25360 REG_POINTER (reg) = 1;
25361 return reg;
25363 else
25365 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
25366 reg = copy_to_mode_reg (DImode, addr);
25367 REG_POINTER (reg) = 1;
25368 return gen_rtx_SUBREG (SImode, reg, 0);
25372 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
25373 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
25374 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
25375 memory by VALUE (supposed to be in MODE).
25377 The size is rounded down to whole number of chunk size moved at once.
25378 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
25381 static void
25382 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
25383 rtx destptr, rtx srcptr, rtx value,
25384 rtx count, machine_mode mode, int unroll,
25385 int expected_size, bool issetmem)
25387 rtx_code_label *out_label, *top_label;
25388 rtx iter, tmp;
25389 machine_mode iter_mode = counter_mode (count);
25390 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
25391 rtx piece_size = GEN_INT (piece_size_n);
25392 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
25393 rtx size;
25394 int i;
25396 top_label = gen_label_rtx ();
25397 out_label = gen_label_rtx ();
25398 iter = gen_reg_rtx (iter_mode);
25400 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
25401 NULL, 1, OPTAB_DIRECT);
25402 /* Those two should combine. */
25403 if (piece_size == const1_rtx)
25405 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
25406 true, out_label);
25407 predict_jump (REG_BR_PROB_BASE * 10 / 100);
25409 emit_move_insn (iter, const0_rtx);
25411 emit_label (top_label);
25413 tmp = convert_modes (Pmode, iter_mode, iter, true);
25415 /* This assert could be relaxed - in this case we'll need to compute
25416 smallest power of two, containing in PIECE_SIZE_N and pass it to
25417 offset_address. */
25418 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
25419 destmem = offset_address (destmem, tmp, piece_size_n);
25420 destmem = adjust_address (destmem, mode, 0);
25422 if (!issetmem)
25424 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
25425 srcmem = adjust_address (srcmem, mode, 0);
25427 /* When unrolling for chips that reorder memory reads and writes,
25428 we can save registers by using single temporary.
25429 Also using 4 temporaries is overkill in 32bit mode. */
25430 if (!TARGET_64BIT && 0)
25432 for (i = 0; i < unroll; i++)
25434 if (i)
25436 destmem =
25437 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
25438 srcmem =
25439 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
25441 emit_move_insn (destmem, srcmem);
25444 else
25446 rtx tmpreg[4];
25447 gcc_assert (unroll <= 4);
25448 for (i = 0; i < unroll; i++)
25450 tmpreg[i] = gen_reg_rtx (mode);
25451 if (i)
25453 srcmem =
25454 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
25456 emit_move_insn (tmpreg[i], srcmem);
25458 for (i = 0; i < unroll; i++)
25460 if (i)
25462 destmem =
25463 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
25465 emit_move_insn (destmem, tmpreg[i]);
25469 else
25470 for (i = 0; i < unroll; i++)
25472 if (i)
25473 destmem =
25474 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
25475 emit_move_insn (destmem, value);
25478 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
25479 true, OPTAB_LIB_WIDEN);
25480 if (tmp != iter)
25481 emit_move_insn (iter, tmp);
25483 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
25484 true, top_label);
25485 if (expected_size != -1)
25487 expected_size /= GET_MODE_SIZE (mode) * unroll;
25488 if (expected_size == 0)
25489 predict_jump (0);
25490 else if (expected_size > REG_BR_PROB_BASE)
25491 predict_jump (REG_BR_PROB_BASE - 1);
25492 else
25493 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
25495 else
25496 predict_jump (REG_BR_PROB_BASE * 80 / 100);
25497 iter = ix86_zero_extend_to_Pmode (iter);
25498 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
25499 true, OPTAB_LIB_WIDEN);
25500 if (tmp != destptr)
25501 emit_move_insn (destptr, tmp);
25502 if (!issetmem)
25504 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
25505 true, OPTAB_LIB_WIDEN);
25506 if (tmp != srcptr)
25507 emit_move_insn (srcptr, tmp);
25509 emit_label (out_label);
25512 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
25513 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
25514 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
25515 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
25516 ORIG_VALUE is the original value passed to memset to fill the memory with.
25517 Other arguments have same meaning as for previous function. */
25519 static void
25520 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
25521 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
25522 rtx count,
25523 machine_mode mode, bool issetmem)
25525 rtx destexp;
25526 rtx srcexp;
25527 rtx countreg;
25528 HOST_WIDE_INT rounded_count;
25530 /* If possible, it is shorter to use rep movs.
25531 TODO: Maybe it is better to move this logic to decide_alg. */
25532 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
25533 && (!issetmem || orig_value == const0_rtx))
25534 mode = SImode;
25536 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
25537 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
25539 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
25540 GET_MODE_SIZE (mode)));
25541 if (mode != QImode)
25543 destexp = gen_rtx_ASHIFT (Pmode, countreg,
25544 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
25545 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
25547 else
25548 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
25549 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
25551 rounded_count
25552 = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode));
25553 destmem = shallow_copy_rtx (destmem);
25554 set_mem_size (destmem, rounded_count);
25556 else if (MEM_SIZE_KNOWN_P (destmem))
25557 clear_mem_size (destmem);
25559 if (issetmem)
25561 value = force_reg (mode, gen_lowpart (mode, value));
25562 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
25564 else
25566 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
25567 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
25568 if (mode != QImode)
25570 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
25571 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
25572 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
25574 else
25575 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
25576 if (CONST_INT_P (count))
25578 rounded_count
25579 = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode));
25580 srcmem = shallow_copy_rtx (srcmem);
25581 set_mem_size (srcmem, rounded_count);
25583 else
25585 if (MEM_SIZE_KNOWN_P (srcmem))
25586 clear_mem_size (srcmem);
25588 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
25589 destexp, srcexp));
25593 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
25594 DESTMEM.
25595 SRC is passed by pointer to be updated on return.
25596 Return value is updated DST. */
25597 static rtx
25598 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
25599 HOST_WIDE_INT size_to_move)
25601 rtx dst = destmem, src = *srcmem, adjust, tempreg;
25602 enum insn_code code;
25603 machine_mode move_mode;
25604 int piece_size, i;
25606 /* Find the widest mode in which we could perform moves.
25607 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
25608 it until move of such size is supported. */
25609 piece_size = 1 << floor_log2 (size_to_move);
25610 while (!int_mode_for_size (piece_size * BITS_PER_UNIT, 0).exists (&move_mode)
25611 || (code = optab_handler (mov_optab, move_mode)) == CODE_FOR_nothing)
25613 gcc_assert (piece_size > 1);
25614 piece_size >>= 1;
25617 /* Find the corresponding vector mode with the same size as MOVE_MODE.
25618 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
25619 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
25621 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
25622 if (!mode_for_vector (word_mode, nunits).exists (&move_mode)
25623 || (code = optab_handler (mov_optab, move_mode)) == CODE_FOR_nothing)
25625 move_mode = word_mode;
25626 piece_size = GET_MODE_SIZE (move_mode);
25627 code = optab_handler (mov_optab, move_mode);
25630 gcc_assert (code != CODE_FOR_nothing);
25632 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
25633 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
25635 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
25636 gcc_assert (size_to_move % piece_size == 0);
25637 adjust = GEN_INT (piece_size);
25638 for (i = 0; i < size_to_move; i += piece_size)
25640 /* We move from memory to memory, so we'll need to do it via
25641 a temporary register. */
25642 tempreg = gen_reg_rtx (move_mode);
25643 emit_insn (GEN_FCN (code) (tempreg, src));
25644 emit_insn (GEN_FCN (code) (dst, tempreg));
25646 emit_move_insn (destptr,
25647 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
25648 emit_move_insn (srcptr,
25649 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
25651 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
25652 piece_size);
25653 src = adjust_automodify_address_nv (src, move_mode, srcptr,
25654 piece_size);
25657 /* Update DST and SRC rtx. */
25658 *srcmem = src;
25659 return dst;
25662 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
25663 static void
25664 expand_movmem_epilogue (rtx destmem, rtx srcmem,
25665 rtx destptr, rtx srcptr, rtx count, int max_size)
25667 rtx src, dest;
25668 if (CONST_INT_P (count))
25670 HOST_WIDE_INT countval = INTVAL (count);
25671 HOST_WIDE_INT epilogue_size = countval % max_size;
25672 int i;
25674 /* For now MAX_SIZE should be a power of 2. This assert could be
25675 relaxed, but it'll require a bit more complicated epilogue
25676 expanding. */
25677 gcc_assert ((max_size & (max_size - 1)) == 0);
25678 for (i = max_size; i >= 1; i >>= 1)
25680 if (epilogue_size & i)
25681 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
25683 return;
25685 if (max_size > 8)
25687 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
25688 count, 1, OPTAB_DIRECT);
25689 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
25690 count, QImode, 1, 4, false);
25691 return;
25694 /* When there are stringops, we can cheaply increase dest and src pointers.
25695 Otherwise we save code size by maintaining offset (zero is readily
25696 available from preceding rep operation) and using x86 addressing modes.
25698 if (TARGET_SINGLE_STRINGOP)
25700 if (max_size > 4)
25702 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
25703 src = change_address (srcmem, SImode, srcptr);
25704 dest = change_address (destmem, SImode, destptr);
25705 emit_insn (gen_strmov (destptr, dest, srcptr, src));
25706 emit_label (label);
25707 LABEL_NUSES (label) = 1;
25709 if (max_size > 2)
25711 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
25712 src = change_address (srcmem, HImode, srcptr);
25713 dest = change_address (destmem, HImode, destptr);
25714 emit_insn (gen_strmov (destptr, dest, srcptr, src));
25715 emit_label (label);
25716 LABEL_NUSES (label) = 1;
25718 if (max_size > 1)
25720 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
25721 src = change_address (srcmem, QImode, srcptr);
25722 dest = change_address (destmem, QImode, destptr);
25723 emit_insn (gen_strmov (destptr, dest, srcptr, src));
25724 emit_label (label);
25725 LABEL_NUSES (label) = 1;
25728 else
25730 rtx offset = force_reg (Pmode, const0_rtx);
25731 rtx tmp;
25733 if (max_size > 4)
25735 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
25736 src = change_address (srcmem, SImode, srcptr);
25737 dest = change_address (destmem, SImode, destptr);
25738 emit_move_insn (dest, src);
25739 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
25740 true, OPTAB_LIB_WIDEN);
25741 if (tmp != offset)
25742 emit_move_insn (offset, tmp);
25743 emit_label (label);
25744 LABEL_NUSES (label) = 1;
25746 if (max_size > 2)
25748 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
25749 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
25750 src = change_address (srcmem, HImode, tmp);
25751 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
25752 dest = change_address (destmem, HImode, tmp);
25753 emit_move_insn (dest, src);
25754 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
25755 true, OPTAB_LIB_WIDEN);
25756 if (tmp != offset)
25757 emit_move_insn (offset, tmp);
25758 emit_label (label);
25759 LABEL_NUSES (label) = 1;
25761 if (max_size > 1)
25763 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
25764 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
25765 src = change_address (srcmem, QImode, tmp);
25766 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
25767 dest = change_address (destmem, QImode, tmp);
25768 emit_move_insn (dest, src);
25769 emit_label (label);
25770 LABEL_NUSES (label) = 1;
25775 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
25776 with value PROMOTED_VAL.
25777 SRC is passed by pointer to be updated on return.
25778 Return value is updated DST. */
25779 static rtx
25780 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
25781 HOST_WIDE_INT size_to_move)
25783 rtx dst = destmem, adjust;
25784 enum insn_code code;
25785 machine_mode move_mode;
25786 int piece_size, i;
25788 /* Find the widest mode in which we could perform moves.
25789 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
25790 it until move of such size is supported. */
25791 move_mode = GET_MODE (promoted_val);
25792 if (move_mode == VOIDmode)
25793 move_mode = QImode;
25794 if (size_to_move < GET_MODE_SIZE (move_mode))
25796 unsigned int move_bits = size_to_move * BITS_PER_UNIT;
25797 move_mode = int_mode_for_size (move_bits, 0).require ();
25798 promoted_val = gen_lowpart (move_mode, promoted_val);
25800 piece_size = GET_MODE_SIZE (move_mode);
25801 code = optab_handler (mov_optab, move_mode);
25802 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
25804 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
25806 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
25807 gcc_assert (size_to_move % piece_size == 0);
25808 adjust = GEN_INT (piece_size);
25809 for (i = 0; i < size_to_move; i += piece_size)
25811 if (piece_size <= GET_MODE_SIZE (word_mode))
25813 emit_insn (gen_strset (destptr, dst, promoted_val));
25814 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
25815 piece_size);
25816 continue;
25819 emit_insn (GEN_FCN (code) (dst, promoted_val));
25821 emit_move_insn (destptr,
25822 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
25824 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
25825 piece_size);
25828 /* Update DST rtx. */
25829 return dst;
25831 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
25832 static void
25833 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
25834 rtx count, int max_size)
25836 count =
25837 expand_simple_binop (counter_mode (count), AND, count,
25838 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
25839 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
25840 gen_lowpart (QImode, value), count, QImode,
25841 1, max_size / 2, true);
25844 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
25845 static void
25846 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
25847 rtx count, int max_size)
25849 rtx dest;
25851 if (CONST_INT_P (count))
25853 HOST_WIDE_INT countval = INTVAL (count);
25854 HOST_WIDE_INT epilogue_size = countval % max_size;
25855 int i;
25857 /* For now MAX_SIZE should be a power of 2. This assert could be
25858 relaxed, but it'll require a bit more complicated epilogue
25859 expanding. */
25860 gcc_assert ((max_size & (max_size - 1)) == 0);
25861 for (i = max_size; i >= 1; i >>= 1)
25863 if (epilogue_size & i)
25865 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
25866 destmem = emit_memset (destmem, destptr, vec_value, i);
25867 else
25868 destmem = emit_memset (destmem, destptr, value, i);
25871 return;
25873 if (max_size > 32)
25875 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
25876 return;
25878 if (max_size > 16)
25880 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
25881 if (TARGET_64BIT)
25883 dest = change_address (destmem, DImode, destptr);
25884 emit_insn (gen_strset (destptr, dest, value));
25885 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
25886 emit_insn (gen_strset (destptr, dest, value));
25888 else
25890 dest = change_address (destmem, SImode, destptr);
25891 emit_insn (gen_strset (destptr, dest, value));
25892 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
25893 emit_insn (gen_strset (destptr, dest, value));
25894 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
25895 emit_insn (gen_strset (destptr, dest, value));
25896 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
25897 emit_insn (gen_strset (destptr, dest, value));
25899 emit_label (label);
25900 LABEL_NUSES (label) = 1;
25902 if (max_size > 8)
25904 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
25905 if (TARGET_64BIT)
25907 dest = change_address (destmem, DImode, destptr);
25908 emit_insn (gen_strset (destptr, dest, value));
25910 else
25912 dest = change_address (destmem, SImode, destptr);
25913 emit_insn (gen_strset (destptr, dest, value));
25914 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
25915 emit_insn (gen_strset (destptr, dest, value));
25917 emit_label (label);
25918 LABEL_NUSES (label) = 1;
25920 if (max_size > 4)
25922 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
25923 dest = change_address (destmem, SImode, destptr);
25924 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
25925 emit_label (label);
25926 LABEL_NUSES (label) = 1;
25928 if (max_size > 2)
25930 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
25931 dest = change_address (destmem, HImode, destptr);
25932 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
25933 emit_label (label);
25934 LABEL_NUSES (label) = 1;
25936 if (max_size > 1)
25938 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
25939 dest = change_address (destmem, QImode, destptr);
25940 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
25941 emit_label (label);
25942 LABEL_NUSES (label) = 1;
25946 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
25947 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
25948 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
25949 ignored.
25950 Return value is updated DESTMEM. */
25951 static rtx
25952 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
25953 rtx destptr, rtx srcptr, rtx value,
25954 rtx vec_value, rtx count, int align,
25955 int desired_alignment, bool issetmem)
25957 int i;
25958 for (i = 1; i < desired_alignment; i <<= 1)
25960 if (align <= i)
25962 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
25963 if (issetmem)
25965 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
25966 destmem = emit_memset (destmem, destptr, vec_value, i);
25967 else
25968 destmem = emit_memset (destmem, destptr, value, i);
25970 else
25971 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
25972 ix86_adjust_counter (count, i);
25973 emit_label (label);
25974 LABEL_NUSES (label) = 1;
25975 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
25978 return destmem;
25981 /* Test if COUNT&SIZE is nonzero and if so, expand movme
25982 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
25983 and jump to DONE_LABEL. */
25984 static void
25985 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
25986 rtx destptr, rtx srcptr,
25987 rtx value, rtx vec_value,
25988 rtx count, int size,
25989 rtx done_label, bool issetmem)
25991 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
25992 machine_mode mode = int_mode_for_size (size * BITS_PER_UNIT, 1).else_blk ();
25993 rtx modesize;
25994 int n;
25996 /* If we do not have vector value to copy, we must reduce size. */
25997 if (issetmem)
25999 if (!vec_value)
26001 if (GET_MODE (value) == VOIDmode && size > 8)
26002 mode = Pmode;
26003 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
26004 mode = GET_MODE (value);
26006 else
26007 mode = GET_MODE (vec_value), value = vec_value;
26009 else
26011 /* Choose appropriate vector mode. */
26012 if (size >= 32)
26013 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
26014 else if (size >= 16)
26015 mode = TARGET_SSE ? V16QImode : DImode;
26016 srcmem = change_address (srcmem, mode, srcptr);
26018 destmem = change_address (destmem, mode, destptr);
26019 modesize = GEN_INT (GET_MODE_SIZE (mode));
26020 gcc_assert (GET_MODE_SIZE (mode) <= size);
26021 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
26023 if (issetmem)
26024 emit_move_insn (destmem, gen_lowpart (mode, value));
26025 else
26027 emit_move_insn (destmem, srcmem);
26028 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
26030 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
26033 destmem = offset_address (destmem, count, 1);
26034 destmem = offset_address (destmem, GEN_INT (-2 * size),
26035 GET_MODE_SIZE (mode));
26036 if (!issetmem)
26038 srcmem = offset_address (srcmem, count, 1);
26039 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
26040 GET_MODE_SIZE (mode));
26042 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
26044 if (issetmem)
26045 emit_move_insn (destmem, gen_lowpart (mode, value));
26046 else
26048 emit_move_insn (destmem, srcmem);
26049 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
26051 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
26053 emit_jump_insn (gen_jump (done_label));
26054 emit_barrier ();
26056 emit_label (label);
26057 LABEL_NUSES (label) = 1;
26060 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
26061 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
26062 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
26063 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
26064 DONE_LABEL is a label after the whole copying sequence. The label is created
26065 on demand if *DONE_LABEL is NULL.
26066 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
26067 bounds after the initial copies.
26069 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
26070 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
26071 we will dispatch to a library call for large blocks.
26073 In pseudocode we do:
26075 if (COUNT < SIZE)
26077 Assume that SIZE is 4. Bigger sizes are handled analogously
26078 if (COUNT & 4)
26080 copy 4 bytes from SRCPTR to DESTPTR
26081 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
26082 goto done_label
26084 if (!COUNT)
26085 goto done_label;
26086 copy 1 byte from SRCPTR to DESTPTR
26087 if (COUNT & 2)
26089 copy 2 bytes from SRCPTR to DESTPTR
26090 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
26093 else
26095 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
26096 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
26098 OLD_DESPTR = DESTPTR;
26099 Align DESTPTR up to DESIRED_ALIGN
26100 SRCPTR += DESTPTR - OLD_DESTPTR
26101 COUNT -= DEST_PTR - OLD_DESTPTR
26102 if (DYNAMIC_CHECK)
26103 Round COUNT down to multiple of SIZE
26104 << optional caller supplied zero size guard is here >>
26105 << optional caller supplied dynamic check is here >>
26106 << caller supplied main copy loop is here >>
26108 done_label:
26110 static void
26111 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
26112 rtx *destptr, rtx *srcptr,
26113 machine_mode mode,
26114 rtx value, rtx vec_value,
26115 rtx *count,
26116 rtx_code_label **done_label,
26117 int size,
26118 int desired_align,
26119 int align,
26120 unsigned HOST_WIDE_INT *min_size,
26121 bool dynamic_check,
26122 bool issetmem)
26124 rtx_code_label *loop_label = NULL, *label;
26125 int n;
26126 rtx modesize;
26127 int prolog_size = 0;
26128 rtx mode_value;
26130 /* Chose proper value to copy. */
26131 if (issetmem && VECTOR_MODE_P (mode))
26132 mode_value = vec_value;
26133 else
26134 mode_value = value;
26135 gcc_assert (GET_MODE_SIZE (mode) <= size);
26137 /* See if block is big or small, handle small blocks. */
26138 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
26140 int size2 = size;
26141 loop_label = gen_label_rtx ();
26143 if (!*done_label)
26144 *done_label = gen_label_rtx ();
26146 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
26147 1, loop_label);
26148 size2 >>= 1;
26150 /* Handle sizes > 3. */
26151 for (;size2 > 2; size2 >>= 1)
26152 expand_small_movmem_or_setmem (destmem, srcmem,
26153 *destptr, *srcptr,
26154 value, vec_value,
26155 *count,
26156 size2, *done_label, issetmem);
26157 /* Nothing to copy? Jump to DONE_LABEL if so */
26158 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
26159 1, *done_label);
26161 /* Do a byte copy. */
26162 destmem = change_address (destmem, QImode, *destptr);
26163 if (issetmem)
26164 emit_move_insn (destmem, gen_lowpart (QImode, value));
26165 else
26167 srcmem = change_address (srcmem, QImode, *srcptr);
26168 emit_move_insn (destmem, srcmem);
26171 /* Handle sizes 2 and 3. */
26172 label = ix86_expand_aligntest (*count, 2, false);
26173 destmem = change_address (destmem, HImode, *destptr);
26174 destmem = offset_address (destmem, *count, 1);
26175 destmem = offset_address (destmem, GEN_INT (-2), 2);
26176 if (issetmem)
26177 emit_move_insn (destmem, gen_lowpart (HImode, value));
26178 else
26180 srcmem = change_address (srcmem, HImode, *srcptr);
26181 srcmem = offset_address (srcmem, *count, 1);
26182 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
26183 emit_move_insn (destmem, srcmem);
26186 emit_label (label);
26187 LABEL_NUSES (label) = 1;
26188 emit_jump_insn (gen_jump (*done_label));
26189 emit_barrier ();
26191 else
26192 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
26193 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
26195 /* Start memcpy for COUNT >= SIZE. */
26196 if (loop_label)
26198 emit_label (loop_label);
26199 LABEL_NUSES (loop_label) = 1;
26202 /* Copy first desired_align bytes. */
26203 if (!issetmem)
26204 srcmem = change_address (srcmem, mode, *srcptr);
26205 destmem = change_address (destmem, mode, *destptr);
26206 modesize = GEN_INT (GET_MODE_SIZE (mode));
26207 for (n = 0; prolog_size < desired_align - align; n++)
26209 if (issetmem)
26210 emit_move_insn (destmem, mode_value);
26211 else
26213 emit_move_insn (destmem, srcmem);
26214 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
26216 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
26217 prolog_size += GET_MODE_SIZE (mode);
26221 /* Copy last SIZE bytes. */
26222 destmem = offset_address (destmem, *count, 1);
26223 destmem = offset_address (destmem,
26224 GEN_INT (-size - prolog_size),
26226 if (issetmem)
26227 emit_move_insn (destmem, mode_value);
26228 else
26230 srcmem = offset_address (srcmem, *count, 1);
26231 srcmem = offset_address (srcmem,
26232 GEN_INT (-size - prolog_size),
26234 emit_move_insn (destmem, srcmem);
26236 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
26238 destmem = offset_address (destmem, modesize, 1);
26239 if (issetmem)
26240 emit_move_insn (destmem, mode_value);
26241 else
26243 srcmem = offset_address (srcmem, modesize, 1);
26244 emit_move_insn (destmem, srcmem);
26248 /* Align destination. */
26249 if (desired_align > 1 && desired_align > align)
26251 rtx saveddest = *destptr;
26253 gcc_assert (desired_align <= size);
26254 /* Align destptr up, place it to new register. */
26255 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
26256 GEN_INT (prolog_size),
26257 NULL_RTX, 1, OPTAB_DIRECT);
26258 if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest))
26259 REG_POINTER (*destptr) = 1;
26260 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
26261 GEN_INT (-desired_align),
26262 *destptr, 1, OPTAB_DIRECT);
26263 /* See how many bytes we skipped. */
26264 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
26265 *destptr,
26266 saveddest, 1, OPTAB_DIRECT);
26267 /* Adjust srcptr and count. */
26268 if (!issetmem)
26269 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr,
26270 saveddest, *srcptr, 1, OPTAB_DIRECT);
26271 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
26272 saveddest, *count, 1, OPTAB_DIRECT);
26273 /* We copied at most size + prolog_size. */
26274 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
26275 *min_size
26276 = ROUND_DOWN (*min_size - size, (unsigned HOST_WIDE_INT)size);
26277 else
26278 *min_size = 0;
26280 /* Our loops always round down the block size, but for dispatch to
26281 library we need precise value. */
26282 if (dynamic_check)
26283 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
26284 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
26286 else
26288 gcc_assert (prolog_size == 0);
26289 /* Decrease count, so we won't end up copying last word twice. */
26290 if (!CONST_INT_P (*count))
26291 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
26292 constm1_rtx, *count, 1, OPTAB_DIRECT);
26293 else
26294 *count = GEN_INT (ROUND_DOWN (UINTVAL (*count) - 1,
26295 (unsigned HOST_WIDE_INT)size));
26296 if (*min_size)
26297 *min_size = ROUND_DOWN (*min_size - 1, (unsigned HOST_WIDE_INT)size);
26302 /* This function is like the previous one, except here we know how many bytes
26303 need to be copied. That allows us to update alignment not only of DST, which
26304 is returned, but also of SRC, which is passed as a pointer for that
26305 reason. */
26306 static rtx
26307 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
26308 rtx srcreg, rtx value, rtx vec_value,
26309 int desired_align, int align_bytes,
26310 bool issetmem)
26312 rtx src = NULL;
26313 rtx orig_dst = dst;
26314 rtx orig_src = NULL;
26315 int piece_size = 1;
26316 int copied_bytes = 0;
26318 if (!issetmem)
26320 gcc_assert (srcp != NULL);
26321 src = *srcp;
26322 orig_src = src;
26325 for (piece_size = 1;
26326 piece_size <= desired_align && copied_bytes < align_bytes;
26327 piece_size <<= 1)
26329 if (align_bytes & piece_size)
26331 if (issetmem)
26333 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
26334 dst = emit_memset (dst, destreg, vec_value, piece_size);
26335 else
26336 dst = emit_memset (dst, destreg, value, piece_size);
26338 else
26339 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
26340 copied_bytes += piece_size;
26343 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
26344 set_mem_align (dst, desired_align * BITS_PER_UNIT);
26345 if (MEM_SIZE_KNOWN_P (orig_dst))
26346 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
26348 if (!issetmem)
26350 int src_align_bytes = get_mem_align_offset (src, desired_align
26351 * BITS_PER_UNIT);
26352 if (src_align_bytes >= 0)
26353 src_align_bytes = desired_align - src_align_bytes;
26354 if (src_align_bytes >= 0)
26356 unsigned int src_align;
26357 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
26359 if ((src_align_bytes & (src_align - 1))
26360 == (align_bytes & (src_align - 1)))
26361 break;
26363 if (src_align > (unsigned int) desired_align)
26364 src_align = desired_align;
26365 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
26366 set_mem_align (src, src_align * BITS_PER_UNIT);
26368 if (MEM_SIZE_KNOWN_P (orig_src))
26369 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
26370 *srcp = src;
26373 return dst;
26376 /* Return true if ALG can be used in current context.
26377 Assume we expand memset if MEMSET is true. */
26378 static bool
26379 alg_usable_p (enum stringop_alg alg, bool memset, bool have_as)
26381 if (alg == no_stringop)
26382 return false;
26383 if (alg == vector_loop)
26384 return TARGET_SSE || TARGET_AVX;
26385 /* Algorithms using the rep prefix want at least edi and ecx;
26386 additionally, memset wants eax and memcpy wants esi. Don't
26387 consider such algorithms if the user has appropriated those
26388 registers for their own purposes, or if we have a non-default
26389 address space, since some string insns cannot override the segment. */
26390 if (alg == rep_prefix_1_byte
26391 || alg == rep_prefix_4_byte
26392 || alg == rep_prefix_8_byte)
26394 if (have_as)
26395 return false;
26396 if (fixed_regs[CX_REG]
26397 || fixed_regs[DI_REG]
26398 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]))
26399 return false;
26401 return true;
26404 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
26405 static enum stringop_alg
26406 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
26407 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
26408 bool memset, bool zero_memset, bool have_as,
26409 int *dynamic_check, bool *noalign, bool recur)
26411 const struct stringop_algs *algs;
26412 bool optimize_for_speed;
26413 int max = 0;
26414 const struct processor_costs *cost;
26415 int i;
26416 bool any_alg_usable_p = false;
26418 *noalign = false;
26419 *dynamic_check = -1;
26421 /* Even if the string operation call is cold, we still might spend a lot
26422 of time processing large blocks. */
26423 if (optimize_function_for_size_p (cfun)
26424 || (optimize_insn_for_size_p ()
26425 && (max_size < 256
26426 || (expected_size != -1 && expected_size < 256))))
26427 optimize_for_speed = false;
26428 else
26429 optimize_for_speed = true;
26431 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
26432 if (memset)
26433 algs = &cost->memset[TARGET_64BIT != 0];
26434 else
26435 algs = &cost->memcpy[TARGET_64BIT != 0];
26437 /* See maximal size for user defined algorithm. */
26438 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
26440 enum stringop_alg candidate = algs->size[i].alg;
26441 bool usable = alg_usable_p (candidate, memset, have_as);
26442 any_alg_usable_p |= usable;
26444 if (candidate != libcall && candidate && usable)
26445 max = algs->size[i].max;
26448 /* If expected size is not known but max size is small enough
26449 so inline version is a win, set expected size into
26450 the range. */
26451 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
26452 && expected_size == -1)
26453 expected_size = min_size / 2 + max_size / 2;
26455 /* If user specified the algorithm, honor it if possible. */
26456 if (ix86_stringop_alg != no_stringop
26457 && alg_usable_p (ix86_stringop_alg, memset, have_as))
26458 return ix86_stringop_alg;
26459 /* rep; movq or rep; movl is the smallest variant. */
26460 else if (!optimize_for_speed)
26462 *noalign = true;
26463 if (!count || (count & 3) || (memset && !zero_memset))
26464 return alg_usable_p (rep_prefix_1_byte, memset, have_as)
26465 ? rep_prefix_1_byte : loop_1_byte;
26466 else
26467 return alg_usable_p (rep_prefix_4_byte, memset, have_as)
26468 ? rep_prefix_4_byte : loop;
26470 /* Very tiny blocks are best handled via the loop, REP is expensive to
26471 setup. */
26472 else if (expected_size != -1 && expected_size < 4)
26473 return loop_1_byte;
26474 else if (expected_size != -1)
26476 enum stringop_alg alg = libcall;
26477 bool alg_noalign = false;
26478 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
26480 /* We get here if the algorithms that were not libcall-based
26481 were rep-prefix based and we are unable to use rep prefixes
26482 based on global register usage. Break out of the loop and
26483 use the heuristic below. */
26484 if (algs->size[i].max == 0)
26485 break;
26486 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
26488 enum stringop_alg candidate = algs->size[i].alg;
26490 if (candidate != libcall
26491 && alg_usable_p (candidate, memset, have_as))
26493 alg = candidate;
26494 alg_noalign = algs->size[i].noalign;
26496 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
26497 last non-libcall inline algorithm. */
26498 if (TARGET_INLINE_ALL_STRINGOPS)
26500 /* When the current size is best to be copied by a libcall,
26501 but we are still forced to inline, run the heuristic below
26502 that will pick code for medium sized blocks. */
26503 if (alg != libcall)
26505 *noalign = alg_noalign;
26506 return alg;
26508 else if (!any_alg_usable_p)
26509 break;
26511 else if (alg_usable_p (candidate, memset, have_as))
26513 *noalign = algs->size[i].noalign;
26514 return candidate;
26519 /* When asked to inline the call anyway, try to pick meaningful choice.
26520 We look for maximal size of block that is faster to copy by hand and
26521 take blocks of at most of that size guessing that average size will
26522 be roughly half of the block.
26524 If this turns out to be bad, we might simply specify the preferred
26525 choice in ix86_costs. */
26526 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
26527 && (algs->unknown_size == libcall
26528 || !alg_usable_p (algs->unknown_size, memset, have_as)))
26530 enum stringop_alg alg;
26531 HOST_WIDE_INT new_expected_size = (max > 0 ? max : 4096) / 2;
26533 /* If there aren't any usable algorithms or if recursing already,
26534 then recursing on smaller sizes or same size isn't going to
26535 find anything. Just return the simple byte-at-a-time copy loop. */
26536 if (!any_alg_usable_p || recur)
26538 /* Pick something reasonable. */
26539 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY && !recur)
26540 *dynamic_check = 128;
26541 return loop_1_byte;
26543 alg = decide_alg (count, new_expected_size, min_size, max_size, memset,
26544 zero_memset, have_as, dynamic_check, noalign, true);
26545 gcc_assert (*dynamic_check == -1);
26546 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
26547 *dynamic_check = max;
26548 else
26549 gcc_assert (alg != libcall);
26550 return alg;
26552 return (alg_usable_p (algs->unknown_size, memset, have_as)
26553 ? algs->unknown_size : libcall);
26556 /* Decide on alignment. We know that the operand is already aligned to ALIGN
26557 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
26558 static int
26559 decide_alignment (int align,
26560 enum stringop_alg alg,
26561 int expected_size,
26562 machine_mode move_mode)
26564 int desired_align = 0;
26566 gcc_assert (alg != no_stringop);
26568 if (alg == libcall)
26569 return 0;
26570 if (move_mode == VOIDmode)
26571 return 0;
26573 desired_align = GET_MODE_SIZE (move_mode);
26574 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
26575 copying whole cacheline at once. */
26576 if (TARGET_PENTIUMPRO
26577 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
26578 desired_align = 8;
26580 if (optimize_size)
26581 desired_align = 1;
26582 if (desired_align < align)
26583 desired_align = align;
26584 if (expected_size != -1 && expected_size < 4)
26585 desired_align = align;
26587 return desired_align;
26591 /* Helper function for memcpy. For QImode value 0xXY produce
26592 0xXYXYXYXY of wide specified by MODE. This is essentially
26593 a * 0x10101010, but we can do slightly better than
26594 synth_mult by unwinding the sequence by hand on CPUs with
26595 slow multiply. */
26596 static rtx
26597 promote_duplicated_reg (machine_mode mode, rtx val)
26599 machine_mode valmode = GET_MODE (val);
26600 rtx tmp;
26601 int nops = mode == DImode ? 3 : 2;
26603 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
26604 if (val == const0_rtx)
26605 return copy_to_mode_reg (mode, CONST0_RTX (mode));
26606 if (CONST_INT_P (val))
26608 HOST_WIDE_INT v = INTVAL (val) & 255;
26610 v |= v << 8;
26611 v |= v << 16;
26612 if (mode == DImode)
26613 v |= (v << 16) << 16;
26614 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
26617 if (valmode == VOIDmode)
26618 valmode = QImode;
26619 if (valmode != QImode)
26620 val = gen_lowpart (QImode, val);
26621 if (mode == QImode)
26622 return val;
26623 if (!TARGET_PARTIAL_REG_STALL)
26624 nops--;
26625 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
26626 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
26627 <= (ix86_cost->shift_const + ix86_cost->add) * nops
26628 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
26630 rtx reg = convert_modes (mode, QImode, val, true);
26631 tmp = promote_duplicated_reg (mode, const1_rtx);
26632 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
26633 OPTAB_DIRECT);
26635 else
26637 rtx reg = convert_modes (mode, QImode, val, true);
26639 if (!TARGET_PARTIAL_REG_STALL)
26640 if (mode == SImode)
26641 emit_insn (gen_insvsi_1 (reg, reg));
26642 else
26643 emit_insn (gen_insvdi_1 (reg, reg));
26644 else
26646 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
26647 NULL, 1, OPTAB_DIRECT);
26648 reg =
26649 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
26651 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
26652 NULL, 1, OPTAB_DIRECT);
26653 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
26654 if (mode == SImode)
26655 return reg;
26656 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
26657 NULL, 1, OPTAB_DIRECT);
26658 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
26659 return reg;
26663 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
26664 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
26665 alignment from ALIGN to DESIRED_ALIGN. */
26666 static rtx
26667 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
26668 int align)
26670 rtx promoted_val;
26672 if (TARGET_64BIT
26673 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
26674 promoted_val = promote_duplicated_reg (DImode, val);
26675 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
26676 promoted_val = promote_duplicated_reg (SImode, val);
26677 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
26678 promoted_val = promote_duplicated_reg (HImode, val);
26679 else
26680 promoted_val = val;
26682 return promoted_val;
26685 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
26686 operations when profitable. The code depends upon architecture, block size
26687 and alignment, but always has one of the following overall structures:
26689 Aligned move sequence:
26691 1) Prologue guard: Conditional that jumps up to epilogues for small
26692 blocks that can be handled by epilogue alone. This is faster
26693 but also needed for correctness, since prologue assume the block
26694 is larger than the desired alignment.
26696 Optional dynamic check for size and libcall for large
26697 blocks is emitted here too, with -minline-stringops-dynamically.
26699 2) Prologue: copy first few bytes in order to get destination
26700 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
26701 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
26702 copied. We emit either a jump tree on power of two sized
26703 blocks, or a byte loop.
26705 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
26706 with specified algorithm.
26708 4) Epilogue: code copying tail of the block that is too small to be
26709 handled by main body (or up to size guarded by prologue guard).
26711 Misaligned move sequence
26713 1) missaligned move prologue/epilogue containing:
26714 a) Prologue handling small memory blocks and jumping to done_label
26715 (skipped if blocks are known to be large enough)
26716 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
26717 needed by single possibly misaligned move
26718 (skipped if alignment is not needed)
26719 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
26721 2) Zero size guard dispatching to done_label, if needed
26723 3) dispatch to library call, if needed,
26725 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
26726 with specified algorithm. */
26727 bool
26728 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
26729 rtx align_exp, rtx expected_align_exp,
26730 rtx expected_size_exp, rtx min_size_exp,
26731 rtx max_size_exp, rtx probable_max_size_exp,
26732 bool issetmem)
26734 rtx destreg;
26735 rtx srcreg = NULL;
26736 rtx_code_label *label = NULL;
26737 rtx tmp;
26738 rtx_code_label *jump_around_label = NULL;
26739 HOST_WIDE_INT align = 1;
26740 unsigned HOST_WIDE_INT count = 0;
26741 HOST_WIDE_INT expected_size = -1;
26742 int size_needed = 0, epilogue_size_needed;
26743 int desired_align = 0, align_bytes = 0;
26744 enum stringop_alg alg;
26745 rtx promoted_val = NULL;
26746 rtx vec_promoted_val = NULL;
26747 bool force_loopy_epilogue = false;
26748 int dynamic_check;
26749 bool need_zero_guard = false;
26750 bool noalign;
26751 machine_mode move_mode = VOIDmode;
26752 machine_mode wider_mode;
26753 int unroll_factor = 1;
26754 /* TODO: Once value ranges are available, fill in proper data. */
26755 unsigned HOST_WIDE_INT min_size = 0;
26756 unsigned HOST_WIDE_INT max_size = -1;
26757 unsigned HOST_WIDE_INT probable_max_size = -1;
26758 bool misaligned_prologue_used = false;
26759 bool have_as;
26761 if (CONST_INT_P (align_exp))
26762 align = INTVAL (align_exp);
26763 /* i386 can do misaligned access on reasonably increased cost. */
26764 if (CONST_INT_P (expected_align_exp)
26765 && INTVAL (expected_align_exp) > align)
26766 align = INTVAL (expected_align_exp);
26767 /* ALIGN is the minimum of destination and source alignment, but we care here
26768 just about destination alignment. */
26769 else if (!issetmem
26770 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
26771 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
26773 if (CONST_INT_P (count_exp))
26775 min_size = max_size = probable_max_size = count = expected_size
26776 = INTVAL (count_exp);
26777 /* When COUNT is 0, there is nothing to do. */
26778 if (!count)
26779 return true;
26781 else
26783 if (min_size_exp)
26784 min_size = INTVAL (min_size_exp);
26785 if (max_size_exp)
26786 max_size = INTVAL (max_size_exp);
26787 if (probable_max_size_exp)
26788 probable_max_size = INTVAL (probable_max_size_exp);
26789 if (CONST_INT_P (expected_size_exp))
26790 expected_size = INTVAL (expected_size_exp);
26793 /* Make sure we don't need to care about overflow later on. */
26794 if (count > (HOST_WIDE_INT_1U << 30))
26795 return false;
26797 have_as = !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst));
26798 if (!issetmem)
26799 have_as |= !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src));
26801 /* Step 0: Decide on preferred algorithm, desired alignment and
26802 size of chunks to be copied by main loop. */
26803 alg = decide_alg (count, expected_size, min_size, probable_max_size,
26804 issetmem,
26805 issetmem && val_exp == const0_rtx, have_as,
26806 &dynamic_check, &noalign, false);
26807 if (alg == libcall)
26808 return false;
26809 gcc_assert (alg != no_stringop);
26811 /* For now vector-version of memset is generated only for memory zeroing, as
26812 creating of promoted vector value is very cheap in this case. */
26813 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
26814 alg = unrolled_loop;
26816 if (!count)
26817 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
26818 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
26819 if (!issetmem)
26820 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
26822 unroll_factor = 1;
26823 move_mode = word_mode;
26824 switch (alg)
26826 case libcall:
26827 case no_stringop:
26828 case last_alg:
26829 gcc_unreachable ();
26830 case loop_1_byte:
26831 need_zero_guard = true;
26832 move_mode = QImode;
26833 break;
26834 case loop:
26835 need_zero_guard = true;
26836 break;
26837 case unrolled_loop:
26838 need_zero_guard = true;
26839 unroll_factor = (TARGET_64BIT ? 4 : 2);
26840 break;
26841 case vector_loop:
26842 need_zero_guard = true;
26843 unroll_factor = 4;
26844 /* Find the widest supported mode. */
26845 move_mode = word_mode;
26846 while (GET_MODE_WIDER_MODE (move_mode).exists (&wider_mode)
26847 && optab_handler (mov_optab, wider_mode) != CODE_FOR_nothing)
26848 move_mode = wider_mode;
26850 if (TARGET_AVX128_OPTIMAL && GET_MODE_BITSIZE (move_mode) > 128)
26851 move_mode = TImode;
26853 /* Find the corresponding vector mode with the same size as MOVE_MODE.
26854 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
26855 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
26857 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
26858 if (!mode_for_vector (word_mode, nunits).exists (&move_mode)
26859 || optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
26860 move_mode = word_mode;
26862 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
26863 break;
26864 case rep_prefix_8_byte:
26865 move_mode = DImode;
26866 break;
26867 case rep_prefix_4_byte:
26868 move_mode = SImode;
26869 break;
26870 case rep_prefix_1_byte:
26871 move_mode = QImode;
26872 break;
26874 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
26875 epilogue_size_needed = size_needed;
26877 /* If we are going to call any library calls conditionally, make sure any
26878 pending stack adjustment happen before the first conditional branch,
26879 otherwise they will be emitted before the library call only and won't
26880 happen from the other branches. */
26881 if (dynamic_check != -1)
26882 do_pending_stack_adjust ();
26884 desired_align = decide_alignment (align, alg, expected_size, move_mode);
26885 if (!TARGET_ALIGN_STRINGOPS || noalign)
26886 align = desired_align;
26888 /* Step 1: Prologue guard. */
26890 /* Alignment code needs count to be in register. */
26891 if (CONST_INT_P (count_exp) && desired_align > align)
26893 if (INTVAL (count_exp) > desired_align
26894 && INTVAL (count_exp) > size_needed)
26896 align_bytes
26897 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
26898 if (align_bytes <= 0)
26899 align_bytes = 0;
26900 else
26901 align_bytes = desired_align - align_bytes;
26903 if (align_bytes == 0)
26904 count_exp = force_reg (counter_mode (count_exp), count_exp);
26906 gcc_assert (desired_align >= 1 && align >= 1);
26908 /* Misaligned move sequences handle both prologue and epilogue at once.
26909 Default code generation results in a smaller code for large alignments
26910 and also avoids redundant job when sizes are known precisely. */
26911 misaligned_prologue_used
26912 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
26913 && MAX (desired_align, epilogue_size_needed) <= 32
26914 && desired_align <= epilogue_size_needed
26915 && ((desired_align > align && !align_bytes)
26916 || (!count && epilogue_size_needed > 1)));
26918 /* Do the cheap promotion to allow better CSE across the
26919 main loop and epilogue (ie one load of the big constant in the
26920 front of all code.
26921 For now the misaligned move sequences do not have fast path
26922 without broadcasting. */
26923 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
26925 if (alg == vector_loop)
26927 gcc_assert (val_exp == const0_rtx);
26928 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
26929 promoted_val = promote_duplicated_reg_to_size (val_exp,
26930 GET_MODE_SIZE (word_mode),
26931 desired_align, align);
26933 else
26935 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
26936 desired_align, align);
26939 /* Misaligned move sequences handles both prologues and epilogues at once.
26940 Default code generation results in smaller code for large alignments and
26941 also avoids redundant job when sizes are known precisely. */
26942 if (misaligned_prologue_used)
26944 /* Misaligned move prologue handled small blocks by itself. */
26945 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
26946 (dst, src, &destreg, &srcreg,
26947 move_mode, promoted_val, vec_promoted_val,
26948 &count_exp,
26949 &jump_around_label,
26950 desired_align < align
26951 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
26952 desired_align, align, &min_size, dynamic_check, issetmem);
26953 if (!issetmem)
26954 src = change_address (src, BLKmode, srcreg);
26955 dst = change_address (dst, BLKmode, destreg);
26956 set_mem_align (dst, desired_align * BITS_PER_UNIT);
26957 epilogue_size_needed = 0;
26958 if (need_zero_guard
26959 && min_size < (unsigned HOST_WIDE_INT) size_needed)
26961 /* It is possible that we copied enough so the main loop will not
26962 execute. */
26963 gcc_assert (size_needed > 1);
26964 if (jump_around_label == NULL_RTX)
26965 jump_around_label = gen_label_rtx ();
26966 emit_cmp_and_jump_insns (count_exp,
26967 GEN_INT (size_needed),
26968 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
26969 if (expected_size == -1
26970 || expected_size < (desired_align - align) / 2 + size_needed)
26971 predict_jump (REG_BR_PROB_BASE * 20 / 100);
26972 else
26973 predict_jump (REG_BR_PROB_BASE * 60 / 100);
26976 /* Ensure that alignment prologue won't copy past end of block. */
26977 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
26979 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
26980 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
26981 Make sure it is power of 2. */
26982 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
26984 /* To improve performance of small blocks, we jump around the VAL
26985 promoting mode. This mean that if the promoted VAL is not constant,
26986 we might not use it in the epilogue and have to use byte
26987 loop variant. */
26988 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
26989 force_loopy_epilogue = true;
26990 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
26991 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
26993 /* If main algorithm works on QImode, no epilogue is needed.
26994 For small sizes just don't align anything. */
26995 if (size_needed == 1)
26996 desired_align = align;
26997 else
26998 goto epilogue;
27000 else if (!count
27001 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
27003 label = gen_label_rtx ();
27004 emit_cmp_and_jump_insns (count_exp,
27005 GEN_INT (epilogue_size_needed),
27006 LTU, 0, counter_mode (count_exp), 1, label);
27007 if (expected_size == -1 || expected_size < epilogue_size_needed)
27008 predict_jump (REG_BR_PROB_BASE * 60 / 100);
27009 else
27010 predict_jump (REG_BR_PROB_BASE * 20 / 100);
27014 /* Emit code to decide on runtime whether library call or inline should be
27015 used. */
27016 if (dynamic_check != -1)
27018 if (!issetmem && CONST_INT_P (count_exp))
27020 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
27022 emit_block_copy_via_libcall (dst, src, count_exp);
27023 count_exp = const0_rtx;
27024 goto epilogue;
27027 else
27029 rtx_code_label *hot_label = gen_label_rtx ();
27030 if (jump_around_label == NULL_RTX)
27031 jump_around_label = gen_label_rtx ();
27032 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
27033 LEU, 0, counter_mode (count_exp),
27034 1, hot_label);
27035 predict_jump (REG_BR_PROB_BASE * 90 / 100);
27036 if (issetmem)
27037 set_storage_via_libcall (dst, count_exp, val_exp);
27038 else
27039 emit_block_copy_via_libcall (dst, src, count_exp);
27040 emit_jump (jump_around_label);
27041 emit_label (hot_label);
27045 /* Step 2: Alignment prologue. */
27046 /* Do the expensive promotion once we branched off the small blocks. */
27047 if (issetmem && !promoted_val)
27048 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
27049 desired_align, align);
27051 if (desired_align > align && !misaligned_prologue_used)
27053 if (align_bytes == 0)
27055 /* Except for the first move in prologue, we no longer know
27056 constant offset in aliasing info. It don't seems to worth
27057 the pain to maintain it for the first move, so throw away
27058 the info early. */
27059 dst = change_address (dst, BLKmode, destreg);
27060 if (!issetmem)
27061 src = change_address (src, BLKmode, srcreg);
27062 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
27063 promoted_val, vec_promoted_val,
27064 count_exp, align, desired_align,
27065 issetmem);
27066 /* At most desired_align - align bytes are copied. */
27067 if (min_size < (unsigned)(desired_align - align))
27068 min_size = 0;
27069 else
27070 min_size -= desired_align - align;
27072 else
27074 /* If we know how many bytes need to be stored before dst is
27075 sufficiently aligned, maintain aliasing info accurately. */
27076 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
27077 srcreg,
27078 promoted_val,
27079 vec_promoted_val,
27080 desired_align,
27081 align_bytes,
27082 issetmem);
27084 count_exp = plus_constant (counter_mode (count_exp),
27085 count_exp, -align_bytes);
27086 count -= align_bytes;
27087 min_size -= align_bytes;
27088 max_size -= align_bytes;
27090 if (need_zero_guard
27091 && min_size < (unsigned HOST_WIDE_INT) size_needed
27092 && (count < (unsigned HOST_WIDE_INT) size_needed
27093 || (align_bytes == 0
27094 && count < ((unsigned HOST_WIDE_INT) size_needed
27095 + desired_align - align))))
27097 /* It is possible that we copied enough so the main loop will not
27098 execute. */
27099 gcc_assert (size_needed > 1);
27100 if (label == NULL_RTX)
27101 label = gen_label_rtx ();
27102 emit_cmp_and_jump_insns (count_exp,
27103 GEN_INT (size_needed),
27104 LTU, 0, counter_mode (count_exp), 1, label);
27105 if (expected_size == -1
27106 || expected_size < (desired_align - align) / 2 + size_needed)
27107 predict_jump (REG_BR_PROB_BASE * 20 / 100);
27108 else
27109 predict_jump (REG_BR_PROB_BASE * 60 / 100);
27112 if (label && size_needed == 1)
27114 emit_label (label);
27115 LABEL_NUSES (label) = 1;
27116 label = NULL;
27117 epilogue_size_needed = 1;
27118 if (issetmem)
27119 promoted_val = val_exp;
27121 else if (label == NULL_RTX && !misaligned_prologue_used)
27122 epilogue_size_needed = size_needed;
27124 /* Step 3: Main loop. */
27126 switch (alg)
27128 case libcall:
27129 case no_stringop:
27130 case last_alg:
27131 gcc_unreachable ();
27132 case loop_1_byte:
27133 case loop:
27134 case unrolled_loop:
27135 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
27136 count_exp, move_mode, unroll_factor,
27137 expected_size, issetmem);
27138 break;
27139 case vector_loop:
27140 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
27141 vec_promoted_val, count_exp, move_mode,
27142 unroll_factor, expected_size, issetmem);
27143 break;
27144 case rep_prefix_8_byte:
27145 case rep_prefix_4_byte:
27146 case rep_prefix_1_byte:
27147 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
27148 val_exp, count_exp, move_mode, issetmem);
27149 break;
27151 /* Adjust properly the offset of src and dest memory for aliasing. */
27152 if (CONST_INT_P (count_exp))
27154 if (!issetmem)
27155 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
27156 (count / size_needed) * size_needed);
27157 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
27158 (count / size_needed) * size_needed);
27160 else
27162 if (!issetmem)
27163 src = change_address (src, BLKmode, srcreg);
27164 dst = change_address (dst, BLKmode, destreg);
27167 /* Step 4: Epilogue to copy the remaining bytes. */
27168 epilogue:
27169 if (label)
27171 /* When the main loop is done, COUNT_EXP might hold original count,
27172 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
27173 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
27174 bytes. Compensate if needed. */
27176 if (size_needed < epilogue_size_needed)
27178 tmp =
27179 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
27180 GEN_INT (size_needed - 1), count_exp, 1,
27181 OPTAB_DIRECT);
27182 if (tmp != count_exp)
27183 emit_move_insn (count_exp, tmp);
27185 emit_label (label);
27186 LABEL_NUSES (label) = 1;
27189 if (count_exp != const0_rtx && epilogue_size_needed > 1)
27191 if (force_loopy_epilogue)
27192 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
27193 epilogue_size_needed);
27194 else
27196 if (issetmem)
27197 expand_setmem_epilogue (dst, destreg, promoted_val,
27198 vec_promoted_val, count_exp,
27199 epilogue_size_needed);
27200 else
27201 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
27202 epilogue_size_needed);
27205 if (jump_around_label)
27206 emit_label (jump_around_label);
27207 return true;
27211 /* Expand the appropriate insns for doing strlen if not just doing
27212 repnz; scasb
27214 out = result, initialized with the start address
27215 align_rtx = alignment of the address.
27216 scratch = scratch register, initialized with the startaddress when
27217 not aligned, otherwise undefined
27219 This is just the body. It needs the initializations mentioned above and
27220 some address computing at the end. These things are done in i386.md. */
27222 static void
27223 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
27225 int align;
27226 rtx tmp;
27227 rtx_code_label *align_2_label = NULL;
27228 rtx_code_label *align_3_label = NULL;
27229 rtx_code_label *align_4_label = gen_label_rtx ();
27230 rtx_code_label *end_0_label = gen_label_rtx ();
27231 rtx mem;
27232 rtx tmpreg = gen_reg_rtx (SImode);
27233 rtx scratch = gen_reg_rtx (SImode);
27234 rtx cmp;
27236 align = 0;
27237 if (CONST_INT_P (align_rtx))
27238 align = INTVAL (align_rtx);
27240 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
27242 /* Is there a known alignment and is it less than 4? */
27243 if (align < 4)
27245 rtx scratch1 = gen_reg_rtx (Pmode);
27246 emit_move_insn (scratch1, out);
27247 /* Is there a known alignment and is it not 2? */
27248 if (align != 2)
27250 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
27251 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
27253 /* Leave just the 3 lower bits. */
27254 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
27255 NULL_RTX, 0, OPTAB_WIDEN);
27257 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
27258 Pmode, 1, align_4_label);
27259 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
27260 Pmode, 1, align_2_label);
27261 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
27262 Pmode, 1, align_3_label);
27264 else
27266 /* Since the alignment is 2, we have to check 2 or 0 bytes;
27267 check if is aligned to 4 - byte. */
27269 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
27270 NULL_RTX, 0, OPTAB_WIDEN);
27272 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
27273 Pmode, 1, align_4_label);
27276 mem = change_address (src, QImode, out);
27278 /* Now compare the bytes. */
27280 /* Compare the first n unaligned byte on a byte per byte basis. */
27281 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
27282 QImode, 1, end_0_label);
27284 /* Increment the address. */
27285 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
27287 /* Not needed with an alignment of 2 */
27288 if (align != 2)
27290 emit_label (align_2_label);
27292 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
27293 end_0_label);
27295 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
27297 emit_label (align_3_label);
27300 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
27301 end_0_label);
27303 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
27306 /* Generate loop to check 4 bytes at a time. It is not a good idea to
27307 align this loop. It gives only huge programs, but does not help to
27308 speed up. */
27309 emit_label (align_4_label);
27311 mem = change_address (src, SImode, out);
27312 emit_move_insn (scratch, mem);
27313 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
27315 /* This formula yields a nonzero result iff one of the bytes is zero.
27316 This saves three branches inside loop and many cycles. */
27318 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
27319 emit_insn (gen_one_cmplsi2 (scratch, scratch));
27320 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
27321 emit_insn (gen_andsi3 (tmpreg, tmpreg,
27322 gen_int_mode (0x80808080, SImode)));
27323 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
27324 align_4_label);
27326 if (TARGET_CMOVE)
27328 rtx reg = gen_reg_rtx (SImode);
27329 rtx reg2 = gen_reg_rtx (Pmode);
27330 emit_move_insn (reg, tmpreg);
27331 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
27333 /* If zero is not in the first two bytes, move two bytes forward. */
27334 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
27335 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
27336 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
27337 emit_insn (gen_rtx_SET (tmpreg,
27338 gen_rtx_IF_THEN_ELSE (SImode, tmp,
27339 reg,
27340 tmpreg)));
27341 /* Emit lea manually to avoid clobbering of flags. */
27342 emit_insn (gen_rtx_SET (reg2, gen_rtx_PLUS (Pmode, out, const2_rtx)));
27344 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
27345 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
27346 emit_insn (gen_rtx_SET (out,
27347 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
27348 reg2,
27349 out)));
27351 else
27353 rtx_code_label *end_2_label = gen_label_rtx ();
27354 /* Is zero in the first two bytes? */
27356 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
27357 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
27358 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
27359 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
27360 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
27361 pc_rtx);
27362 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
27363 JUMP_LABEL (tmp) = end_2_label;
27365 /* Not in the first two. Move two bytes forward. */
27366 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
27367 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
27369 emit_label (end_2_label);
27373 /* Avoid branch in fixing the byte. */
27374 tmpreg = gen_lowpart (QImode, tmpreg);
27375 emit_insn (gen_addqi3_cconly_overflow (tmpreg, tmpreg));
27376 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
27377 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
27378 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
27380 emit_label (end_0_label);
27383 /* Expand strlen. */
27385 bool
27386 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
27388 rtx addr, scratch1, scratch2, scratch3, scratch4;
27390 /* The generic case of strlen expander is long. Avoid it's
27391 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
27393 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
27394 && !TARGET_INLINE_ALL_STRINGOPS
27395 && !optimize_insn_for_size_p ()
27396 && (!CONST_INT_P (align) || INTVAL (align) < 4))
27397 return false;
27399 addr = force_reg (Pmode, XEXP (src, 0));
27400 scratch1 = gen_reg_rtx (Pmode);
27402 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
27403 && !optimize_insn_for_size_p ())
27405 /* Well it seems that some optimizer does not combine a call like
27406 foo(strlen(bar), strlen(bar));
27407 when the move and the subtraction is done here. It does calculate
27408 the length just once when these instructions are done inside of
27409 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
27410 often used and I use one fewer register for the lifetime of
27411 output_strlen_unroll() this is better. */
27413 emit_move_insn (out, addr);
27415 ix86_expand_strlensi_unroll_1 (out, src, align);
27417 /* strlensi_unroll_1 returns the address of the zero at the end of
27418 the string, like memchr(), so compute the length by subtracting
27419 the start address. */
27420 emit_insn (ix86_gen_sub3 (out, out, addr));
27422 else
27424 rtx unspec;
27426 /* Can't use this if the user has appropriated eax, ecx, or edi. */
27427 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
27428 return false;
27429 /* Can't use this for non-default address spaces. */
27430 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src)))
27431 return false;
27433 scratch2 = gen_reg_rtx (Pmode);
27434 scratch3 = gen_reg_rtx (Pmode);
27435 scratch4 = force_reg (Pmode, constm1_rtx);
27437 emit_move_insn (scratch3, addr);
27438 eoschar = force_reg (QImode, eoschar);
27440 src = replace_equiv_address_nv (src, scratch3);
27442 /* If .md starts supporting :P, this can be done in .md. */
27443 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
27444 scratch4), UNSPEC_SCAS);
27445 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
27446 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
27447 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
27449 return true;
27452 /* For given symbol (function) construct code to compute address of it's PLT
27453 entry in large x86-64 PIC model. */
27454 static rtx
27455 construct_plt_address (rtx symbol)
27457 rtx tmp, unspec;
27459 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
27460 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
27461 gcc_assert (Pmode == DImode);
27463 tmp = gen_reg_rtx (Pmode);
27464 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
27466 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
27467 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
27468 return tmp;
27472 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
27473 rtx callarg2,
27474 rtx pop, bool sibcall)
27476 rtx vec[3];
27477 rtx use = NULL, call;
27478 unsigned int vec_len = 0;
27479 tree fndecl;
27481 if (GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
27483 fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0));
27484 if (fndecl
27485 && (lookup_attribute ("interrupt",
27486 TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))))
27487 error ("interrupt service routine can't be called directly");
27489 else
27490 fndecl = NULL_TREE;
27492 if (pop == const0_rtx)
27493 pop = NULL;
27494 gcc_assert (!TARGET_64BIT || !pop);
27496 if (TARGET_MACHO && !TARGET_64BIT)
27498 #if TARGET_MACHO
27499 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
27500 fnaddr = machopic_indirect_call_target (fnaddr);
27501 #endif
27503 else
27505 /* Static functions and indirect calls don't need the pic register. Also,
27506 check if PLT was explicitly avoided via no-plt or "noplt" attribute, making
27507 it an indirect call. */
27508 rtx addr = XEXP (fnaddr, 0);
27509 if (flag_pic
27510 && GET_CODE (addr) == SYMBOL_REF
27511 && !SYMBOL_REF_LOCAL_P (addr))
27513 if (flag_plt
27514 && (SYMBOL_REF_DECL (addr) == NULL_TREE
27515 || !lookup_attribute ("noplt",
27516 DECL_ATTRIBUTES (SYMBOL_REF_DECL (addr)))))
27518 if (!TARGET_64BIT
27519 || (ix86_cmodel == CM_LARGE_PIC
27520 && DEFAULT_ABI != MS_ABI))
27522 use_reg (&use, gen_rtx_REG (Pmode,
27523 REAL_PIC_OFFSET_TABLE_REGNUM));
27524 if (ix86_use_pseudo_pic_reg ())
27525 emit_move_insn (gen_rtx_REG (Pmode,
27526 REAL_PIC_OFFSET_TABLE_REGNUM),
27527 pic_offset_table_rtx);
27530 else if (!TARGET_PECOFF && !TARGET_MACHO)
27532 if (TARGET_64BIT)
27534 fnaddr = gen_rtx_UNSPEC (Pmode,
27535 gen_rtvec (1, addr),
27536 UNSPEC_GOTPCREL);
27537 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
27539 else
27541 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
27542 UNSPEC_GOT);
27543 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
27544 fnaddr = gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
27545 fnaddr);
27547 fnaddr = gen_const_mem (Pmode, fnaddr);
27548 /* Pmode may not be the same as word_mode for x32, which
27549 doesn't support indirect branch via 32-bit memory slot.
27550 Since x32 GOT slot is 64 bit with zero upper 32 bits,
27551 indirect branch via x32 GOT slot is OK. */
27552 if (GET_MODE (fnaddr) != word_mode)
27553 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
27554 fnaddr = gen_rtx_MEM (QImode, fnaddr);
27559 /* Skip setting up RAX register for -mskip-rax-setup when there are no
27560 parameters passed in vector registers. */
27561 if (TARGET_64BIT
27562 && (INTVAL (callarg2) > 0
27563 || (INTVAL (callarg2) == 0
27564 && (TARGET_SSE || !flag_skip_rax_setup))))
27566 rtx al = gen_rtx_REG (QImode, AX_REG);
27567 emit_move_insn (al, callarg2);
27568 use_reg (&use, al);
27571 if (ix86_cmodel == CM_LARGE_PIC
27572 && !TARGET_PECOFF
27573 && MEM_P (fnaddr)
27574 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
27575 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
27576 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
27577 /* Since x32 GOT slot is 64 bit with zero upper 32 bits, indirect
27578 branch via x32 GOT slot is OK. */
27579 else if (!(TARGET_X32
27580 && MEM_P (fnaddr)
27581 && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND
27582 && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode))
27583 && (sibcall
27584 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
27585 : !call_insn_operand (XEXP (fnaddr, 0), word_mode)))
27587 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
27588 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
27591 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
27593 if (retval)
27595 /* We should add bounds as destination register in case
27596 pointer with bounds may be returned. */
27597 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
27599 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
27600 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
27601 if (GET_CODE (retval) == PARALLEL)
27603 b0 = gen_rtx_EXPR_LIST (VOIDmode, b0, const0_rtx);
27604 b1 = gen_rtx_EXPR_LIST (VOIDmode, b1, const0_rtx);
27605 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, b0, b1));
27606 retval = chkp_join_splitted_slot (retval, par);
27608 else
27610 retval = gen_rtx_PARALLEL (VOIDmode,
27611 gen_rtvec (3, retval, b0, b1));
27612 chkp_put_regs_to_expr_list (retval);
27616 call = gen_rtx_SET (retval, call);
27618 vec[vec_len++] = call;
27620 if (pop)
27622 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
27623 pop = gen_rtx_SET (stack_pointer_rtx, pop);
27624 vec[vec_len++] = pop;
27627 if (cfun->machine->no_caller_saved_registers
27628 && (!fndecl
27629 || (!TREE_THIS_VOLATILE (fndecl)
27630 && !lookup_attribute ("no_caller_saved_registers",
27631 TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))))
27633 static const char ix86_call_used_regs[] = CALL_USED_REGISTERS;
27634 bool is_64bit_ms_abi = (TARGET_64BIT
27635 && ix86_function_abi (fndecl) == MS_ABI);
27636 char c_mask = CALL_USED_REGISTERS_MASK (is_64bit_ms_abi);
27638 /* If there are no caller-saved registers, add all registers
27639 that are clobbered by the call which returns. */
27640 for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27641 if (!fixed_regs[i]
27642 && (ix86_call_used_regs[i] == 1
27643 || (ix86_call_used_regs[i] & c_mask))
27644 && !STACK_REGNO_P (i)
27645 && !MMX_REGNO_P (i))
27646 clobber_reg (&use,
27647 gen_rtx_REG (GET_MODE (regno_reg_rtx[i]), i));
27649 else if (TARGET_64BIT_MS_ABI
27650 && (!callarg2 || INTVAL (callarg2) != -2))
27652 unsigned i;
27654 for (i = 0; i < NUM_X86_64_MS_CLOBBERED_REGS; i++)
27656 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
27657 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
27659 clobber_reg (&use, gen_rtx_REG (mode, regno));
27662 /* Set here, but it may get cleared later. */
27663 if (TARGET_CALL_MS2SYSV_XLOGUES)
27665 if (!TARGET_SSE)
27668 /* Don't break hot-patched functions. */
27669 else if (ix86_function_ms_hook_prologue (current_function_decl))
27672 /* TODO: Cases not yet examined. */
27673 else if (flag_split_stack)
27674 warn_once_call_ms2sysv_xlogues ("-fsplit-stack");
27676 else
27678 gcc_assert (!reload_completed);
27679 cfun->machine->call_ms2sysv = true;
27684 if (vec_len > 1)
27685 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
27686 call = emit_call_insn (call);
27687 if (use)
27688 CALL_INSN_FUNCTION_USAGE (call) = use;
27690 return call;
27693 /* Return true if the function being called was marked with attribute
27694 "noplt" or using -fno-plt and we are compiling for non-PIC. We need
27695 to handle the non-PIC case in the backend because there is no easy
27696 interface for the front-end to force non-PLT calls to use the GOT.
27697 This is currently used only with 64-bit or 32-bit GOT32X ELF targets
27698 to call the function marked "noplt" indirectly. */
27700 static bool
27701 ix86_nopic_noplt_attribute_p (rtx call_op)
27703 if (flag_pic || ix86_cmodel == CM_LARGE
27704 || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X)
27705 || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
27706 || SYMBOL_REF_LOCAL_P (call_op))
27707 return false;
27709 tree symbol_decl = SYMBOL_REF_DECL (call_op);
27711 if (!flag_plt
27712 || (symbol_decl != NULL_TREE
27713 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
27714 return true;
27716 return false;
27719 /* Output the assembly for a call instruction. */
27721 const char *
27722 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
27724 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
27725 bool seh_nop_p = false;
27726 const char *xasm;
27728 if (SIBLING_CALL_P (insn))
27730 if (direct_p)
27732 if (ix86_nopic_noplt_attribute_p (call_op))
27734 if (TARGET_64BIT)
27735 xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
27736 else
27737 xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
27739 else
27740 xasm = "%!jmp\t%P0";
27742 /* SEH epilogue detection requires the indirect branch case
27743 to include REX.W. */
27744 else if (TARGET_SEH)
27745 xasm = "%!rex.W jmp\t%A0";
27746 else
27747 xasm = "%!jmp\t%A0";
27749 output_asm_insn (xasm, &call_op);
27750 return "";
27753 /* SEH unwinding can require an extra nop to be emitted in several
27754 circumstances. Determine if we have one of those. */
27755 if (TARGET_SEH)
27757 rtx_insn *i;
27759 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
27761 /* If we get to another real insn, we don't need the nop. */
27762 if (INSN_P (i))
27763 break;
27765 /* If we get to the epilogue note, prevent a catch region from
27766 being adjacent to the standard epilogue sequence. If non-
27767 call-exceptions, we'll have done this during epilogue emission. */
27768 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
27769 && !flag_non_call_exceptions
27770 && !can_throw_internal (insn))
27772 seh_nop_p = true;
27773 break;
27777 /* If we didn't find a real insn following the call, prevent the
27778 unwinder from looking into the next function. */
27779 if (i == NULL)
27780 seh_nop_p = true;
27783 if (direct_p)
27785 if (ix86_nopic_noplt_attribute_p (call_op))
27787 if (TARGET_64BIT)
27788 xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
27789 else
27790 xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
27792 else
27793 xasm = "%!call\t%P0";
27795 else
27796 xasm = "%!call\t%A0";
27798 output_asm_insn (xasm, &call_op);
27800 if (seh_nop_p)
27801 return "nop";
27803 return "";
27806 /* Clear stack slot assignments remembered from previous functions.
27807 This is called from INIT_EXPANDERS once before RTL is emitted for each
27808 function. */
27810 static struct machine_function *
27811 ix86_init_machine_status (void)
27813 struct machine_function *f;
27815 f = ggc_cleared_alloc<machine_function> ();
27816 f->call_abi = ix86_abi;
27818 return f;
27821 /* Return a MEM corresponding to a stack slot with mode MODE.
27822 Allocate a new slot if necessary.
27824 The RTL for a function can have several slots available: N is
27825 which slot to use. */
27828 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
27830 struct stack_local_entry *s;
27832 gcc_assert (n < MAX_386_STACK_LOCALS);
27834 for (s = ix86_stack_locals; s; s = s->next)
27835 if (s->mode == mode && s->n == n)
27836 return validize_mem (copy_rtx (s->rtl));
27838 s = ggc_alloc<stack_local_entry> ();
27839 s->n = n;
27840 s->mode = mode;
27841 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
27843 s->next = ix86_stack_locals;
27844 ix86_stack_locals = s;
27845 return validize_mem (copy_rtx (s->rtl));
27848 static void
27849 ix86_instantiate_decls (void)
27851 struct stack_local_entry *s;
27853 for (s = ix86_stack_locals; s; s = s->next)
27854 if (s->rtl != NULL_RTX)
27855 instantiate_decl_rtl (s->rtl);
27858 /* Return the number used for encoding REG, in the range 0..7. */
27860 static int
27861 reg_encoded_number (rtx reg)
27863 unsigned regno = REGNO (reg);
27864 switch (regno)
27866 case AX_REG:
27867 return 0;
27868 case CX_REG:
27869 return 1;
27870 case DX_REG:
27871 return 2;
27872 case BX_REG:
27873 return 3;
27874 case SP_REG:
27875 return 4;
27876 case BP_REG:
27877 return 5;
27878 case SI_REG:
27879 return 6;
27880 case DI_REG:
27881 return 7;
27882 default:
27883 break;
27885 if (IN_RANGE (regno, FIRST_STACK_REG, LAST_STACK_REG))
27886 return regno - FIRST_STACK_REG;
27887 if (IN_RANGE (regno, FIRST_SSE_REG, LAST_SSE_REG))
27888 return regno - FIRST_SSE_REG;
27889 if (IN_RANGE (regno, FIRST_MMX_REG, LAST_MMX_REG))
27890 return regno - FIRST_MMX_REG;
27891 if (IN_RANGE (regno, FIRST_REX_SSE_REG, LAST_REX_SSE_REG))
27892 return regno - FIRST_REX_SSE_REG;
27893 if (IN_RANGE (regno, FIRST_REX_INT_REG, LAST_REX_INT_REG))
27894 return regno - FIRST_REX_INT_REG;
27895 if (IN_RANGE (regno, FIRST_MASK_REG, LAST_MASK_REG))
27896 return regno - FIRST_MASK_REG;
27897 if (IN_RANGE (regno, FIRST_BND_REG, LAST_BND_REG))
27898 return regno - FIRST_BND_REG;
27899 return -1;
27902 /* Given an insn INSN with NOPERANDS OPERANDS, return the modr/m byte used
27903 in its encoding if it could be relevant for ROP mitigation, otherwise
27904 return -1. If POPNO0 and POPNO1 are nonnull, store the operand numbers
27905 used for calculating it into them. */
27907 static int
27908 ix86_get_modrm_for_rop (rtx_insn *insn, rtx *operands, int noperands,
27909 int *popno0 = 0, int *popno1 = 0)
27911 if (asm_noperands (PATTERN (insn)) >= 0)
27912 return -1;
27913 int has_modrm = get_attr_modrm (insn);
27914 if (!has_modrm)
27915 return -1;
27916 enum attr_modrm_class cls = get_attr_modrm_class (insn);
27917 rtx op0, op1;
27918 switch (cls)
27920 case MODRM_CLASS_OP02:
27921 gcc_assert (noperands >= 3);
27922 if (popno0)
27924 *popno0 = 0;
27925 *popno1 = 2;
27927 op0 = operands[0];
27928 op1 = operands[2];
27929 break;
27930 case MODRM_CLASS_OP01:
27931 gcc_assert (noperands >= 2);
27932 if (popno0)
27934 *popno0 = 0;
27935 *popno1 = 1;
27937 op0 = operands[0];
27938 op1 = operands[1];
27939 break;
27940 default:
27941 return -1;
27943 if (REG_P (op0) && REG_P (op1))
27945 int enc0 = reg_encoded_number (op0);
27946 int enc1 = reg_encoded_number (op1);
27947 return 0xc0 + (enc1 << 3) + enc0;
27949 return -1;
27952 /* Check whether x86 address PARTS is a pc-relative address. */
27954 bool
27955 ix86_rip_relative_addr_p (struct ix86_address *parts)
27957 rtx base, index, disp;
27959 base = parts->base;
27960 index = parts->index;
27961 disp = parts->disp;
27963 if (disp && !base && !index)
27965 if (TARGET_64BIT)
27967 rtx symbol = disp;
27969 if (GET_CODE (disp) == CONST)
27970 symbol = XEXP (disp, 0);
27971 if (GET_CODE (symbol) == PLUS
27972 && CONST_INT_P (XEXP (symbol, 1)))
27973 symbol = XEXP (symbol, 0);
27975 if (GET_CODE (symbol) == LABEL_REF
27976 || (GET_CODE (symbol) == SYMBOL_REF
27977 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
27978 || (GET_CODE (symbol) == UNSPEC
27979 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
27980 || XINT (symbol, 1) == UNSPEC_PCREL
27981 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
27982 return true;
27985 return false;
27988 /* Calculate the length of the memory address in the instruction encoding.
27989 Includes addr32 prefix, does not include the one-byte modrm, opcode,
27990 or other prefixes. We never generate addr32 prefix for LEA insn. */
27993 memory_address_length (rtx addr, bool lea)
27995 struct ix86_address parts;
27996 rtx base, index, disp;
27997 int len;
27998 int ok;
28000 if (GET_CODE (addr) == PRE_DEC
28001 || GET_CODE (addr) == POST_INC
28002 || GET_CODE (addr) == PRE_MODIFY
28003 || GET_CODE (addr) == POST_MODIFY)
28004 return 0;
28006 ok = ix86_decompose_address (addr, &parts);
28007 gcc_assert (ok);
28009 len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1;
28011 /* If this is not LEA instruction, add the length of addr32 prefix. */
28012 if (TARGET_64BIT && !lea
28013 && (SImode_address_operand (addr, VOIDmode)
28014 || (parts.base && GET_MODE (parts.base) == SImode)
28015 || (parts.index && GET_MODE (parts.index) == SImode)))
28016 len++;
28018 base = parts.base;
28019 index = parts.index;
28020 disp = parts.disp;
28022 if (base && SUBREG_P (base))
28023 base = SUBREG_REG (base);
28024 if (index && SUBREG_P (index))
28025 index = SUBREG_REG (index);
28027 gcc_assert (base == NULL_RTX || REG_P (base));
28028 gcc_assert (index == NULL_RTX || REG_P (index));
28030 /* Rule of thumb:
28031 - esp as the base always wants an index,
28032 - ebp as the base always wants a displacement,
28033 - r12 as the base always wants an index,
28034 - r13 as the base always wants a displacement. */
28036 /* Register Indirect. */
28037 if (base && !index && !disp)
28039 /* esp (for its index) and ebp (for its displacement) need
28040 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
28041 code. */
28042 if (base == arg_pointer_rtx
28043 || base == frame_pointer_rtx
28044 || REGNO (base) == SP_REG
28045 || REGNO (base) == BP_REG
28046 || REGNO (base) == R12_REG
28047 || REGNO (base) == R13_REG)
28048 len++;
28051 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
28052 is not disp32, but disp32(%rip), so for disp32
28053 SIB byte is needed, unless print_operand_address
28054 optimizes it into disp32(%rip) or (%rip) is implied
28055 by UNSPEC. */
28056 else if (disp && !base && !index)
28058 len += 4;
28059 if (!ix86_rip_relative_addr_p (&parts))
28060 len++;
28062 else
28064 /* Find the length of the displacement constant. */
28065 if (disp)
28067 if (base && satisfies_constraint_K (disp))
28068 len += 1;
28069 else
28070 len += 4;
28072 /* ebp always wants a displacement. Similarly r13. */
28073 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
28074 len++;
28076 /* An index requires the two-byte modrm form.... */
28077 if (index
28078 /* ...like esp (or r12), which always wants an index. */
28079 || base == arg_pointer_rtx
28080 || base == frame_pointer_rtx
28081 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
28082 len++;
28085 return len;
28088 /* Compute default value for "length_immediate" attribute. When SHORTFORM
28089 is set, expect that insn have 8bit immediate alternative. */
28091 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
28093 int len = 0;
28094 int i;
28095 extract_insn_cached (insn);
28096 for (i = recog_data.n_operands - 1; i >= 0; --i)
28097 if (CONSTANT_P (recog_data.operand[i]))
28099 enum attr_mode mode = get_attr_mode (insn);
28101 gcc_assert (!len);
28102 if (shortform && CONST_INT_P (recog_data.operand[i]))
28104 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
28105 switch (mode)
28107 case MODE_QI:
28108 len = 1;
28109 continue;
28110 case MODE_HI:
28111 ival = trunc_int_for_mode (ival, HImode);
28112 break;
28113 case MODE_SI:
28114 ival = trunc_int_for_mode (ival, SImode);
28115 break;
28116 default:
28117 break;
28119 if (IN_RANGE (ival, -128, 127))
28121 len = 1;
28122 continue;
28125 switch (mode)
28127 case MODE_QI:
28128 len = 1;
28129 break;
28130 case MODE_HI:
28131 len = 2;
28132 break;
28133 case MODE_SI:
28134 len = 4;
28135 break;
28136 /* Immediates for DImode instructions are encoded
28137 as 32bit sign extended values. */
28138 case MODE_DI:
28139 len = 4;
28140 break;
28141 default:
28142 fatal_insn ("unknown insn mode", insn);
28145 return len;
28148 /* Compute default value for "length_address" attribute. */
28150 ix86_attr_length_address_default (rtx_insn *insn)
28152 int i;
28154 if (get_attr_type (insn) == TYPE_LEA)
28156 rtx set = PATTERN (insn), addr;
28158 if (GET_CODE (set) == PARALLEL)
28159 set = XVECEXP (set, 0, 0);
28161 gcc_assert (GET_CODE (set) == SET);
28163 addr = SET_SRC (set);
28165 return memory_address_length (addr, true);
28168 extract_insn_cached (insn);
28169 for (i = recog_data.n_operands - 1; i >= 0; --i)
28171 rtx op = recog_data.operand[i];
28172 if (MEM_P (op))
28174 constrain_operands_cached (insn, reload_completed);
28175 if (which_alternative != -1)
28177 const char *constraints = recog_data.constraints[i];
28178 int alt = which_alternative;
28180 while (*constraints == '=' || *constraints == '+')
28181 constraints++;
28182 while (alt-- > 0)
28183 while (*constraints++ != ',')
28185 /* Skip ignored operands. */
28186 if (*constraints == 'X')
28187 continue;
28190 int len = memory_address_length (XEXP (op, 0), false);
28192 /* Account for segment prefix for non-default addr spaces. */
28193 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op)))
28194 len++;
28196 return len;
28199 return 0;
28202 /* Compute default value for "length_vex" attribute. It includes
28203 2 or 3 byte VEX prefix and 1 opcode byte. */
28206 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
28207 bool has_vex_w)
28209 int i;
28211 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
28212 byte VEX prefix. */
28213 if (!has_0f_opcode || has_vex_w)
28214 return 3 + 1;
28216 /* We can always use 2 byte VEX prefix in 32bit. */
28217 if (!TARGET_64BIT)
28218 return 2 + 1;
28220 extract_insn_cached (insn);
28222 for (i = recog_data.n_operands - 1; i >= 0; --i)
28223 if (REG_P (recog_data.operand[i]))
28225 /* REX.W bit uses 3 byte VEX prefix. */
28226 if (GET_MODE (recog_data.operand[i]) == DImode
28227 && GENERAL_REG_P (recog_data.operand[i]))
28228 return 3 + 1;
28230 else
28232 /* REX.X or REX.B bits use 3 byte VEX prefix. */
28233 if (MEM_P (recog_data.operand[i])
28234 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
28235 return 3 + 1;
28238 return 2 + 1;
28242 static bool
28243 ix86_class_likely_spilled_p (reg_class_t);
28245 /* Returns true if lhs of insn is HW function argument register and set up
28246 is_spilled to true if it is likely spilled HW register. */
28247 static bool
28248 insn_is_function_arg (rtx insn, bool* is_spilled)
28250 rtx dst;
28252 if (!NONDEBUG_INSN_P (insn))
28253 return false;
28254 /* Call instructions are not movable, ignore it. */
28255 if (CALL_P (insn))
28256 return false;
28257 insn = PATTERN (insn);
28258 if (GET_CODE (insn) == PARALLEL)
28259 insn = XVECEXP (insn, 0, 0);
28260 if (GET_CODE (insn) != SET)
28261 return false;
28262 dst = SET_DEST (insn);
28263 if (REG_P (dst) && HARD_REGISTER_P (dst)
28264 && ix86_function_arg_regno_p (REGNO (dst)))
28266 /* Is it likely spilled HW register? */
28267 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
28268 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
28269 *is_spilled = true;
28270 return true;
28272 return false;
28275 /* Add output dependencies for chain of function adjacent arguments if only
28276 there is a move to likely spilled HW register. Return first argument
28277 if at least one dependence was added or NULL otherwise. */
28278 static rtx_insn *
28279 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
28281 rtx_insn *insn;
28282 rtx_insn *last = call;
28283 rtx_insn *first_arg = NULL;
28284 bool is_spilled = false;
28286 head = PREV_INSN (head);
28288 /* Find nearest to call argument passing instruction. */
28289 while (true)
28291 last = PREV_INSN (last);
28292 if (last == head)
28293 return NULL;
28294 if (!NONDEBUG_INSN_P (last))
28295 continue;
28296 if (insn_is_function_arg (last, &is_spilled))
28297 break;
28298 return NULL;
28301 first_arg = last;
28302 while (true)
28304 insn = PREV_INSN (last);
28305 if (!INSN_P (insn))
28306 break;
28307 if (insn == head)
28308 break;
28309 if (!NONDEBUG_INSN_P (insn))
28311 last = insn;
28312 continue;
28314 if (insn_is_function_arg (insn, &is_spilled))
28316 /* Add output depdendence between two function arguments if chain
28317 of output arguments contains likely spilled HW registers. */
28318 if (is_spilled)
28319 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
28320 first_arg = last = insn;
28322 else
28323 break;
28325 if (!is_spilled)
28326 return NULL;
28327 return first_arg;
28330 /* Add output or anti dependency from insn to first_arg to restrict its code
28331 motion. */
28332 static void
28333 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
28335 rtx set;
28336 rtx tmp;
28338 /* Add anti dependencies for bounds stores. */
28339 if (INSN_P (insn)
28340 && GET_CODE (PATTERN (insn)) == PARALLEL
28341 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
28342 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_BNDSTX)
28344 add_dependence (first_arg, insn, REG_DEP_ANTI);
28345 return;
28348 set = single_set (insn);
28349 if (!set)
28350 return;
28351 tmp = SET_DEST (set);
28352 if (REG_P (tmp))
28354 /* Add output dependency to the first function argument. */
28355 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
28356 return;
28358 /* Add anti dependency. */
28359 add_dependence (first_arg, insn, REG_DEP_ANTI);
28362 /* Avoid cross block motion of function argument through adding dependency
28363 from the first non-jump instruction in bb. */
28364 static void
28365 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
28367 rtx_insn *insn = BB_END (bb);
28369 while (insn)
28371 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
28373 rtx set = single_set (insn);
28374 if (set)
28376 avoid_func_arg_motion (arg, insn);
28377 return;
28380 if (insn == BB_HEAD (bb))
28381 return;
28382 insn = PREV_INSN (insn);
28386 /* Hook for pre-reload schedule - avoid motion of function arguments
28387 passed in likely spilled HW registers. */
28388 static void
28389 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
28391 rtx_insn *insn;
28392 rtx_insn *first_arg = NULL;
28393 if (reload_completed)
28394 return;
28395 while (head != tail && DEBUG_INSN_P (head))
28396 head = NEXT_INSN (head);
28397 for (insn = tail; insn != head; insn = PREV_INSN (insn))
28398 if (INSN_P (insn) && CALL_P (insn))
28400 first_arg = add_parameter_dependencies (insn, head);
28401 if (first_arg)
28403 /* Add dependee for first argument to predecessors if only
28404 region contains more than one block. */
28405 basic_block bb = BLOCK_FOR_INSN (insn);
28406 int rgn = CONTAINING_RGN (bb->index);
28407 int nr_blks = RGN_NR_BLOCKS (rgn);
28408 /* Skip trivial regions and region head blocks that can have
28409 predecessors outside of region. */
28410 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
28412 edge e;
28413 edge_iterator ei;
28415 /* Regions are SCCs with the exception of selective
28416 scheduling with pipelining of outer blocks enabled.
28417 So also check that immediate predecessors of a non-head
28418 block are in the same region. */
28419 FOR_EACH_EDGE (e, ei, bb->preds)
28421 /* Avoid creating of loop-carried dependencies through
28422 using topological ordering in the region. */
28423 if (rgn == CONTAINING_RGN (e->src->index)
28424 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
28425 add_dependee_for_func_arg (first_arg, e->src);
28428 insn = first_arg;
28429 if (insn == head)
28430 break;
28433 else if (first_arg)
28434 avoid_func_arg_motion (first_arg, insn);
28437 /* Hook for pre-reload schedule - set priority of moves from likely spilled
28438 HW registers to maximum, to schedule them at soon as possible. These are
28439 moves from function argument registers at the top of the function entry
28440 and moves from function return value registers after call. */
28441 static int
28442 ix86_adjust_priority (rtx_insn *insn, int priority)
28444 rtx set;
28446 if (reload_completed)
28447 return priority;
28449 if (!NONDEBUG_INSN_P (insn))
28450 return priority;
28452 set = single_set (insn);
28453 if (set)
28455 rtx tmp = SET_SRC (set);
28456 if (REG_P (tmp)
28457 && HARD_REGISTER_P (tmp)
28458 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
28459 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
28460 return current_sched_info->sched_max_insns_priority;
28463 return priority;
28466 /* Prepare for scheduling pass. */
28467 static void
28468 ix86_sched_init_global (FILE *, int, int)
28470 /* Install scheduling hooks for current CPU. Some of these hooks are used
28471 in time-critical parts of the scheduler, so we only set them up when
28472 they are actually used. */
28473 switch (ix86_tune)
28475 case PROCESSOR_CORE2:
28476 case PROCESSOR_NEHALEM:
28477 case PROCESSOR_SANDYBRIDGE:
28478 case PROCESSOR_HASWELL:
28479 /* Do not perform multipass scheduling for pre-reload schedule
28480 to save compile time. */
28481 if (reload_completed)
28483 ix86_core2i7_init_hooks ();
28484 break;
28486 /* Fall through. */
28487 default:
28488 targetm.sched.dfa_post_advance_cycle = NULL;
28489 targetm.sched.first_cycle_multipass_init = NULL;
28490 targetm.sched.first_cycle_multipass_begin = NULL;
28491 targetm.sched.first_cycle_multipass_issue = NULL;
28492 targetm.sched.first_cycle_multipass_backtrack = NULL;
28493 targetm.sched.first_cycle_multipass_end = NULL;
28494 targetm.sched.first_cycle_multipass_fini = NULL;
28495 break;
28500 /* Implement TARGET_CONSTANT_ALIGNMENT. */
28502 static HOST_WIDE_INT
28503 ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align)
28505 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
28506 || TREE_CODE (exp) == INTEGER_CST)
28508 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
28509 return 64;
28510 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
28511 return 128;
28513 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
28514 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
28515 return BITS_PER_WORD;
28517 return align;
28520 /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
28521 the data type, and ALIGN is the alignment that the object would
28522 ordinarily have. */
28524 static int
28525 iamcu_alignment (tree type, int align)
28527 machine_mode mode;
28529 if (align < 32 || TYPE_USER_ALIGN (type))
28530 return align;
28532 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
28533 bytes. */
28534 mode = TYPE_MODE (strip_array_types (type));
28535 switch (GET_MODE_CLASS (mode))
28537 case MODE_INT:
28538 case MODE_COMPLEX_INT:
28539 case MODE_COMPLEX_FLOAT:
28540 case MODE_FLOAT:
28541 case MODE_DECIMAL_FLOAT:
28542 return 32;
28543 default:
28544 return align;
28548 /* Compute the alignment for a static variable.
28549 TYPE is the data type, and ALIGN is the alignment that
28550 the object would ordinarily have. The value of this function is used
28551 instead of that alignment to align the object. */
28554 ix86_data_alignment (tree type, int align, bool opt)
28556 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
28557 for symbols from other compilation units or symbols that don't need
28558 to bind locally. In order to preserve some ABI compatibility with
28559 those compilers, ensure we don't decrease alignment from what we
28560 used to assume. */
28562 int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
28564 /* A data structure, equal or greater than the size of a cache line
28565 (64 bytes in the Pentium 4 and other recent Intel processors, including
28566 processors based on Intel Core microarchitecture) should be aligned
28567 so that its base address is a multiple of a cache line size. */
28569 int max_align
28570 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
28572 if (max_align < BITS_PER_WORD)
28573 max_align = BITS_PER_WORD;
28575 switch (ix86_align_data_type)
28577 case ix86_align_data_type_abi: opt = false; break;
28578 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
28579 case ix86_align_data_type_cacheline: break;
28582 if (TARGET_IAMCU)
28583 align = iamcu_alignment (type, align);
28585 if (opt
28586 && AGGREGATE_TYPE_P (type)
28587 && TYPE_SIZE (type)
28588 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
28590 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align_compat)
28591 && align < max_align_compat)
28592 align = max_align_compat;
28593 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align)
28594 && align < max_align)
28595 align = max_align;
28598 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
28599 to 16byte boundary. */
28600 if (TARGET_64BIT)
28602 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
28603 && TYPE_SIZE (type)
28604 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
28605 && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
28606 && align < 128)
28607 return 128;
28610 if (!opt)
28611 return align;
28613 if (TREE_CODE (type) == ARRAY_TYPE)
28615 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
28616 return 64;
28617 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
28618 return 128;
28620 else if (TREE_CODE (type) == COMPLEX_TYPE)
28623 if (TYPE_MODE (type) == DCmode && align < 64)
28624 return 64;
28625 if ((TYPE_MODE (type) == XCmode
28626 || TYPE_MODE (type) == TCmode) && align < 128)
28627 return 128;
28629 else if ((TREE_CODE (type) == RECORD_TYPE
28630 || TREE_CODE (type) == UNION_TYPE
28631 || TREE_CODE (type) == QUAL_UNION_TYPE)
28632 && TYPE_FIELDS (type))
28634 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
28635 return 64;
28636 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
28637 return 128;
28639 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
28640 || TREE_CODE (type) == INTEGER_TYPE)
28642 if (TYPE_MODE (type) == DFmode && align < 64)
28643 return 64;
28644 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
28645 return 128;
28648 return align;
28651 /* Compute the alignment for a local variable or a stack slot. EXP is
28652 the data type or decl itself, MODE is the widest mode available and
28653 ALIGN is the alignment that the object would ordinarily have. The
28654 value of this macro is used instead of that alignment to align the
28655 object. */
28657 unsigned int
28658 ix86_local_alignment (tree exp, machine_mode mode,
28659 unsigned int align)
28661 tree type, decl;
28663 if (exp && DECL_P (exp))
28665 type = TREE_TYPE (exp);
28666 decl = exp;
28668 else
28670 type = exp;
28671 decl = NULL;
28674 /* Don't do dynamic stack realignment for long long objects with
28675 -mpreferred-stack-boundary=2. */
28676 if (!TARGET_64BIT
28677 && align == 64
28678 && ix86_preferred_stack_boundary < 64
28679 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
28680 && (!type || !TYPE_USER_ALIGN (type))
28681 && (!decl || !DECL_USER_ALIGN (decl)))
28682 align = 32;
28684 /* If TYPE is NULL, we are allocating a stack slot for caller-save
28685 register in MODE. We will return the largest alignment of XF
28686 and DF. */
28687 if (!type)
28689 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
28690 align = GET_MODE_ALIGNMENT (DFmode);
28691 return align;
28694 /* Don't increase alignment for Intel MCU psABI. */
28695 if (TARGET_IAMCU)
28696 return align;
28698 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
28699 to 16byte boundary. Exact wording is:
28701 An array uses the same alignment as its elements, except that a local or
28702 global array variable of length at least 16 bytes or
28703 a C99 variable-length array variable always has alignment of at least 16 bytes.
28705 This was added to allow use of aligned SSE instructions at arrays. This
28706 rule is meant for static storage (where compiler can not do the analysis
28707 by itself). We follow it for automatic variables only when convenient.
28708 We fully control everything in the function compiled and functions from
28709 other unit can not rely on the alignment.
28711 Exclude va_list type. It is the common case of local array where
28712 we can not benefit from the alignment.
28714 TODO: Probably one should optimize for size only when var is not escaping. */
28715 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
28716 && TARGET_SSE)
28718 if (AGGREGATE_TYPE_P (type)
28719 && (va_list_type_node == NULL_TREE
28720 || (TYPE_MAIN_VARIANT (type)
28721 != TYPE_MAIN_VARIANT (va_list_type_node)))
28722 && TYPE_SIZE (type)
28723 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
28724 && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
28725 && align < 128)
28726 return 128;
28728 if (TREE_CODE (type) == ARRAY_TYPE)
28730 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
28731 return 64;
28732 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
28733 return 128;
28735 else if (TREE_CODE (type) == COMPLEX_TYPE)
28737 if (TYPE_MODE (type) == DCmode && align < 64)
28738 return 64;
28739 if ((TYPE_MODE (type) == XCmode
28740 || TYPE_MODE (type) == TCmode) && align < 128)
28741 return 128;
28743 else if ((TREE_CODE (type) == RECORD_TYPE
28744 || TREE_CODE (type) == UNION_TYPE
28745 || TREE_CODE (type) == QUAL_UNION_TYPE)
28746 && TYPE_FIELDS (type))
28748 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
28749 return 64;
28750 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
28751 return 128;
28753 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
28754 || TREE_CODE (type) == INTEGER_TYPE)
28757 if (TYPE_MODE (type) == DFmode && align < 64)
28758 return 64;
28759 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
28760 return 128;
28762 return align;
28765 /* Compute the minimum required alignment for dynamic stack realignment
28766 purposes for a local variable, parameter or a stack slot. EXP is
28767 the data type or decl itself, MODE is its mode and ALIGN is the
28768 alignment that the object would ordinarily have. */
28770 unsigned int
28771 ix86_minimum_alignment (tree exp, machine_mode mode,
28772 unsigned int align)
28774 tree type, decl;
28776 if (exp && DECL_P (exp))
28778 type = TREE_TYPE (exp);
28779 decl = exp;
28781 else
28783 type = exp;
28784 decl = NULL;
28787 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
28788 return align;
28790 /* Don't do dynamic stack realignment for long long objects with
28791 -mpreferred-stack-boundary=2. */
28792 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
28793 && (!type || !TYPE_USER_ALIGN (type))
28794 && (!decl || !DECL_USER_ALIGN (decl)))
28796 gcc_checking_assert (!TARGET_STV);
28797 return 32;
28800 return align;
28803 /* Find a location for the static chain incoming to a nested function.
28804 This is a register, unless all free registers are used by arguments. */
28806 static rtx
28807 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
28809 unsigned regno;
28811 /* While this function won't be called by the middle-end when a static
28812 chain isn't needed, it's also used throughout the backend so it's
28813 easiest to keep this check centralized. */
28814 if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
28815 return NULL;
28817 if (TARGET_64BIT)
28819 /* We always use R10 in 64-bit mode. */
28820 regno = R10_REG;
28822 else
28824 const_tree fntype, fndecl;
28825 unsigned int ccvt;
28827 /* By default in 32-bit mode we use ECX to pass the static chain. */
28828 regno = CX_REG;
28830 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
28832 fntype = TREE_TYPE (fndecl_or_type);
28833 fndecl = fndecl_or_type;
28835 else
28837 fntype = fndecl_or_type;
28838 fndecl = NULL;
28841 ccvt = ix86_get_callcvt (fntype);
28842 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
28844 /* Fastcall functions use ecx/edx for arguments, which leaves
28845 us with EAX for the static chain.
28846 Thiscall functions use ecx for arguments, which also
28847 leaves us with EAX for the static chain. */
28848 regno = AX_REG;
28850 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
28852 /* Thiscall functions use ecx for arguments, which leaves
28853 us with EAX and EDX for the static chain.
28854 We are using for abi-compatibility EAX. */
28855 regno = AX_REG;
28857 else if (ix86_function_regparm (fntype, fndecl) == 3)
28859 /* For regparm 3, we have no free call-clobbered registers in
28860 which to store the static chain. In order to implement this,
28861 we have the trampoline push the static chain to the stack.
28862 However, we can't push a value below the return address when
28863 we call the nested function directly, so we have to use an
28864 alternate entry point. For this we use ESI, and have the
28865 alternate entry point push ESI, so that things appear the
28866 same once we're executing the nested function. */
28867 if (incoming_p)
28869 if (fndecl == current_function_decl
28870 && !ix86_static_chain_on_stack)
28872 gcc_assert (!reload_completed);
28873 ix86_static_chain_on_stack = true;
28875 return gen_frame_mem (SImode,
28876 plus_constant (Pmode,
28877 arg_pointer_rtx, -8));
28879 regno = SI_REG;
28883 return gen_rtx_REG (Pmode, regno);
28886 /* Emit RTL insns to initialize the variable parts of a trampoline.
28887 FNDECL is the decl of the target address; M_TRAMP is a MEM for
28888 the trampoline, and CHAIN_VALUE is an RTX for the static chain
28889 to be passed to the target function. */
28891 static void
28892 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
28894 rtx mem, fnaddr;
28895 int opcode;
28896 int offset = 0;
28898 fnaddr = XEXP (DECL_RTL (fndecl), 0);
28900 if (TARGET_64BIT)
28902 int size;
28904 /* Load the function address to r11. Try to load address using
28905 the shorter movl instead of movabs. We may want to support
28906 movq for kernel mode, but kernel does not use trampolines at
28907 the moment. FNADDR is a 32bit address and may not be in
28908 DImode when ptr_mode == SImode. Always use movl in this
28909 case. */
28910 if (ptr_mode == SImode
28911 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
28913 fnaddr = copy_addr_to_reg (fnaddr);
28915 mem = adjust_address (m_tramp, HImode, offset);
28916 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
28918 mem = adjust_address (m_tramp, SImode, offset + 2);
28919 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
28920 offset += 6;
28922 else
28924 mem = adjust_address (m_tramp, HImode, offset);
28925 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
28927 mem = adjust_address (m_tramp, DImode, offset + 2);
28928 emit_move_insn (mem, fnaddr);
28929 offset += 10;
28932 /* Load static chain using movabs to r10. Use the shorter movl
28933 instead of movabs when ptr_mode == SImode. */
28934 if (ptr_mode == SImode)
28936 opcode = 0xba41;
28937 size = 6;
28939 else
28941 opcode = 0xba49;
28942 size = 10;
28945 mem = adjust_address (m_tramp, HImode, offset);
28946 emit_move_insn (mem, gen_int_mode (opcode, HImode));
28948 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
28949 emit_move_insn (mem, chain_value);
28950 offset += size;
28952 /* Jump to r11; the last (unused) byte is a nop, only there to
28953 pad the write out to a single 32-bit store. */
28954 mem = adjust_address (m_tramp, SImode, offset);
28955 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
28956 offset += 4;
28958 else
28960 rtx disp, chain;
28962 /* Depending on the static chain location, either load a register
28963 with a constant, or push the constant to the stack. All of the
28964 instructions are the same size. */
28965 chain = ix86_static_chain (fndecl, true);
28966 if (REG_P (chain))
28968 switch (REGNO (chain))
28970 case AX_REG:
28971 opcode = 0xb8; break;
28972 case CX_REG:
28973 opcode = 0xb9; break;
28974 default:
28975 gcc_unreachable ();
28978 else
28979 opcode = 0x68;
28981 mem = adjust_address (m_tramp, QImode, offset);
28982 emit_move_insn (mem, gen_int_mode (opcode, QImode));
28984 mem = adjust_address (m_tramp, SImode, offset + 1);
28985 emit_move_insn (mem, chain_value);
28986 offset += 5;
28988 mem = adjust_address (m_tramp, QImode, offset);
28989 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
28991 mem = adjust_address (m_tramp, SImode, offset + 1);
28993 /* Compute offset from the end of the jmp to the target function.
28994 In the case in which the trampoline stores the static chain on
28995 the stack, we need to skip the first insn which pushes the
28996 (call-saved) register static chain; this push is 1 byte. */
28997 offset += 5;
28998 disp = expand_binop (SImode, sub_optab, fnaddr,
28999 plus_constant (Pmode, XEXP (m_tramp, 0),
29000 offset - (MEM_P (chain) ? 1 : 0)),
29001 NULL_RTX, 1, OPTAB_DIRECT);
29002 emit_move_insn (mem, disp);
29005 gcc_assert (offset <= TRAMPOLINE_SIZE);
29007 #ifdef HAVE_ENABLE_EXECUTE_STACK
29008 #ifdef CHECK_EXECUTE_STACK_ENABLED
29009 if (CHECK_EXECUTE_STACK_ENABLED)
29010 #endif
29011 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
29012 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
29013 #endif
29016 static bool
29017 ix86_allocate_stack_slots_for_args (void)
29019 /* Naked functions should not allocate stack slots for arguments. */
29020 return !ix86_function_naked (current_function_decl);
29023 static bool
29024 ix86_warn_func_return (tree decl)
29026 /* Naked functions are implemented entirely in assembly, including the
29027 return sequence, so suppress warnings about this. */
29028 return !ix86_function_naked (decl);
29031 /* The following file contains several enumerations and data structures
29032 built from the definitions in i386-builtin-types.def. */
29034 #include "i386-builtin-types.inc"
29036 /* Table for the ix86 builtin non-function types. */
29037 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
29039 /* Retrieve an element from the above table, building some of
29040 the types lazily. */
29042 static tree
29043 ix86_get_builtin_type (enum ix86_builtin_type tcode)
29045 unsigned int index;
29046 tree type, itype;
29048 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
29050 type = ix86_builtin_type_tab[(int) tcode];
29051 if (type != NULL)
29052 return type;
29054 gcc_assert (tcode > IX86_BT_LAST_PRIM);
29055 if (tcode <= IX86_BT_LAST_VECT)
29057 machine_mode mode;
29059 index = tcode - IX86_BT_LAST_PRIM - 1;
29060 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
29061 mode = ix86_builtin_type_vect_mode[index];
29063 type = build_vector_type_for_mode (itype, mode);
29065 else
29067 int quals;
29069 index = tcode - IX86_BT_LAST_VECT - 1;
29070 if (tcode <= IX86_BT_LAST_PTR)
29071 quals = TYPE_UNQUALIFIED;
29072 else
29073 quals = TYPE_QUAL_CONST;
29075 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
29076 if (quals != TYPE_UNQUALIFIED)
29077 itype = build_qualified_type (itype, quals);
29079 type = build_pointer_type (itype);
29082 ix86_builtin_type_tab[(int) tcode] = type;
29083 return type;
29086 /* Table for the ix86 builtin function types. */
29087 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
29089 /* Retrieve an element from the above table, building some of
29090 the types lazily. */
29092 static tree
29093 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
29095 tree type;
29097 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
29099 type = ix86_builtin_func_type_tab[(int) tcode];
29100 if (type != NULL)
29101 return type;
29103 if (tcode <= IX86_BT_LAST_FUNC)
29105 unsigned start = ix86_builtin_func_start[(int) tcode];
29106 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
29107 tree rtype, atype, args = void_list_node;
29108 unsigned i;
29110 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
29111 for (i = after - 1; i > start; --i)
29113 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
29114 args = tree_cons (NULL, atype, args);
29117 type = build_function_type (rtype, args);
29119 else
29121 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
29122 enum ix86_builtin_func_type icode;
29124 icode = ix86_builtin_func_alias_base[index];
29125 type = ix86_get_builtin_func_type (icode);
29128 ix86_builtin_func_type_tab[(int) tcode] = type;
29129 return type;
29133 /* Codes for all the SSE/MMX builtins. Builtins not mentioned in any
29134 bdesc_* arrays below should come first, then builtins for each bdesc_*
29135 array in ascending order, so that we can use direct array accesses. */
29136 enum ix86_builtins
29138 IX86_BUILTIN_MASKMOVQ,
29139 IX86_BUILTIN_LDMXCSR,
29140 IX86_BUILTIN_STMXCSR,
29141 IX86_BUILTIN_MASKMOVDQU,
29142 IX86_BUILTIN_PSLLDQ128,
29143 IX86_BUILTIN_CLFLUSH,
29144 IX86_BUILTIN_MONITOR,
29145 IX86_BUILTIN_MWAIT,
29146 IX86_BUILTIN_CLZERO,
29147 IX86_BUILTIN_VEC_INIT_V2SI,
29148 IX86_BUILTIN_VEC_INIT_V4HI,
29149 IX86_BUILTIN_VEC_INIT_V8QI,
29150 IX86_BUILTIN_VEC_EXT_V2DF,
29151 IX86_BUILTIN_VEC_EXT_V2DI,
29152 IX86_BUILTIN_VEC_EXT_V4SF,
29153 IX86_BUILTIN_VEC_EXT_V4SI,
29154 IX86_BUILTIN_VEC_EXT_V8HI,
29155 IX86_BUILTIN_VEC_EXT_V2SI,
29156 IX86_BUILTIN_VEC_EXT_V4HI,
29157 IX86_BUILTIN_VEC_EXT_V16QI,
29158 IX86_BUILTIN_VEC_SET_V2DI,
29159 IX86_BUILTIN_VEC_SET_V4SF,
29160 IX86_BUILTIN_VEC_SET_V4SI,
29161 IX86_BUILTIN_VEC_SET_V8HI,
29162 IX86_BUILTIN_VEC_SET_V4HI,
29163 IX86_BUILTIN_VEC_SET_V16QI,
29164 IX86_BUILTIN_GATHERSIV2DF,
29165 IX86_BUILTIN_GATHERSIV4DF,
29166 IX86_BUILTIN_GATHERDIV2DF,
29167 IX86_BUILTIN_GATHERDIV4DF,
29168 IX86_BUILTIN_GATHERSIV4SF,
29169 IX86_BUILTIN_GATHERSIV8SF,
29170 IX86_BUILTIN_GATHERDIV4SF,
29171 IX86_BUILTIN_GATHERDIV8SF,
29172 IX86_BUILTIN_GATHERSIV2DI,
29173 IX86_BUILTIN_GATHERSIV4DI,
29174 IX86_BUILTIN_GATHERDIV2DI,
29175 IX86_BUILTIN_GATHERDIV4DI,
29176 IX86_BUILTIN_GATHERSIV4SI,
29177 IX86_BUILTIN_GATHERSIV8SI,
29178 IX86_BUILTIN_GATHERDIV4SI,
29179 IX86_BUILTIN_GATHERDIV8SI,
29180 IX86_BUILTIN_VFMSUBSD3_MASK3,
29181 IX86_BUILTIN_VFMSUBSS3_MASK3,
29182 IX86_BUILTIN_GATHER3SIV8SF,
29183 IX86_BUILTIN_GATHER3SIV4SF,
29184 IX86_BUILTIN_GATHER3SIV4DF,
29185 IX86_BUILTIN_GATHER3SIV2DF,
29186 IX86_BUILTIN_GATHER3DIV8SF,
29187 IX86_BUILTIN_GATHER3DIV4SF,
29188 IX86_BUILTIN_GATHER3DIV4DF,
29189 IX86_BUILTIN_GATHER3DIV2DF,
29190 IX86_BUILTIN_GATHER3SIV8SI,
29191 IX86_BUILTIN_GATHER3SIV4SI,
29192 IX86_BUILTIN_GATHER3SIV4DI,
29193 IX86_BUILTIN_GATHER3SIV2DI,
29194 IX86_BUILTIN_GATHER3DIV8SI,
29195 IX86_BUILTIN_GATHER3DIV4SI,
29196 IX86_BUILTIN_GATHER3DIV4DI,
29197 IX86_BUILTIN_GATHER3DIV2DI,
29198 IX86_BUILTIN_SCATTERSIV8SF,
29199 IX86_BUILTIN_SCATTERSIV4SF,
29200 IX86_BUILTIN_SCATTERSIV4DF,
29201 IX86_BUILTIN_SCATTERSIV2DF,
29202 IX86_BUILTIN_SCATTERDIV8SF,
29203 IX86_BUILTIN_SCATTERDIV4SF,
29204 IX86_BUILTIN_SCATTERDIV4DF,
29205 IX86_BUILTIN_SCATTERDIV2DF,
29206 IX86_BUILTIN_SCATTERSIV8SI,
29207 IX86_BUILTIN_SCATTERSIV4SI,
29208 IX86_BUILTIN_SCATTERSIV4DI,
29209 IX86_BUILTIN_SCATTERSIV2DI,
29210 IX86_BUILTIN_SCATTERDIV8SI,
29211 IX86_BUILTIN_SCATTERDIV4SI,
29212 IX86_BUILTIN_SCATTERDIV4DI,
29213 IX86_BUILTIN_SCATTERDIV2DI,
29214 /* Alternate 4 and 8 element gather/scatter for the vectorizer
29215 where all operands are 32-byte or 64-byte wide respectively. */
29216 IX86_BUILTIN_GATHERALTSIV4DF,
29217 IX86_BUILTIN_GATHERALTDIV8SF,
29218 IX86_BUILTIN_GATHERALTSIV4DI,
29219 IX86_BUILTIN_GATHERALTDIV8SI,
29220 IX86_BUILTIN_GATHER3ALTDIV16SF,
29221 IX86_BUILTIN_GATHER3ALTDIV16SI,
29222 IX86_BUILTIN_GATHER3ALTSIV4DF,
29223 IX86_BUILTIN_GATHER3ALTDIV8SF,
29224 IX86_BUILTIN_GATHER3ALTSIV4DI,
29225 IX86_BUILTIN_GATHER3ALTDIV8SI,
29226 IX86_BUILTIN_GATHER3ALTSIV8DF,
29227 IX86_BUILTIN_GATHER3ALTSIV8DI,
29228 IX86_BUILTIN_GATHER3DIV16SF,
29229 IX86_BUILTIN_GATHER3DIV16SI,
29230 IX86_BUILTIN_GATHER3DIV8DF,
29231 IX86_BUILTIN_GATHER3DIV8DI,
29232 IX86_BUILTIN_GATHER3SIV16SF,
29233 IX86_BUILTIN_GATHER3SIV16SI,
29234 IX86_BUILTIN_GATHER3SIV8DF,
29235 IX86_BUILTIN_GATHER3SIV8DI,
29236 IX86_BUILTIN_SCATTERALTSIV8DF,
29237 IX86_BUILTIN_SCATTERALTDIV16SF,
29238 IX86_BUILTIN_SCATTERALTSIV8DI,
29239 IX86_BUILTIN_SCATTERALTDIV16SI,
29240 IX86_BUILTIN_SCATTERDIV16SF,
29241 IX86_BUILTIN_SCATTERDIV16SI,
29242 IX86_BUILTIN_SCATTERDIV8DF,
29243 IX86_BUILTIN_SCATTERDIV8DI,
29244 IX86_BUILTIN_SCATTERSIV16SF,
29245 IX86_BUILTIN_SCATTERSIV16SI,
29246 IX86_BUILTIN_SCATTERSIV8DF,
29247 IX86_BUILTIN_SCATTERSIV8DI,
29248 IX86_BUILTIN_GATHERPFQPD,
29249 IX86_BUILTIN_GATHERPFDPS,
29250 IX86_BUILTIN_GATHERPFDPD,
29251 IX86_BUILTIN_GATHERPFQPS,
29252 IX86_BUILTIN_SCATTERPFDPD,
29253 IX86_BUILTIN_SCATTERPFDPS,
29254 IX86_BUILTIN_SCATTERPFQPD,
29255 IX86_BUILTIN_SCATTERPFQPS,
29256 IX86_BUILTIN_CLWB,
29257 IX86_BUILTIN_CLFLUSHOPT,
29258 IX86_BUILTIN_INFQ,
29259 IX86_BUILTIN_HUGE_VALQ,
29260 IX86_BUILTIN_NANQ,
29261 IX86_BUILTIN_NANSQ,
29262 IX86_BUILTIN_XABORT,
29263 IX86_BUILTIN_ADDCARRYX32,
29264 IX86_BUILTIN_ADDCARRYX64,
29265 IX86_BUILTIN_SBB32,
29266 IX86_BUILTIN_SBB64,
29267 IX86_BUILTIN_RDRAND16_STEP,
29268 IX86_BUILTIN_RDRAND32_STEP,
29269 IX86_BUILTIN_RDRAND64_STEP,
29270 IX86_BUILTIN_RDSEED16_STEP,
29271 IX86_BUILTIN_RDSEED32_STEP,
29272 IX86_BUILTIN_RDSEED64_STEP,
29273 IX86_BUILTIN_MONITORX,
29274 IX86_BUILTIN_MWAITX,
29275 IX86_BUILTIN_CFSTRING,
29276 IX86_BUILTIN_CPU_INIT,
29277 IX86_BUILTIN_CPU_IS,
29278 IX86_BUILTIN_CPU_SUPPORTS,
29279 IX86_BUILTIN_READ_FLAGS,
29280 IX86_BUILTIN_WRITE_FLAGS,
29282 /* All the remaining builtins are tracked in bdesc_* arrays in
29283 i386-builtin.def. Don't add any IX86_BUILTIN_* enumerators after
29284 this point. */
29285 #define BDESC(mask, icode, name, code, comparison, flag) \
29286 code,
29287 #define BDESC_FIRST(kind, kindu, mask, icode, name, code, comparison, flag) \
29288 code, \
29289 IX86_BUILTIN__BDESC_##kindu##_FIRST = code,
29290 #define BDESC_END(kind, next_kind)
29292 #include "i386-builtin.def"
29294 #undef BDESC
29295 #undef BDESC_FIRST
29296 #undef BDESC_END
29298 IX86_BUILTIN_MAX,
29300 IX86_BUILTIN__BDESC_MAX_FIRST = IX86_BUILTIN_MAX,
29302 /* Now just the aliases for bdesc_* start/end. */
29303 #define BDESC(mask, icode, name, code, comparison, flag)
29304 #define BDESC_FIRST(kind, kindu, mask, icode, name, code, comparison, flag)
29305 #define BDESC_END(kind, next_kind) \
29306 IX86_BUILTIN__BDESC_##kind##_LAST \
29307 = IX86_BUILTIN__BDESC_##next_kind##_FIRST - 1,
29309 #include "i386-builtin.def"
29311 #undef BDESC
29312 #undef BDESC_FIRST
29313 #undef BDESC_END
29315 /* Just to make sure there is no comma after the last enumerator. */
29316 IX86_BUILTIN__BDESC_MAX_LAST = IX86_BUILTIN__BDESC_MAX_FIRST
29319 /* Table for the ix86 builtin decls. */
29320 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
29322 /* Table of all of the builtin functions that are possible with different ISA's
29323 but are waiting to be built until a function is declared to use that
29324 ISA. */
29325 struct builtin_isa {
29326 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
29327 HOST_WIDE_INT isa2; /* additional isa_flags this builtin is defined for */
29328 const char *name; /* function name */
29329 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
29330 unsigned char const_p:1; /* true if the declaration is constant */
29331 unsigned char pure_p:1; /* true if the declaration has pure attribute */
29332 bool leaf_p; /* true if the declaration has leaf attribute */
29333 bool nothrow_p; /* true if the declaration has nothrow attribute */
29334 bool set_and_not_built_p;
29337 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
29339 /* Bits that can still enable any inclusion of a builtin. */
29340 static HOST_WIDE_INT deferred_isa_values = 0;
29341 static HOST_WIDE_INT deferred_isa_values2 = 0;
29343 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
29344 of which isa_flags to use in the ix86_builtins_isa array. Stores the
29345 function decl in the ix86_builtins array. Returns the function decl or
29346 NULL_TREE, if the builtin was not added.
29348 If the front end has a special hook for builtin functions, delay adding
29349 builtin functions that aren't in the current ISA until the ISA is changed
29350 with function specific optimization. Doing so, can save about 300K for the
29351 default compiler. When the builtin is expanded, check at that time whether
29352 it is valid.
29354 If the front end doesn't have a special hook, record all builtins, even if
29355 it isn't an instruction set in the current ISA in case the user uses
29356 function specific options for a different ISA, so that we don't get scope
29357 errors if a builtin is added in the middle of a function scope. */
29359 static inline tree
29360 def_builtin (HOST_WIDE_INT mask, const char *name,
29361 enum ix86_builtin_func_type tcode,
29362 enum ix86_builtins code)
29364 tree decl = NULL_TREE;
29366 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
29368 ix86_builtins_isa[(int) code].isa = mask;
29370 /* OPTION_MASK_ISA_AVX512VL has special meaning. Despite of generic case,
29371 where any bit set means that built-in is enable, this bit must be *and-ed*
29372 with another one. E.g.: OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL
29373 means that *both* cpuid bits must be set for the built-in to be available.
29374 Handle this here. */
29375 if (mask & ix86_isa_flags & OPTION_MASK_ISA_AVX512VL)
29376 mask &= ~OPTION_MASK_ISA_AVX512VL;
29378 mask &= ~OPTION_MASK_ISA_64BIT;
29379 if (mask == 0
29380 || (mask & ix86_isa_flags) != 0
29381 || (lang_hooks.builtin_function
29382 == lang_hooks.builtin_function_ext_scope))
29385 tree type = ix86_get_builtin_func_type (tcode);
29386 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
29387 NULL, NULL_TREE);
29388 ix86_builtins[(int) code] = decl;
29389 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
29391 else
29393 /* Just a MASK where set_and_not_built_p == true can potentially
29394 include a builtin. */
29395 deferred_isa_values |= mask;
29396 ix86_builtins[(int) code] = NULL_TREE;
29397 ix86_builtins_isa[(int) code].tcode = tcode;
29398 ix86_builtins_isa[(int) code].name = name;
29399 ix86_builtins_isa[(int) code].leaf_p = false;
29400 ix86_builtins_isa[(int) code].nothrow_p = false;
29401 ix86_builtins_isa[(int) code].const_p = false;
29402 ix86_builtins_isa[(int) code].pure_p = false;
29403 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
29407 return decl;
29410 /* Like def_builtin, but also marks the function decl "const". */
29412 static inline tree
29413 def_builtin_const (HOST_WIDE_INT mask, const char *name,
29414 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
29416 tree decl = def_builtin (mask, name, tcode, code);
29417 if (decl)
29418 TREE_READONLY (decl) = 1;
29419 else
29420 ix86_builtins_isa[(int) code].const_p = true;
29422 return decl;
29425 /* Like def_builtin, but also marks the function decl "pure". */
29427 static inline tree
29428 def_builtin_pure (HOST_WIDE_INT mask, const char *name,
29429 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
29431 tree decl = def_builtin (mask, name, tcode, code);
29432 if (decl)
29433 DECL_PURE_P (decl) = 1;
29434 else
29435 ix86_builtins_isa[(int) code].pure_p = true;
29437 return decl;
29440 /* Like def_builtin, but for additional isa2 flags. */
29442 static inline tree
29443 def_builtin2 (HOST_WIDE_INT mask, const char *name,
29444 enum ix86_builtin_func_type tcode,
29445 enum ix86_builtins code)
29447 tree decl = NULL_TREE;
29449 ix86_builtins_isa[(int) code].isa2 = mask;
29451 if (mask == 0
29452 || (mask & ix86_isa_flags2) != 0
29453 || (lang_hooks.builtin_function
29454 == lang_hooks.builtin_function_ext_scope))
29457 tree type = ix86_get_builtin_func_type (tcode);
29458 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
29459 NULL, NULL_TREE);
29460 ix86_builtins[(int) code] = decl;
29461 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
29463 else
29465 /* Just a MASK where set_and_not_built_p == true can potentially
29466 include a builtin. */
29467 deferred_isa_values2 |= mask;
29468 ix86_builtins[(int) code] = NULL_TREE;
29469 ix86_builtins_isa[(int) code].tcode = tcode;
29470 ix86_builtins_isa[(int) code].name = name;
29471 ix86_builtins_isa[(int) code].leaf_p = false;
29472 ix86_builtins_isa[(int) code].nothrow_p = false;
29473 ix86_builtins_isa[(int) code].const_p = false;
29474 ix86_builtins_isa[(int) code].pure_p = false;
29475 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
29478 return decl;
29481 /* Like def_builtin, but also marks the function decl "const". */
29483 static inline tree
29484 def_builtin_const2 (HOST_WIDE_INT mask, const char *name,
29485 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
29487 tree decl = def_builtin2 (mask, name, tcode, code);
29488 if (decl)
29489 TREE_READONLY (decl) = 1;
29490 else
29491 ix86_builtins_isa[(int) code].const_p = true;
29493 return decl;
29496 /* Like def_builtin, but also marks the function decl "pure". */
29498 static inline tree
29499 def_builtin_pure2 (HOST_WIDE_INT mask, const char *name,
29500 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
29502 tree decl = def_builtin2 (mask, name, tcode, code);
29503 if (decl)
29504 DECL_PURE_P (decl) = 1;
29505 else
29506 ix86_builtins_isa[(int) code].pure_p = true;
29508 return decl;
29511 /* Add any new builtin functions for a given ISA that may not have been
29512 declared. This saves a bit of space compared to adding all of the
29513 declarations to the tree, even if we didn't use them. */
29515 static void
29516 ix86_add_new_builtins (HOST_WIDE_INT isa, HOST_WIDE_INT isa2)
29518 if ((isa & deferred_isa_values) == 0
29519 && (isa2 & deferred_isa_values2) == 0)
29520 return;
29522 /* Bits in ISA value can be removed from potential isa values. */
29523 deferred_isa_values &= ~isa;
29524 deferred_isa_values2 &= ~isa2;
29526 int i;
29527 tree saved_current_target_pragma = current_target_pragma;
29528 current_target_pragma = NULL_TREE;
29530 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
29532 if (((ix86_builtins_isa[i].isa & isa) != 0
29533 || (ix86_builtins_isa[i].isa2 & isa2) != 0)
29534 && ix86_builtins_isa[i].set_and_not_built_p)
29536 tree decl, type;
29538 /* Don't define the builtin again. */
29539 ix86_builtins_isa[i].set_and_not_built_p = false;
29541 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
29542 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
29543 type, i, BUILT_IN_MD, NULL,
29544 NULL_TREE);
29546 ix86_builtins[i] = decl;
29547 if (ix86_builtins_isa[i].const_p)
29548 TREE_READONLY (decl) = 1;
29549 if (ix86_builtins_isa[i].pure_p)
29550 DECL_PURE_P (decl) = 1;
29551 if (ix86_builtins_isa[i].leaf_p)
29552 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
29553 NULL_TREE);
29554 if (ix86_builtins_isa[i].nothrow_p)
29555 TREE_NOTHROW (decl) = 1;
29559 current_target_pragma = saved_current_target_pragma;
29562 /* Bits for builtin_description.flag. */
29564 /* Set when we don't support the comparison natively, and should
29565 swap_comparison in order to support it. */
29566 #define BUILTIN_DESC_SWAP_OPERANDS 1
29568 struct builtin_description
29570 const HOST_WIDE_INT mask;
29571 const enum insn_code icode;
29572 const char *const name;
29573 const enum ix86_builtins code;
29574 const enum rtx_code comparison;
29575 const int flag;
29578 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
29579 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
29580 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
29581 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
29582 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
29583 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
29584 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
29585 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
29586 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
29587 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
29588 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
29589 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
29590 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
29591 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
29592 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
29593 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
29594 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
29595 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
29596 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
29597 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
29598 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
29599 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
29600 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
29601 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
29602 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
29603 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
29604 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
29605 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
29606 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
29607 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
29608 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
29609 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
29610 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
29611 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
29612 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
29613 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
29614 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
29615 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
29616 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
29617 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
29618 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
29619 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
29620 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
29621 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
29622 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
29623 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
29624 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
29625 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
29626 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
29627 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
29628 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
29629 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
29631 #define BDESC(mask, icode, name, code, comparison, flag) \
29632 { mask, icode, name, code, comparison, flag },
29633 #define BDESC_FIRST(kind, kindu, mask, icode, name, code, comparison, flag) \
29634 static const struct builtin_description bdesc_##kind[] = \
29636 BDESC (mask, icode, name, code, comparison, flag)
29637 #define BDESC_END(kind, next_kind) \
29640 #include "i386-builtin.def"
29642 #undef BDESC
29643 #undef BDESC_FIRST
29644 #undef BDESC_END
29646 /* TM vector builtins. */
29648 /* Reuse the existing x86-specific `struct builtin_description' cause
29649 we're lazy. Add casts to make them fit. */
29650 static const struct builtin_description bdesc_tm[] =
29652 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
29653 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
29654 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
29655 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
29656 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
29657 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
29658 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
29660 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
29661 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
29662 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
29663 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
29664 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
29665 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
29666 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
29668 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
29669 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
29670 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
29671 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
29672 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
29673 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
29674 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
29676 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
29677 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
29678 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
29681 /* Initialize the transactional memory vector load/store builtins. */
29683 static void
29684 ix86_init_tm_builtins (void)
29686 enum ix86_builtin_func_type ftype;
29687 const struct builtin_description *d;
29688 size_t i;
29689 tree decl;
29690 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
29691 tree attrs_log, attrs_type_log;
29693 if (!flag_tm)
29694 return;
29696 /* If there are no builtins defined, we must be compiling in a
29697 language without trans-mem support. */
29698 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
29699 return;
29701 /* Use whatever attributes a normal TM load has. */
29702 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
29703 attrs_load = DECL_ATTRIBUTES (decl);
29704 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
29705 /* Use whatever attributes a normal TM store has. */
29706 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
29707 attrs_store = DECL_ATTRIBUTES (decl);
29708 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
29709 /* Use whatever attributes a normal TM log has. */
29710 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
29711 attrs_log = DECL_ATTRIBUTES (decl);
29712 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
29714 for (i = 0, d = bdesc_tm;
29715 i < ARRAY_SIZE (bdesc_tm);
29716 i++, d++)
29718 if ((d->mask & ix86_isa_flags) != 0
29719 || (lang_hooks.builtin_function
29720 == lang_hooks.builtin_function_ext_scope))
29722 tree type, attrs, attrs_type;
29723 enum built_in_function code = (enum built_in_function) d->code;
29725 ftype = (enum ix86_builtin_func_type) d->flag;
29726 type = ix86_get_builtin_func_type (ftype);
29728 if (BUILTIN_TM_LOAD_P (code))
29730 attrs = attrs_load;
29731 attrs_type = attrs_type_load;
29733 else if (BUILTIN_TM_STORE_P (code))
29735 attrs = attrs_store;
29736 attrs_type = attrs_type_store;
29738 else
29740 attrs = attrs_log;
29741 attrs_type = attrs_type_log;
29743 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
29744 /* The builtin without the prefix for
29745 calling it directly. */
29746 d->name + strlen ("__builtin_"),
29747 attrs);
29748 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
29749 set the TYPE_ATTRIBUTES. */
29750 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
29752 set_builtin_decl (code, decl, false);
29757 /* Macros for verification of enum ix86_builtins order. */
29758 #define BDESC_VERIFY(x, y, z) \
29759 gcc_checking_assert ((x) == (enum ix86_builtins) ((y) + (z)))
29760 #define BDESC_VERIFYS(x, y, z) \
29761 STATIC_ASSERT ((x) == (enum ix86_builtins) ((y) + (z)))
29763 BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPESTR_FIRST,
29764 IX86_BUILTIN__BDESC_COMI_LAST, 1);
29765 BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPISTR_FIRST,
29766 IX86_BUILTIN__BDESC_PCMPESTR_LAST, 1);
29767 BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST,
29768 IX86_BUILTIN__BDESC_PCMPISTR_LAST, 1);
29769 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS_FIRST,
29770 IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST, 1);
29771 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST,
29772 IX86_BUILTIN__BDESC_ARGS_LAST, 1);
29773 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS2_FIRST,
29774 IX86_BUILTIN__BDESC_ROUND_ARGS_LAST, 1);
29775 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MPX_FIRST,
29776 IX86_BUILTIN__BDESC_ARGS2_LAST, 1);
29777 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MPX_CONST_FIRST,
29778 IX86_BUILTIN__BDESC_MPX_LAST, 1);
29779 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MULTI_ARG_FIRST,
29780 IX86_BUILTIN__BDESC_MPX_CONST_LAST, 1);
29781 BDESC_VERIFYS (IX86_BUILTIN_MAX,
29782 IX86_BUILTIN__BDESC_MULTI_ARG_LAST, 1);
29784 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
29785 in the current target ISA to allow the user to compile particular modules
29786 with different target specific options that differ from the command line
29787 options. */
29788 static void
29789 ix86_init_mmx_sse_builtins (void)
29791 const struct builtin_description * d;
29792 enum ix86_builtin_func_type ftype;
29793 size_t i;
29795 /* Add all special builtins with variable number of operands. */
29796 for (i = 0, d = bdesc_special_args;
29797 i < ARRAY_SIZE (bdesc_special_args);
29798 i++, d++)
29800 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST, i);
29801 if (d->name == 0)
29802 continue;
29804 ftype = (enum ix86_builtin_func_type) d->flag;
29805 def_builtin (d->mask, d->name, ftype, d->code);
29807 BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST,
29808 IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST,
29809 ARRAY_SIZE (bdesc_special_args) - 1);
29811 /* Add all builtins with variable number of operands. */
29812 for (i = 0, d = bdesc_args;
29813 i < ARRAY_SIZE (bdesc_args);
29814 i++, d++)
29816 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_ARGS_FIRST, i);
29817 if (d->name == 0)
29818 continue;
29820 ftype = (enum ix86_builtin_func_type) d->flag;
29821 def_builtin_const (d->mask, d->name, ftype, d->code);
29823 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS_LAST,
29824 IX86_BUILTIN__BDESC_ARGS_FIRST,
29825 ARRAY_SIZE (bdesc_args) - 1);
29827 /* Add all builtins with variable number of operands. */
29828 for (i = 0, d = bdesc_args2;
29829 i < ARRAY_SIZE (bdesc_args2);
29830 i++, d++)
29832 if (d->name == 0)
29833 continue;
29835 ftype = (enum ix86_builtin_func_type) d->flag;
29836 def_builtin_const2 (d->mask, d->name, ftype, d->code);
29839 /* Add all builtins with rounding. */
29840 for (i = 0, d = bdesc_round_args;
29841 i < ARRAY_SIZE (bdesc_round_args);
29842 i++, d++)
29844 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST, i);
29845 if (d->name == 0)
29846 continue;
29848 ftype = (enum ix86_builtin_func_type) d->flag;
29849 def_builtin_const (d->mask, d->name, ftype, d->code);
29851 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ROUND_ARGS_LAST,
29852 IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST,
29853 ARRAY_SIZE (bdesc_round_args) - 1);
29855 /* pcmpestr[im] insns. */
29856 for (i = 0, d = bdesc_pcmpestr;
29857 i < ARRAY_SIZE (bdesc_pcmpestr);
29858 i++, d++)
29860 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_PCMPESTR_FIRST, i);
29861 if (d->code == IX86_BUILTIN_PCMPESTRM128)
29862 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
29863 else
29864 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
29865 def_builtin_const (d->mask, d->name, ftype, d->code);
29867 BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPESTR_LAST,
29868 IX86_BUILTIN__BDESC_PCMPESTR_FIRST,
29869 ARRAY_SIZE (bdesc_pcmpestr) - 1);
29871 /* pcmpistr[im] insns. */
29872 for (i = 0, d = bdesc_pcmpistr;
29873 i < ARRAY_SIZE (bdesc_pcmpistr);
29874 i++, d++)
29876 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_PCMPISTR_FIRST, i);
29877 if (d->code == IX86_BUILTIN_PCMPISTRM128)
29878 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
29879 else
29880 ftype = INT_FTYPE_V16QI_V16QI_INT;
29881 def_builtin_const (d->mask, d->name, ftype, d->code);
29883 BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPISTR_LAST,
29884 IX86_BUILTIN__BDESC_PCMPISTR_FIRST,
29885 ARRAY_SIZE (bdesc_pcmpistr) - 1);
29887 /* comi/ucomi insns. */
29888 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
29890 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_COMI_FIRST, i);
29891 if (d->mask == OPTION_MASK_ISA_SSE2)
29892 ftype = INT_FTYPE_V2DF_V2DF;
29893 else
29894 ftype = INT_FTYPE_V4SF_V4SF;
29895 def_builtin_const (d->mask, d->name, ftype, d->code);
29897 BDESC_VERIFYS (IX86_BUILTIN__BDESC_COMI_LAST,
29898 IX86_BUILTIN__BDESC_COMI_FIRST,
29899 ARRAY_SIZE (bdesc_comi) - 1);
29901 /* SSE */
29902 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
29903 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
29904 def_builtin_pure (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
29905 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
29907 /* SSE or 3DNow!A */
29908 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A
29909 /* As it uses V4HImode, we have to require -mmmx too. */
29910 | OPTION_MASK_ISA_MMX,
29911 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
29912 IX86_BUILTIN_MASKMOVQ);
29914 /* SSE2 */
29915 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
29916 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
29918 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
29919 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
29920 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
29921 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
29923 /* SSE3. */
29924 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
29925 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
29926 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
29927 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
29929 /* AES */
29930 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
29931 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
29932 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
29933 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
29934 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
29935 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
29936 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
29937 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
29938 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
29939 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
29940 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
29941 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
29943 /* PCLMUL */
29944 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
29945 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
29947 /* RDRND */
29948 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
29949 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
29950 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
29951 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
29952 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
29953 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
29954 IX86_BUILTIN_RDRAND64_STEP);
29956 /* AVX2 */
29957 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
29958 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
29959 IX86_BUILTIN_GATHERSIV2DF);
29961 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
29962 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
29963 IX86_BUILTIN_GATHERSIV4DF);
29965 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
29966 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
29967 IX86_BUILTIN_GATHERDIV2DF);
29969 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
29970 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
29971 IX86_BUILTIN_GATHERDIV4DF);
29973 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
29974 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
29975 IX86_BUILTIN_GATHERSIV4SF);
29977 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
29978 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
29979 IX86_BUILTIN_GATHERSIV8SF);
29981 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
29982 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
29983 IX86_BUILTIN_GATHERDIV4SF);
29985 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
29986 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
29987 IX86_BUILTIN_GATHERDIV8SF);
29989 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
29990 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
29991 IX86_BUILTIN_GATHERSIV2DI);
29993 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
29994 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
29995 IX86_BUILTIN_GATHERSIV4DI);
29997 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
29998 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
29999 IX86_BUILTIN_GATHERDIV2DI);
30001 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
30002 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
30003 IX86_BUILTIN_GATHERDIV4DI);
30005 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
30006 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
30007 IX86_BUILTIN_GATHERSIV4SI);
30009 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
30010 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
30011 IX86_BUILTIN_GATHERSIV8SI);
30013 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
30014 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
30015 IX86_BUILTIN_GATHERDIV4SI);
30017 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
30018 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
30019 IX86_BUILTIN_GATHERDIV8SI);
30021 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
30022 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
30023 IX86_BUILTIN_GATHERALTSIV4DF);
30025 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
30026 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
30027 IX86_BUILTIN_GATHERALTDIV8SF);
30029 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
30030 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
30031 IX86_BUILTIN_GATHERALTSIV4DI);
30033 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
30034 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
30035 IX86_BUILTIN_GATHERALTDIV8SI);
30037 /* AVX512F */
30038 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
30039 V16SF_FTYPE_V16SF_PCVOID_V16SI_HI_INT,
30040 IX86_BUILTIN_GATHER3SIV16SF);
30042 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
30043 V8DF_FTYPE_V8DF_PCVOID_V8SI_QI_INT,
30044 IX86_BUILTIN_GATHER3SIV8DF);
30046 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
30047 V8SF_FTYPE_V8SF_PCVOID_V8DI_QI_INT,
30048 IX86_BUILTIN_GATHER3DIV16SF);
30050 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
30051 V8DF_FTYPE_V8DF_PCVOID_V8DI_QI_INT,
30052 IX86_BUILTIN_GATHER3DIV8DF);
30054 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
30055 V16SI_FTYPE_V16SI_PCVOID_V16SI_HI_INT,
30056 IX86_BUILTIN_GATHER3SIV16SI);
30058 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
30059 V8DI_FTYPE_V8DI_PCVOID_V8SI_QI_INT,
30060 IX86_BUILTIN_GATHER3SIV8DI);
30062 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
30063 V8SI_FTYPE_V8SI_PCVOID_V8DI_QI_INT,
30064 IX86_BUILTIN_GATHER3DIV16SI);
30066 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
30067 V8DI_FTYPE_V8DI_PCVOID_V8DI_QI_INT,
30068 IX86_BUILTIN_GATHER3DIV8DI);
30070 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
30071 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
30072 IX86_BUILTIN_GATHER3ALTSIV8DF);
30074 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
30075 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
30076 IX86_BUILTIN_GATHER3ALTDIV16SF);
30078 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
30079 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
30080 IX86_BUILTIN_GATHER3ALTSIV8DI);
30082 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
30083 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
30084 IX86_BUILTIN_GATHER3ALTDIV16SI);
30086 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
30087 VOID_FTYPE_PVOID_HI_V16SI_V16SF_INT,
30088 IX86_BUILTIN_SCATTERSIV16SF);
30090 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
30091 VOID_FTYPE_PVOID_QI_V8SI_V8DF_INT,
30092 IX86_BUILTIN_SCATTERSIV8DF);
30094 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
30095 VOID_FTYPE_PVOID_QI_V8DI_V8SF_INT,
30096 IX86_BUILTIN_SCATTERDIV16SF);
30098 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
30099 VOID_FTYPE_PVOID_QI_V8DI_V8DF_INT,
30100 IX86_BUILTIN_SCATTERDIV8DF);
30102 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
30103 VOID_FTYPE_PVOID_HI_V16SI_V16SI_INT,
30104 IX86_BUILTIN_SCATTERSIV16SI);
30106 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
30107 VOID_FTYPE_PVOID_QI_V8SI_V8DI_INT,
30108 IX86_BUILTIN_SCATTERSIV8DI);
30110 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
30111 VOID_FTYPE_PVOID_QI_V8DI_V8SI_INT,
30112 IX86_BUILTIN_SCATTERDIV16SI);
30114 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
30115 VOID_FTYPE_PVOID_QI_V8DI_V8DI_INT,
30116 IX86_BUILTIN_SCATTERDIV8DI);
30118 /* AVX512VL */
30119 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
30120 V2DF_FTYPE_V2DF_PCVOID_V4SI_QI_INT,
30121 IX86_BUILTIN_GATHER3SIV2DF);
30123 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
30124 V4DF_FTYPE_V4DF_PCVOID_V4SI_QI_INT,
30125 IX86_BUILTIN_GATHER3SIV4DF);
30127 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
30128 V2DF_FTYPE_V2DF_PCVOID_V2DI_QI_INT,
30129 IX86_BUILTIN_GATHER3DIV2DF);
30131 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
30132 V4DF_FTYPE_V4DF_PCVOID_V4DI_QI_INT,
30133 IX86_BUILTIN_GATHER3DIV4DF);
30135 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
30136 V4SF_FTYPE_V4SF_PCVOID_V4SI_QI_INT,
30137 IX86_BUILTIN_GATHER3SIV4SF);
30139 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
30140 V8SF_FTYPE_V8SF_PCVOID_V8SI_QI_INT,
30141 IX86_BUILTIN_GATHER3SIV8SF);
30143 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
30144 V4SF_FTYPE_V4SF_PCVOID_V2DI_QI_INT,
30145 IX86_BUILTIN_GATHER3DIV4SF);
30147 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
30148 V4SF_FTYPE_V4SF_PCVOID_V4DI_QI_INT,
30149 IX86_BUILTIN_GATHER3DIV8SF);
30151 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
30152 V2DI_FTYPE_V2DI_PCVOID_V4SI_QI_INT,
30153 IX86_BUILTIN_GATHER3SIV2DI);
30155 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
30156 V4DI_FTYPE_V4DI_PCVOID_V4SI_QI_INT,
30157 IX86_BUILTIN_GATHER3SIV4DI);
30159 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
30160 V2DI_FTYPE_V2DI_PCVOID_V2DI_QI_INT,
30161 IX86_BUILTIN_GATHER3DIV2DI);
30163 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
30164 V4DI_FTYPE_V4DI_PCVOID_V4DI_QI_INT,
30165 IX86_BUILTIN_GATHER3DIV4DI);
30167 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
30168 V4SI_FTYPE_V4SI_PCVOID_V4SI_QI_INT,
30169 IX86_BUILTIN_GATHER3SIV4SI);
30171 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
30172 V8SI_FTYPE_V8SI_PCVOID_V8SI_QI_INT,
30173 IX86_BUILTIN_GATHER3SIV8SI);
30175 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
30176 V4SI_FTYPE_V4SI_PCVOID_V2DI_QI_INT,
30177 IX86_BUILTIN_GATHER3DIV4SI);
30179 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
30180 V4SI_FTYPE_V4SI_PCVOID_V4DI_QI_INT,
30181 IX86_BUILTIN_GATHER3DIV8SI);
30183 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
30184 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
30185 IX86_BUILTIN_GATHER3ALTSIV4DF);
30187 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
30188 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
30189 IX86_BUILTIN_GATHER3ALTDIV8SF);
30191 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
30192 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
30193 IX86_BUILTIN_GATHER3ALTSIV4DI);
30195 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
30196 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
30197 IX86_BUILTIN_GATHER3ALTDIV8SI);
30199 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
30200 VOID_FTYPE_PVOID_QI_V8SI_V8SF_INT,
30201 IX86_BUILTIN_SCATTERSIV8SF);
30203 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
30204 VOID_FTYPE_PVOID_QI_V4SI_V4SF_INT,
30205 IX86_BUILTIN_SCATTERSIV4SF);
30207 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
30208 VOID_FTYPE_PVOID_QI_V4SI_V4DF_INT,
30209 IX86_BUILTIN_SCATTERSIV4DF);
30211 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
30212 VOID_FTYPE_PVOID_QI_V4SI_V2DF_INT,
30213 IX86_BUILTIN_SCATTERSIV2DF);
30215 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
30216 VOID_FTYPE_PVOID_QI_V4DI_V4SF_INT,
30217 IX86_BUILTIN_SCATTERDIV8SF);
30219 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
30220 VOID_FTYPE_PVOID_QI_V2DI_V4SF_INT,
30221 IX86_BUILTIN_SCATTERDIV4SF);
30223 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
30224 VOID_FTYPE_PVOID_QI_V4DI_V4DF_INT,
30225 IX86_BUILTIN_SCATTERDIV4DF);
30227 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
30228 VOID_FTYPE_PVOID_QI_V2DI_V2DF_INT,
30229 IX86_BUILTIN_SCATTERDIV2DF);
30231 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
30232 VOID_FTYPE_PVOID_QI_V8SI_V8SI_INT,
30233 IX86_BUILTIN_SCATTERSIV8SI);
30235 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
30236 VOID_FTYPE_PVOID_QI_V4SI_V4SI_INT,
30237 IX86_BUILTIN_SCATTERSIV4SI);
30239 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
30240 VOID_FTYPE_PVOID_QI_V4SI_V4DI_INT,
30241 IX86_BUILTIN_SCATTERSIV4DI);
30243 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
30244 VOID_FTYPE_PVOID_QI_V4SI_V2DI_INT,
30245 IX86_BUILTIN_SCATTERSIV2DI);
30247 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
30248 VOID_FTYPE_PVOID_QI_V4DI_V4SI_INT,
30249 IX86_BUILTIN_SCATTERDIV8SI);
30251 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
30252 VOID_FTYPE_PVOID_QI_V2DI_V4SI_INT,
30253 IX86_BUILTIN_SCATTERDIV4SI);
30255 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
30256 VOID_FTYPE_PVOID_QI_V4DI_V4DI_INT,
30257 IX86_BUILTIN_SCATTERDIV4DI);
30259 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
30260 VOID_FTYPE_PVOID_QI_V2DI_V2DI_INT,
30261 IX86_BUILTIN_SCATTERDIV2DI);
30262 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8df ",
30263 VOID_FTYPE_PDOUBLE_QI_V16SI_V8DF_INT,
30264 IX86_BUILTIN_SCATTERALTSIV8DF);
30266 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8sf ",
30267 VOID_FTYPE_PFLOAT_HI_V8DI_V16SF_INT,
30268 IX86_BUILTIN_SCATTERALTDIV16SF);
30270 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8di ",
30271 VOID_FTYPE_PLONGLONG_QI_V16SI_V8DI_INT,
30272 IX86_BUILTIN_SCATTERALTSIV8DI);
30274 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8si ",
30275 VOID_FTYPE_PINT_HI_V8DI_V16SI_INT,
30276 IX86_BUILTIN_SCATTERALTDIV16SI);
30278 /* AVX512PF */
30279 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
30280 VOID_FTYPE_QI_V8SI_PCVOID_INT_INT,
30281 IX86_BUILTIN_GATHERPFDPD);
30282 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
30283 VOID_FTYPE_HI_V16SI_PCVOID_INT_INT,
30284 IX86_BUILTIN_GATHERPFDPS);
30285 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
30286 VOID_FTYPE_QI_V8DI_PCVOID_INT_INT,
30287 IX86_BUILTIN_GATHERPFQPD);
30288 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
30289 VOID_FTYPE_QI_V8DI_PCVOID_INT_INT,
30290 IX86_BUILTIN_GATHERPFQPS);
30291 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
30292 VOID_FTYPE_QI_V8SI_PCVOID_INT_INT,
30293 IX86_BUILTIN_SCATTERPFDPD);
30294 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
30295 VOID_FTYPE_HI_V16SI_PCVOID_INT_INT,
30296 IX86_BUILTIN_SCATTERPFDPS);
30297 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
30298 VOID_FTYPE_QI_V8DI_PCVOID_INT_INT,
30299 IX86_BUILTIN_SCATTERPFQPD);
30300 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
30301 VOID_FTYPE_QI_V8DI_PCVOID_INT_INT,
30302 IX86_BUILTIN_SCATTERPFQPS);
30304 /* SHA */
30305 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
30306 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
30307 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
30308 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
30309 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
30310 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
30311 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
30312 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
30313 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
30314 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
30315 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
30316 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
30317 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
30318 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
30320 /* RTM. */
30321 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
30322 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
30324 /* MMX access to the vec_init patterns. */
30325 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
30326 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
30328 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
30329 V4HI_FTYPE_HI_HI_HI_HI,
30330 IX86_BUILTIN_VEC_INIT_V4HI);
30332 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
30333 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
30334 IX86_BUILTIN_VEC_INIT_V8QI);
30336 /* Access to the vec_extract patterns. */
30337 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
30338 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
30339 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
30340 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
30341 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
30342 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
30343 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
30344 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
30345 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
30346 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
30348 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A
30349 /* As it uses V4HImode, we have to require -mmmx too. */
30350 | OPTION_MASK_ISA_MMX,
30351 "__builtin_ia32_vec_ext_v4hi",
30352 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
30354 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
30355 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
30357 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
30358 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
30360 /* Access to the vec_set patterns. */
30361 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
30362 "__builtin_ia32_vec_set_v2di",
30363 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
30365 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
30366 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
30368 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
30369 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
30371 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
30372 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
30374 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A
30375 /* As it uses V4HImode, we have to require -mmmx too. */
30376 | OPTION_MASK_ISA_MMX,
30377 "__builtin_ia32_vec_set_v4hi",
30378 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
30380 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
30381 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
30383 /* RDSEED */
30384 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
30385 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
30386 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
30387 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
30388 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
30389 "__builtin_ia32_rdseed_di_step",
30390 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
30392 /* ADCX */
30393 def_builtin (0, "__builtin_ia32_addcarryx_u32",
30394 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
30395 def_builtin (OPTION_MASK_ISA_64BIT,
30396 "__builtin_ia32_addcarryx_u64",
30397 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
30398 IX86_BUILTIN_ADDCARRYX64);
30400 /* SBB */
30401 def_builtin (0, "__builtin_ia32_sbb_u32",
30402 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
30403 def_builtin (OPTION_MASK_ISA_64BIT,
30404 "__builtin_ia32_sbb_u64",
30405 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
30406 IX86_BUILTIN_SBB64);
30408 /* Read/write FLAGS. */
30409 def_builtin (0, "__builtin_ia32_readeflags_u32",
30410 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
30411 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
30412 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
30413 def_builtin (0, "__builtin_ia32_writeeflags_u32",
30414 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
30415 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
30416 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
30418 /* CLFLUSHOPT. */
30419 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
30420 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
30422 /* CLWB. */
30423 def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
30424 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
30426 /* MONITORX and MWAITX. */
30427 def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_monitorx",
30428 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITORX);
30429 def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_mwaitx",
30430 VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAITX);
30432 /* CLZERO. */
30433 def_builtin (OPTION_MASK_ISA_CLZERO, "__builtin_ia32_clzero",
30434 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLZERO);
30436 /* Add FMA4 multi-arg argument instructions */
30437 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
30439 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_MULTI_ARG_FIRST, i);
30440 if (d->name == 0)
30441 continue;
30443 ftype = (enum ix86_builtin_func_type) d->flag;
30444 def_builtin_const (d->mask, d->name, ftype, d->code);
30446 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MULTI_ARG_LAST,
30447 IX86_BUILTIN__BDESC_MULTI_ARG_FIRST,
30448 ARRAY_SIZE (bdesc_multi_arg) - 1);
30451 static void
30452 ix86_init_mpx_builtins ()
30454 const struct builtin_description * d;
30455 enum ix86_builtin_func_type ftype;
30456 tree decl;
30457 size_t i;
30459 for (i = 0, d = bdesc_mpx;
30460 i < ARRAY_SIZE (bdesc_mpx);
30461 i++, d++)
30463 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_MPX_FIRST, i);
30464 if (d->name == 0)
30465 continue;
30467 ftype = (enum ix86_builtin_func_type) d->flag;
30468 decl = def_builtin (d->mask, d->name, ftype, d->code);
30470 /* With no leaf and nothrow flags for MPX builtins
30471 abnormal edges may follow its call when setjmp
30472 presents in the function. Since we may have a lot
30473 of MPX builtins calls it causes lots of useless
30474 edges and enormous PHI nodes. To avoid this we mark
30475 MPX builtins as leaf and nothrow. */
30476 if (decl)
30478 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
30479 NULL_TREE);
30480 TREE_NOTHROW (decl) = 1;
30482 else
30484 ix86_builtins_isa[(int)d->code].leaf_p = true;
30485 ix86_builtins_isa[(int)d->code].nothrow_p = true;
30488 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MPX_LAST,
30489 IX86_BUILTIN__BDESC_MPX_FIRST,
30490 ARRAY_SIZE (bdesc_mpx) - 1);
30492 for (i = 0, d = bdesc_mpx_const;
30493 i < ARRAY_SIZE (bdesc_mpx_const);
30494 i++, d++)
30496 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_MPX_CONST_FIRST, i);
30497 if (d->name == 0)
30498 continue;
30500 ftype = (enum ix86_builtin_func_type) d->flag;
30501 decl = def_builtin_const (d->mask, d->name, ftype, d->code);
30503 if (decl)
30505 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
30506 NULL_TREE);
30507 TREE_NOTHROW (decl) = 1;
30509 else
30511 ix86_builtins_isa[(int)d->code].leaf_p = true;
30512 ix86_builtins_isa[(int)d->code].nothrow_p = true;
30515 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MPX_CONST_LAST,
30516 IX86_BUILTIN__BDESC_MPX_CONST_FIRST,
30517 ARRAY_SIZE (bdesc_mpx_const) - 1);
30519 #undef BDESC_VERIFY
30520 #undef BDESC_VERIFYS
30522 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
30523 to return a pointer to VERSION_DECL if the outcome of the expression
30524 formed by PREDICATE_CHAIN is true. This function will be called during
30525 version dispatch to decide which function version to execute. It returns
30526 the basic block at the end, to which more conditions can be added. */
30528 static basic_block
30529 add_condition_to_bb (tree function_decl, tree version_decl,
30530 tree predicate_chain, basic_block new_bb)
30532 gimple *return_stmt;
30533 tree convert_expr, result_var;
30534 gimple *convert_stmt;
30535 gimple *call_cond_stmt;
30536 gimple *if_else_stmt;
30538 basic_block bb1, bb2, bb3;
30539 edge e12, e23;
30541 tree cond_var, and_expr_var = NULL_TREE;
30542 gimple_seq gseq;
30544 tree predicate_decl, predicate_arg;
30546 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
30548 gcc_assert (new_bb != NULL);
30549 gseq = bb_seq (new_bb);
30552 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
30553 build_fold_addr_expr (version_decl));
30554 result_var = create_tmp_var (ptr_type_node);
30555 convert_stmt = gimple_build_assign (result_var, convert_expr);
30556 return_stmt = gimple_build_return (result_var);
30558 if (predicate_chain == NULL_TREE)
30560 gimple_seq_add_stmt (&gseq, convert_stmt);
30561 gimple_seq_add_stmt (&gseq, return_stmt);
30562 set_bb_seq (new_bb, gseq);
30563 gimple_set_bb (convert_stmt, new_bb);
30564 gimple_set_bb (return_stmt, new_bb);
30565 pop_cfun ();
30566 return new_bb;
30569 while (predicate_chain != NULL)
30571 cond_var = create_tmp_var (integer_type_node);
30572 predicate_decl = TREE_PURPOSE (predicate_chain);
30573 predicate_arg = TREE_VALUE (predicate_chain);
30574 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
30575 gimple_call_set_lhs (call_cond_stmt, cond_var);
30577 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
30578 gimple_set_bb (call_cond_stmt, new_bb);
30579 gimple_seq_add_stmt (&gseq, call_cond_stmt);
30581 predicate_chain = TREE_CHAIN (predicate_chain);
30583 if (and_expr_var == NULL)
30584 and_expr_var = cond_var;
30585 else
30587 gimple *assign_stmt;
30588 /* Use MIN_EXPR to check if any integer is zero?.
30589 and_expr_var = min_expr <cond_var, and_expr_var> */
30590 assign_stmt = gimple_build_assign (and_expr_var,
30591 build2 (MIN_EXPR, integer_type_node,
30592 cond_var, and_expr_var));
30594 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
30595 gimple_set_bb (assign_stmt, new_bb);
30596 gimple_seq_add_stmt (&gseq, assign_stmt);
30600 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
30601 integer_zero_node,
30602 NULL_TREE, NULL_TREE);
30603 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
30604 gimple_set_bb (if_else_stmt, new_bb);
30605 gimple_seq_add_stmt (&gseq, if_else_stmt);
30607 gimple_seq_add_stmt (&gseq, convert_stmt);
30608 gimple_seq_add_stmt (&gseq, return_stmt);
30609 set_bb_seq (new_bb, gseq);
30611 bb1 = new_bb;
30612 e12 = split_block (bb1, if_else_stmt);
30613 bb2 = e12->dest;
30614 e12->flags &= ~EDGE_FALLTHRU;
30615 e12->flags |= EDGE_TRUE_VALUE;
30617 e23 = split_block (bb2, return_stmt);
30619 gimple_set_bb (convert_stmt, bb2);
30620 gimple_set_bb (return_stmt, bb2);
30622 bb3 = e23->dest;
30623 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
30625 remove_edge (e23);
30626 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
30628 pop_cfun ();
30630 return bb3;
30633 /* This parses the attribute arguments to target in DECL and determines
30634 the right builtin to use to match the platform specification.
30635 It returns the priority value for this version decl. If PREDICATE_LIST
30636 is not NULL, it stores the list of cpu features that need to be checked
30637 before dispatching this function. */
30639 static unsigned int
30640 get_builtin_code_for_version (tree decl, tree *predicate_list)
30642 tree attrs;
30643 struct cl_target_option cur_target;
30644 tree target_node;
30645 struct cl_target_option *new_target;
30646 const char *arg_str = NULL;
30647 const char *attrs_str = NULL;
30648 char *tok_str = NULL;
30649 char *token;
30651 /* Priority of i386 features, greater value is higher priority. This is
30652 used to decide the order in which function dispatch must happen. For
30653 instance, a version specialized for SSE4.2 should be checked for dispatch
30654 before a version for SSE3, as SSE4.2 implies SSE3. */
30655 enum feature_priority
30657 P_ZERO = 0,
30658 P_MMX,
30659 P_SSE,
30660 P_SSE2,
30661 P_SSE3,
30662 P_SSSE3,
30663 P_PROC_SSSE3,
30664 P_SSE4_A,
30665 P_PROC_SSE4_A,
30666 P_SSE4_1,
30667 P_SSE4_2,
30668 P_PROC_SSE4_2,
30669 P_POPCNT,
30670 P_AES,
30671 P_PCLMUL,
30672 P_AVX,
30673 P_PROC_AVX,
30674 P_BMI,
30675 P_PROC_BMI,
30676 P_FMA4,
30677 P_XOP,
30678 P_PROC_XOP,
30679 P_FMA,
30680 P_PROC_FMA,
30681 P_BMI2,
30682 P_AVX2,
30683 P_PROC_AVX2,
30684 P_AVX512F,
30685 P_PROC_AVX512F
30688 enum feature_priority priority = P_ZERO;
30690 /* These are the target attribute strings for which a dispatcher is
30691 available, from fold_builtin_cpu. */
30693 static struct _feature_list
30695 const char *const name;
30696 const enum feature_priority priority;
30698 const feature_list[] =
30700 {"mmx", P_MMX},
30701 {"sse", P_SSE},
30702 {"sse2", P_SSE2},
30703 {"sse3", P_SSE3},
30704 {"sse4a", P_SSE4_A},
30705 {"ssse3", P_SSSE3},
30706 {"sse4.1", P_SSE4_1},
30707 {"sse4.2", P_SSE4_2},
30708 {"popcnt", P_POPCNT},
30709 {"aes", P_AES},
30710 {"pclmul", P_PCLMUL},
30711 {"avx", P_AVX},
30712 {"bmi", P_BMI},
30713 {"fma4", P_FMA4},
30714 {"xop", P_XOP},
30715 {"fma", P_FMA},
30716 {"bmi2", P_BMI2},
30717 {"avx2", P_AVX2},
30718 {"avx512f", P_AVX512F}
30722 static unsigned int NUM_FEATURES
30723 = sizeof (feature_list) / sizeof (struct _feature_list);
30725 unsigned int i;
30727 tree predicate_chain = NULL_TREE;
30728 tree predicate_decl, predicate_arg;
30730 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
30731 gcc_assert (attrs != NULL);
30733 attrs = TREE_VALUE (TREE_VALUE (attrs));
30735 gcc_assert (TREE_CODE (attrs) == STRING_CST);
30736 attrs_str = TREE_STRING_POINTER (attrs);
30738 /* Return priority zero for default function. */
30739 if (strcmp (attrs_str, "default") == 0)
30740 return 0;
30742 /* Handle arch= if specified. For priority, set it to be 1 more than
30743 the best instruction set the processor can handle. For instance, if
30744 there is a version for atom and a version for ssse3 (the highest ISA
30745 priority for atom), the atom version must be checked for dispatch
30746 before the ssse3 version. */
30747 if (strstr (attrs_str, "arch=") != NULL)
30749 cl_target_option_save (&cur_target, &global_options);
30750 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
30751 &global_options_set);
30753 gcc_assert (target_node);
30754 new_target = TREE_TARGET_OPTION (target_node);
30755 gcc_assert (new_target);
30757 if (new_target->arch_specified && new_target->arch > 0)
30759 switch (new_target->arch)
30761 case PROCESSOR_CORE2:
30762 arg_str = "core2";
30763 priority = P_PROC_SSSE3;
30764 break;
30765 case PROCESSOR_NEHALEM:
30766 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
30768 arg_str = "westmere";
30769 priority = P_AES;
30771 else
30773 /* We translate "arch=corei7" and "arch=nehalem" to
30774 "corei7" so that it will be mapped to M_INTEL_COREI7
30775 as cpu type to cover all M_INTEL_COREI7_XXXs. */
30776 arg_str = "corei7";
30777 priority = P_PROC_SSE4_2;
30779 break;
30780 case PROCESSOR_SANDYBRIDGE:
30781 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
30782 arg_str = "ivybridge";
30783 else
30784 arg_str = "sandybridge";
30785 priority = P_PROC_AVX;
30786 break;
30787 case PROCESSOR_HASWELL:
30788 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AVX512VL)
30789 arg_str = "skylake-avx512";
30790 else if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_XSAVES)
30791 arg_str = "skylake";
30792 else if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
30793 arg_str = "broadwell";
30794 else
30795 arg_str = "haswell";
30796 priority = P_PROC_AVX2;
30797 break;
30798 case PROCESSOR_BONNELL:
30799 arg_str = "bonnell";
30800 priority = P_PROC_SSSE3;
30801 break;
30802 case PROCESSOR_KNL:
30803 arg_str = "knl";
30804 priority = P_PROC_AVX512F;
30805 break;
30806 case PROCESSOR_KNM:
30807 arg_str = "knm";
30808 priority = P_PROC_AVX512F;
30809 break;
30810 case PROCESSOR_SILVERMONT:
30811 arg_str = "silvermont";
30812 priority = P_PROC_SSE4_2;
30813 break;
30814 case PROCESSOR_AMDFAM10:
30815 arg_str = "amdfam10h";
30816 priority = P_PROC_SSE4_A;
30817 break;
30818 case PROCESSOR_BTVER1:
30819 arg_str = "btver1";
30820 priority = P_PROC_SSE4_A;
30821 break;
30822 case PROCESSOR_BTVER2:
30823 arg_str = "btver2";
30824 priority = P_PROC_BMI;
30825 break;
30826 case PROCESSOR_BDVER1:
30827 arg_str = "bdver1";
30828 priority = P_PROC_XOP;
30829 break;
30830 case PROCESSOR_BDVER2:
30831 arg_str = "bdver2";
30832 priority = P_PROC_FMA;
30833 break;
30834 case PROCESSOR_BDVER3:
30835 arg_str = "bdver3";
30836 priority = P_PROC_FMA;
30837 break;
30838 case PROCESSOR_BDVER4:
30839 arg_str = "bdver4";
30840 priority = P_PROC_AVX2;
30841 break;
30842 case PROCESSOR_ZNVER1:
30843 arg_str = "znver1";
30844 priority = P_PROC_AVX2;
30845 break;
30849 cl_target_option_restore (&global_options, &cur_target);
30851 if (predicate_list && arg_str == NULL)
30853 error_at (DECL_SOURCE_LOCATION (decl),
30854 "No dispatcher found for the versioning attributes");
30855 return 0;
30858 if (predicate_list)
30860 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
30861 /* For a C string literal the length includes the trailing NULL. */
30862 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
30863 predicate_chain = tree_cons (predicate_decl, predicate_arg,
30864 predicate_chain);
30868 /* Process feature name. */
30869 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
30870 strcpy (tok_str, attrs_str);
30871 token = strtok (tok_str, ",");
30872 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
30874 while (token != NULL)
30876 /* Do not process "arch=" */
30877 if (strncmp (token, "arch=", 5) == 0)
30879 token = strtok (NULL, ",");
30880 continue;
30882 for (i = 0; i < NUM_FEATURES; ++i)
30884 if (strcmp (token, feature_list[i].name) == 0)
30886 if (predicate_list)
30888 predicate_arg = build_string_literal (
30889 strlen (feature_list[i].name) + 1,
30890 feature_list[i].name);
30891 predicate_chain = tree_cons (predicate_decl, predicate_arg,
30892 predicate_chain);
30894 /* Find the maximum priority feature. */
30895 if (feature_list[i].priority > priority)
30896 priority = feature_list[i].priority;
30898 break;
30901 if (predicate_list && i == NUM_FEATURES)
30903 error_at (DECL_SOURCE_LOCATION (decl),
30904 "No dispatcher found for %s", token);
30905 return 0;
30907 token = strtok (NULL, ",");
30909 free (tok_str);
30911 if (predicate_list && predicate_chain == NULL_TREE)
30913 error_at (DECL_SOURCE_LOCATION (decl),
30914 "No dispatcher found for the versioning attributes : %s",
30915 attrs_str);
30916 return 0;
30918 else if (predicate_list)
30920 predicate_chain = nreverse (predicate_chain);
30921 *predicate_list = predicate_chain;
30924 return priority;
30927 /* This compares the priority of target features in function DECL1
30928 and DECL2. It returns positive value if DECL1 is higher priority,
30929 negative value if DECL2 is higher priority and 0 if they are the
30930 same. */
30932 static int
30933 ix86_compare_version_priority (tree decl1, tree decl2)
30935 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
30936 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
30938 return (int)priority1 - (int)priority2;
30941 /* V1 and V2 point to function versions with different priorities
30942 based on the target ISA. This function compares their priorities. */
30944 static int
30945 feature_compare (const void *v1, const void *v2)
30947 typedef struct _function_version_info
30949 tree version_decl;
30950 tree predicate_chain;
30951 unsigned int dispatch_priority;
30952 } function_version_info;
30954 const function_version_info c1 = *(const function_version_info *)v1;
30955 const function_version_info c2 = *(const function_version_info *)v2;
30956 return (c2.dispatch_priority - c1.dispatch_priority);
30959 /* This function generates the dispatch function for
30960 multi-versioned functions. DISPATCH_DECL is the function which will
30961 contain the dispatch logic. FNDECLS are the function choices for
30962 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
30963 in DISPATCH_DECL in which the dispatch code is generated. */
30965 static int
30966 dispatch_function_versions (tree dispatch_decl,
30967 void *fndecls_p,
30968 basic_block *empty_bb)
30970 tree default_decl;
30971 gimple *ifunc_cpu_init_stmt;
30972 gimple_seq gseq;
30973 int ix;
30974 tree ele;
30975 vec<tree> *fndecls;
30976 unsigned int num_versions = 0;
30977 unsigned int actual_versions = 0;
30978 unsigned int i;
30980 struct _function_version_info
30982 tree version_decl;
30983 tree predicate_chain;
30984 unsigned int dispatch_priority;
30985 }*function_version_info;
30987 gcc_assert (dispatch_decl != NULL
30988 && fndecls_p != NULL
30989 && empty_bb != NULL);
30991 /*fndecls_p is actually a vector. */
30992 fndecls = static_cast<vec<tree> *> (fndecls_p);
30994 /* At least one more version other than the default. */
30995 num_versions = fndecls->length ();
30996 gcc_assert (num_versions >= 2);
30998 function_version_info = (struct _function_version_info *)
30999 XNEWVEC (struct _function_version_info, (num_versions - 1));
31001 /* The first version in the vector is the default decl. */
31002 default_decl = (*fndecls)[0];
31004 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
31006 gseq = bb_seq (*empty_bb);
31007 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
31008 constructors, so explicity call __builtin_cpu_init here. */
31009 ifunc_cpu_init_stmt = gimple_build_call_vec (
31010 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
31011 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
31012 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
31013 set_bb_seq (*empty_bb, gseq);
31015 pop_cfun ();
31018 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
31020 tree version_decl = ele;
31021 tree predicate_chain = NULL_TREE;
31022 unsigned int priority;
31023 /* Get attribute string, parse it and find the right predicate decl.
31024 The predicate function could be a lengthy combination of many
31025 features, like arch-type and various isa-variants. */
31026 priority = get_builtin_code_for_version (version_decl,
31027 &predicate_chain);
31029 if (predicate_chain == NULL_TREE)
31030 continue;
31032 function_version_info [actual_versions].version_decl = version_decl;
31033 function_version_info [actual_versions].predicate_chain
31034 = predicate_chain;
31035 function_version_info [actual_versions].dispatch_priority = priority;
31036 actual_versions++;
31039 /* Sort the versions according to descending order of dispatch priority. The
31040 priority is based on the ISA. This is not a perfect solution. There
31041 could still be ambiguity. If more than one function version is suitable
31042 to execute, which one should be dispatched? In future, allow the user
31043 to specify a dispatch priority next to the version. */
31044 qsort (function_version_info, actual_versions,
31045 sizeof (struct _function_version_info), feature_compare);
31047 for (i = 0; i < actual_versions; ++i)
31048 *empty_bb = add_condition_to_bb (dispatch_decl,
31049 function_version_info[i].version_decl,
31050 function_version_info[i].predicate_chain,
31051 *empty_bb);
31053 /* dispatch default version at the end. */
31054 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
31055 NULL, *empty_bb);
31057 free (function_version_info);
31058 return 0;
31061 /* This function changes the assembler name for functions that are
31062 versions. If DECL is a function version and has a "target"
31063 attribute, it appends the attribute string to its assembler name. */
31065 static tree
31066 ix86_mangle_function_version_assembler_name (tree decl, tree id)
31068 tree version_attr;
31069 const char *orig_name, *version_string;
31070 char *attr_str, *assembler_name;
31072 if (DECL_DECLARED_INLINE_P (decl)
31073 && lookup_attribute ("gnu_inline",
31074 DECL_ATTRIBUTES (decl)))
31075 error_at (DECL_SOURCE_LOCATION (decl),
31076 "Function versions cannot be marked as gnu_inline,"
31077 " bodies have to be generated");
31079 if (DECL_VIRTUAL_P (decl)
31080 || DECL_VINDEX (decl))
31081 sorry ("Virtual function multiversioning not supported");
31083 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31085 /* target attribute string cannot be NULL. */
31086 gcc_assert (version_attr != NULL_TREE);
31088 orig_name = IDENTIFIER_POINTER (id);
31089 version_string
31090 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
31092 if (strcmp (version_string, "default") == 0)
31093 return id;
31095 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
31096 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
31098 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
31100 /* Allow assembler name to be modified if already set. */
31101 if (DECL_ASSEMBLER_NAME_SET_P (decl))
31102 SET_DECL_RTL (decl, NULL);
31104 tree ret = get_identifier (assembler_name);
31105 XDELETEVEC (attr_str);
31106 XDELETEVEC (assembler_name);
31107 return ret;
31111 static tree
31112 ix86_mangle_decl_assembler_name (tree decl, tree id)
31114 /* For function version, add the target suffix to the assembler name. */
31115 if (TREE_CODE (decl) == FUNCTION_DECL
31116 && DECL_FUNCTION_VERSIONED (decl))
31117 id = ix86_mangle_function_version_assembler_name (decl, id);
31118 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
31119 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
31120 #endif
31122 return id;
31125 /* Make a dispatcher declaration for the multi-versioned function DECL.
31126 Calls to DECL function will be replaced with calls to the dispatcher
31127 by the front-end. Returns the decl of the dispatcher function. */
31129 static tree
31130 ix86_get_function_versions_dispatcher (void *decl)
31132 tree fn = (tree) decl;
31133 struct cgraph_node *node = NULL;
31134 struct cgraph_node *default_node = NULL;
31135 struct cgraph_function_version_info *node_v = NULL;
31136 struct cgraph_function_version_info *first_v = NULL;
31138 tree dispatch_decl = NULL;
31140 struct cgraph_function_version_info *default_version_info = NULL;
31142 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
31144 node = cgraph_node::get (fn);
31145 gcc_assert (node != NULL);
31147 node_v = node->function_version ();
31148 gcc_assert (node_v != NULL);
31150 if (node_v->dispatcher_resolver != NULL)
31151 return node_v->dispatcher_resolver;
31153 /* Find the default version and make it the first node. */
31154 first_v = node_v;
31155 /* Go to the beginning of the chain. */
31156 while (first_v->prev != NULL)
31157 first_v = first_v->prev;
31158 default_version_info = first_v;
31159 while (default_version_info != NULL)
31161 if (is_function_default_version
31162 (default_version_info->this_node->decl))
31163 break;
31164 default_version_info = default_version_info->next;
31167 /* If there is no default node, just return NULL. */
31168 if (default_version_info == NULL)
31169 return NULL;
31171 /* Make default info the first node. */
31172 if (first_v != default_version_info)
31174 default_version_info->prev->next = default_version_info->next;
31175 if (default_version_info->next)
31176 default_version_info->next->prev = default_version_info->prev;
31177 first_v->prev = default_version_info;
31178 default_version_info->next = first_v;
31179 default_version_info->prev = NULL;
31182 default_node = default_version_info->this_node;
31184 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
31185 if (targetm.has_ifunc_p ())
31187 struct cgraph_function_version_info *it_v = NULL;
31188 struct cgraph_node *dispatcher_node = NULL;
31189 struct cgraph_function_version_info *dispatcher_version_info = NULL;
31191 /* Right now, the dispatching is done via ifunc. */
31192 dispatch_decl = make_dispatcher_decl (default_node->decl);
31194 dispatcher_node = cgraph_node::get_create (dispatch_decl);
31195 gcc_assert (dispatcher_node != NULL);
31196 dispatcher_node->dispatcher_function = 1;
31197 dispatcher_version_info
31198 = dispatcher_node->insert_new_function_version ();
31199 dispatcher_version_info->next = default_version_info;
31200 dispatcher_node->definition = 1;
31202 /* Set the dispatcher for all the versions. */
31203 it_v = default_version_info;
31204 while (it_v != NULL)
31206 it_v->dispatcher_resolver = dispatch_decl;
31207 it_v = it_v->next;
31210 else
31211 #endif
31213 error_at (DECL_SOURCE_LOCATION (default_node->decl),
31214 "multiversioning needs ifunc which is not supported "
31215 "on this target");
31218 return dispatch_decl;
31221 /* Make the resolver function decl to dispatch the versions of
31222 a multi-versioned function, DEFAULT_DECL. IFUNC_ALIAS_DECL is
31223 ifunc alias that will point to the created resolver. Create an
31224 empty basic block in the resolver and store the pointer in
31225 EMPTY_BB. Return the decl of the resolver function. */
31227 static tree
31228 make_resolver_func (const tree default_decl,
31229 const tree ifunc_alias_decl,
31230 basic_block *empty_bb)
31232 char *resolver_name;
31233 tree decl, type, decl_name, t;
31235 /* IFUNC's have to be globally visible. So, if the default_decl is
31236 not, then the name of the IFUNC should be made unique. */
31237 if (TREE_PUBLIC (default_decl) == 0)
31239 char *ifunc_name = make_unique_name (default_decl, "ifunc", true);
31240 symtab->change_decl_assembler_name (ifunc_alias_decl,
31241 get_identifier (ifunc_name));
31242 XDELETEVEC (ifunc_name);
31245 resolver_name = make_unique_name (default_decl, "resolver", false);
31247 /* The resolver function should return a (void *). */
31248 type = build_function_type_list (ptr_type_node, NULL_TREE);
31250 decl = build_fn_decl (resolver_name, type);
31251 decl_name = get_identifier (resolver_name);
31252 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
31254 DECL_NAME (decl) = decl_name;
31255 TREE_USED (decl) = 1;
31256 DECL_ARTIFICIAL (decl) = 1;
31257 DECL_IGNORED_P (decl) = 1;
31258 TREE_PUBLIC (decl) = 0;
31259 DECL_UNINLINABLE (decl) = 1;
31261 /* Resolver is not external, body is generated. */
31262 DECL_EXTERNAL (decl) = 0;
31263 DECL_EXTERNAL (ifunc_alias_decl) = 0;
31265 DECL_CONTEXT (decl) = NULL_TREE;
31266 DECL_INITIAL (decl) = make_node (BLOCK);
31267 DECL_STATIC_CONSTRUCTOR (decl) = 0;
31269 if (DECL_COMDAT_GROUP (default_decl)
31270 || TREE_PUBLIC (default_decl))
31272 /* In this case, each translation unit with a call to this
31273 versioned function will put out a resolver. Ensure it
31274 is comdat to keep just one copy. */
31275 DECL_COMDAT (decl) = 1;
31276 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
31278 /* Build result decl and add to function_decl. */
31279 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
31280 DECL_ARTIFICIAL (t) = 1;
31281 DECL_IGNORED_P (t) = 1;
31282 DECL_RESULT (decl) = t;
31284 gimplify_function_tree (decl);
31285 push_cfun (DECL_STRUCT_FUNCTION (decl));
31286 *empty_bb = init_lowered_empty_function (decl, false,
31287 profile_count::uninitialized ());
31289 cgraph_node::add_new_function (decl, true);
31290 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
31292 pop_cfun ();
31294 gcc_assert (ifunc_alias_decl != NULL);
31295 /* Mark ifunc_alias_decl as "ifunc" with resolver as resolver_name. */
31296 DECL_ATTRIBUTES (ifunc_alias_decl)
31297 = make_attribute ("ifunc", resolver_name,
31298 DECL_ATTRIBUTES (ifunc_alias_decl));
31300 /* Create the alias for dispatch to resolver here. */
31301 cgraph_node::create_same_body_alias (ifunc_alias_decl, decl);
31302 XDELETEVEC (resolver_name);
31303 return decl;
31306 /* Generate the dispatching code body to dispatch multi-versioned function
31307 DECL. The target hook is called to process the "target" attributes and
31308 provide the code to dispatch the right function at run-time. NODE points
31309 to the dispatcher decl whose body will be created. */
31311 static tree
31312 ix86_generate_version_dispatcher_body (void *node_p)
31314 tree resolver_decl;
31315 basic_block empty_bb;
31316 tree default_ver_decl;
31317 struct cgraph_node *versn;
31318 struct cgraph_node *node;
31320 struct cgraph_function_version_info *node_version_info = NULL;
31321 struct cgraph_function_version_info *versn_info = NULL;
31323 node = (cgraph_node *)node_p;
31325 node_version_info = node->function_version ();
31326 gcc_assert (node->dispatcher_function
31327 && node_version_info != NULL);
31329 if (node_version_info->dispatcher_resolver)
31330 return node_version_info->dispatcher_resolver;
31332 /* The first version in the chain corresponds to the default version. */
31333 default_ver_decl = node_version_info->next->this_node->decl;
31335 /* node is going to be an alias, so remove the finalized bit. */
31336 node->definition = false;
31338 resolver_decl = make_resolver_func (default_ver_decl,
31339 node->decl, &empty_bb);
31341 node_version_info->dispatcher_resolver = resolver_decl;
31343 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
31345 auto_vec<tree, 2> fn_ver_vec;
31347 for (versn_info = node_version_info->next; versn_info;
31348 versn_info = versn_info->next)
31350 versn = versn_info->this_node;
31351 /* Check for virtual functions here again, as by this time it should
31352 have been determined if this function needs a vtable index or
31353 not. This happens for methods in derived classes that override
31354 virtual methods in base classes but are not explicitly marked as
31355 virtual. */
31356 if (DECL_VINDEX (versn->decl))
31357 sorry ("Virtual function multiversioning not supported");
31359 fn_ver_vec.safe_push (versn->decl);
31362 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
31363 cgraph_edge::rebuild_edges ();
31364 pop_cfun ();
31365 return resolver_decl;
31367 /* This builds the processor_model struct type defined in
31368 libgcc/config/i386/cpuinfo.c */
31370 static tree
31371 build_processor_model_struct (void)
31373 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
31374 "__cpu_features"};
31375 tree field = NULL_TREE, field_chain = NULL_TREE;
31376 int i;
31377 tree type = make_node (RECORD_TYPE);
31379 /* The first 3 fields are unsigned int. */
31380 for (i = 0; i < 3; ++i)
31382 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
31383 get_identifier (field_name[i]), unsigned_type_node);
31384 if (field_chain != NULL_TREE)
31385 DECL_CHAIN (field) = field_chain;
31386 field_chain = field;
31389 /* The last field is an array of unsigned integers of size one. */
31390 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
31391 get_identifier (field_name[3]),
31392 build_array_type (unsigned_type_node,
31393 build_index_type (size_one_node)));
31394 if (field_chain != NULL_TREE)
31395 DECL_CHAIN (field) = field_chain;
31396 field_chain = field;
31398 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
31399 return type;
31402 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
31404 static tree
31405 make_var_decl (tree type, const char *name)
31407 tree new_decl;
31409 new_decl = build_decl (UNKNOWN_LOCATION,
31410 VAR_DECL,
31411 get_identifier(name),
31412 type);
31414 DECL_EXTERNAL (new_decl) = 1;
31415 TREE_STATIC (new_decl) = 1;
31416 TREE_PUBLIC (new_decl) = 1;
31417 DECL_INITIAL (new_decl) = 0;
31418 DECL_ARTIFICIAL (new_decl) = 0;
31419 DECL_PRESERVE_P (new_decl) = 1;
31421 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
31422 assemble_variable (new_decl, 0, 0, 0);
31424 return new_decl;
31427 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
31428 into an integer defined in libgcc/config/i386/cpuinfo.c */
31430 static tree
31431 fold_builtin_cpu (tree fndecl, tree *args)
31433 unsigned int i;
31434 enum ix86_builtins fn_code = (enum ix86_builtins)
31435 DECL_FUNCTION_CODE (fndecl);
31436 tree param_string_cst = NULL;
31438 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
31439 enum processor_features
31441 F_CMOV = 0,
31442 F_MMX,
31443 F_POPCNT,
31444 F_SSE,
31445 F_SSE2,
31446 F_SSE3,
31447 F_SSSE3,
31448 F_SSE4_1,
31449 F_SSE4_2,
31450 F_AVX,
31451 F_AVX2,
31452 F_SSE4_A,
31453 F_FMA4,
31454 F_XOP,
31455 F_FMA,
31456 F_AVX512F,
31457 F_BMI,
31458 F_BMI2,
31459 F_AES,
31460 F_PCLMUL,
31461 F_AVX512VL,
31462 F_AVX512BW,
31463 F_AVX512DQ,
31464 F_AVX512CD,
31465 F_AVX512ER,
31466 F_AVX512PF,
31467 F_AVX512VBMI,
31468 F_AVX512IFMA,
31469 F_AVX5124VNNIW,
31470 F_AVX5124FMAPS,
31471 F_AVX512VPOPCNTDQ,
31472 F_MAX
31475 /* These are the values for vendor types and cpu types and subtypes
31476 in cpuinfo.c. Cpu types and subtypes should be subtracted by
31477 the corresponding start value. */
31478 enum processor_model
31480 M_INTEL = 1,
31481 M_AMD,
31482 M_CPU_TYPE_START,
31483 M_INTEL_BONNELL,
31484 M_INTEL_CORE2,
31485 M_INTEL_COREI7,
31486 M_AMDFAM10H,
31487 M_AMDFAM15H,
31488 M_INTEL_SILVERMONT,
31489 M_INTEL_KNL,
31490 M_AMD_BTVER1,
31491 M_AMD_BTVER2,
31492 M_AMDFAM17H,
31493 M_INTEL_KNM,
31494 M_CPU_SUBTYPE_START,
31495 M_INTEL_COREI7_NEHALEM,
31496 M_INTEL_COREI7_WESTMERE,
31497 M_INTEL_COREI7_SANDYBRIDGE,
31498 M_AMDFAM10H_BARCELONA,
31499 M_AMDFAM10H_SHANGHAI,
31500 M_AMDFAM10H_ISTANBUL,
31501 M_AMDFAM15H_BDVER1,
31502 M_AMDFAM15H_BDVER2,
31503 M_AMDFAM15H_BDVER3,
31504 M_AMDFAM15H_BDVER4,
31505 M_AMDFAM17H_ZNVER1,
31506 M_INTEL_COREI7_IVYBRIDGE,
31507 M_INTEL_COREI7_HASWELL,
31508 M_INTEL_COREI7_BROADWELL,
31509 M_INTEL_COREI7_SKYLAKE,
31510 M_INTEL_COREI7_SKYLAKE_AVX512
31513 static struct _arch_names_table
31515 const char *const name;
31516 const enum processor_model model;
31518 const arch_names_table[] =
31520 {"amd", M_AMD},
31521 {"intel", M_INTEL},
31522 {"atom", M_INTEL_BONNELL},
31523 {"slm", M_INTEL_SILVERMONT},
31524 {"core2", M_INTEL_CORE2},
31525 {"corei7", M_INTEL_COREI7},
31526 {"nehalem", M_INTEL_COREI7_NEHALEM},
31527 {"westmere", M_INTEL_COREI7_WESTMERE},
31528 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
31529 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
31530 {"haswell", M_INTEL_COREI7_HASWELL},
31531 {"broadwell", M_INTEL_COREI7_BROADWELL},
31532 {"skylake", M_INTEL_COREI7_SKYLAKE},
31533 {"skylake-avx512", M_INTEL_COREI7_SKYLAKE_AVX512},
31534 {"bonnell", M_INTEL_BONNELL},
31535 {"silvermont", M_INTEL_SILVERMONT},
31536 {"knl", M_INTEL_KNL},
31537 {"knm", M_INTEL_KNM},
31538 {"amdfam10h", M_AMDFAM10H},
31539 {"barcelona", M_AMDFAM10H_BARCELONA},
31540 {"shanghai", M_AMDFAM10H_SHANGHAI},
31541 {"istanbul", M_AMDFAM10H_ISTANBUL},
31542 {"btver1", M_AMD_BTVER1},
31543 {"amdfam15h", M_AMDFAM15H},
31544 {"bdver1", M_AMDFAM15H_BDVER1},
31545 {"bdver2", M_AMDFAM15H_BDVER2},
31546 {"bdver3", M_AMDFAM15H_BDVER3},
31547 {"bdver4", M_AMDFAM15H_BDVER4},
31548 {"btver2", M_AMD_BTVER2},
31549 {"amdfam17h", M_AMDFAM17H},
31550 {"znver1", M_AMDFAM17H_ZNVER1},
31553 static struct _isa_names_table
31555 const char *const name;
31556 const enum processor_features feature;
31558 const isa_names_table[] =
31560 {"cmov", F_CMOV},
31561 {"mmx", F_MMX},
31562 {"popcnt", F_POPCNT},
31563 {"sse", F_SSE},
31564 {"sse2", F_SSE2},
31565 {"sse3", F_SSE3},
31566 {"ssse3", F_SSSE3},
31567 {"sse4a", F_SSE4_A},
31568 {"sse4.1", F_SSE4_1},
31569 {"sse4.2", F_SSE4_2},
31570 {"avx", F_AVX},
31571 {"fma4", F_FMA4},
31572 {"xop", F_XOP},
31573 {"fma", F_FMA},
31574 {"avx2", F_AVX2},
31575 {"avx512f", F_AVX512F},
31576 {"bmi", F_BMI},
31577 {"bmi2", F_BMI2},
31578 {"aes", F_AES},
31579 {"pclmul", F_PCLMUL},
31580 {"avx512vl",F_AVX512VL},
31581 {"avx512bw",F_AVX512BW},
31582 {"avx512dq",F_AVX512DQ},
31583 {"avx512cd",F_AVX512CD},
31584 {"avx512er",F_AVX512ER},
31585 {"avx512pf",F_AVX512PF},
31586 {"avx512vbmi",F_AVX512VBMI},
31587 {"avx512ifma",F_AVX512IFMA},
31588 {"avx5124vnniw",F_AVX5124VNNIW},
31589 {"avx5124fmaps",F_AVX5124FMAPS},
31590 {"avx512vpopcntdq",F_AVX512VPOPCNTDQ}
31593 tree __processor_model_type = build_processor_model_struct ();
31594 tree __cpu_model_var = make_var_decl (__processor_model_type,
31595 "__cpu_model");
31598 varpool_node::add (__cpu_model_var);
31600 gcc_assert ((args != NULL) && (*args != NULL));
31602 param_string_cst = *args;
31603 while (param_string_cst
31604 && TREE_CODE (param_string_cst) != STRING_CST)
31606 /* *args must be a expr that can contain other EXPRS leading to a
31607 STRING_CST. */
31608 if (!EXPR_P (param_string_cst))
31610 error ("Parameter to builtin must be a string constant or literal");
31611 return integer_zero_node;
31613 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
31616 gcc_assert (param_string_cst);
31618 if (fn_code == IX86_BUILTIN_CPU_IS)
31620 tree ref;
31621 tree field;
31622 tree final;
31624 unsigned int field_val = 0;
31625 unsigned int NUM_ARCH_NAMES
31626 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
31628 for (i = 0; i < NUM_ARCH_NAMES; i++)
31629 if (strcmp (arch_names_table[i].name,
31630 TREE_STRING_POINTER (param_string_cst)) == 0)
31631 break;
31633 if (i == NUM_ARCH_NAMES)
31635 error ("Parameter to builtin not valid: %s",
31636 TREE_STRING_POINTER (param_string_cst));
31637 return integer_zero_node;
31640 field = TYPE_FIELDS (__processor_model_type);
31641 field_val = arch_names_table[i].model;
31643 /* CPU types are stored in the next field. */
31644 if (field_val > M_CPU_TYPE_START
31645 && field_val < M_CPU_SUBTYPE_START)
31647 field = DECL_CHAIN (field);
31648 field_val -= M_CPU_TYPE_START;
31651 /* CPU subtypes are stored in the next field. */
31652 if (field_val > M_CPU_SUBTYPE_START)
31654 field = DECL_CHAIN ( DECL_CHAIN (field));
31655 field_val -= M_CPU_SUBTYPE_START;
31658 /* Get the appropriate field in __cpu_model. */
31659 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
31660 field, NULL_TREE);
31662 /* Check the value. */
31663 final = build2 (EQ_EXPR, unsigned_type_node, ref,
31664 build_int_cstu (unsigned_type_node, field_val));
31665 return build1 (CONVERT_EXPR, integer_type_node, final);
31667 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
31669 tree ref;
31670 tree array_elt;
31671 tree field;
31672 tree final;
31674 unsigned int field_val = 0;
31675 unsigned int NUM_ISA_NAMES
31676 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
31678 for (i = 0; i < NUM_ISA_NAMES; i++)
31679 if (strcmp (isa_names_table[i].name,
31680 TREE_STRING_POINTER (param_string_cst)) == 0)
31681 break;
31683 if (i == NUM_ISA_NAMES)
31685 error ("Parameter to builtin not valid: %s",
31686 TREE_STRING_POINTER (param_string_cst));
31687 return integer_zero_node;
31690 field = TYPE_FIELDS (__processor_model_type);
31691 /* Get the last field, which is __cpu_features. */
31692 while (DECL_CHAIN (field))
31693 field = DECL_CHAIN (field);
31695 /* Get the appropriate field: __cpu_model.__cpu_features */
31696 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
31697 field, NULL_TREE);
31699 /* Access the 0th element of __cpu_features array. */
31700 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
31701 integer_zero_node, NULL_TREE, NULL_TREE);
31703 field_val = (1 << isa_names_table[i].feature);
31704 /* Return __cpu_model.__cpu_features[0] & field_val */
31705 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
31706 build_int_cstu (unsigned_type_node, field_val));
31707 return build1 (CONVERT_EXPR, integer_type_node, final);
31709 gcc_unreachable ();
31712 static tree
31713 ix86_fold_builtin (tree fndecl, int n_args,
31714 tree *args, bool ignore ATTRIBUTE_UNUSED)
31716 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
31718 enum ix86_builtins fn_code = (enum ix86_builtins)
31719 DECL_FUNCTION_CODE (fndecl);
31720 switch (fn_code)
31722 case IX86_BUILTIN_CPU_IS:
31723 case IX86_BUILTIN_CPU_SUPPORTS:
31724 gcc_assert (n_args == 1);
31725 return fold_builtin_cpu (fndecl, args);
31727 case IX86_BUILTIN_NANQ:
31728 case IX86_BUILTIN_NANSQ:
31730 tree type = TREE_TYPE (TREE_TYPE (fndecl));
31731 const char *str = c_getstr (*args);
31732 int quiet = fn_code == IX86_BUILTIN_NANQ;
31733 REAL_VALUE_TYPE real;
31735 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
31736 return build_real (type, real);
31737 return NULL_TREE;
31740 case IX86_BUILTIN_INFQ:
31741 case IX86_BUILTIN_HUGE_VALQ:
31743 tree type = TREE_TYPE (TREE_TYPE (fndecl));
31744 REAL_VALUE_TYPE inf;
31745 real_inf (&inf);
31746 return build_real (type, inf);
31749 case IX86_BUILTIN_TZCNT16:
31750 case IX86_BUILTIN_CTZS:
31751 case IX86_BUILTIN_TZCNT32:
31752 case IX86_BUILTIN_TZCNT64:
31753 gcc_assert (n_args == 1);
31754 if (TREE_CODE (args[0]) == INTEGER_CST)
31756 tree type = TREE_TYPE (TREE_TYPE (fndecl));
31757 tree arg = args[0];
31758 if (fn_code == IX86_BUILTIN_TZCNT16
31759 || fn_code == IX86_BUILTIN_CTZS)
31760 arg = fold_convert (short_unsigned_type_node, arg);
31761 if (integer_zerop (arg))
31762 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
31763 else
31764 return fold_const_call (CFN_CTZ, type, arg);
31766 break;
31768 case IX86_BUILTIN_LZCNT16:
31769 case IX86_BUILTIN_CLZS:
31770 case IX86_BUILTIN_LZCNT32:
31771 case IX86_BUILTIN_LZCNT64:
31772 gcc_assert (n_args == 1);
31773 if (TREE_CODE (args[0]) == INTEGER_CST)
31775 tree type = TREE_TYPE (TREE_TYPE (fndecl));
31776 tree arg = args[0];
31777 if (fn_code == IX86_BUILTIN_LZCNT16
31778 || fn_code == IX86_BUILTIN_CLZS)
31779 arg = fold_convert (short_unsigned_type_node, arg);
31780 if (integer_zerop (arg))
31781 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
31782 else
31783 return fold_const_call (CFN_CLZ, type, arg);
31785 break;
31787 case IX86_BUILTIN_BEXTR32:
31788 case IX86_BUILTIN_BEXTR64:
31789 case IX86_BUILTIN_BEXTRI32:
31790 case IX86_BUILTIN_BEXTRI64:
31791 gcc_assert (n_args == 2);
31792 if (tree_fits_uhwi_p (args[1]))
31794 unsigned HOST_WIDE_INT res = 0;
31795 unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0]));
31796 unsigned int start = tree_to_uhwi (args[1]);
31797 unsigned int len = (start & 0xff00) >> 8;
31798 start &= 0xff;
31799 if (start >= prec || len == 0)
31800 res = 0;
31801 else if (!tree_fits_uhwi_p (args[0]))
31802 break;
31803 else
31804 res = tree_to_uhwi (args[0]) >> start;
31805 if (len > prec)
31806 len = prec;
31807 if (len < HOST_BITS_PER_WIDE_INT)
31808 res &= (HOST_WIDE_INT_1U << len) - 1;
31809 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
31811 break;
31813 case IX86_BUILTIN_BZHI32:
31814 case IX86_BUILTIN_BZHI64:
31815 gcc_assert (n_args == 2);
31816 if (tree_fits_uhwi_p (args[1]))
31818 unsigned int idx = tree_to_uhwi (args[1]) & 0xff;
31819 if (idx >= TYPE_PRECISION (TREE_TYPE (args[0])))
31820 return args[0];
31821 if (!tree_fits_uhwi_p (args[0]))
31822 break;
31823 unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]);
31824 res &= ~(HOST_WIDE_INT_M1U << idx);
31825 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
31827 break;
31829 case IX86_BUILTIN_PDEP32:
31830 case IX86_BUILTIN_PDEP64:
31831 gcc_assert (n_args == 2);
31832 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
31834 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
31835 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
31836 unsigned HOST_WIDE_INT res = 0;
31837 unsigned HOST_WIDE_INT m, k = 1;
31838 for (m = 1; m; m <<= 1)
31839 if ((mask & m) != 0)
31841 if ((src & k) != 0)
31842 res |= m;
31843 k <<= 1;
31845 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
31847 break;
31849 case IX86_BUILTIN_PEXT32:
31850 case IX86_BUILTIN_PEXT64:
31851 gcc_assert (n_args == 2);
31852 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
31854 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
31855 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
31856 unsigned HOST_WIDE_INT res = 0;
31857 unsigned HOST_WIDE_INT m, k = 1;
31858 for (m = 1; m; m <<= 1)
31859 if ((mask & m) != 0)
31861 if ((src & m) != 0)
31862 res |= k;
31863 k <<= 1;
31865 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
31867 break;
31869 default:
31870 break;
31874 #ifdef SUBTARGET_FOLD_BUILTIN
31875 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
31876 #endif
31878 return NULL_TREE;
31881 /* Fold a MD builtin (use ix86_fold_builtin for folding into
31882 constant) in GIMPLE. */
31884 bool
31885 ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
31887 gimple *stmt = gsi_stmt (*gsi);
31888 tree fndecl = gimple_call_fndecl (stmt);
31889 gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD);
31890 int n_args = gimple_call_num_args (stmt);
31891 enum ix86_builtins fn_code = (enum ix86_builtins) DECL_FUNCTION_CODE (fndecl);
31892 tree decl = NULL_TREE;
31893 tree arg0, arg1;
31895 switch (fn_code)
31897 case IX86_BUILTIN_TZCNT32:
31898 decl = builtin_decl_implicit (BUILT_IN_CTZ);
31899 goto fold_tzcnt_lzcnt;
31901 case IX86_BUILTIN_TZCNT64:
31902 decl = builtin_decl_implicit (BUILT_IN_CTZLL);
31903 goto fold_tzcnt_lzcnt;
31905 case IX86_BUILTIN_LZCNT32:
31906 decl = builtin_decl_implicit (BUILT_IN_CLZ);
31907 goto fold_tzcnt_lzcnt;
31909 case IX86_BUILTIN_LZCNT64:
31910 decl = builtin_decl_implicit (BUILT_IN_CLZLL);
31911 goto fold_tzcnt_lzcnt;
31913 fold_tzcnt_lzcnt:
31914 gcc_assert (n_args == 1);
31915 arg0 = gimple_call_arg (stmt, 0);
31916 if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (stmt))
31918 int prec = TYPE_PRECISION (TREE_TYPE (arg0));
31919 /* If arg0 is provably non-zero, optimize into generic
31920 __builtin_c[tl]z{,ll} function the middle-end handles
31921 better. */
31922 if (!expr_not_equal_to (arg0, wi::zero (prec)))
31923 return false;
31925 location_t loc = gimple_location (stmt);
31926 gimple *g = gimple_build_call (decl, 1, arg0);
31927 gimple_set_location (g, loc);
31928 tree lhs = make_ssa_name (integer_type_node);
31929 gimple_call_set_lhs (g, lhs);
31930 gsi_insert_before (gsi, g, GSI_SAME_STMT);
31931 g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, lhs);
31932 gimple_set_location (g, loc);
31933 gsi_replace (gsi, g, false);
31934 return true;
31936 break;
31938 case IX86_BUILTIN_BZHI32:
31939 case IX86_BUILTIN_BZHI64:
31940 gcc_assert (n_args == 2);
31941 arg1 = gimple_call_arg (stmt, 1);
31942 if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (stmt))
31944 unsigned int idx = tree_to_uhwi (arg1) & 0xff;
31945 arg0 = gimple_call_arg (stmt, 0);
31946 if (idx < TYPE_PRECISION (TREE_TYPE (arg0)))
31947 break;
31948 location_t loc = gimple_location (stmt);
31949 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
31950 gimple_set_location (g, loc);
31951 gsi_replace (gsi, g, false);
31952 return true;
31954 break;
31956 case IX86_BUILTIN_PDEP32:
31957 case IX86_BUILTIN_PDEP64:
31958 case IX86_BUILTIN_PEXT32:
31959 case IX86_BUILTIN_PEXT64:
31960 gcc_assert (n_args == 2);
31961 arg1 = gimple_call_arg (stmt, 1);
31962 if (integer_all_onesp (arg1) && gimple_call_lhs (stmt))
31964 location_t loc = gimple_location (stmt);
31965 arg0 = gimple_call_arg (stmt, 0);
31966 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
31967 gimple_set_location (g, loc);
31968 gsi_replace (gsi, g, false);
31969 return true;
31971 break;
31973 default:
31974 break;
31977 return false;
31980 /* Make builtins to detect cpu type and features supported. NAME is
31981 the builtin name, CODE is the builtin code, and FTYPE is the function
31982 type of the builtin. */
31984 static void
31985 make_cpu_type_builtin (const char* name, int code,
31986 enum ix86_builtin_func_type ftype, bool is_const)
31988 tree decl;
31989 tree type;
31991 type = ix86_get_builtin_func_type (ftype);
31992 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
31993 NULL, NULL_TREE);
31994 gcc_assert (decl != NULL_TREE);
31995 ix86_builtins[(int) code] = decl;
31996 TREE_READONLY (decl) = is_const;
31999 /* Make builtins to get CPU type and features supported. The created
32000 builtins are :
32002 __builtin_cpu_init (), to detect cpu type and features,
32003 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
32004 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
32007 static void
32008 ix86_init_platform_type_builtins (void)
32010 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
32011 INT_FTYPE_VOID, false);
32012 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
32013 INT_FTYPE_PCCHAR, true);
32014 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
32015 INT_FTYPE_PCCHAR, true);
32018 /* Internal method for ix86_init_builtins. */
32020 static void
32021 ix86_init_builtins_va_builtins_abi (void)
32023 tree ms_va_ref, sysv_va_ref;
32024 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
32025 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
32026 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
32027 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
32029 if (!TARGET_64BIT)
32030 return;
32031 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
32032 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
32033 ms_va_ref = build_reference_type (ms_va_list_type_node);
32034 sysv_va_ref =
32035 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
32037 fnvoid_va_end_ms =
32038 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32039 fnvoid_va_start_ms =
32040 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32041 fnvoid_va_end_sysv =
32042 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
32043 fnvoid_va_start_sysv =
32044 build_varargs_function_type_list (void_type_node, sysv_va_ref,
32045 NULL_TREE);
32046 fnvoid_va_copy_ms =
32047 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
32048 NULL_TREE);
32049 fnvoid_va_copy_sysv =
32050 build_function_type_list (void_type_node, sysv_va_ref,
32051 sysv_va_ref, NULL_TREE);
32053 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
32054 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
32055 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
32056 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
32057 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
32058 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
32059 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
32060 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32061 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
32062 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32063 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
32064 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32067 static void
32068 ix86_init_builtin_types (void)
32070 tree float80_type_node, const_string_type_node;
32072 /* The __float80 type. */
32073 float80_type_node = long_double_type_node;
32074 if (TYPE_MODE (float80_type_node) != XFmode)
32076 if (float64x_type_node != NULL_TREE
32077 && TYPE_MODE (float64x_type_node) == XFmode)
32078 float80_type_node = float64x_type_node;
32079 else
32081 /* The __float80 type. */
32082 float80_type_node = make_node (REAL_TYPE);
32084 TYPE_PRECISION (float80_type_node) = 80;
32085 layout_type (float80_type_node);
32088 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
32090 /* The __float128 type. The node has already been created as
32091 _Float128, so we only need to register the __float128 name for
32092 it. */
32093 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
32095 const_string_type_node
32096 = build_pointer_type (build_qualified_type
32097 (char_type_node, TYPE_QUAL_CONST));
32099 /* This macro is built by i386-builtin-types.awk. */
32100 DEFINE_BUILTIN_PRIMITIVE_TYPES;
32103 static void
32104 ix86_init_builtins (void)
32106 tree ftype, decl;
32108 ix86_init_builtin_types ();
32110 /* Builtins to get CPU type and features. */
32111 ix86_init_platform_type_builtins ();
32113 /* TFmode support builtins. */
32114 def_builtin_const (0, "__builtin_infq",
32115 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
32116 def_builtin_const (0, "__builtin_huge_valq",
32117 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
32119 ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_CONST_STRING);
32120 decl = add_builtin_function ("__builtin_nanq", ftype, IX86_BUILTIN_NANQ,
32121 BUILT_IN_MD, "nanq", NULL_TREE);
32122 TREE_READONLY (decl) = 1;
32123 ix86_builtins[(int) IX86_BUILTIN_NANQ] = decl;
32125 decl = add_builtin_function ("__builtin_nansq", ftype, IX86_BUILTIN_NANSQ,
32126 BUILT_IN_MD, "nansq", NULL_TREE);
32127 TREE_READONLY (decl) = 1;
32128 ix86_builtins[(int) IX86_BUILTIN_NANSQ] = decl;
32130 /* We will expand them to normal call if SSE isn't available since
32131 they are used by libgcc. */
32132 ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
32133 decl = add_builtin_function ("__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ,
32134 BUILT_IN_MD, "__fabstf2", NULL_TREE);
32135 TREE_READONLY (decl) = 1;
32136 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
32138 ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
32139 decl = add_builtin_function ("__builtin_copysignq", ftype,
32140 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
32141 "__copysigntf3", NULL_TREE);
32142 TREE_READONLY (decl) = 1;
32143 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
32145 ix86_init_tm_builtins ();
32146 ix86_init_mmx_sse_builtins ();
32147 ix86_init_mpx_builtins ();
32149 if (TARGET_LP64)
32150 ix86_init_builtins_va_builtins_abi ();
32152 #ifdef SUBTARGET_INIT_BUILTINS
32153 SUBTARGET_INIT_BUILTINS;
32154 #endif
32157 /* Return the ix86 builtin for CODE. */
32159 static tree
32160 ix86_builtin_decl (unsigned code, bool)
32162 if (code >= IX86_BUILTIN_MAX)
32163 return error_mark_node;
32165 return ix86_builtins[code];
32168 /* Errors in the source file can cause expand_expr to return const0_rtx
32169 where we expect a vector. To avoid crashing, use one of the vector
32170 clear instructions. */
32171 static rtx
32172 safe_vector_operand (rtx x, machine_mode mode)
32174 if (x == const0_rtx)
32175 x = CONST0_RTX (mode);
32176 return x;
32179 /* Fixup modeless constants to fit required mode. */
32180 static rtx
32181 fixup_modeless_constant (rtx x, machine_mode mode)
32183 if (GET_MODE (x) == VOIDmode)
32184 x = convert_to_mode (mode, x, 1);
32185 return x;
32188 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
32190 static rtx
32191 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
32193 rtx pat;
32194 tree arg0 = CALL_EXPR_ARG (exp, 0);
32195 tree arg1 = CALL_EXPR_ARG (exp, 1);
32196 rtx op0 = expand_normal (arg0);
32197 rtx op1 = expand_normal (arg1);
32198 machine_mode tmode = insn_data[icode].operand[0].mode;
32199 machine_mode mode0 = insn_data[icode].operand[1].mode;
32200 machine_mode mode1 = insn_data[icode].operand[2].mode;
32202 if (VECTOR_MODE_P (mode0))
32203 op0 = safe_vector_operand (op0, mode0);
32204 if (VECTOR_MODE_P (mode1))
32205 op1 = safe_vector_operand (op1, mode1);
32207 if (optimize || !target
32208 || GET_MODE (target) != tmode
32209 || !insn_data[icode].operand[0].predicate (target, tmode))
32210 target = gen_reg_rtx (tmode);
32212 if (GET_MODE (op1) == SImode && mode1 == TImode)
32214 rtx x = gen_reg_rtx (V4SImode);
32215 emit_insn (gen_sse2_loadd (x, op1));
32216 op1 = gen_lowpart (TImode, x);
32219 if (!insn_data[icode].operand[1].predicate (op0, mode0))
32220 op0 = copy_to_mode_reg (mode0, op0);
32221 if (!insn_data[icode].operand[2].predicate (op1, mode1))
32222 op1 = copy_to_mode_reg (mode1, op1);
32224 pat = GEN_FCN (icode) (target, op0, op1);
32225 if (! pat)
32226 return 0;
32228 emit_insn (pat);
32230 return target;
32233 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
32235 static rtx
32236 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
32237 enum ix86_builtin_func_type m_type,
32238 enum rtx_code sub_code)
32240 rtx pat;
32241 int i;
32242 int nargs;
32243 bool comparison_p = false;
32244 bool tf_p = false;
32245 bool last_arg_constant = false;
32246 int num_memory = 0;
32247 struct {
32248 rtx op;
32249 machine_mode mode;
32250 } args[4];
32252 machine_mode tmode = insn_data[icode].operand[0].mode;
32254 switch (m_type)
32256 case MULTI_ARG_4_DF2_DI_I:
32257 case MULTI_ARG_4_DF2_DI_I1:
32258 case MULTI_ARG_4_SF2_SI_I:
32259 case MULTI_ARG_4_SF2_SI_I1:
32260 nargs = 4;
32261 last_arg_constant = true;
32262 break;
32264 case MULTI_ARG_3_SF:
32265 case MULTI_ARG_3_DF:
32266 case MULTI_ARG_3_SF2:
32267 case MULTI_ARG_3_DF2:
32268 case MULTI_ARG_3_DI:
32269 case MULTI_ARG_3_SI:
32270 case MULTI_ARG_3_SI_DI:
32271 case MULTI_ARG_3_HI:
32272 case MULTI_ARG_3_HI_SI:
32273 case MULTI_ARG_3_QI:
32274 case MULTI_ARG_3_DI2:
32275 case MULTI_ARG_3_SI2:
32276 case MULTI_ARG_3_HI2:
32277 case MULTI_ARG_3_QI2:
32278 nargs = 3;
32279 break;
32281 case MULTI_ARG_2_SF:
32282 case MULTI_ARG_2_DF:
32283 case MULTI_ARG_2_DI:
32284 case MULTI_ARG_2_SI:
32285 case MULTI_ARG_2_HI:
32286 case MULTI_ARG_2_QI:
32287 nargs = 2;
32288 break;
32290 case MULTI_ARG_2_DI_IMM:
32291 case MULTI_ARG_2_SI_IMM:
32292 case MULTI_ARG_2_HI_IMM:
32293 case MULTI_ARG_2_QI_IMM:
32294 nargs = 2;
32295 last_arg_constant = true;
32296 break;
32298 case MULTI_ARG_1_SF:
32299 case MULTI_ARG_1_DF:
32300 case MULTI_ARG_1_SF2:
32301 case MULTI_ARG_1_DF2:
32302 case MULTI_ARG_1_DI:
32303 case MULTI_ARG_1_SI:
32304 case MULTI_ARG_1_HI:
32305 case MULTI_ARG_1_QI:
32306 case MULTI_ARG_1_SI_DI:
32307 case MULTI_ARG_1_HI_DI:
32308 case MULTI_ARG_1_HI_SI:
32309 case MULTI_ARG_1_QI_DI:
32310 case MULTI_ARG_1_QI_SI:
32311 case MULTI_ARG_1_QI_HI:
32312 nargs = 1;
32313 break;
32315 case MULTI_ARG_2_DI_CMP:
32316 case MULTI_ARG_2_SI_CMP:
32317 case MULTI_ARG_2_HI_CMP:
32318 case MULTI_ARG_2_QI_CMP:
32319 nargs = 2;
32320 comparison_p = true;
32321 break;
32323 case MULTI_ARG_2_SF_TF:
32324 case MULTI_ARG_2_DF_TF:
32325 case MULTI_ARG_2_DI_TF:
32326 case MULTI_ARG_2_SI_TF:
32327 case MULTI_ARG_2_HI_TF:
32328 case MULTI_ARG_2_QI_TF:
32329 nargs = 2;
32330 tf_p = true;
32331 break;
32333 default:
32334 gcc_unreachable ();
32337 if (optimize || !target
32338 || GET_MODE (target) != tmode
32339 || !insn_data[icode].operand[0].predicate (target, tmode))
32340 target = gen_reg_rtx (tmode);
32341 else if (memory_operand (target, tmode))
32342 num_memory++;
32344 gcc_assert (nargs <= 4);
32346 for (i = 0; i < nargs; i++)
32348 tree arg = CALL_EXPR_ARG (exp, i);
32349 rtx op = expand_normal (arg);
32350 int adjust = (comparison_p) ? 1 : 0;
32351 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
32353 if (last_arg_constant && i == nargs - 1)
32355 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
32357 enum insn_code new_icode = icode;
32358 switch (icode)
32360 case CODE_FOR_xop_vpermil2v2df3:
32361 case CODE_FOR_xop_vpermil2v4sf3:
32362 case CODE_FOR_xop_vpermil2v4df3:
32363 case CODE_FOR_xop_vpermil2v8sf3:
32364 error ("the last argument must be a 2-bit immediate");
32365 return gen_reg_rtx (tmode);
32366 case CODE_FOR_xop_rotlv2di3:
32367 new_icode = CODE_FOR_rotlv2di3;
32368 goto xop_rotl;
32369 case CODE_FOR_xop_rotlv4si3:
32370 new_icode = CODE_FOR_rotlv4si3;
32371 goto xop_rotl;
32372 case CODE_FOR_xop_rotlv8hi3:
32373 new_icode = CODE_FOR_rotlv8hi3;
32374 goto xop_rotl;
32375 case CODE_FOR_xop_rotlv16qi3:
32376 new_icode = CODE_FOR_rotlv16qi3;
32377 xop_rotl:
32378 if (CONST_INT_P (op))
32380 int mask = GET_MODE_UNIT_BITSIZE (tmode) - 1;
32381 op = GEN_INT (INTVAL (op) & mask);
32382 gcc_checking_assert
32383 (insn_data[icode].operand[i + 1].predicate (op, mode));
32385 else
32387 gcc_checking_assert
32388 (nargs == 2
32389 && insn_data[new_icode].operand[0].mode == tmode
32390 && insn_data[new_icode].operand[1].mode == tmode
32391 && insn_data[new_icode].operand[2].mode == mode
32392 && insn_data[new_icode].operand[0].predicate
32393 == insn_data[icode].operand[0].predicate
32394 && insn_data[new_icode].operand[1].predicate
32395 == insn_data[icode].operand[1].predicate);
32396 icode = new_icode;
32397 goto non_constant;
32399 break;
32400 default:
32401 gcc_unreachable ();
32405 else
32407 non_constant:
32408 if (VECTOR_MODE_P (mode))
32409 op = safe_vector_operand (op, mode);
32411 /* If we aren't optimizing, only allow one memory operand to be
32412 generated. */
32413 if (memory_operand (op, mode))
32414 num_memory++;
32416 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
32418 if (optimize
32419 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
32420 || num_memory > 1)
32421 op = force_reg (mode, op);
32424 args[i].op = op;
32425 args[i].mode = mode;
32428 switch (nargs)
32430 case 1:
32431 pat = GEN_FCN (icode) (target, args[0].op);
32432 break;
32434 case 2:
32435 if (tf_p)
32436 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
32437 GEN_INT ((int)sub_code));
32438 else if (! comparison_p)
32439 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
32440 else
32442 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
32443 args[0].op,
32444 args[1].op);
32446 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
32448 break;
32450 case 3:
32451 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
32452 break;
32454 case 4:
32455 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
32456 break;
32458 default:
32459 gcc_unreachable ();
32462 if (! pat)
32463 return 0;
32465 emit_insn (pat);
32466 return target;
32469 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
32470 insns with vec_merge. */
32472 static rtx
32473 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
32474 rtx target)
32476 rtx pat;
32477 tree arg0 = CALL_EXPR_ARG (exp, 0);
32478 rtx op1, op0 = expand_normal (arg0);
32479 machine_mode tmode = insn_data[icode].operand[0].mode;
32480 machine_mode mode0 = insn_data[icode].operand[1].mode;
32482 if (optimize || !target
32483 || GET_MODE (target) != tmode
32484 || !insn_data[icode].operand[0].predicate (target, tmode))
32485 target = gen_reg_rtx (tmode);
32487 if (VECTOR_MODE_P (mode0))
32488 op0 = safe_vector_operand (op0, mode0);
32490 if ((optimize && !register_operand (op0, mode0))
32491 || !insn_data[icode].operand[1].predicate (op0, mode0))
32492 op0 = copy_to_mode_reg (mode0, op0);
32494 op1 = op0;
32495 if (!insn_data[icode].operand[2].predicate (op1, mode0))
32496 op1 = copy_to_mode_reg (mode0, op1);
32498 pat = GEN_FCN (icode) (target, op0, op1);
32499 if (! pat)
32500 return 0;
32501 emit_insn (pat);
32502 return target;
32505 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
32507 static rtx
32508 ix86_expand_sse_compare (const struct builtin_description *d,
32509 tree exp, rtx target, bool swap)
32511 rtx pat;
32512 tree arg0 = CALL_EXPR_ARG (exp, 0);
32513 tree arg1 = CALL_EXPR_ARG (exp, 1);
32514 rtx op0 = expand_normal (arg0);
32515 rtx op1 = expand_normal (arg1);
32516 rtx op2;
32517 machine_mode tmode = insn_data[d->icode].operand[0].mode;
32518 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
32519 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
32520 enum rtx_code comparison = d->comparison;
32522 if (VECTOR_MODE_P (mode0))
32523 op0 = safe_vector_operand (op0, mode0);
32524 if (VECTOR_MODE_P (mode1))
32525 op1 = safe_vector_operand (op1, mode1);
32527 /* Swap operands if we have a comparison that isn't available in
32528 hardware. */
32529 if (swap)
32530 std::swap (op0, op1);
32532 if (optimize || !target
32533 || GET_MODE (target) != tmode
32534 || !insn_data[d->icode].operand[0].predicate (target, tmode))
32535 target = gen_reg_rtx (tmode);
32537 if ((optimize && !register_operand (op0, mode0))
32538 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
32539 op0 = copy_to_mode_reg (mode0, op0);
32540 if ((optimize && !register_operand (op1, mode1))
32541 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
32542 op1 = copy_to_mode_reg (mode1, op1);
32544 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
32545 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
32546 if (! pat)
32547 return 0;
32548 emit_insn (pat);
32549 return target;
32552 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
32554 static rtx
32555 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
32556 rtx target)
32558 rtx pat;
32559 tree arg0 = CALL_EXPR_ARG (exp, 0);
32560 tree arg1 = CALL_EXPR_ARG (exp, 1);
32561 rtx op0 = expand_normal (arg0);
32562 rtx op1 = expand_normal (arg1);
32563 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
32564 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
32565 enum rtx_code comparison = d->comparison;
32567 if (VECTOR_MODE_P (mode0))
32568 op0 = safe_vector_operand (op0, mode0);
32569 if (VECTOR_MODE_P (mode1))
32570 op1 = safe_vector_operand (op1, mode1);
32572 /* Swap operands if we have a comparison that isn't available in
32573 hardware. */
32574 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
32575 std::swap (op0, op1);
32577 target = gen_reg_rtx (SImode);
32578 emit_move_insn (target, const0_rtx);
32579 target = gen_rtx_SUBREG (QImode, target, 0);
32581 if ((optimize && !register_operand (op0, mode0))
32582 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
32583 op0 = copy_to_mode_reg (mode0, op0);
32584 if ((optimize && !register_operand (op1, mode1))
32585 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
32586 op1 = copy_to_mode_reg (mode1, op1);
32588 pat = GEN_FCN (d->icode) (op0, op1);
32589 if (! pat)
32590 return 0;
32591 emit_insn (pat);
32592 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
32593 gen_rtx_fmt_ee (comparison, QImode,
32594 SET_DEST (pat),
32595 const0_rtx)));
32597 return SUBREG_REG (target);
32600 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
32602 static rtx
32603 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
32604 rtx target)
32606 rtx pat;
32607 tree arg0 = CALL_EXPR_ARG (exp, 0);
32608 rtx op1, op0 = expand_normal (arg0);
32609 machine_mode tmode = insn_data[d->icode].operand[0].mode;
32610 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
32612 if (optimize || target == 0
32613 || GET_MODE (target) != tmode
32614 || !insn_data[d->icode].operand[0].predicate (target, tmode))
32615 target = gen_reg_rtx (tmode);
32617 if (VECTOR_MODE_P (mode0))
32618 op0 = safe_vector_operand (op0, mode0);
32620 if ((optimize && !register_operand (op0, mode0))
32621 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
32622 op0 = copy_to_mode_reg (mode0, op0);
32624 op1 = GEN_INT (d->comparison);
32626 pat = GEN_FCN (d->icode) (target, op0, op1);
32627 if (! pat)
32628 return 0;
32629 emit_insn (pat);
32630 return target;
32633 static rtx
32634 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
32635 tree exp, rtx target)
32637 rtx pat;
32638 tree arg0 = CALL_EXPR_ARG (exp, 0);
32639 tree arg1 = CALL_EXPR_ARG (exp, 1);
32640 rtx op0 = expand_normal (arg0);
32641 rtx op1 = expand_normal (arg1);
32642 rtx op2;
32643 machine_mode tmode = insn_data[d->icode].operand[0].mode;
32644 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
32645 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
32647 if (optimize || target == 0
32648 || GET_MODE (target) != tmode
32649 || !insn_data[d->icode].operand[0].predicate (target, tmode))
32650 target = gen_reg_rtx (tmode);
32652 op0 = safe_vector_operand (op0, mode0);
32653 op1 = safe_vector_operand (op1, mode1);
32655 if ((optimize && !register_operand (op0, mode0))
32656 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
32657 op0 = copy_to_mode_reg (mode0, op0);
32658 if ((optimize && !register_operand (op1, mode1))
32659 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
32660 op1 = copy_to_mode_reg (mode1, op1);
32662 op2 = GEN_INT (d->comparison);
32664 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
32665 if (! pat)
32666 return 0;
32667 emit_insn (pat);
32668 return target;
32671 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
32673 static rtx
32674 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
32675 rtx target)
32677 rtx pat;
32678 tree arg0 = CALL_EXPR_ARG (exp, 0);
32679 tree arg1 = CALL_EXPR_ARG (exp, 1);
32680 rtx op0 = expand_normal (arg0);
32681 rtx op1 = expand_normal (arg1);
32682 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
32683 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
32684 enum rtx_code comparison = d->comparison;
32686 if (VECTOR_MODE_P (mode0))
32687 op0 = safe_vector_operand (op0, mode0);
32688 if (VECTOR_MODE_P (mode1))
32689 op1 = safe_vector_operand (op1, mode1);
32691 target = gen_reg_rtx (SImode);
32692 emit_move_insn (target, const0_rtx);
32693 target = gen_rtx_SUBREG (QImode, target, 0);
32695 if ((optimize && !register_operand (op0, mode0))
32696 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
32697 op0 = copy_to_mode_reg (mode0, op0);
32698 if ((optimize && !register_operand (op1, mode1))
32699 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
32700 op1 = copy_to_mode_reg (mode1, op1);
32702 pat = GEN_FCN (d->icode) (op0, op1);
32703 if (! pat)
32704 return 0;
32705 emit_insn (pat);
32706 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
32707 gen_rtx_fmt_ee (comparison, QImode,
32708 SET_DEST (pat),
32709 const0_rtx)));
32711 return SUBREG_REG (target);
32714 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
32716 static rtx
32717 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
32718 tree exp, rtx target)
32720 rtx pat;
32721 tree arg0 = CALL_EXPR_ARG (exp, 0);
32722 tree arg1 = CALL_EXPR_ARG (exp, 1);
32723 tree arg2 = CALL_EXPR_ARG (exp, 2);
32724 tree arg3 = CALL_EXPR_ARG (exp, 3);
32725 tree arg4 = CALL_EXPR_ARG (exp, 4);
32726 rtx scratch0, scratch1;
32727 rtx op0 = expand_normal (arg0);
32728 rtx op1 = expand_normal (arg1);
32729 rtx op2 = expand_normal (arg2);
32730 rtx op3 = expand_normal (arg3);
32731 rtx op4 = expand_normal (arg4);
32732 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
32734 tmode0 = insn_data[d->icode].operand[0].mode;
32735 tmode1 = insn_data[d->icode].operand[1].mode;
32736 modev2 = insn_data[d->icode].operand[2].mode;
32737 modei3 = insn_data[d->icode].operand[3].mode;
32738 modev4 = insn_data[d->icode].operand[4].mode;
32739 modei5 = insn_data[d->icode].operand[5].mode;
32740 modeimm = insn_data[d->icode].operand[6].mode;
32742 if (VECTOR_MODE_P (modev2))
32743 op0 = safe_vector_operand (op0, modev2);
32744 if (VECTOR_MODE_P (modev4))
32745 op2 = safe_vector_operand (op2, modev4);
32747 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
32748 op0 = copy_to_mode_reg (modev2, op0);
32749 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
32750 op1 = copy_to_mode_reg (modei3, op1);
32751 if ((optimize && !register_operand (op2, modev4))
32752 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
32753 op2 = copy_to_mode_reg (modev4, op2);
32754 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
32755 op3 = copy_to_mode_reg (modei5, op3);
32757 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
32759 error ("the fifth argument must be an 8-bit immediate");
32760 return const0_rtx;
32763 if (d->code == IX86_BUILTIN_PCMPESTRI128)
32765 if (optimize || !target
32766 || GET_MODE (target) != tmode0
32767 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
32768 target = gen_reg_rtx (tmode0);
32770 scratch1 = gen_reg_rtx (tmode1);
32772 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
32774 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
32776 if (optimize || !target
32777 || GET_MODE (target) != tmode1
32778 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
32779 target = gen_reg_rtx (tmode1);
32781 scratch0 = gen_reg_rtx (tmode0);
32783 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
32785 else
32787 gcc_assert (d->flag);
32789 scratch0 = gen_reg_rtx (tmode0);
32790 scratch1 = gen_reg_rtx (tmode1);
32792 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
32795 if (! pat)
32796 return 0;
32798 emit_insn (pat);
32800 if (d->flag)
32802 target = gen_reg_rtx (SImode);
32803 emit_move_insn (target, const0_rtx);
32804 target = gen_rtx_SUBREG (QImode, target, 0);
32806 emit_insn
32807 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
32808 gen_rtx_fmt_ee (EQ, QImode,
32809 gen_rtx_REG ((machine_mode) d->flag,
32810 FLAGS_REG),
32811 const0_rtx)));
32812 return SUBREG_REG (target);
32814 else
32815 return target;
32819 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
32821 static rtx
32822 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
32823 tree exp, rtx target)
32825 rtx pat;
32826 tree arg0 = CALL_EXPR_ARG (exp, 0);
32827 tree arg1 = CALL_EXPR_ARG (exp, 1);
32828 tree arg2 = CALL_EXPR_ARG (exp, 2);
32829 rtx scratch0, scratch1;
32830 rtx op0 = expand_normal (arg0);
32831 rtx op1 = expand_normal (arg1);
32832 rtx op2 = expand_normal (arg2);
32833 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
32835 tmode0 = insn_data[d->icode].operand[0].mode;
32836 tmode1 = insn_data[d->icode].operand[1].mode;
32837 modev2 = insn_data[d->icode].operand[2].mode;
32838 modev3 = insn_data[d->icode].operand[3].mode;
32839 modeimm = insn_data[d->icode].operand[4].mode;
32841 if (VECTOR_MODE_P (modev2))
32842 op0 = safe_vector_operand (op0, modev2);
32843 if (VECTOR_MODE_P (modev3))
32844 op1 = safe_vector_operand (op1, modev3);
32846 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
32847 op0 = copy_to_mode_reg (modev2, op0);
32848 if ((optimize && !register_operand (op1, modev3))
32849 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
32850 op1 = copy_to_mode_reg (modev3, op1);
32852 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
32854 error ("the third argument must be an 8-bit immediate");
32855 return const0_rtx;
32858 if (d->code == IX86_BUILTIN_PCMPISTRI128)
32860 if (optimize || !target
32861 || GET_MODE (target) != tmode0
32862 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
32863 target = gen_reg_rtx (tmode0);
32865 scratch1 = gen_reg_rtx (tmode1);
32867 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
32869 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
32871 if (optimize || !target
32872 || GET_MODE (target) != tmode1
32873 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
32874 target = gen_reg_rtx (tmode1);
32876 scratch0 = gen_reg_rtx (tmode0);
32878 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
32880 else
32882 gcc_assert (d->flag);
32884 scratch0 = gen_reg_rtx (tmode0);
32885 scratch1 = gen_reg_rtx (tmode1);
32887 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
32890 if (! pat)
32891 return 0;
32893 emit_insn (pat);
32895 if (d->flag)
32897 target = gen_reg_rtx (SImode);
32898 emit_move_insn (target, const0_rtx);
32899 target = gen_rtx_SUBREG (QImode, target, 0);
32901 emit_insn
32902 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
32903 gen_rtx_fmt_ee (EQ, QImode,
32904 gen_rtx_REG ((machine_mode) d->flag,
32905 FLAGS_REG),
32906 const0_rtx)));
32907 return SUBREG_REG (target);
32909 else
32910 return target;
32913 /* Subroutine of ix86_expand_builtin to take care of insns with
32914 variable number of operands. */
32916 static rtx
32917 ix86_expand_args_builtin (const struct builtin_description *d,
32918 tree exp, rtx target)
32920 rtx pat, real_target;
32921 unsigned int i, nargs;
32922 unsigned int nargs_constant = 0;
32923 unsigned int mask_pos = 0;
32924 int num_memory = 0;
32925 struct
32927 rtx op;
32928 machine_mode mode;
32929 } args[6];
32930 bool second_arg_count = false;
32931 enum insn_code icode = d->icode;
32932 const struct insn_data_d *insn_p = &insn_data[icode];
32933 machine_mode tmode = insn_p->operand[0].mode;
32934 machine_mode rmode = VOIDmode;
32935 bool swap = false;
32936 enum rtx_code comparison = d->comparison;
32938 switch ((enum ix86_builtin_func_type) d->flag)
32940 case V2DF_FTYPE_V2DF_ROUND:
32941 case V4DF_FTYPE_V4DF_ROUND:
32942 case V8DF_FTYPE_V8DF_ROUND:
32943 case V4SF_FTYPE_V4SF_ROUND:
32944 case V8SF_FTYPE_V8SF_ROUND:
32945 case V16SF_FTYPE_V16SF_ROUND:
32946 case V4SI_FTYPE_V4SF_ROUND:
32947 case V8SI_FTYPE_V8SF_ROUND:
32948 case V16SI_FTYPE_V16SF_ROUND:
32949 return ix86_expand_sse_round (d, exp, target);
32950 case V4SI_FTYPE_V2DF_V2DF_ROUND:
32951 case V8SI_FTYPE_V4DF_V4DF_ROUND:
32952 case V16SI_FTYPE_V8DF_V8DF_ROUND:
32953 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
32954 case INT_FTYPE_V8SF_V8SF_PTEST:
32955 case INT_FTYPE_V4DI_V4DI_PTEST:
32956 case INT_FTYPE_V4DF_V4DF_PTEST:
32957 case INT_FTYPE_V4SF_V4SF_PTEST:
32958 case INT_FTYPE_V2DI_V2DI_PTEST:
32959 case INT_FTYPE_V2DF_V2DF_PTEST:
32960 return ix86_expand_sse_ptest (d, exp, target);
32961 case FLOAT128_FTYPE_FLOAT128:
32962 case FLOAT_FTYPE_FLOAT:
32963 case INT_FTYPE_INT:
32964 case UINT_FTYPE_UINT:
32965 case UINT16_FTYPE_UINT16:
32966 case UINT64_FTYPE_INT:
32967 case UINT64_FTYPE_UINT64:
32968 case INT64_FTYPE_INT64:
32969 case INT64_FTYPE_V4SF:
32970 case INT64_FTYPE_V2DF:
32971 case INT_FTYPE_V16QI:
32972 case INT_FTYPE_V8QI:
32973 case INT_FTYPE_V8SF:
32974 case INT_FTYPE_V4DF:
32975 case INT_FTYPE_V4SF:
32976 case INT_FTYPE_V2DF:
32977 case INT_FTYPE_V32QI:
32978 case V16QI_FTYPE_V16QI:
32979 case V8SI_FTYPE_V8SF:
32980 case V8SI_FTYPE_V4SI:
32981 case V8HI_FTYPE_V8HI:
32982 case V8HI_FTYPE_V16QI:
32983 case V8QI_FTYPE_V8QI:
32984 case V8SF_FTYPE_V8SF:
32985 case V8SF_FTYPE_V8SI:
32986 case V8SF_FTYPE_V4SF:
32987 case V8SF_FTYPE_V8HI:
32988 case V4SI_FTYPE_V4SI:
32989 case V4SI_FTYPE_V16QI:
32990 case V4SI_FTYPE_V4SF:
32991 case V4SI_FTYPE_V8SI:
32992 case V4SI_FTYPE_V8HI:
32993 case V4SI_FTYPE_V4DF:
32994 case V4SI_FTYPE_V2DF:
32995 case V4HI_FTYPE_V4HI:
32996 case V4DF_FTYPE_V4DF:
32997 case V4DF_FTYPE_V4SI:
32998 case V4DF_FTYPE_V4SF:
32999 case V4DF_FTYPE_V2DF:
33000 case V4SF_FTYPE_V4SF:
33001 case V4SF_FTYPE_V4SI:
33002 case V4SF_FTYPE_V8SF:
33003 case V4SF_FTYPE_V4DF:
33004 case V4SF_FTYPE_V8HI:
33005 case V4SF_FTYPE_V2DF:
33006 case V2DI_FTYPE_V2DI:
33007 case V2DI_FTYPE_V16QI:
33008 case V2DI_FTYPE_V8HI:
33009 case V2DI_FTYPE_V4SI:
33010 case V2DF_FTYPE_V2DF:
33011 case V2DF_FTYPE_V4SI:
33012 case V2DF_FTYPE_V4DF:
33013 case V2DF_FTYPE_V4SF:
33014 case V2DF_FTYPE_V2SI:
33015 case V2SI_FTYPE_V2SI:
33016 case V2SI_FTYPE_V4SF:
33017 case V2SI_FTYPE_V2SF:
33018 case V2SI_FTYPE_V2DF:
33019 case V2SF_FTYPE_V2SF:
33020 case V2SF_FTYPE_V2SI:
33021 case V32QI_FTYPE_V32QI:
33022 case V32QI_FTYPE_V16QI:
33023 case V16HI_FTYPE_V16HI:
33024 case V16HI_FTYPE_V8HI:
33025 case V8SI_FTYPE_V8SI:
33026 case V16HI_FTYPE_V16QI:
33027 case V8SI_FTYPE_V16QI:
33028 case V4DI_FTYPE_V16QI:
33029 case V8SI_FTYPE_V8HI:
33030 case V4DI_FTYPE_V8HI:
33031 case V4DI_FTYPE_V4SI:
33032 case V4DI_FTYPE_V2DI:
33033 case UQI_FTYPE_UQI:
33034 case UHI_FTYPE_UHI:
33035 case USI_FTYPE_USI:
33036 case USI_FTYPE_UQI:
33037 case USI_FTYPE_UHI:
33038 case UDI_FTYPE_UDI:
33039 case UHI_FTYPE_V16QI:
33040 case USI_FTYPE_V32QI:
33041 case UDI_FTYPE_V64QI:
33042 case V16QI_FTYPE_UHI:
33043 case V32QI_FTYPE_USI:
33044 case V64QI_FTYPE_UDI:
33045 case V8HI_FTYPE_UQI:
33046 case V16HI_FTYPE_UHI:
33047 case V32HI_FTYPE_USI:
33048 case V4SI_FTYPE_UQI:
33049 case V8SI_FTYPE_UQI:
33050 case V4SI_FTYPE_UHI:
33051 case V8SI_FTYPE_UHI:
33052 case UQI_FTYPE_V8HI:
33053 case UHI_FTYPE_V16HI:
33054 case USI_FTYPE_V32HI:
33055 case UQI_FTYPE_V4SI:
33056 case UQI_FTYPE_V8SI:
33057 case UHI_FTYPE_V16SI:
33058 case UQI_FTYPE_V2DI:
33059 case UQI_FTYPE_V4DI:
33060 case UQI_FTYPE_V8DI:
33061 case V16SI_FTYPE_UHI:
33062 case V2DI_FTYPE_UQI:
33063 case V4DI_FTYPE_UQI:
33064 case V16SI_FTYPE_INT:
33065 case V16SF_FTYPE_V8SF:
33066 case V16SI_FTYPE_V8SI:
33067 case V16SF_FTYPE_V4SF:
33068 case V16SI_FTYPE_V4SI:
33069 case V16SI_FTYPE_V16SF:
33070 case V16SI_FTYPE_V16SI:
33071 case V16SF_FTYPE_V16SF:
33072 case V8DI_FTYPE_UQI:
33073 case V8DI_FTYPE_V8DI:
33074 case V8DF_FTYPE_V4DF:
33075 case V8DF_FTYPE_V2DF:
33076 case V8DF_FTYPE_V8DF:
33077 nargs = 1;
33078 break;
33079 case V4SF_FTYPE_V4SF_VEC_MERGE:
33080 case V2DF_FTYPE_V2DF_VEC_MERGE:
33081 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
33082 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
33083 case V16QI_FTYPE_V16QI_V16QI:
33084 case V16QI_FTYPE_V8HI_V8HI:
33085 case V16SF_FTYPE_V16SF_V16SF:
33086 case V8QI_FTYPE_V8QI_V8QI:
33087 case V8QI_FTYPE_V4HI_V4HI:
33088 case V8HI_FTYPE_V8HI_V8HI:
33089 case V8HI_FTYPE_V16QI_V16QI:
33090 case V8HI_FTYPE_V4SI_V4SI:
33091 case V8SF_FTYPE_V8SF_V8SF:
33092 case V8SF_FTYPE_V8SF_V8SI:
33093 case V8DF_FTYPE_V8DF_V8DF:
33094 case V4SI_FTYPE_V4SI_V4SI:
33095 case V4SI_FTYPE_V8HI_V8HI:
33096 case V4SI_FTYPE_V2DF_V2DF:
33097 case V4HI_FTYPE_V4HI_V4HI:
33098 case V4HI_FTYPE_V8QI_V8QI:
33099 case V4HI_FTYPE_V2SI_V2SI:
33100 case V4DF_FTYPE_V4DF_V4DF:
33101 case V4DF_FTYPE_V4DF_V4DI:
33102 case V4SF_FTYPE_V4SF_V4SF:
33103 case V4SF_FTYPE_V4SF_V4SI:
33104 case V4SF_FTYPE_V4SF_V2SI:
33105 case V4SF_FTYPE_V4SF_V2DF:
33106 case V4SF_FTYPE_V4SF_UINT:
33107 case V4SF_FTYPE_V4SF_DI:
33108 case V4SF_FTYPE_V4SF_SI:
33109 case V2DI_FTYPE_V2DI_V2DI:
33110 case V2DI_FTYPE_V16QI_V16QI:
33111 case V2DI_FTYPE_V4SI_V4SI:
33112 case V2DI_FTYPE_V2DI_V16QI:
33113 case V2SI_FTYPE_V2SI_V2SI:
33114 case V2SI_FTYPE_V4HI_V4HI:
33115 case V2SI_FTYPE_V2SF_V2SF:
33116 case V2DF_FTYPE_V2DF_V2DF:
33117 case V2DF_FTYPE_V2DF_V4SF:
33118 case V2DF_FTYPE_V2DF_V2DI:
33119 case V2DF_FTYPE_V2DF_DI:
33120 case V2DF_FTYPE_V2DF_SI:
33121 case V2DF_FTYPE_V2DF_UINT:
33122 case V2SF_FTYPE_V2SF_V2SF:
33123 case V1DI_FTYPE_V1DI_V1DI:
33124 case V1DI_FTYPE_V8QI_V8QI:
33125 case V1DI_FTYPE_V2SI_V2SI:
33126 case V32QI_FTYPE_V16HI_V16HI:
33127 case V16HI_FTYPE_V8SI_V8SI:
33128 case V32QI_FTYPE_V32QI_V32QI:
33129 case V16HI_FTYPE_V32QI_V32QI:
33130 case V16HI_FTYPE_V16HI_V16HI:
33131 case V8SI_FTYPE_V4DF_V4DF:
33132 case V8SI_FTYPE_V8SI_V8SI:
33133 case V8SI_FTYPE_V16HI_V16HI:
33134 case V4DI_FTYPE_V4DI_V4DI:
33135 case V4DI_FTYPE_V8SI_V8SI:
33136 case V8DI_FTYPE_V64QI_V64QI:
33137 if (comparison == UNKNOWN)
33138 return ix86_expand_binop_builtin (icode, exp, target);
33139 nargs = 2;
33140 break;
33141 case V4SF_FTYPE_V4SF_V4SF_SWAP:
33142 case V2DF_FTYPE_V2DF_V2DF_SWAP:
33143 gcc_assert (comparison != UNKNOWN);
33144 nargs = 2;
33145 swap = true;
33146 break;
33147 case V16HI_FTYPE_V16HI_V8HI_COUNT:
33148 case V16HI_FTYPE_V16HI_SI_COUNT:
33149 case V8SI_FTYPE_V8SI_V4SI_COUNT:
33150 case V8SI_FTYPE_V8SI_SI_COUNT:
33151 case V4DI_FTYPE_V4DI_V2DI_COUNT:
33152 case V4DI_FTYPE_V4DI_INT_COUNT:
33153 case V8HI_FTYPE_V8HI_V8HI_COUNT:
33154 case V8HI_FTYPE_V8HI_SI_COUNT:
33155 case V4SI_FTYPE_V4SI_V4SI_COUNT:
33156 case V4SI_FTYPE_V4SI_SI_COUNT:
33157 case V4HI_FTYPE_V4HI_V4HI_COUNT:
33158 case V4HI_FTYPE_V4HI_SI_COUNT:
33159 case V2DI_FTYPE_V2DI_V2DI_COUNT:
33160 case V2DI_FTYPE_V2DI_SI_COUNT:
33161 case V2SI_FTYPE_V2SI_V2SI_COUNT:
33162 case V2SI_FTYPE_V2SI_SI_COUNT:
33163 case V1DI_FTYPE_V1DI_V1DI_COUNT:
33164 case V1DI_FTYPE_V1DI_SI_COUNT:
33165 nargs = 2;
33166 second_arg_count = true;
33167 break;
33168 case V16HI_FTYPE_V16HI_INT_V16HI_UHI_COUNT:
33169 case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI_COUNT:
33170 case V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT:
33171 case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT:
33172 case V2DI_FTYPE_V2DI_INT_V2DI_UQI_COUNT:
33173 case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI_COUNT:
33174 case V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT:
33175 case V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT:
33176 case V4DI_FTYPE_V4DI_INT_V4DI_UQI_COUNT:
33177 case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI_COUNT:
33178 case V4SI_FTYPE_V4SI_INT_V4SI_UQI_COUNT:
33179 case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI_COUNT:
33180 case V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT:
33181 case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT:
33182 case V8HI_FTYPE_V8HI_INT_V8HI_UQI_COUNT:
33183 case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI_COUNT:
33184 case V8SI_FTYPE_V8SI_INT_V8SI_UQI_COUNT:
33185 case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI_COUNT:
33186 nargs = 4;
33187 second_arg_count = true;
33188 break;
33189 case UINT64_FTYPE_UINT64_UINT64:
33190 case UINT_FTYPE_UINT_UINT:
33191 case UINT_FTYPE_UINT_USHORT:
33192 case UINT_FTYPE_UINT_UCHAR:
33193 case UINT16_FTYPE_UINT16_INT:
33194 case UINT8_FTYPE_UINT8_INT:
33195 case UQI_FTYPE_UQI_UQI:
33196 case UHI_FTYPE_UHI_UHI:
33197 case USI_FTYPE_USI_USI:
33198 case UDI_FTYPE_UDI_UDI:
33199 case V16SI_FTYPE_V8DF_V8DF:
33200 nargs = 2;
33201 break;
33202 case V2DI_FTYPE_V2DI_INT_CONVERT:
33203 nargs = 2;
33204 rmode = V1TImode;
33205 nargs_constant = 1;
33206 break;
33207 case V4DI_FTYPE_V4DI_INT_CONVERT:
33208 nargs = 2;
33209 rmode = V2TImode;
33210 nargs_constant = 1;
33211 break;
33212 case V8DI_FTYPE_V8DI_INT_CONVERT:
33213 nargs = 2;
33214 rmode = V4TImode;
33215 nargs_constant = 1;
33216 break;
33217 case V8HI_FTYPE_V8HI_INT:
33218 case V8HI_FTYPE_V8SF_INT:
33219 case V16HI_FTYPE_V16SF_INT:
33220 case V8HI_FTYPE_V4SF_INT:
33221 case V8SF_FTYPE_V8SF_INT:
33222 case V4SF_FTYPE_V16SF_INT:
33223 case V16SF_FTYPE_V16SF_INT:
33224 case V4SI_FTYPE_V4SI_INT:
33225 case V4SI_FTYPE_V8SI_INT:
33226 case V4HI_FTYPE_V4HI_INT:
33227 case V4DF_FTYPE_V4DF_INT:
33228 case V4DF_FTYPE_V8DF_INT:
33229 case V4SF_FTYPE_V4SF_INT:
33230 case V4SF_FTYPE_V8SF_INT:
33231 case V2DI_FTYPE_V2DI_INT:
33232 case V2DF_FTYPE_V2DF_INT:
33233 case V2DF_FTYPE_V4DF_INT:
33234 case V16HI_FTYPE_V16HI_INT:
33235 case V8SI_FTYPE_V8SI_INT:
33236 case V16SI_FTYPE_V16SI_INT:
33237 case V4SI_FTYPE_V16SI_INT:
33238 case V4DI_FTYPE_V4DI_INT:
33239 case V2DI_FTYPE_V4DI_INT:
33240 case V4DI_FTYPE_V8DI_INT:
33241 case QI_FTYPE_V4SF_INT:
33242 case QI_FTYPE_V2DF_INT:
33243 case UQI_FTYPE_UQI_UQI_CONST:
33244 case UHI_FTYPE_UHI_UQI:
33245 case USI_FTYPE_USI_UQI:
33246 case UDI_FTYPE_UDI_UQI:
33247 nargs = 2;
33248 nargs_constant = 1;
33249 break;
33250 case V16QI_FTYPE_V16QI_V16QI_V16QI:
33251 case V8SF_FTYPE_V8SF_V8SF_V8SF:
33252 case V4DF_FTYPE_V4DF_V4DF_V4DF:
33253 case V4SF_FTYPE_V4SF_V4SF_V4SF:
33254 case V2DF_FTYPE_V2DF_V2DF_V2DF:
33255 case V32QI_FTYPE_V32QI_V32QI_V32QI:
33256 case UHI_FTYPE_V16SI_V16SI_UHI:
33257 case UQI_FTYPE_V8DI_V8DI_UQI:
33258 case V16HI_FTYPE_V16SI_V16HI_UHI:
33259 case V16QI_FTYPE_V16SI_V16QI_UHI:
33260 case V16QI_FTYPE_V8DI_V16QI_UQI:
33261 case V16SF_FTYPE_V16SF_V16SF_UHI:
33262 case V16SF_FTYPE_V4SF_V16SF_UHI:
33263 case V16SI_FTYPE_SI_V16SI_UHI:
33264 case V16SI_FTYPE_V16HI_V16SI_UHI:
33265 case V16SI_FTYPE_V16QI_V16SI_UHI:
33266 case V8SF_FTYPE_V4SF_V8SF_UQI:
33267 case V4DF_FTYPE_V2DF_V4DF_UQI:
33268 case V8SI_FTYPE_V4SI_V8SI_UQI:
33269 case V8SI_FTYPE_SI_V8SI_UQI:
33270 case V4SI_FTYPE_V4SI_V4SI_UQI:
33271 case V4SI_FTYPE_SI_V4SI_UQI:
33272 case V4DI_FTYPE_V2DI_V4DI_UQI:
33273 case V4DI_FTYPE_DI_V4DI_UQI:
33274 case V2DI_FTYPE_V2DI_V2DI_UQI:
33275 case V2DI_FTYPE_DI_V2DI_UQI:
33276 case V64QI_FTYPE_V64QI_V64QI_UDI:
33277 case V64QI_FTYPE_V16QI_V64QI_UDI:
33278 case V64QI_FTYPE_QI_V64QI_UDI:
33279 case V32QI_FTYPE_V32QI_V32QI_USI:
33280 case V32QI_FTYPE_V16QI_V32QI_USI:
33281 case V32QI_FTYPE_QI_V32QI_USI:
33282 case V16QI_FTYPE_V16QI_V16QI_UHI:
33283 case V16QI_FTYPE_QI_V16QI_UHI:
33284 case V32HI_FTYPE_V8HI_V32HI_USI:
33285 case V32HI_FTYPE_HI_V32HI_USI:
33286 case V16HI_FTYPE_V8HI_V16HI_UHI:
33287 case V16HI_FTYPE_HI_V16HI_UHI:
33288 case V8HI_FTYPE_V8HI_V8HI_UQI:
33289 case V8HI_FTYPE_HI_V8HI_UQI:
33290 case V8SF_FTYPE_V8HI_V8SF_UQI:
33291 case V4SF_FTYPE_V8HI_V4SF_UQI:
33292 case V8SI_FTYPE_V8SF_V8SI_UQI:
33293 case V4SI_FTYPE_V4SF_V4SI_UQI:
33294 case V4DI_FTYPE_V4SF_V4DI_UQI:
33295 case V2DI_FTYPE_V4SF_V2DI_UQI:
33296 case V4SF_FTYPE_V4DI_V4SF_UQI:
33297 case V4SF_FTYPE_V2DI_V4SF_UQI:
33298 case V4DF_FTYPE_V4DI_V4DF_UQI:
33299 case V2DF_FTYPE_V2DI_V2DF_UQI:
33300 case V16QI_FTYPE_V8HI_V16QI_UQI:
33301 case V16QI_FTYPE_V16HI_V16QI_UHI:
33302 case V16QI_FTYPE_V4SI_V16QI_UQI:
33303 case V16QI_FTYPE_V8SI_V16QI_UQI:
33304 case V8HI_FTYPE_V4SI_V8HI_UQI:
33305 case V8HI_FTYPE_V8SI_V8HI_UQI:
33306 case V16QI_FTYPE_V2DI_V16QI_UQI:
33307 case V16QI_FTYPE_V4DI_V16QI_UQI:
33308 case V8HI_FTYPE_V2DI_V8HI_UQI:
33309 case V8HI_FTYPE_V4DI_V8HI_UQI:
33310 case V4SI_FTYPE_V2DI_V4SI_UQI:
33311 case V4SI_FTYPE_V4DI_V4SI_UQI:
33312 case V32QI_FTYPE_V32HI_V32QI_USI:
33313 case UHI_FTYPE_V16QI_V16QI_UHI:
33314 case USI_FTYPE_V32QI_V32QI_USI:
33315 case UDI_FTYPE_V64QI_V64QI_UDI:
33316 case UQI_FTYPE_V8HI_V8HI_UQI:
33317 case UHI_FTYPE_V16HI_V16HI_UHI:
33318 case USI_FTYPE_V32HI_V32HI_USI:
33319 case UQI_FTYPE_V4SI_V4SI_UQI:
33320 case UQI_FTYPE_V8SI_V8SI_UQI:
33321 case UQI_FTYPE_V2DI_V2DI_UQI:
33322 case UQI_FTYPE_V4DI_V4DI_UQI:
33323 case V4SF_FTYPE_V2DF_V4SF_UQI:
33324 case V4SF_FTYPE_V4DF_V4SF_UQI:
33325 case V16SI_FTYPE_V16SI_V16SI_UHI:
33326 case V16SI_FTYPE_V4SI_V16SI_UHI:
33327 case V2DI_FTYPE_V4SI_V2DI_UQI:
33328 case V2DI_FTYPE_V8HI_V2DI_UQI:
33329 case V2DI_FTYPE_V16QI_V2DI_UQI:
33330 case V4DI_FTYPE_V4DI_V4DI_UQI:
33331 case V4DI_FTYPE_V4SI_V4DI_UQI:
33332 case V4DI_FTYPE_V8HI_V4DI_UQI:
33333 case V4DI_FTYPE_V16QI_V4DI_UQI:
33334 case V4DI_FTYPE_V4DF_V4DI_UQI:
33335 case V2DI_FTYPE_V2DF_V2DI_UQI:
33336 case V4SI_FTYPE_V4DF_V4SI_UQI:
33337 case V4SI_FTYPE_V2DF_V4SI_UQI:
33338 case V4SI_FTYPE_V8HI_V4SI_UQI:
33339 case V4SI_FTYPE_V16QI_V4SI_UQI:
33340 case V4DI_FTYPE_V4DI_V4DI_V4DI:
33341 case V8DF_FTYPE_V2DF_V8DF_UQI:
33342 case V8DF_FTYPE_V4DF_V8DF_UQI:
33343 case V8DF_FTYPE_V8DF_V8DF_UQI:
33344 case V8SF_FTYPE_V8SF_V8SF_UQI:
33345 case V8SF_FTYPE_V8SI_V8SF_UQI:
33346 case V4DF_FTYPE_V4DF_V4DF_UQI:
33347 case V4SF_FTYPE_V4SF_V4SF_UQI:
33348 case V2DF_FTYPE_V2DF_V2DF_UQI:
33349 case V2DF_FTYPE_V4SF_V2DF_UQI:
33350 case V2DF_FTYPE_V4SI_V2DF_UQI:
33351 case V4SF_FTYPE_V4SI_V4SF_UQI:
33352 case V4DF_FTYPE_V4SF_V4DF_UQI:
33353 case V4DF_FTYPE_V4SI_V4DF_UQI:
33354 case V8SI_FTYPE_V8SI_V8SI_UQI:
33355 case V8SI_FTYPE_V8HI_V8SI_UQI:
33356 case V8SI_FTYPE_V16QI_V8SI_UQI:
33357 case V8DF_FTYPE_V8SI_V8DF_UQI:
33358 case V8DI_FTYPE_DI_V8DI_UQI:
33359 case V16SF_FTYPE_V8SF_V16SF_UHI:
33360 case V16SI_FTYPE_V8SI_V16SI_UHI:
33361 case V16HI_FTYPE_V16HI_V16HI_UHI:
33362 case V8HI_FTYPE_V16QI_V8HI_UQI:
33363 case V16HI_FTYPE_V16QI_V16HI_UHI:
33364 case V32HI_FTYPE_V32HI_V32HI_USI:
33365 case V32HI_FTYPE_V32QI_V32HI_USI:
33366 case V8DI_FTYPE_V16QI_V8DI_UQI:
33367 case V8DI_FTYPE_V2DI_V8DI_UQI:
33368 case V8DI_FTYPE_V4DI_V8DI_UQI:
33369 case V8DI_FTYPE_V8DI_V8DI_UQI:
33370 case V8DI_FTYPE_V8HI_V8DI_UQI:
33371 case V8DI_FTYPE_V8SI_V8DI_UQI:
33372 case V8HI_FTYPE_V8DI_V8HI_UQI:
33373 case V8SI_FTYPE_V8DI_V8SI_UQI:
33374 case V4SI_FTYPE_V4SI_V4SI_V4SI:
33375 nargs = 3;
33376 break;
33377 case V32QI_FTYPE_V32QI_V32QI_INT:
33378 case V16HI_FTYPE_V16HI_V16HI_INT:
33379 case V16QI_FTYPE_V16QI_V16QI_INT:
33380 case V4DI_FTYPE_V4DI_V4DI_INT:
33381 case V8HI_FTYPE_V8HI_V8HI_INT:
33382 case V8SI_FTYPE_V8SI_V8SI_INT:
33383 case V8SI_FTYPE_V8SI_V4SI_INT:
33384 case V8SF_FTYPE_V8SF_V8SF_INT:
33385 case V8SF_FTYPE_V8SF_V4SF_INT:
33386 case V4SI_FTYPE_V4SI_V4SI_INT:
33387 case V4DF_FTYPE_V4DF_V4DF_INT:
33388 case V16SF_FTYPE_V16SF_V16SF_INT:
33389 case V16SF_FTYPE_V16SF_V4SF_INT:
33390 case V16SI_FTYPE_V16SI_V4SI_INT:
33391 case V4DF_FTYPE_V4DF_V2DF_INT:
33392 case V4SF_FTYPE_V4SF_V4SF_INT:
33393 case V2DI_FTYPE_V2DI_V2DI_INT:
33394 case V4DI_FTYPE_V4DI_V2DI_INT:
33395 case V2DF_FTYPE_V2DF_V2DF_INT:
33396 case UQI_FTYPE_V8DI_V8UDI_INT:
33397 case UQI_FTYPE_V8DF_V8DF_INT:
33398 case UQI_FTYPE_V2DF_V2DF_INT:
33399 case UQI_FTYPE_V4SF_V4SF_INT:
33400 case UHI_FTYPE_V16SI_V16SI_INT:
33401 case UHI_FTYPE_V16SF_V16SF_INT:
33402 nargs = 3;
33403 nargs_constant = 1;
33404 break;
33405 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
33406 nargs = 3;
33407 rmode = V4DImode;
33408 nargs_constant = 1;
33409 break;
33410 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
33411 nargs = 3;
33412 rmode = V2DImode;
33413 nargs_constant = 1;
33414 break;
33415 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
33416 nargs = 3;
33417 rmode = DImode;
33418 nargs_constant = 1;
33419 break;
33420 case V2DI_FTYPE_V2DI_UINT_UINT:
33421 nargs = 3;
33422 nargs_constant = 2;
33423 break;
33424 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
33425 nargs = 3;
33426 rmode = V8DImode;
33427 nargs_constant = 1;
33428 break;
33429 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT:
33430 nargs = 5;
33431 rmode = V8DImode;
33432 mask_pos = 2;
33433 nargs_constant = 1;
33434 break;
33435 case QI_FTYPE_V8DF_INT_UQI:
33436 case QI_FTYPE_V4DF_INT_UQI:
33437 case QI_FTYPE_V2DF_INT_UQI:
33438 case HI_FTYPE_V16SF_INT_UHI:
33439 case QI_FTYPE_V8SF_INT_UQI:
33440 case QI_FTYPE_V4SF_INT_UQI:
33441 nargs = 3;
33442 mask_pos = 1;
33443 nargs_constant = 1;
33444 break;
33445 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_USI_CONVERT:
33446 nargs = 5;
33447 rmode = V4DImode;
33448 mask_pos = 2;
33449 nargs_constant = 1;
33450 break;
33451 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UHI_CONVERT:
33452 nargs = 5;
33453 rmode = V2DImode;
33454 mask_pos = 2;
33455 nargs_constant = 1;
33456 break;
33457 case V32QI_FTYPE_V32QI_V32QI_V32QI_USI:
33458 case V32HI_FTYPE_V32HI_V32HI_V32HI_USI:
33459 case V32HI_FTYPE_V64QI_V64QI_V32HI_USI:
33460 case V16SI_FTYPE_V32HI_V32HI_V16SI_UHI:
33461 case V64QI_FTYPE_V64QI_V64QI_V64QI_UDI:
33462 case V32HI_FTYPE_V32HI_V8HI_V32HI_USI:
33463 case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI:
33464 case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI:
33465 case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI:
33466 case V64QI_FTYPE_V32HI_V32HI_V64QI_UDI:
33467 case V32QI_FTYPE_V16HI_V16HI_V32QI_USI:
33468 case V16QI_FTYPE_V8HI_V8HI_V16QI_UHI:
33469 case V32HI_FTYPE_V16SI_V16SI_V32HI_USI:
33470 case V16HI_FTYPE_V8SI_V8SI_V16HI_UHI:
33471 case V8HI_FTYPE_V4SI_V4SI_V8HI_UQI:
33472 case V4DF_FTYPE_V4DF_V4DI_V4DF_UQI:
33473 case V8SF_FTYPE_V8SF_V8SI_V8SF_UQI:
33474 case V4SF_FTYPE_V4SF_V4SI_V4SF_UQI:
33475 case V2DF_FTYPE_V2DF_V2DI_V2DF_UQI:
33476 case V2DI_FTYPE_V4SI_V4SI_V2DI_UQI:
33477 case V4DI_FTYPE_V8SI_V8SI_V4DI_UQI:
33478 case V4DF_FTYPE_V4DI_V4DF_V4DF_UQI:
33479 case V8SF_FTYPE_V8SI_V8SF_V8SF_UQI:
33480 case V2DF_FTYPE_V2DI_V2DF_V2DF_UQI:
33481 case V4SF_FTYPE_V4SI_V4SF_V4SF_UQI:
33482 case V16SF_FTYPE_V16SF_V16SF_V16SF_UHI:
33483 case V16SF_FTYPE_V16SF_V16SI_V16SF_UHI:
33484 case V16SF_FTYPE_V16SI_V16SF_V16SF_UHI:
33485 case V16SI_FTYPE_V16SI_V16SI_V16SI_UHI:
33486 case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI:
33487 case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI:
33488 case V8SI_FTYPE_V8SI_V8SI_V8SI_UQI:
33489 case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI:
33490 case V8SF_FTYPE_V8SF_V8SF_V8SF_UQI:
33491 case V16QI_FTYPE_V16QI_V16QI_V16QI_UHI:
33492 case V16HI_FTYPE_V16HI_V16HI_V16HI_UHI:
33493 case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI:
33494 case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI:
33495 case V4DI_FTYPE_V4DI_V4DI_V4DI_UQI:
33496 case V4DF_FTYPE_V4DF_V4DF_V4DF_UQI:
33497 case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI:
33498 case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI:
33499 case V8DF_FTYPE_V8DF_V8DI_V8DF_UQI:
33500 case V8DF_FTYPE_V8DI_V8DF_V8DF_UQI:
33501 case V8DI_FTYPE_V16SI_V16SI_V8DI_UQI:
33502 case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI:
33503 case V8DI_FTYPE_V8DI_V8DI_V8DI_UQI:
33504 case V8HI_FTYPE_V16QI_V16QI_V8HI_UQI:
33505 case V16HI_FTYPE_V32QI_V32QI_V16HI_UHI:
33506 case V8SI_FTYPE_V16HI_V16HI_V8SI_UQI:
33507 case V4SI_FTYPE_V8HI_V8HI_V4SI_UQI:
33508 nargs = 4;
33509 break;
33510 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
33511 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
33512 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
33513 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
33514 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
33515 nargs = 4;
33516 nargs_constant = 1;
33517 break;
33518 case UQI_FTYPE_V4DI_V4DI_INT_UQI:
33519 case UQI_FTYPE_V8SI_V8SI_INT_UQI:
33520 case QI_FTYPE_V4DF_V4DF_INT_UQI:
33521 case QI_FTYPE_V8SF_V8SF_INT_UQI:
33522 case UQI_FTYPE_V2DI_V2DI_INT_UQI:
33523 case UQI_FTYPE_V4SI_V4SI_INT_UQI:
33524 case UQI_FTYPE_V2DF_V2DF_INT_UQI:
33525 case UQI_FTYPE_V4SF_V4SF_INT_UQI:
33526 case UDI_FTYPE_V64QI_V64QI_INT_UDI:
33527 case USI_FTYPE_V32QI_V32QI_INT_USI:
33528 case UHI_FTYPE_V16QI_V16QI_INT_UHI:
33529 case USI_FTYPE_V32HI_V32HI_INT_USI:
33530 case UHI_FTYPE_V16HI_V16HI_INT_UHI:
33531 case UQI_FTYPE_V8HI_V8HI_INT_UQI:
33532 nargs = 4;
33533 mask_pos = 1;
33534 nargs_constant = 1;
33535 break;
33536 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
33537 nargs = 4;
33538 nargs_constant = 2;
33539 break;
33540 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
33541 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
33542 nargs = 4;
33543 break;
33544 case UQI_FTYPE_V8DI_V8DI_INT_UQI:
33545 case UHI_FTYPE_V16SI_V16SI_INT_UHI:
33546 mask_pos = 1;
33547 nargs = 4;
33548 nargs_constant = 1;
33549 break;
33550 case V8SF_FTYPE_V8SF_INT_V8SF_UQI:
33551 case V4SF_FTYPE_V4SF_INT_V4SF_UQI:
33552 case V2DF_FTYPE_V4DF_INT_V2DF_UQI:
33553 case V2DI_FTYPE_V4DI_INT_V2DI_UQI:
33554 case V8SF_FTYPE_V16SF_INT_V8SF_UQI:
33555 case V8SI_FTYPE_V16SI_INT_V8SI_UQI:
33556 case V2DF_FTYPE_V8DF_INT_V2DF_UQI:
33557 case V2DI_FTYPE_V8DI_INT_V2DI_UQI:
33558 case V4SF_FTYPE_V8SF_INT_V4SF_UQI:
33559 case V4SI_FTYPE_V8SI_INT_V4SI_UQI:
33560 case V8HI_FTYPE_V8SF_INT_V8HI_UQI:
33561 case V8HI_FTYPE_V4SF_INT_V8HI_UQI:
33562 case V32HI_FTYPE_V32HI_INT_V32HI_USI:
33563 case V16HI_FTYPE_V16HI_INT_V16HI_UHI:
33564 case V8HI_FTYPE_V8HI_INT_V8HI_UQI:
33565 case V4DI_FTYPE_V4DI_INT_V4DI_UQI:
33566 case V2DI_FTYPE_V2DI_INT_V2DI_UQI:
33567 case V8SI_FTYPE_V8SI_INT_V8SI_UQI:
33568 case V4SI_FTYPE_V4SI_INT_V4SI_UQI:
33569 case V4DF_FTYPE_V4DF_INT_V4DF_UQI:
33570 case V2DF_FTYPE_V2DF_INT_V2DF_UQI:
33571 case V8DF_FTYPE_V8DF_INT_V8DF_UQI:
33572 case V16SF_FTYPE_V16SF_INT_V16SF_UHI:
33573 case V16HI_FTYPE_V16SF_INT_V16HI_UHI:
33574 case V16SI_FTYPE_V16SI_INT_V16SI_UHI:
33575 case V4SI_FTYPE_V16SI_INT_V4SI_UQI:
33576 case V4DI_FTYPE_V8DI_INT_V4DI_UQI:
33577 case V4DF_FTYPE_V8DF_INT_V4DF_UQI:
33578 case V4SF_FTYPE_V16SF_INT_V4SF_UQI:
33579 case V8DI_FTYPE_V8DI_INT_V8DI_UQI:
33580 nargs = 4;
33581 mask_pos = 2;
33582 nargs_constant = 1;
33583 break;
33584 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI:
33585 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI:
33586 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI:
33587 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI:
33588 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI:
33589 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI:
33590 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI:
33591 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI:
33592 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI:
33593 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI:
33594 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI:
33595 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI:
33596 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI:
33597 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI:
33598 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI:
33599 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI:
33600 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_UQI:
33601 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UQI:
33602 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI:
33603 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI:
33604 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI:
33605 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI:
33606 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI:
33607 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_UQI:
33608 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_UQI:
33609 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_UQI:
33610 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_UQI:
33611 nargs = 5;
33612 mask_pos = 2;
33613 nargs_constant = 1;
33614 break;
33615 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI:
33616 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI:
33617 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI:
33618 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI:
33619 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI:
33620 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI:
33621 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI:
33622 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI:
33623 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI:
33624 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI:
33625 nargs = 5;
33626 mask_pos = 1;
33627 nargs_constant = 1;
33628 break;
33630 default:
33631 gcc_unreachable ();
33634 gcc_assert (nargs <= ARRAY_SIZE (args));
33636 if (comparison != UNKNOWN)
33638 gcc_assert (nargs == 2);
33639 return ix86_expand_sse_compare (d, exp, target, swap);
33642 if (rmode == VOIDmode || rmode == tmode)
33644 if (optimize
33645 || target == 0
33646 || GET_MODE (target) != tmode
33647 || !insn_p->operand[0].predicate (target, tmode))
33648 target = gen_reg_rtx (tmode);
33649 else if (memory_operand (target, tmode))
33650 num_memory++;
33651 real_target = target;
33653 else
33655 real_target = gen_reg_rtx (tmode);
33656 target = lowpart_subreg (rmode, real_target, tmode);
33659 for (i = 0; i < nargs; i++)
33661 tree arg = CALL_EXPR_ARG (exp, i);
33662 rtx op = expand_normal (arg);
33663 machine_mode mode = insn_p->operand[i + 1].mode;
33664 bool match = insn_p->operand[i + 1].predicate (op, mode);
33666 if (second_arg_count && i == 1)
33668 /* SIMD shift insns take either an 8-bit immediate or
33669 register as count. But builtin functions take int as
33670 count. If count doesn't match, we put it in register.
33671 The instructions are using 64-bit count, if op is just
33672 32-bit, zero-extend it, as negative shift counts
33673 are undefined behavior and zero-extension is more
33674 efficient. */
33675 if (!match)
33677 if (SCALAR_INT_MODE_P (GET_MODE (op)))
33678 op = convert_modes (mode, GET_MODE (op), op, 1);
33679 else
33680 op = lowpart_subreg (mode, op, GET_MODE (op));
33681 if (!insn_p->operand[i + 1].predicate (op, mode))
33682 op = copy_to_reg (op);
33685 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
33686 (!mask_pos && (nargs - i) <= nargs_constant))
33688 if (!match)
33689 switch (icode)
33691 case CODE_FOR_avx_vinsertf128v4di:
33692 case CODE_FOR_avx_vextractf128v4di:
33693 error ("the last argument must be an 1-bit immediate");
33694 return const0_rtx;
33696 case CODE_FOR_avx512f_cmpv8di3_mask:
33697 case CODE_FOR_avx512f_cmpv16si3_mask:
33698 case CODE_FOR_avx512f_ucmpv8di3_mask:
33699 case CODE_FOR_avx512f_ucmpv16si3_mask:
33700 case CODE_FOR_avx512vl_cmpv4di3_mask:
33701 case CODE_FOR_avx512vl_cmpv8si3_mask:
33702 case CODE_FOR_avx512vl_ucmpv4di3_mask:
33703 case CODE_FOR_avx512vl_ucmpv8si3_mask:
33704 case CODE_FOR_avx512vl_cmpv2di3_mask:
33705 case CODE_FOR_avx512vl_cmpv4si3_mask:
33706 case CODE_FOR_avx512vl_ucmpv2di3_mask:
33707 case CODE_FOR_avx512vl_ucmpv4si3_mask:
33708 error ("the last argument must be a 3-bit immediate");
33709 return const0_rtx;
33711 case CODE_FOR_sse4_1_roundsd:
33712 case CODE_FOR_sse4_1_roundss:
33714 case CODE_FOR_sse4_1_roundpd:
33715 case CODE_FOR_sse4_1_roundps:
33716 case CODE_FOR_avx_roundpd256:
33717 case CODE_FOR_avx_roundps256:
33719 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
33720 case CODE_FOR_sse4_1_roundps_sfix:
33721 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
33722 case CODE_FOR_avx_roundps_sfix256:
33724 case CODE_FOR_sse4_1_blendps:
33725 case CODE_FOR_avx_blendpd256:
33726 case CODE_FOR_avx_vpermilv4df:
33727 case CODE_FOR_avx_vpermilv4df_mask:
33728 case CODE_FOR_avx512f_getmantv8df_mask:
33729 case CODE_FOR_avx512f_getmantv16sf_mask:
33730 case CODE_FOR_avx512vl_getmantv8sf_mask:
33731 case CODE_FOR_avx512vl_getmantv4df_mask:
33732 case CODE_FOR_avx512vl_getmantv4sf_mask:
33733 case CODE_FOR_avx512vl_getmantv2df_mask:
33734 case CODE_FOR_avx512dq_rangepv8df_mask_round:
33735 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
33736 case CODE_FOR_avx512dq_rangepv4df_mask:
33737 case CODE_FOR_avx512dq_rangepv8sf_mask:
33738 case CODE_FOR_avx512dq_rangepv2df_mask:
33739 case CODE_FOR_avx512dq_rangepv4sf_mask:
33740 case CODE_FOR_avx_shufpd256_mask:
33741 error ("the last argument must be a 4-bit immediate");
33742 return const0_rtx;
33744 case CODE_FOR_sha1rnds4:
33745 case CODE_FOR_sse4_1_blendpd:
33746 case CODE_FOR_avx_vpermilv2df:
33747 case CODE_FOR_avx_vpermilv2df_mask:
33748 case CODE_FOR_xop_vpermil2v2df3:
33749 case CODE_FOR_xop_vpermil2v4sf3:
33750 case CODE_FOR_xop_vpermil2v4df3:
33751 case CODE_FOR_xop_vpermil2v8sf3:
33752 case CODE_FOR_avx512f_vinsertf32x4_mask:
33753 case CODE_FOR_avx512f_vinserti32x4_mask:
33754 case CODE_FOR_avx512f_vextractf32x4_mask:
33755 case CODE_FOR_avx512f_vextracti32x4_mask:
33756 case CODE_FOR_sse2_shufpd:
33757 case CODE_FOR_sse2_shufpd_mask:
33758 case CODE_FOR_avx512dq_shuf_f64x2_mask:
33759 case CODE_FOR_avx512dq_shuf_i64x2_mask:
33760 case CODE_FOR_avx512vl_shuf_i32x4_mask:
33761 case CODE_FOR_avx512vl_shuf_f32x4_mask:
33762 error ("the last argument must be a 2-bit immediate");
33763 return const0_rtx;
33765 case CODE_FOR_avx_vextractf128v4df:
33766 case CODE_FOR_avx_vextractf128v8sf:
33767 case CODE_FOR_avx_vextractf128v8si:
33768 case CODE_FOR_avx_vinsertf128v4df:
33769 case CODE_FOR_avx_vinsertf128v8sf:
33770 case CODE_FOR_avx_vinsertf128v8si:
33771 case CODE_FOR_avx512f_vinsertf64x4_mask:
33772 case CODE_FOR_avx512f_vinserti64x4_mask:
33773 case CODE_FOR_avx512f_vextractf64x4_mask:
33774 case CODE_FOR_avx512f_vextracti64x4_mask:
33775 case CODE_FOR_avx512dq_vinsertf32x8_mask:
33776 case CODE_FOR_avx512dq_vinserti32x8_mask:
33777 case CODE_FOR_avx512vl_vinsertv4df:
33778 case CODE_FOR_avx512vl_vinsertv4di:
33779 case CODE_FOR_avx512vl_vinsertv8sf:
33780 case CODE_FOR_avx512vl_vinsertv8si:
33781 error ("the last argument must be a 1-bit immediate");
33782 return const0_rtx;
33784 case CODE_FOR_avx_vmcmpv2df3:
33785 case CODE_FOR_avx_vmcmpv4sf3:
33786 case CODE_FOR_avx_cmpv2df3:
33787 case CODE_FOR_avx_cmpv4sf3:
33788 case CODE_FOR_avx_cmpv4df3:
33789 case CODE_FOR_avx_cmpv8sf3:
33790 case CODE_FOR_avx512f_cmpv8df3_mask:
33791 case CODE_FOR_avx512f_cmpv16sf3_mask:
33792 case CODE_FOR_avx512f_vmcmpv2df3_mask:
33793 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
33794 error ("the last argument must be a 5-bit immediate");
33795 return const0_rtx;
33797 default:
33798 switch (nargs_constant)
33800 case 2:
33801 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
33802 (!mask_pos && (nargs - i) == nargs_constant))
33804 error ("the next to last argument must be an 8-bit immediate");
33805 break;
33807 /* FALLTHRU */
33808 case 1:
33809 error ("the last argument must be an 8-bit immediate");
33810 break;
33811 default:
33812 gcc_unreachable ();
33814 return const0_rtx;
33817 else
33819 if (VECTOR_MODE_P (mode))
33820 op = safe_vector_operand (op, mode);
33822 /* If we aren't optimizing, only allow one memory operand to
33823 be generated. */
33824 if (memory_operand (op, mode))
33825 num_memory++;
33827 op = fixup_modeless_constant (op, mode);
33829 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
33831 if (optimize || !match || num_memory > 1)
33832 op = copy_to_mode_reg (mode, op);
33834 else
33836 op = copy_to_reg (op);
33837 op = lowpart_subreg (mode, op, GET_MODE (op));
33841 args[i].op = op;
33842 args[i].mode = mode;
33845 switch (nargs)
33847 case 1:
33848 pat = GEN_FCN (icode) (real_target, args[0].op);
33849 break;
33850 case 2:
33851 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
33852 break;
33853 case 3:
33854 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
33855 args[2].op);
33856 break;
33857 case 4:
33858 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
33859 args[2].op, args[3].op);
33860 break;
33861 case 5:
33862 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
33863 args[2].op, args[3].op, args[4].op);
33864 break;
33865 case 6:
33866 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
33867 args[2].op, args[3].op, args[4].op,
33868 args[5].op);
33869 break;
33870 default:
33871 gcc_unreachable ();
33874 if (! pat)
33875 return 0;
33877 emit_insn (pat);
33878 return target;
33881 /* Transform pattern of following layout:
33882 (set A
33883 (unspec [B C] UNSPEC_EMBEDDED_ROUNDING))
33885 into:
33886 (set (A B)) */
33888 static rtx
33889 ix86_erase_embedded_rounding (rtx pat)
33891 if (GET_CODE (pat) == INSN)
33892 pat = PATTERN (pat);
33894 gcc_assert (GET_CODE (pat) == SET);
33895 rtx src = SET_SRC (pat);
33896 gcc_assert (XVECLEN (src, 0) == 2);
33897 rtx p0 = XVECEXP (src, 0, 0);
33898 gcc_assert (GET_CODE (src) == UNSPEC
33899 && XINT (src, 1) == UNSPEC_EMBEDDED_ROUNDING);
33900 rtx res = gen_rtx_SET (SET_DEST (pat), p0);
33901 return res;
33904 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
33905 with rounding. */
33906 static rtx
33907 ix86_expand_sse_comi_round (const struct builtin_description *d,
33908 tree exp, rtx target)
33910 rtx pat, set_dst;
33911 tree arg0 = CALL_EXPR_ARG (exp, 0);
33912 tree arg1 = CALL_EXPR_ARG (exp, 1);
33913 tree arg2 = CALL_EXPR_ARG (exp, 2);
33914 tree arg3 = CALL_EXPR_ARG (exp, 3);
33915 rtx op0 = expand_normal (arg0);
33916 rtx op1 = expand_normal (arg1);
33917 rtx op2 = expand_normal (arg2);
33918 rtx op3 = expand_normal (arg3);
33919 enum insn_code icode = d->icode;
33920 const struct insn_data_d *insn_p = &insn_data[icode];
33921 machine_mode mode0 = insn_p->operand[0].mode;
33922 machine_mode mode1 = insn_p->operand[1].mode;
33923 enum rtx_code comparison = UNEQ;
33924 bool need_ucomi = false;
33926 /* See avxintrin.h for values. */
33927 enum rtx_code comi_comparisons[32] =
33929 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
33930 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
33931 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
33933 bool need_ucomi_values[32] =
33935 true, false, false, true, true, false, false, true,
33936 true, false, false, true, true, false, false, true,
33937 false, true, true, false, false, true, true, false,
33938 false, true, true, false, false, true, true, false
33941 if (!CONST_INT_P (op2))
33943 error ("the third argument must be comparison constant");
33944 return const0_rtx;
33946 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
33948 error ("incorrect comparison mode");
33949 return const0_rtx;
33952 if (!insn_p->operand[2].predicate (op3, SImode))
33954 error ("incorrect rounding operand");
33955 return const0_rtx;
33958 comparison = comi_comparisons[INTVAL (op2)];
33959 need_ucomi = need_ucomi_values[INTVAL (op2)];
33961 if (VECTOR_MODE_P (mode0))
33962 op0 = safe_vector_operand (op0, mode0);
33963 if (VECTOR_MODE_P (mode1))
33964 op1 = safe_vector_operand (op1, mode1);
33966 target = gen_reg_rtx (SImode);
33967 emit_move_insn (target, const0_rtx);
33968 target = gen_rtx_SUBREG (QImode, target, 0);
33970 if ((optimize && !register_operand (op0, mode0))
33971 || !insn_p->operand[0].predicate (op0, mode0))
33972 op0 = copy_to_mode_reg (mode0, op0);
33973 if ((optimize && !register_operand (op1, mode1))
33974 || !insn_p->operand[1].predicate (op1, mode1))
33975 op1 = copy_to_mode_reg (mode1, op1);
33977 if (need_ucomi)
33978 icode = icode == CODE_FOR_sse_comi_round
33979 ? CODE_FOR_sse_ucomi_round
33980 : CODE_FOR_sse2_ucomi_round;
33982 pat = GEN_FCN (icode) (op0, op1, op3);
33983 if (! pat)
33984 return 0;
33986 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
33987 if (INTVAL (op3) == NO_ROUND)
33989 pat = ix86_erase_embedded_rounding (pat);
33990 if (! pat)
33991 return 0;
33993 set_dst = SET_DEST (pat);
33995 else
33997 gcc_assert (GET_CODE (pat) == SET);
33998 set_dst = SET_DEST (pat);
34001 emit_insn (pat);
34002 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34003 gen_rtx_fmt_ee (comparison, QImode,
34004 set_dst,
34005 const0_rtx)));
34007 return SUBREG_REG (target);
34010 static rtx
34011 ix86_expand_round_builtin (const struct builtin_description *d,
34012 tree exp, rtx target)
34014 rtx pat;
34015 unsigned int i, nargs;
34016 struct
34018 rtx op;
34019 machine_mode mode;
34020 } args[6];
34021 enum insn_code icode = d->icode;
34022 const struct insn_data_d *insn_p = &insn_data[icode];
34023 machine_mode tmode = insn_p->operand[0].mode;
34024 unsigned int nargs_constant = 0;
34025 unsigned int redundant_embed_rnd = 0;
34027 switch ((enum ix86_builtin_func_type) d->flag)
34029 case UINT64_FTYPE_V2DF_INT:
34030 case UINT64_FTYPE_V4SF_INT:
34031 case UINT_FTYPE_V2DF_INT:
34032 case UINT_FTYPE_V4SF_INT:
34033 case INT64_FTYPE_V2DF_INT:
34034 case INT64_FTYPE_V4SF_INT:
34035 case INT_FTYPE_V2DF_INT:
34036 case INT_FTYPE_V4SF_INT:
34037 nargs = 2;
34038 break;
34039 case V4SF_FTYPE_V4SF_UINT_INT:
34040 case V4SF_FTYPE_V4SF_UINT64_INT:
34041 case V2DF_FTYPE_V2DF_UINT64_INT:
34042 case V4SF_FTYPE_V4SF_INT_INT:
34043 case V4SF_FTYPE_V4SF_INT64_INT:
34044 case V2DF_FTYPE_V2DF_INT64_INT:
34045 case V4SF_FTYPE_V4SF_V4SF_INT:
34046 case V2DF_FTYPE_V2DF_V2DF_INT:
34047 case V4SF_FTYPE_V4SF_V2DF_INT:
34048 case V2DF_FTYPE_V2DF_V4SF_INT:
34049 nargs = 3;
34050 break;
34051 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
34052 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
34053 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
34054 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
34055 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
34056 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
34057 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
34058 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
34059 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
34060 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
34061 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
34062 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
34063 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
34064 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
34065 nargs = 4;
34066 break;
34067 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
34068 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
34069 nargs_constant = 2;
34070 nargs = 4;
34071 break;
34072 case INT_FTYPE_V4SF_V4SF_INT_INT:
34073 case INT_FTYPE_V2DF_V2DF_INT_INT:
34074 return ix86_expand_sse_comi_round (d, exp, target);
34075 case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT:
34076 case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT:
34077 case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT:
34078 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
34079 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
34080 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
34081 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
34082 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
34083 nargs = 5;
34084 break;
34085 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
34086 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
34087 nargs_constant = 4;
34088 nargs = 5;
34089 break;
34090 case UQI_FTYPE_V8DF_V8DF_INT_UQI_INT:
34091 case UQI_FTYPE_V2DF_V2DF_INT_UQI_INT:
34092 case UHI_FTYPE_V16SF_V16SF_INT_UHI_INT:
34093 case UQI_FTYPE_V4SF_V4SF_INT_UQI_INT:
34094 nargs_constant = 3;
34095 nargs = 5;
34096 break;
34097 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
34098 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
34099 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
34100 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
34101 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT:
34102 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT:
34103 nargs = 6;
34104 nargs_constant = 4;
34105 break;
34106 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
34107 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
34108 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
34109 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
34110 nargs = 6;
34111 nargs_constant = 3;
34112 break;
34113 default:
34114 gcc_unreachable ();
34116 gcc_assert (nargs <= ARRAY_SIZE (args));
34118 if (optimize
34119 || target == 0
34120 || GET_MODE (target) != tmode
34121 || !insn_p->operand[0].predicate (target, tmode))
34122 target = gen_reg_rtx (tmode);
34124 for (i = 0; i < nargs; i++)
34126 tree arg = CALL_EXPR_ARG (exp, i);
34127 rtx op = expand_normal (arg);
34128 machine_mode mode = insn_p->operand[i + 1].mode;
34129 bool match = insn_p->operand[i + 1].predicate (op, mode);
34131 if (i == nargs - nargs_constant)
34133 if (!match)
34135 switch (icode)
34137 case CODE_FOR_avx512f_getmantv8df_mask_round:
34138 case CODE_FOR_avx512f_getmantv16sf_mask_round:
34139 case CODE_FOR_avx512f_vgetmantv2df_round:
34140 case CODE_FOR_avx512f_vgetmantv2df_mask_round:
34141 case CODE_FOR_avx512f_vgetmantv4sf_round:
34142 case CODE_FOR_avx512f_vgetmantv4sf_mask_round:
34143 error ("the immediate argument must be a 4-bit immediate");
34144 return const0_rtx;
34145 case CODE_FOR_avx512f_cmpv8df3_mask_round:
34146 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
34147 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
34148 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
34149 error ("the immediate argument must be a 5-bit immediate");
34150 return const0_rtx;
34151 default:
34152 error ("the immediate argument must be an 8-bit immediate");
34153 return const0_rtx;
34157 else if (i == nargs-1)
34159 if (!insn_p->operand[nargs].predicate (op, SImode))
34161 error ("incorrect rounding operand");
34162 return const0_rtx;
34165 /* If there is no rounding use normal version of the pattern. */
34166 if (INTVAL (op) == NO_ROUND)
34167 redundant_embed_rnd = 1;
34169 else
34171 if (VECTOR_MODE_P (mode))
34172 op = safe_vector_operand (op, mode);
34174 op = fixup_modeless_constant (op, mode);
34176 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34178 if (optimize || !match)
34179 op = copy_to_mode_reg (mode, op);
34181 else
34183 op = copy_to_reg (op);
34184 op = lowpart_subreg (mode, op, GET_MODE (op));
34188 args[i].op = op;
34189 args[i].mode = mode;
34192 switch (nargs)
34194 case 1:
34195 pat = GEN_FCN (icode) (target, args[0].op);
34196 break;
34197 case 2:
34198 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34199 break;
34200 case 3:
34201 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34202 args[2].op);
34203 break;
34204 case 4:
34205 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34206 args[2].op, args[3].op);
34207 break;
34208 case 5:
34209 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34210 args[2].op, args[3].op, args[4].op);
34211 break;
34212 case 6:
34213 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34214 args[2].op, args[3].op, args[4].op,
34215 args[5].op);
34216 break;
34217 default:
34218 gcc_unreachable ();
34221 if (!pat)
34222 return 0;
34224 if (redundant_embed_rnd)
34225 pat = ix86_erase_embedded_rounding (pat);
34227 emit_insn (pat);
34228 return target;
34231 /* Subroutine of ix86_expand_builtin to take care of special insns
34232 with variable number of operands. */
34234 static rtx
34235 ix86_expand_special_args_builtin (const struct builtin_description *d,
34236 tree exp, rtx target)
34238 tree arg;
34239 rtx pat, op;
34240 unsigned int i, nargs, arg_adjust, memory;
34241 bool aligned_mem = false;
34242 struct
34244 rtx op;
34245 machine_mode mode;
34246 } args[3];
34247 enum insn_code icode = d->icode;
34248 bool last_arg_constant = false;
34249 const struct insn_data_d *insn_p = &insn_data[icode];
34250 machine_mode tmode = insn_p->operand[0].mode;
34251 enum { load, store } klass;
34253 switch ((enum ix86_builtin_func_type) d->flag)
34255 case VOID_FTYPE_VOID:
34256 emit_insn (GEN_FCN (icode) (target));
34257 return 0;
34258 case VOID_FTYPE_UINT64:
34259 case VOID_FTYPE_UNSIGNED:
34260 nargs = 0;
34261 klass = store;
34262 memory = 0;
34263 break;
34265 case INT_FTYPE_VOID:
34266 case USHORT_FTYPE_VOID:
34267 case UINT64_FTYPE_VOID:
34268 case UNSIGNED_FTYPE_VOID:
34269 nargs = 0;
34270 klass = load;
34271 memory = 0;
34272 break;
34273 case UINT64_FTYPE_PUNSIGNED:
34274 case V2DI_FTYPE_PV2DI:
34275 case V4DI_FTYPE_PV4DI:
34276 case V32QI_FTYPE_PCCHAR:
34277 case V16QI_FTYPE_PCCHAR:
34278 case V8SF_FTYPE_PCV4SF:
34279 case V8SF_FTYPE_PCFLOAT:
34280 case V4SF_FTYPE_PCFLOAT:
34281 case V4DF_FTYPE_PCV2DF:
34282 case V4DF_FTYPE_PCDOUBLE:
34283 case V2DF_FTYPE_PCDOUBLE:
34284 case VOID_FTYPE_PVOID:
34285 case V8DI_FTYPE_PV8DI:
34286 nargs = 1;
34287 klass = load;
34288 memory = 0;
34289 switch (icode)
34291 case CODE_FOR_sse4_1_movntdqa:
34292 case CODE_FOR_avx2_movntdqa:
34293 case CODE_FOR_avx512f_movntdqa:
34294 aligned_mem = true;
34295 break;
34296 default:
34297 break;
34299 break;
34300 case VOID_FTYPE_PV2SF_V4SF:
34301 case VOID_FTYPE_PV8DI_V8DI:
34302 case VOID_FTYPE_PV4DI_V4DI:
34303 case VOID_FTYPE_PV2DI_V2DI:
34304 case VOID_FTYPE_PCHAR_V32QI:
34305 case VOID_FTYPE_PCHAR_V16QI:
34306 case VOID_FTYPE_PFLOAT_V16SF:
34307 case VOID_FTYPE_PFLOAT_V8SF:
34308 case VOID_FTYPE_PFLOAT_V4SF:
34309 case VOID_FTYPE_PDOUBLE_V8DF:
34310 case VOID_FTYPE_PDOUBLE_V4DF:
34311 case VOID_FTYPE_PDOUBLE_V2DF:
34312 case VOID_FTYPE_PLONGLONG_LONGLONG:
34313 case VOID_FTYPE_PULONGLONG_ULONGLONG:
34314 case VOID_FTYPE_PINT_INT:
34315 nargs = 1;
34316 klass = store;
34317 /* Reserve memory operand for target. */
34318 memory = ARRAY_SIZE (args);
34319 switch (icode)
34321 /* These builtins and instructions require the memory
34322 to be properly aligned. */
34323 case CODE_FOR_avx_movntv4di:
34324 case CODE_FOR_sse2_movntv2di:
34325 case CODE_FOR_avx_movntv8sf:
34326 case CODE_FOR_sse_movntv4sf:
34327 case CODE_FOR_sse4a_vmmovntv4sf:
34328 case CODE_FOR_avx_movntv4df:
34329 case CODE_FOR_sse2_movntv2df:
34330 case CODE_FOR_sse4a_vmmovntv2df:
34331 case CODE_FOR_sse2_movntidi:
34332 case CODE_FOR_sse_movntq:
34333 case CODE_FOR_sse2_movntisi:
34334 case CODE_FOR_avx512f_movntv16sf:
34335 case CODE_FOR_avx512f_movntv8df:
34336 case CODE_FOR_avx512f_movntv8di:
34337 aligned_mem = true;
34338 break;
34339 default:
34340 break;
34342 break;
34343 case V4SF_FTYPE_V4SF_PCV2SF:
34344 case V2DF_FTYPE_V2DF_PCDOUBLE:
34345 nargs = 2;
34346 klass = load;
34347 memory = 1;
34348 break;
34349 case V8SF_FTYPE_PCV8SF_V8SI:
34350 case V4DF_FTYPE_PCV4DF_V4DI:
34351 case V4SF_FTYPE_PCV4SF_V4SI:
34352 case V2DF_FTYPE_PCV2DF_V2DI:
34353 case V8SI_FTYPE_PCV8SI_V8SI:
34354 case V4DI_FTYPE_PCV4DI_V4DI:
34355 case V4SI_FTYPE_PCV4SI_V4SI:
34356 case V2DI_FTYPE_PCV2DI_V2DI:
34357 case VOID_FTYPE_INT_INT64:
34358 nargs = 2;
34359 klass = load;
34360 memory = 0;
34361 break;
34362 case VOID_FTYPE_PV8DF_V8DF_UQI:
34363 case VOID_FTYPE_PV4DF_V4DF_UQI:
34364 case VOID_FTYPE_PV2DF_V2DF_UQI:
34365 case VOID_FTYPE_PV16SF_V16SF_UHI:
34366 case VOID_FTYPE_PV8SF_V8SF_UQI:
34367 case VOID_FTYPE_PV4SF_V4SF_UQI:
34368 case VOID_FTYPE_PV8DI_V8DI_UQI:
34369 case VOID_FTYPE_PV4DI_V4DI_UQI:
34370 case VOID_FTYPE_PV2DI_V2DI_UQI:
34371 case VOID_FTYPE_PV16SI_V16SI_UHI:
34372 case VOID_FTYPE_PV8SI_V8SI_UQI:
34373 case VOID_FTYPE_PV4SI_V4SI_UQI:
34374 switch (icode)
34376 /* These builtins and instructions require the memory
34377 to be properly aligned. */
34378 case CODE_FOR_avx512f_storev16sf_mask:
34379 case CODE_FOR_avx512f_storev16si_mask:
34380 case CODE_FOR_avx512f_storev8df_mask:
34381 case CODE_FOR_avx512f_storev8di_mask:
34382 case CODE_FOR_avx512vl_storev8sf_mask:
34383 case CODE_FOR_avx512vl_storev8si_mask:
34384 case CODE_FOR_avx512vl_storev4df_mask:
34385 case CODE_FOR_avx512vl_storev4di_mask:
34386 case CODE_FOR_avx512vl_storev4sf_mask:
34387 case CODE_FOR_avx512vl_storev4si_mask:
34388 case CODE_FOR_avx512vl_storev2df_mask:
34389 case CODE_FOR_avx512vl_storev2di_mask:
34390 aligned_mem = true;
34391 break;
34392 default:
34393 break;
34395 /* FALLTHRU */
34396 case VOID_FTYPE_PV8SF_V8SI_V8SF:
34397 case VOID_FTYPE_PV4DF_V4DI_V4DF:
34398 case VOID_FTYPE_PV4SF_V4SI_V4SF:
34399 case VOID_FTYPE_PV2DF_V2DI_V2DF:
34400 case VOID_FTYPE_PV8SI_V8SI_V8SI:
34401 case VOID_FTYPE_PV4DI_V4DI_V4DI:
34402 case VOID_FTYPE_PV4SI_V4SI_V4SI:
34403 case VOID_FTYPE_PV2DI_V2DI_V2DI:
34404 case VOID_FTYPE_PV8SI_V8DI_UQI:
34405 case VOID_FTYPE_PV8HI_V8DI_UQI:
34406 case VOID_FTYPE_PV16HI_V16SI_UHI:
34407 case VOID_FTYPE_PV16QI_V8DI_UQI:
34408 case VOID_FTYPE_PV16QI_V16SI_UHI:
34409 case VOID_FTYPE_PV4SI_V4DI_UQI:
34410 case VOID_FTYPE_PV4SI_V2DI_UQI:
34411 case VOID_FTYPE_PV8HI_V4DI_UQI:
34412 case VOID_FTYPE_PV8HI_V2DI_UQI:
34413 case VOID_FTYPE_PV8HI_V8SI_UQI:
34414 case VOID_FTYPE_PV8HI_V4SI_UQI:
34415 case VOID_FTYPE_PV16QI_V4DI_UQI:
34416 case VOID_FTYPE_PV16QI_V2DI_UQI:
34417 case VOID_FTYPE_PV16QI_V8SI_UQI:
34418 case VOID_FTYPE_PV16QI_V4SI_UQI:
34419 case VOID_FTYPE_PCHAR_V64QI_UDI:
34420 case VOID_FTYPE_PCHAR_V32QI_USI:
34421 case VOID_FTYPE_PCHAR_V16QI_UHI:
34422 case VOID_FTYPE_PSHORT_V32HI_USI:
34423 case VOID_FTYPE_PSHORT_V16HI_UHI:
34424 case VOID_FTYPE_PSHORT_V8HI_UQI:
34425 case VOID_FTYPE_PINT_V16SI_UHI:
34426 case VOID_FTYPE_PINT_V8SI_UQI:
34427 case VOID_FTYPE_PINT_V4SI_UQI:
34428 case VOID_FTYPE_PINT64_V8DI_UQI:
34429 case VOID_FTYPE_PINT64_V4DI_UQI:
34430 case VOID_FTYPE_PINT64_V2DI_UQI:
34431 case VOID_FTYPE_PDOUBLE_V8DF_UQI:
34432 case VOID_FTYPE_PDOUBLE_V4DF_UQI:
34433 case VOID_FTYPE_PDOUBLE_V2DF_UQI:
34434 case VOID_FTYPE_PFLOAT_V16SF_UHI:
34435 case VOID_FTYPE_PFLOAT_V8SF_UQI:
34436 case VOID_FTYPE_PFLOAT_V4SF_UQI:
34437 case VOID_FTYPE_PV32QI_V32HI_USI:
34438 case VOID_FTYPE_PV16QI_V16HI_UHI:
34439 case VOID_FTYPE_PV8QI_V8HI_UQI:
34440 nargs = 2;
34441 klass = store;
34442 /* Reserve memory operand for target. */
34443 memory = ARRAY_SIZE (args);
34444 break;
34445 case V4SF_FTYPE_PCV4SF_V4SF_UQI:
34446 case V8SF_FTYPE_PCV8SF_V8SF_UQI:
34447 case V16SF_FTYPE_PCV16SF_V16SF_UHI:
34448 case V4SI_FTYPE_PCV4SI_V4SI_UQI:
34449 case V8SI_FTYPE_PCV8SI_V8SI_UQI:
34450 case V16SI_FTYPE_PCV16SI_V16SI_UHI:
34451 case V2DF_FTYPE_PCV2DF_V2DF_UQI:
34452 case V4DF_FTYPE_PCV4DF_V4DF_UQI:
34453 case V8DF_FTYPE_PCV8DF_V8DF_UQI:
34454 case V2DI_FTYPE_PCV2DI_V2DI_UQI:
34455 case V4DI_FTYPE_PCV4DI_V4DI_UQI:
34456 case V8DI_FTYPE_PCV8DI_V8DI_UQI:
34457 switch (icode)
34459 /* These builtins and instructions require the memory
34460 to be properly aligned. */
34461 case CODE_FOR_avx512f_loadv16sf_mask:
34462 case CODE_FOR_avx512f_loadv16si_mask:
34463 case CODE_FOR_avx512f_loadv8df_mask:
34464 case CODE_FOR_avx512f_loadv8di_mask:
34465 case CODE_FOR_avx512vl_loadv8sf_mask:
34466 case CODE_FOR_avx512vl_loadv8si_mask:
34467 case CODE_FOR_avx512vl_loadv4df_mask:
34468 case CODE_FOR_avx512vl_loadv4di_mask:
34469 case CODE_FOR_avx512vl_loadv4sf_mask:
34470 case CODE_FOR_avx512vl_loadv4si_mask:
34471 case CODE_FOR_avx512vl_loadv2df_mask:
34472 case CODE_FOR_avx512vl_loadv2di_mask:
34473 case CODE_FOR_avx512bw_loadv64qi_mask:
34474 case CODE_FOR_avx512vl_loadv32qi_mask:
34475 case CODE_FOR_avx512vl_loadv16qi_mask:
34476 case CODE_FOR_avx512bw_loadv32hi_mask:
34477 case CODE_FOR_avx512vl_loadv16hi_mask:
34478 case CODE_FOR_avx512vl_loadv8hi_mask:
34479 aligned_mem = true;
34480 break;
34481 default:
34482 break;
34484 case V64QI_FTYPE_PCCHAR_V64QI_UDI:
34485 case V32QI_FTYPE_PCCHAR_V32QI_USI:
34486 case V16QI_FTYPE_PCCHAR_V16QI_UHI:
34487 case V32HI_FTYPE_PCSHORT_V32HI_USI:
34488 case V16HI_FTYPE_PCSHORT_V16HI_UHI:
34489 case V8HI_FTYPE_PCSHORT_V8HI_UQI:
34490 case V16SI_FTYPE_PCINT_V16SI_UHI:
34491 case V8SI_FTYPE_PCINT_V8SI_UQI:
34492 case V4SI_FTYPE_PCINT_V4SI_UQI:
34493 case V8DI_FTYPE_PCINT64_V8DI_UQI:
34494 case V4DI_FTYPE_PCINT64_V4DI_UQI:
34495 case V2DI_FTYPE_PCINT64_V2DI_UQI:
34496 case V8DF_FTYPE_PCDOUBLE_V8DF_UQI:
34497 case V4DF_FTYPE_PCDOUBLE_V4DF_UQI:
34498 case V2DF_FTYPE_PCDOUBLE_V2DF_UQI:
34499 case V16SF_FTYPE_PCFLOAT_V16SF_UHI:
34500 case V8SF_FTYPE_PCFLOAT_V8SF_UQI:
34501 case V4SF_FTYPE_PCFLOAT_V4SF_UQI:
34502 nargs = 3;
34503 klass = load;
34504 memory = 0;
34505 break;
34506 case VOID_FTYPE_UINT_UINT_UINT:
34507 case VOID_FTYPE_UINT64_UINT_UINT:
34508 case UCHAR_FTYPE_UINT_UINT_UINT:
34509 case UCHAR_FTYPE_UINT64_UINT_UINT:
34510 nargs = 3;
34511 klass = load;
34512 memory = ARRAY_SIZE (args);
34513 last_arg_constant = true;
34514 break;
34515 default:
34516 gcc_unreachable ();
34519 gcc_assert (nargs <= ARRAY_SIZE (args));
34521 if (klass == store)
34523 arg = CALL_EXPR_ARG (exp, 0);
34524 op = expand_normal (arg);
34525 gcc_assert (target == 0);
34526 if (memory)
34528 op = ix86_zero_extend_to_Pmode (op);
34529 target = gen_rtx_MEM (tmode, op);
34530 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
34531 on it. Try to improve it using get_pointer_alignment,
34532 and if the special builtin is one that requires strict
34533 mode alignment, also from it's GET_MODE_ALIGNMENT.
34534 Failure to do so could lead to ix86_legitimate_combined_insn
34535 rejecting all changes to such insns. */
34536 unsigned int align = get_pointer_alignment (arg);
34537 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
34538 align = GET_MODE_ALIGNMENT (tmode);
34539 if (MEM_ALIGN (target) < align)
34540 set_mem_align (target, align);
34542 else
34543 target = force_reg (tmode, op);
34544 arg_adjust = 1;
34546 else
34548 arg_adjust = 0;
34549 if (optimize
34550 || target == 0
34551 || !register_operand (target, tmode)
34552 || GET_MODE (target) != tmode)
34553 target = gen_reg_rtx (tmode);
34556 for (i = 0; i < nargs; i++)
34558 machine_mode mode = insn_p->operand[i + 1].mode;
34559 bool match;
34561 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
34562 op = expand_normal (arg);
34563 match = insn_p->operand[i + 1].predicate (op, mode);
34565 if (last_arg_constant && (i + 1) == nargs)
34567 if (!match)
34569 if (icode == CODE_FOR_lwp_lwpvalsi3
34570 || icode == CODE_FOR_lwp_lwpinssi3
34571 || icode == CODE_FOR_lwp_lwpvaldi3
34572 || icode == CODE_FOR_lwp_lwpinsdi3)
34573 error ("the last argument must be a 32-bit immediate");
34574 else
34575 error ("the last argument must be an 8-bit immediate");
34576 return const0_rtx;
34579 else
34581 if (i == memory)
34583 /* This must be the memory operand. */
34584 op = ix86_zero_extend_to_Pmode (op);
34585 op = gen_rtx_MEM (mode, op);
34586 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
34587 on it. Try to improve it using get_pointer_alignment,
34588 and if the special builtin is one that requires strict
34589 mode alignment, also from it's GET_MODE_ALIGNMENT.
34590 Failure to do so could lead to ix86_legitimate_combined_insn
34591 rejecting all changes to such insns. */
34592 unsigned int align = get_pointer_alignment (arg);
34593 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
34594 align = GET_MODE_ALIGNMENT (mode);
34595 if (MEM_ALIGN (op) < align)
34596 set_mem_align (op, align);
34598 else
34600 /* This must be register. */
34601 if (VECTOR_MODE_P (mode))
34602 op = safe_vector_operand (op, mode);
34604 op = fixup_modeless_constant (op, mode);
34606 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34607 op = copy_to_mode_reg (mode, op);
34608 else
34610 op = copy_to_reg (op);
34611 op = lowpart_subreg (mode, op, GET_MODE (op));
34616 args[i].op = op;
34617 args[i].mode = mode;
34620 switch (nargs)
34622 case 0:
34623 pat = GEN_FCN (icode) (target);
34624 break;
34625 case 1:
34626 pat = GEN_FCN (icode) (target, args[0].op);
34627 break;
34628 case 2:
34629 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34630 break;
34631 case 3:
34632 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
34633 break;
34634 default:
34635 gcc_unreachable ();
34638 if (! pat)
34639 return 0;
34640 emit_insn (pat);
34641 return klass == store ? 0 : target;
34644 /* Return the integer constant in ARG. Constrain it to be in the range
34645 of the subparts of VEC_TYPE; issue an error if not. */
34647 static int
34648 get_element_number (tree vec_type, tree arg)
34650 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
34652 if (!tree_fits_uhwi_p (arg)
34653 || (elt = tree_to_uhwi (arg), elt > max))
34655 error ("selector must be an integer constant in the range 0..%wi", max);
34656 return 0;
34659 return elt;
34662 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34663 ix86_expand_vector_init. We DO have language-level syntax for this, in
34664 the form of (type){ init-list }. Except that since we can't place emms
34665 instructions from inside the compiler, we can't allow the use of MMX
34666 registers unless the user explicitly asks for it. So we do *not* define
34667 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
34668 we have builtins invoked by mmintrin.h that gives us license to emit
34669 these sorts of instructions. */
34671 static rtx
34672 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
34674 machine_mode tmode = TYPE_MODE (type);
34675 machine_mode inner_mode = GET_MODE_INNER (tmode);
34676 int i, n_elt = GET_MODE_NUNITS (tmode);
34677 rtvec v = rtvec_alloc (n_elt);
34679 gcc_assert (VECTOR_MODE_P (tmode));
34680 gcc_assert (call_expr_nargs (exp) == n_elt);
34682 for (i = 0; i < n_elt; ++i)
34684 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
34685 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
34688 if (!target || !register_operand (target, tmode))
34689 target = gen_reg_rtx (tmode);
34691 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
34692 return target;
34695 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34696 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
34697 had a language-level syntax for referencing vector elements. */
34699 static rtx
34700 ix86_expand_vec_ext_builtin (tree exp, rtx target)
34702 machine_mode tmode, mode0;
34703 tree arg0, arg1;
34704 int elt;
34705 rtx op0;
34707 arg0 = CALL_EXPR_ARG (exp, 0);
34708 arg1 = CALL_EXPR_ARG (exp, 1);
34710 op0 = expand_normal (arg0);
34711 elt = get_element_number (TREE_TYPE (arg0), arg1);
34713 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
34714 mode0 = TYPE_MODE (TREE_TYPE (arg0));
34715 gcc_assert (VECTOR_MODE_P (mode0));
34717 op0 = force_reg (mode0, op0);
34719 if (optimize || !target || !register_operand (target, tmode))
34720 target = gen_reg_rtx (tmode);
34722 ix86_expand_vector_extract (true, target, op0, elt);
34724 return target;
34727 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34728 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
34729 a language-level syntax for referencing vector elements. */
34731 static rtx
34732 ix86_expand_vec_set_builtin (tree exp)
34734 machine_mode tmode, mode1;
34735 tree arg0, arg1, arg2;
34736 int elt;
34737 rtx op0, op1, target;
34739 arg0 = CALL_EXPR_ARG (exp, 0);
34740 arg1 = CALL_EXPR_ARG (exp, 1);
34741 arg2 = CALL_EXPR_ARG (exp, 2);
34743 tmode = TYPE_MODE (TREE_TYPE (arg0));
34744 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
34745 gcc_assert (VECTOR_MODE_P (tmode));
34747 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
34748 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
34749 elt = get_element_number (TREE_TYPE (arg0), arg2);
34751 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
34752 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
34754 op0 = force_reg (tmode, op0);
34755 op1 = force_reg (mode1, op1);
34757 /* OP0 is the source of these builtin functions and shouldn't be
34758 modified. Create a copy, use it and return it as target. */
34759 target = gen_reg_rtx (tmode);
34760 emit_move_insn (target, op0);
34761 ix86_expand_vector_set (true, target, op1, elt);
34763 return target;
34766 /* Emit conditional move of SRC to DST with condition
34767 OP1 CODE OP2. */
34768 static void
34769 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
34771 rtx t;
34773 if (TARGET_CMOVE)
34775 t = ix86_expand_compare (code, op1, op2);
34776 emit_insn (gen_rtx_SET (dst, gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
34777 src, dst)));
34779 else
34781 rtx_code_label *nomove = gen_label_rtx ();
34782 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
34783 const0_rtx, GET_MODE (op1), 1, nomove);
34784 emit_move_insn (dst, src);
34785 emit_label (nomove);
34789 /* Choose max of DST and SRC and put it to DST. */
34790 static void
34791 ix86_emit_move_max (rtx dst, rtx src)
34793 ix86_emit_cmove (dst, src, LTU, dst, src);
34796 /* Expand an expression EXP that calls a built-in function,
34797 with result going to TARGET if that's convenient
34798 (and in mode MODE if that's convenient).
34799 SUBTARGET may be used as the target for computing one of EXP's operands.
34800 IGNORE is nonzero if the value is to be ignored. */
34802 static rtx
34803 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
34804 machine_mode mode, int ignore)
34806 size_t i;
34807 enum insn_code icode;
34808 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
34809 tree arg0, arg1, arg2, arg3, arg4;
34810 rtx op0, op1, op2, op3, op4, pat, insn;
34811 machine_mode mode0, mode1, mode2, mode3, mode4;
34812 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
34814 /* For CPU builtins that can be folded, fold first and expand the fold. */
34815 switch (fcode)
34817 case IX86_BUILTIN_CPU_INIT:
34819 /* Make it call __cpu_indicator_init in libgcc. */
34820 tree call_expr, fndecl, type;
34821 type = build_function_type_list (integer_type_node, NULL_TREE);
34822 fndecl = build_fn_decl ("__cpu_indicator_init", type);
34823 call_expr = build_call_expr (fndecl, 0);
34824 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
34826 case IX86_BUILTIN_CPU_IS:
34827 case IX86_BUILTIN_CPU_SUPPORTS:
34829 tree arg0 = CALL_EXPR_ARG (exp, 0);
34830 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
34831 gcc_assert (fold_expr != NULL_TREE);
34832 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
34836 /* Determine whether the builtin function is available under the current ISA.
34837 Originally the builtin was not created if it wasn't applicable to the
34838 current ISA based on the command line switches. With function specific
34839 options, we need to check in the context of the function making the call
34840 whether it is supported. Treat AVX512VL and MMX specially. For other flags,
34841 if isa includes more than one ISA bit, treat those are requiring any
34842 of them. For AVX512VL, require both AVX512VL and the non-AVX512VL
34843 ISAs. Likewise for MMX, require both MMX and the non-MMX ISAs.
34844 Similarly for 64BIT, but we shouldn't be building such builtins
34845 at all, -m64 is a whole TU option. */
34846 if (((ix86_builtins_isa[fcode].isa
34847 & ~(OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_MMX
34848 | OPTION_MASK_ISA_64BIT))
34849 && !(ix86_builtins_isa[fcode].isa
34850 & ~(OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_MMX
34851 | OPTION_MASK_ISA_64BIT)
34852 & ix86_isa_flags))
34853 || ((ix86_builtins_isa[fcode].isa & OPTION_MASK_ISA_AVX512VL)
34854 && !(ix86_isa_flags & OPTION_MASK_ISA_AVX512VL))
34855 || ((ix86_builtins_isa[fcode].isa & OPTION_MASK_ISA_MMX)
34856 && !(ix86_isa_flags & OPTION_MASK_ISA_MMX))
34857 || (ix86_builtins_isa[fcode].isa2
34858 && !(ix86_builtins_isa[fcode].isa2 & ix86_isa_flags2)))
34860 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa,
34861 ix86_builtins_isa[fcode].isa2, 0, 0,
34862 NULL, NULL, (enum fpmath_unit) 0,
34863 false);
34864 if (!opts)
34865 error ("%qE needs unknown isa option", fndecl);
34866 else
34868 gcc_assert (opts != NULL);
34869 error ("%qE needs isa option %s", fndecl, opts);
34870 free (opts);
34872 return expand_call (exp, target, ignore);
34875 switch (fcode)
34877 case IX86_BUILTIN_BNDMK:
34878 if (!target
34879 || GET_MODE (target) != BNDmode
34880 || !register_operand (target, BNDmode))
34881 target = gen_reg_rtx (BNDmode);
34883 arg0 = CALL_EXPR_ARG (exp, 0);
34884 arg1 = CALL_EXPR_ARG (exp, 1);
34886 op0 = expand_normal (arg0);
34887 op1 = expand_normal (arg1);
34889 if (!register_operand (op0, Pmode))
34890 op0 = ix86_zero_extend_to_Pmode (op0);
34891 if (!register_operand (op1, Pmode))
34892 op1 = ix86_zero_extend_to_Pmode (op1);
34894 /* Builtin arg1 is size of block but instruction op1 should
34895 be (size - 1). */
34896 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
34897 NULL_RTX, 1, OPTAB_DIRECT);
34899 emit_insn (BNDmode == BND64mode
34900 ? gen_bnd64_mk (target, op0, op1)
34901 : gen_bnd32_mk (target, op0, op1));
34902 return target;
34904 case IX86_BUILTIN_BNDSTX:
34905 arg0 = CALL_EXPR_ARG (exp, 0);
34906 arg1 = CALL_EXPR_ARG (exp, 1);
34907 arg2 = CALL_EXPR_ARG (exp, 2);
34909 op0 = expand_normal (arg0);
34910 op1 = expand_normal (arg1);
34911 op2 = expand_normal (arg2);
34913 if (!register_operand (op0, Pmode))
34914 op0 = ix86_zero_extend_to_Pmode (op0);
34915 if (!register_operand (op1, BNDmode))
34916 op1 = copy_to_mode_reg (BNDmode, op1);
34917 if (!register_operand (op2, Pmode))
34918 op2 = ix86_zero_extend_to_Pmode (op2);
34920 emit_insn (BNDmode == BND64mode
34921 ? gen_bnd64_stx (op2, op0, op1)
34922 : gen_bnd32_stx (op2, op0, op1));
34923 return 0;
34925 case IX86_BUILTIN_BNDLDX:
34926 if (!target
34927 || GET_MODE (target) != BNDmode
34928 || !register_operand (target, BNDmode))
34929 target = gen_reg_rtx (BNDmode);
34931 arg0 = CALL_EXPR_ARG (exp, 0);
34932 arg1 = CALL_EXPR_ARG (exp, 1);
34934 op0 = expand_normal (arg0);
34935 op1 = expand_normal (arg1);
34937 if (!register_operand (op0, Pmode))
34938 op0 = ix86_zero_extend_to_Pmode (op0);
34939 if (!register_operand (op1, Pmode))
34940 op1 = ix86_zero_extend_to_Pmode (op1);
34942 emit_insn (BNDmode == BND64mode
34943 ? gen_bnd64_ldx (target, op0, op1)
34944 : gen_bnd32_ldx (target, op0, op1));
34945 return target;
34947 case IX86_BUILTIN_BNDCL:
34948 arg0 = CALL_EXPR_ARG (exp, 0);
34949 arg1 = CALL_EXPR_ARG (exp, 1);
34951 op0 = expand_normal (arg0);
34952 op1 = expand_normal (arg1);
34954 if (!register_operand (op0, Pmode))
34955 op0 = ix86_zero_extend_to_Pmode (op0);
34956 if (!register_operand (op1, BNDmode))
34957 op1 = copy_to_mode_reg (BNDmode, op1);
34959 emit_insn (BNDmode == BND64mode
34960 ? gen_bnd64_cl (op1, op0)
34961 : gen_bnd32_cl (op1, op0));
34962 return 0;
34964 case IX86_BUILTIN_BNDCU:
34965 arg0 = CALL_EXPR_ARG (exp, 0);
34966 arg1 = CALL_EXPR_ARG (exp, 1);
34968 op0 = expand_normal (arg0);
34969 op1 = expand_normal (arg1);
34971 if (!register_operand (op0, Pmode))
34972 op0 = ix86_zero_extend_to_Pmode (op0);
34973 if (!register_operand (op1, BNDmode))
34974 op1 = copy_to_mode_reg (BNDmode, op1);
34976 emit_insn (BNDmode == BND64mode
34977 ? gen_bnd64_cu (op1, op0)
34978 : gen_bnd32_cu (op1, op0));
34979 return 0;
34981 case IX86_BUILTIN_BNDRET:
34982 arg0 = CALL_EXPR_ARG (exp, 0);
34983 target = chkp_get_rtl_bounds (arg0);
34985 /* If no bounds were specified for returned value,
34986 then use INIT bounds. It usually happens when
34987 some built-in function is expanded. */
34988 if (!target)
34990 rtx t1 = gen_reg_rtx (Pmode);
34991 rtx t2 = gen_reg_rtx (Pmode);
34992 target = gen_reg_rtx (BNDmode);
34993 emit_move_insn (t1, const0_rtx);
34994 emit_move_insn (t2, constm1_rtx);
34995 emit_insn (BNDmode == BND64mode
34996 ? gen_bnd64_mk (target, t1, t2)
34997 : gen_bnd32_mk (target, t1, t2));
35000 gcc_assert (target && REG_P (target));
35001 return target;
35003 case IX86_BUILTIN_BNDNARROW:
35005 rtx m1, m1h1, m1h2, lb, ub, t1;
35007 /* Return value and lb. */
35008 arg0 = CALL_EXPR_ARG (exp, 0);
35009 /* Bounds. */
35010 arg1 = CALL_EXPR_ARG (exp, 1);
35011 /* Size. */
35012 arg2 = CALL_EXPR_ARG (exp, 2);
35014 lb = expand_normal (arg0);
35015 op1 = expand_normal (arg1);
35016 op2 = expand_normal (arg2);
35018 /* Size was passed but we need to use (size - 1) as for bndmk. */
35019 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
35020 NULL_RTX, 1, OPTAB_DIRECT);
35022 /* Add LB to size and inverse to get UB. */
35023 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
35024 op2, 1, OPTAB_DIRECT);
35025 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
35027 if (!register_operand (lb, Pmode))
35028 lb = ix86_zero_extend_to_Pmode (lb);
35029 if (!register_operand (ub, Pmode))
35030 ub = ix86_zero_extend_to_Pmode (ub);
35032 /* We need to move bounds to memory before any computations. */
35033 if (MEM_P (op1))
35034 m1 = op1;
35035 else
35037 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
35038 emit_move_insn (m1, op1);
35041 /* Generate mem expression to be used for access to LB and UB. */
35042 m1h1 = adjust_address (m1, Pmode, 0);
35043 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
35045 t1 = gen_reg_rtx (Pmode);
35047 /* Compute LB. */
35048 emit_move_insn (t1, m1h1);
35049 ix86_emit_move_max (t1, lb);
35050 emit_move_insn (m1h1, t1);
35052 /* Compute UB. UB is stored in 1's complement form. Therefore
35053 we also use max here. */
35054 emit_move_insn (t1, m1h2);
35055 ix86_emit_move_max (t1, ub);
35056 emit_move_insn (m1h2, t1);
35058 op2 = gen_reg_rtx (BNDmode);
35059 emit_move_insn (op2, m1);
35061 return chkp_join_splitted_slot (lb, op2);
35064 case IX86_BUILTIN_BNDINT:
35066 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
35068 if (!target
35069 || GET_MODE (target) != BNDmode
35070 || !register_operand (target, BNDmode))
35071 target = gen_reg_rtx (BNDmode);
35073 arg0 = CALL_EXPR_ARG (exp, 0);
35074 arg1 = CALL_EXPR_ARG (exp, 1);
35076 op0 = expand_normal (arg0);
35077 op1 = expand_normal (arg1);
35079 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
35080 rh1 = adjust_address (res, Pmode, 0);
35081 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
35083 /* Put first bounds to temporaries. */
35084 lb1 = gen_reg_rtx (Pmode);
35085 ub1 = gen_reg_rtx (Pmode);
35086 if (MEM_P (op0))
35088 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
35089 emit_move_insn (ub1, adjust_address (op0, Pmode,
35090 GET_MODE_SIZE (Pmode)));
35092 else
35094 emit_move_insn (res, op0);
35095 emit_move_insn (lb1, rh1);
35096 emit_move_insn (ub1, rh2);
35099 /* Put second bounds to temporaries. */
35100 lb2 = gen_reg_rtx (Pmode);
35101 ub2 = gen_reg_rtx (Pmode);
35102 if (MEM_P (op1))
35104 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
35105 emit_move_insn (ub2, adjust_address (op1, Pmode,
35106 GET_MODE_SIZE (Pmode)));
35108 else
35110 emit_move_insn (res, op1);
35111 emit_move_insn (lb2, rh1);
35112 emit_move_insn (ub2, rh2);
35115 /* Compute LB. */
35116 ix86_emit_move_max (lb1, lb2);
35117 emit_move_insn (rh1, lb1);
35119 /* Compute UB. UB is stored in 1's complement form. Therefore
35120 we also use max here. */
35121 ix86_emit_move_max (ub1, ub2);
35122 emit_move_insn (rh2, ub1);
35124 emit_move_insn (target, res);
35126 return target;
35129 case IX86_BUILTIN_SIZEOF:
35131 tree name;
35132 rtx symbol;
35134 if (!target
35135 || GET_MODE (target) != Pmode
35136 || !register_operand (target, Pmode))
35137 target = gen_reg_rtx (Pmode);
35139 arg0 = CALL_EXPR_ARG (exp, 0);
35140 gcc_assert (VAR_P (arg0));
35142 name = DECL_ASSEMBLER_NAME (arg0);
35143 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
35145 emit_insn (Pmode == SImode
35146 ? gen_move_size_reloc_si (target, symbol)
35147 : gen_move_size_reloc_di (target, symbol));
35149 return target;
35152 case IX86_BUILTIN_BNDLOWER:
35154 rtx mem, hmem;
35156 if (!target
35157 || GET_MODE (target) != Pmode
35158 || !register_operand (target, Pmode))
35159 target = gen_reg_rtx (Pmode);
35161 arg0 = CALL_EXPR_ARG (exp, 0);
35162 op0 = expand_normal (arg0);
35164 /* We need to move bounds to memory first. */
35165 if (MEM_P (op0))
35166 mem = op0;
35167 else
35169 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
35170 emit_move_insn (mem, op0);
35173 /* Generate mem expression to access LB and load it. */
35174 hmem = adjust_address (mem, Pmode, 0);
35175 emit_move_insn (target, hmem);
35177 return target;
35180 case IX86_BUILTIN_BNDUPPER:
35182 rtx mem, hmem, res;
35184 if (!target
35185 || GET_MODE (target) != Pmode
35186 || !register_operand (target, Pmode))
35187 target = gen_reg_rtx (Pmode);
35189 arg0 = CALL_EXPR_ARG (exp, 0);
35190 op0 = expand_normal (arg0);
35192 /* We need to move bounds to memory first. */
35193 if (MEM_P (op0))
35194 mem = op0;
35195 else
35197 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
35198 emit_move_insn (mem, op0);
35201 /* Generate mem expression to access UB. */
35202 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
35204 /* We need to inverse all bits of UB. */
35205 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
35207 if (res != target)
35208 emit_move_insn (target, res);
35210 return target;
35213 case IX86_BUILTIN_MASKMOVQ:
35214 case IX86_BUILTIN_MASKMOVDQU:
35215 icode = (fcode == IX86_BUILTIN_MASKMOVQ
35216 ? CODE_FOR_mmx_maskmovq
35217 : CODE_FOR_sse2_maskmovdqu);
35218 /* Note the arg order is different from the operand order. */
35219 arg1 = CALL_EXPR_ARG (exp, 0);
35220 arg2 = CALL_EXPR_ARG (exp, 1);
35221 arg0 = CALL_EXPR_ARG (exp, 2);
35222 op0 = expand_normal (arg0);
35223 op1 = expand_normal (arg1);
35224 op2 = expand_normal (arg2);
35225 mode0 = insn_data[icode].operand[0].mode;
35226 mode1 = insn_data[icode].operand[1].mode;
35227 mode2 = insn_data[icode].operand[2].mode;
35229 op0 = ix86_zero_extend_to_Pmode (op0);
35230 op0 = gen_rtx_MEM (mode1, op0);
35232 if (!insn_data[icode].operand[0].predicate (op0, mode0))
35233 op0 = copy_to_mode_reg (mode0, op0);
35234 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35235 op1 = copy_to_mode_reg (mode1, op1);
35236 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35237 op2 = copy_to_mode_reg (mode2, op2);
35238 pat = GEN_FCN (icode) (op0, op1, op2);
35239 if (! pat)
35240 return 0;
35241 emit_insn (pat);
35242 return 0;
35244 case IX86_BUILTIN_LDMXCSR:
35245 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
35246 target = assign_386_stack_local (SImode, SLOT_TEMP);
35247 emit_move_insn (target, op0);
35248 emit_insn (gen_sse_ldmxcsr (target));
35249 return 0;
35251 case IX86_BUILTIN_STMXCSR:
35252 target = assign_386_stack_local (SImode, SLOT_TEMP);
35253 emit_insn (gen_sse_stmxcsr (target));
35254 return copy_to_mode_reg (SImode, target);
35256 case IX86_BUILTIN_CLFLUSH:
35257 arg0 = CALL_EXPR_ARG (exp, 0);
35258 op0 = expand_normal (arg0);
35259 icode = CODE_FOR_sse2_clflush;
35260 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35261 op0 = ix86_zero_extend_to_Pmode (op0);
35263 emit_insn (gen_sse2_clflush (op0));
35264 return 0;
35266 case IX86_BUILTIN_CLWB:
35267 arg0 = CALL_EXPR_ARG (exp, 0);
35268 op0 = expand_normal (arg0);
35269 icode = CODE_FOR_clwb;
35270 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35271 op0 = ix86_zero_extend_to_Pmode (op0);
35273 emit_insn (gen_clwb (op0));
35274 return 0;
35276 case IX86_BUILTIN_CLFLUSHOPT:
35277 arg0 = CALL_EXPR_ARG (exp, 0);
35278 op0 = expand_normal (arg0);
35279 icode = CODE_FOR_clflushopt;
35280 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35281 op0 = ix86_zero_extend_to_Pmode (op0);
35283 emit_insn (gen_clflushopt (op0));
35284 return 0;
35286 case IX86_BUILTIN_MONITOR:
35287 case IX86_BUILTIN_MONITORX:
35288 arg0 = CALL_EXPR_ARG (exp, 0);
35289 arg1 = CALL_EXPR_ARG (exp, 1);
35290 arg2 = CALL_EXPR_ARG (exp, 2);
35291 op0 = expand_normal (arg0);
35292 op1 = expand_normal (arg1);
35293 op2 = expand_normal (arg2);
35294 if (!REG_P (op0))
35295 op0 = ix86_zero_extend_to_Pmode (op0);
35296 if (!REG_P (op1))
35297 op1 = copy_to_mode_reg (SImode, op1);
35298 if (!REG_P (op2))
35299 op2 = copy_to_mode_reg (SImode, op2);
35301 emit_insn (fcode == IX86_BUILTIN_MONITOR
35302 ? ix86_gen_monitor (op0, op1, op2)
35303 : ix86_gen_monitorx (op0, op1, op2));
35304 return 0;
35306 case IX86_BUILTIN_MWAIT:
35307 arg0 = CALL_EXPR_ARG (exp, 0);
35308 arg1 = CALL_EXPR_ARG (exp, 1);
35309 op0 = expand_normal (arg0);
35310 op1 = expand_normal (arg1);
35311 if (!REG_P (op0))
35312 op0 = copy_to_mode_reg (SImode, op0);
35313 if (!REG_P (op1))
35314 op1 = copy_to_mode_reg (SImode, op1);
35315 emit_insn (gen_sse3_mwait (op0, op1));
35316 return 0;
35318 case IX86_BUILTIN_MWAITX:
35319 arg0 = CALL_EXPR_ARG (exp, 0);
35320 arg1 = CALL_EXPR_ARG (exp, 1);
35321 arg2 = CALL_EXPR_ARG (exp, 2);
35322 op0 = expand_normal (arg0);
35323 op1 = expand_normal (arg1);
35324 op2 = expand_normal (arg2);
35325 if (!REG_P (op0))
35326 op0 = copy_to_mode_reg (SImode, op0);
35327 if (!REG_P (op1))
35328 op1 = copy_to_mode_reg (SImode, op1);
35329 if (!REG_P (op2))
35330 op2 = copy_to_mode_reg (SImode, op2);
35331 emit_insn (gen_mwaitx (op0, op1, op2));
35332 return 0;
35334 case IX86_BUILTIN_CLZERO:
35335 arg0 = CALL_EXPR_ARG (exp, 0);
35336 op0 = expand_normal (arg0);
35337 if (!REG_P (op0))
35338 op0 = ix86_zero_extend_to_Pmode (op0);
35339 emit_insn (ix86_gen_clzero (op0));
35340 return 0;
35342 case IX86_BUILTIN_VEC_INIT_V2SI:
35343 case IX86_BUILTIN_VEC_INIT_V4HI:
35344 case IX86_BUILTIN_VEC_INIT_V8QI:
35345 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
35347 case IX86_BUILTIN_VEC_EXT_V2DF:
35348 case IX86_BUILTIN_VEC_EXT_V2DI:
35349 case IX86_BUILTIN_VEC_EXT_V4SF:
35350 case IX86_BUILTIN_VEC_EXT_V4SI:
35351 case IX86_BUILTIN_VEC_EXT_V8HI:
35352 case IX86_BUILTIN_VEC_EXT_V2SI:
35353 case IX86_BUILTIN_VEC_EXT_V4HI:
35354 case IX86_BUILTIN_VEC_EXT_V16QI:
35355 return ix86_expand_vec_ext_builtin (exp, target);
35357 case IX86_BUILTIN_VEC_SET_V2DI:
35358 case IX86_BUILTIN_VEC_SET_V4SF:
35359 case IX86_BUILTIN_VEC_SET_V4SI:
35360 case IX86_BUILTIN_VEC_SET_V8HI:
35361 case IX86_BUILTIN_VEC_SET_V4HI:
35362 case IX86_BUILTIN_VEC_SET_V16QI:
35363 return ix86_expand_vec_set_builtin (exp);
35365 case IX86_BUILTIN_NANQ:
35366 case IX86_BUILTIN_NANSQ:
35367 return expand_call (exp, target, ignore);
35369 case IX86_BUILTIN_RDPMC:
35370 case IX86_BUILTIN_RDTSC:
35371 case IX86_BUILTIN_RDTSCP:
35372 case IX86_BUILTIN_XGETBV:
35374 op0 = gen_reg_rtx (DImode);
35375 op1 = gen_reg_rtx (DImode);
35377 if (fcode == IX86_BUILTIN_RDPMC)
35379 arg0 = CALL_EXPR_ARG (exp, 0);
35380 op2 = expand_normal (arg0);
35381 if (!register_operand (op2, SImode))
35382 op2 = copy_to_mode_reg (SImode, op2);
35384 insn = (TARGET_64BIT
35385 ? gen_rdpmc_rex64 (op0, op1, op2)
35386 : gen_rdpmc (op0, op2));
35387 emit_insn (insn);
35389 else if (fcode == IX86_BUILTIN_XGETBV)
35391 arg0 = CALL_EXPR_ARG (exp, 0);
35392 op2 = expand_normal (arg0);
35393 if (!register_operand (op2, SImode))
35394 op2 = copy_to_mode_reg (SImode, op2);
35396 insn = (TARGET_64BIT
35397 ? gen_xgetbv_rex64 (op0, op1, op2)
35398 : gen_xgetbv (op0, op2));
35399 emit_insn (insn);
35401 else if (fcode == IX86_BUILTIN_RDTSC)
35403 insn = (TARGET_64BIT
35404 ? gen_rdtsc_rex64 (op0, op1)
35405 : gen_rdtsc (op0));
35406 emit_insn (insn);
35408 else
35410 op2 = gen_reg_rtx (SImode);
35412 insn = (TARGET_64BIT
35413 ? gen_rdtscp_rex64 (op0, op1, op2)
35414 : gen_rdtscp (op0, op2));
35415 emit_insn (insn);
35417 arg0 = CALL_EXPR_ARG (exp, 0);
35418 op4 = expand_normal (arg0);
35419 if (!address_operand (op4, VOIDmode))
35421 op4 = convert_memory_address (Pmode, op4);
35422 op4 = copy_addr_to_reg (op4);
35424 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
35427 if (target == 0)
35429 /* mode is VOIDmode if __builtin_rd* has been called
35430 without lhs. */
35431 if (mode == VOIDmode)
35432 return target;
35433 target = gen_reg_rtx (mode);
35436 if (TARGET_64BIT)
35438 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
35439 op1, 1, OPTAB_DIRECT);
35440 op0 = expand_simple_binop (DImode, IOR, op0, op1,
35441 op0, 1, OPTAB_DIRECT);
35444 emit_move_insn (target, op0);
35445 return target;
35447 case IX86_BUILTIN_FXSAVE:
35448 case IX86_BUILTIN_FXRSTOR:
35449 case IX86_BUILTIN_FXSAVE64:
35450 case IX86_BUILTIN_FXRSTOR64:
35451 case IX86_BUILTIN_FNSTENV:
35452 case IX86_BUILTIN_FLDENV:
35453 mode0 = BLKmode;
35454 switch (fcode)
35456 case IX86_BUILTIN_FXSAVE:
35457 icode = CODE_FOR_fxsave;
35458 break;
35459 case IX86_BUILTIN_FXRSTOR:
35460 icode = CODE_FOR_fxrstor;
35461 break;
35462 case IX86_BUILTIN_FXSAVE64:
35463 icode = CODE_FOR_fxsave64;
35464 break;
35465 case IX86_BUILTIN_FXRSTOR64:
35466 icode = CODE_FOR_fxrstor64;
35467 break;
35468 case IX86_BUILTIN_FNSTENV:
35469 icode = CODE_FOR_fnstenv;
35470 break;
35471 case IX86_BUILTIN_FLDENV:
35472 icode = CODE_FOR_fldenv;
35473 break;
35474 default:
35475 gcc_unreachable ();
35478 arg0 = CALL_EXPR_ARG (exp, 0);
35479 op0 = expand_normal (arg0);
35481 if (!address_operand (op0, VOIDmode))
35483 op0 = convert_memory_address (Pmode, op0);
35484 op0 = copy_addr_to_reg (op0);
35486 op0 = gen_rtx_MEM (mode0, op0);
35488 pat = GEN_FCN (icode) (op0);
35489 if (pat)
35490 emit_insn (pat);
35491 return 0;
35493 case IX86_BUILTIN_XSETBV:
35494 arg0 = CALL_EXPR_ARG (exp, 0);
35495 arg1 = CALL_EXPR_ARG (exp, 1);
35496 op0 = expand_normal (arg0);
35497 op1 = expand_normal (arg1);
35499 if (!REG_P (op0))
35500 op0 = copy_to_mode_reg (SImode, op0);
35502 if (TARGET_64BIT)
35504 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
35505 NULL, 1, OPTAB_DIRECT);
35507 op2 = gen_lowpart (SImode, op2);
35508 op1 = gen_lowpart (SImode, op1);
35509 if (!REG_P (op1))
35510 op1 = copy_to_mode_reg (SImode, op1);
35511 if (!REG_P (op2))
35512 op2 = copy_to_mode_reg (SImode, op2);
35513 icode = CODE_FOR_xsetbv_rex64;
35514 pat = GEN_FCN (icode) (op0, op1, op2);
35516 else
35518 if (!REG_P (op1))
35519 op1 = copy_to_mode_reg (DImode, op1);
35520 icode = CODE_FOR_xsetbv;
35521 pat = GEN_FCN (icode) (op0, op1);
35523 if (pat)
35524 emit_insn (pat);
35525 return 0;
35527 case IX86_BUILTIN_XSAVE:
35528 case IX86_BUILTIN_XRSTOR:
35529 case IX86_BUILTIN_XSAVE64:
35530 case IX86_BUILTIN_XRSTOR64:
35531 case IX86_BUILTIN_XSAVEOPT:
35532 case IX86_BUILTIN_XSAVEOPT64:
35533 case IX86_BUILTIN_XSAVES:
35534 case IX86_BUILTIN_XRSTORS:
35535 case IX86_BUILTIN_XSAVES64:
35536 case IX86_BUILTIN_XRSTORS64:
35537 case IX86_BUILTIN_XSAVEC:
35538 case IX86_BUILTIN_XSAVEC64:
35539 arg0 = CALL_EXPR_ARG (exp, 0);
35540 arg1 = CALL_EXPR_ARG (exp, 1);
35541 op0 = expand_normal (arg0);
35542 op1 = expand_normal (arg1);
35544 if (!address_operand (op0, VOIDmode))
35546 op0 = convert_memory_address (Pmode, op0);
35547 op0 = copy_addr_to_reg (op0);
35549 op0 = gen_rtx_MEM (BLKmode, op0);
35551 op1 = force_reg (DImode, op1);
35553 if (TARGET_64BIT)
35555 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
35556 NULL, 1, OPTAB_DIRECT);
35557 switch (fcode)
35559 case IX86_BUILTIN_XSAVE:
35560 icode = CODE_FOR_xsave_rex64;
35561 break;
35562 case IX86_BUILTIN_XRSTOR:
35563 icode = CODE_FOR_xrstor_rex64;
35564 break;
35565 case IX86_BUILTIN_XSAVE64:
35566 icode = CODE_FOR_xsave64;
35567 break;
35568 case IX86_BUILTIN_XRSTOR64:
35569 icode = CODE_FOR_xrstor64;
35570 break;
35571 case IX86_BUILTIN_XSAVEOPT:
35572 icode = CODE_FOR_xsaveopt_rex64;
35573 break;
35574 case IX86_BUILTIN_XSAVEOPT64:
35575 icode = CODE_FOR_xsaveopt64;
35576 break;
35577 case IX86_BUILTIN_XSAVES:
35578 icode = CODE_FOR_xsaves_rex64;
35579 break;
35580 case IX86_BUILTIN_XRSTORS:
35581 icode = CODE_FOR_xrstors_rex64;
35582 break;
35583 case IX86_BUILTIN_XSAVES64:
35584 icode = CODE_FOR_xsaves64;
35585 break;
35586 case IX86_BUILTIN_XRSTORS64:
35587 icode = CODE_FOR_xrstors64;
35588 break;
35589 case IX86_BUILTIN_XSAVEC:
35590 icode = CODE_FOR_xsavec_rex64;
35591 break;
35592 case IX86_BUILTIN_XSAVEC64:
35593 icode = CODE_FOR_xsavec64;
35594 break;
35595 default:
35596 gcc_unreachable ();
35599 op2 = gen_lowpart (SImode, op2);
35600 op1 = gen_lowpart (SImode, op1);
35601 pat = GEN_FCN (icode) (op0, op1, op2);
35603 else
35605 switch (fcode)
35607 case IX86_BUILTIN_XSAVE:
35608 icode = CODE_FOR_xsave;
35609 break;
35610 case IX86_BUILTIN_XRSTOR:
35611 icode = CODE_FOR_xrstor;
35612 break;
35613 case IX86_BUILTIN_XSAVEOPT:
35614 icode = CODE_FOR_xsaveopt;
35615 break;
35616 case IX86_BUILTIN_XSAVES:
35617 icode = CODE_FOR_xsaves;
35618 break;
35619 case IX86_BUILTIN_XRSTORS:
35620 icode = CODE_FOR_xrstors;
35621 break;
35622 case IX86_BUILTIN_XSAVEC:
35623 icode = CODE_FOR_xsavec;
35624 break;
35625 default:
35626 gcc_unreachable ();
35628 pat = GEN_FCN (icode) (op0, op1);
35631 if (pat)
35632 emit_insn (pat);
35633 return 0;
35635 case IX86_BUILTIN_LLWPCB:
35636 arg0 = CALL_EXPR_ARG (exp, 0);
35637 op0 = expand_normal (arg0);
35638 icode = CODE_FOR_lwp_llwpcb;
35639 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35640 op0 = ix86_zero_extend_to_Pmode (op0);
35641 emit_insn (gen_lwp_llwpcb (op0));
35642 return 0;
35644 case IX86_BUILTIN_SLWPCB:
35645 icode = CODE_FOR_lwp_slwpcb;
35646 if (!target
35647 || !insn_data[icode].operand[0].predicate (target, Pmode))
35648 target = gen_reg_rtx (Pmode);
35649 emit_insn (gen_lwp_slwpcb (target));
35650 return target;
35652 case IX86_BUILTIN_BEXTRI32:
35653 case IX86_BUILTIN_BEXTRI64:
35654 arg0 = CALL_EXPR_ARG (exp, 0);
35655 arg1 = CALL_EXPR_ARG (exp, 1);
35656 op0 = expand_normal (arg0);
35657 op1 = expand_normal (arg1);
35658 icode = (fcode == IX86_BUILTIN_BEXTRI32
35659 ? CODE_FOR_tbm_bextri_si
35660 : CODE_FOR_tbm_bextri_di);
35661 if (!CONST_INT_P (op1))
35663 error ("last argument must be an immediate");
35664 return const0_rtx;
35666 else
35668 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
35669 unsigned char lsb_index = INTVAL (op1) & 0xFF;
35670 op1 = GEN_INT (length);
35671 op2 = GEN_INT (lsb_index);
35672 pat = GEN_FCN (icode) (target, op0, op1, op2);
35673 if (pat)
35674 emit_insn (pat);
35675 return target;
35678 case IX86_BUILTIN_RDRAND16_STEP:
35679 icode = CODE_FOR_rdrandhi_1;
35680 mode0 = HImode;
35681 goto rdrand_step;
35683 case IX86_BUILTIN_RDRAND32_STEP:
35684 icode = CODE_FOR_rdrandsi_1;
35685 mode0 = SImode;
35686 goto rdrand_step;
35688 case IX86_BUILTIN_RDRAND64_STEP:
35689 icode = CODE_FOR_rdranddi_1;
35690 mode0 = DImode;
35692 rdrand_step:
35693 arg0 = CALL_EXPR_ARG (exp, 0);
35694 op1 = expand_normal (arg0);
35695 if (!address_operand (op1, VOIDmode))
35697 op1 = convert_memory_address (Pmode, op1);
35698 op1 = copy_addr_to_reg (op1);
35701 op0 = gen_reg_rtx (mode0);
35702 emit_insn (GEN_FCN (icode) (op0));
35704 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35706 op1 = gen_reg_rtx (SImode);
35707 emit_move_insn (op1, CONST1_RTX (SImode));
35709 /* Emit SImode conditional move. */
35710 if (mode0 == HImode)
35712 if (TARGET_ZERO_EXTEND_WITH_AND
35713 && optimize_function_for_speed_p (cfun))
35715 op2 = force_reg (SImode, const0_rtx);
35717 emit_insn (gen_movstricthi
35718 (gen_lowpart (HImode, op2), op0));
35720 else
35722 op2 = gen_reg_rtx (SImode);
35724 emit_insn (gen_zero_extendhisi2 (op2, op0));
35727 else if (mode0 == SImode)
35728 op2 = op0;
35729 else
35730 op2 = gen_rtx_SUBREG (SImode, op0, 0);
35732 if (target == 0
35733 || !register_operand (target, SImode))
35734 target = gen_reg_rtx (SImode);
35736 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
35737 const0_rtx);
35738 emit_insn (gen_rtx_SET (target,
35739 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
35740 return target;
35742 case IX86_BUILTIN_RDSEED16_STEP:
35743 icode = CODE_FOR_rdseedhi_1;
35744 mode0 = HImode;
35745 goto rdseed_step;
35747 case IX86_BUILTIN_RDSEED32_STEP:
35748 icode = CODE_FOR_rdseedsi_1;
35749 mode0 = SImode;
35750 goto rdseed_step;
35752 case IX86_BUILTIN_RDSEED64_STEP:
35753 icode = CODE_FOR_rdseeddi_1;
35754 mode0 = DImode;
35756 rdseed_step:
35757 arg0 = CALL_EXPR_ARG (exp, 0);
35758 op1 = expand_normal (arg0);
35759 if (!address_operand (op1, VOIDmode))
35761 op1 = convert_memory_address (Pmode, op1);
35762 op1 = copy_addr_to_reg (op1);
35765 op0 = gen_reg_rtx (mode0);
35766 emit_insn (GEN_FCN (icode) (op0));
35768 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35770 op2 = gen_reg_rtx (QImode);
35772 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
35773 const0_rtx);
35774 emit_insn (gen_rtx_SET (op2, pat));
35776 if (target == 0
35777 || !register_operand (target, SImode))
35778 target = gen_reg_rtx (SImode);
35780 emit_insn (gen_zero_extendqisi2 (target, op2));
35781 return target;
35783 case IX86_BUILTIN_SBB32:
35784 icode = CODE_FOR_subborrowsi;
35785 mode0 = SImode;
35786 goto handlecarry;
35788 case IX86_BUILTIN_SBB64:
35789 icode = CODE_FOR_subborrowdi;
35790 mode0 = DImode;
35791 goto handlecarry;
35793 case IX86_BUILTIN_ADDCARRYX32:
35794 icode = CODE_FOR_addcarrysi;
35795 mode0 = SImode;
35796 goto handlecarry;
35798 case IX86_BUILTIN_ADDCARRYX64:
35799 icode = CODE_FOR_addcarrydi;
35800 mode0 = DImode;
35802 handlecarry:
35803 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
35804 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
35805 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
35806 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
35808 op1 = expand_normal (arg0);
35809 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
35811 op2 = expand_normal (arg1);
35812 if (!register_operand (op2, mode0))
35813 op2 = copy_to_mode_reg (mode0, op2);
35815 op3 = expand_normal (arg2);
35816 if (!register_operand (op3, mode0))
35817 op3 = copy_to_mode_reg (mode0, op3);
35819 op4 = expand_normal (arg3);
35820 if (!address_operand (op4, VOIDmode))
35822 op4 = convert_memory_address (Pmode, op4);
35823 op4 = copy_addr_to_reg (op4);
35826 /* Generate CF from input operand. */
35827 emit_insn (gen_addqi3_cconly_overflow (op1, constm1_rtx));
35829 /* Generate instruction that consumes CF. */
35830 op0 = gen_reg_rtx (mode0);
35832 op1 = gen_rtx_REG (CCCmode, FLAGS_REG);
35833 pat = gen_rtx_LTU (mode0, op1, const0_rtx);
35834 emit_insn (GEN_FCN (icode) (op0, op2, op3, op1, pat));
35836 /* Return current CF value. */
35837 if (target == 0)
35838 target = gen_reg_rtx (QImode);
35840 PUT_MODE (pat, QImode);
35841 emit_insn (gen_rtx_SET (target, pat));
35843 /* Store the result. */
35844 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
35846 return target;
35848 case IX86_BUILTIN_READ_FLAGS:
35849 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
35851 if (optimize
35852 || target == NULL_RTX
35853 || !nonimmediate_operand (target, word_mode)
35854 || GET_MODE (target) != word_mode)
35855 target = gen_reg_rtx (word_mode);
35857 emit_insn (gen_pop (target));
35858 return target;
35860 case IX86_BUILTIN_WRITE_FLAGS:
35862 arg0 = CALL_EXPR_ARG (exp, 0);
35863 op0 = expand_normal (arg0);
35864 if (!general_no_elim_operand (op0, word_mode))
35865 op0 = copy_to_mode_reg (word_mode, op0);
35867 emit_insn (gen_push (op0));
35868 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
35869 return 0;
35871 case IX86_BUILTIN_KTESTC8:
35872 icode = CODE_FOR_ktestqi;
35873 mode3 = CCCmode;
35874 goto kortest;
35876 case IX86_BUILTIN_KTESTZ8:
35877 icode = CODE_FOR_ktestqi;
35878 mode3 = CCZmode;
35879 goto kortest;
35881 case IX86_BUILTIN_KTESTC16:
35882 icode = CODE_FOR_ktesthi;
35883 mode3 = CCCmode;
35884 goto kortest;
35886 case IX86_BUILTIN_KTESTZ16:
35887 icode = CODE_FOR_ktesthi;
35888 mode3 = CCZmode;
35889 goto kortest;
35891 case IX86_BUILTIN_KTESTC32:
35892 icode = CODE_FOR_ktestsi;
35893 mode3 = CCCmode;
35894 goto kortest;
35896 case IX86_BUILTIN_KTESTZ32:
35897 icode = CODE_FOR_ktestsi;
35898 mode3 = CCZmode;
35899 goto kortest;
35901 case IX86_BUILTIN_KTESTC64:
35902 icode = CODE_FOR_ktestdi;
35903 mode3 = CCCmode;
35904 goto kortest;
35906 case IX86_BUILTIN_KTESTZ64:
35907 icode = CODE_FOR_ktestdi;
35908 mode3 = CCZmode;
35909 goto kortest;
35911 case IX86_BUILTIN_KORTESTC8:
35912 icode = CODE_FOR_kortestqi;
35913 mode3 = CCCmode;
35914 goto kortest;
35916 case IX86_BUILTIN_KORTESTZ8:
35917 icode = CODE_FOR_kortestqi;
35918 mode3 = CCZmode;
35919 goto kortest;
35921 case IX86_BUILTIN_KORTESTC16:
35922 icode = CODE_FOR_kortesthi;
35923 mode3 = CCCmode;
35924 goto kortest;
35926 case IX86_BUILTIN_KORTESTZ16:
35927 icode = CODE_FOR_kortesthi;
35928 mode3 = CCZmode;
35929 goto kortest;
35931 case IX86_BUILTIN_KORTESTC32:
35932 icode = CODE_FOR_kortestsi;
35933 mode3 = CCCmode;
35934 goto kortest;
35936 case IX86_BUILTIN_KORTESTZ32:
35937 icode = CODE_FOR_kortestsi;
35938 mode3 = CCZmode;
35939 goto kortest;
35941 case IX86_BUILTIN_KORTESTC64:
35942 icode = CODE_FOR_kortestdi;
35943 mode3 = CCCmode;
35944 goto kortest;
35946 case IX86_BUILTIN_KORTESTZ64:
35947 icode = CODE_FOR_kortestdi;
35948 mode3 = CCZmode;
35950 kortest:
35951 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
35952 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
35953 op0 = expand_normal (arg0);
35954 op1 = expand_normal (arg1);
35956 mode0 = insn_data[icode].operand[0].mode;
35957 mode1 = insn_data[icode].operand[1].mode;
35959 if (GET_MODE (op0) != VOIDmode)
35960 op0 = force_reg (GET_MODE (op0), op0);
35962 op0 = gen_lowpart (mode0, op0);
35964 if (!insn_data[icode].operand[0].predicate (op0, mode0))
35965 op0 = copy_to_mode_reg (mode0, op0);
35967 if (GET_MODE (op1) != VOIDmode)
35968 op1 = force_reg (GET_MODE (op1), op1);
35970 op1 = gen_lowpart (mode1, op1);
35972 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35973 op1 = copy_to_mode_reg (mode1, op1);
35975 target = gen_reg_rtx (QImode);
35977 /* Emit kortest. */
35978 emit_insn (GEN_FCN (icode) (op0, op1));
35979 /* And use setcc to return result from flags. */
35980 ix86_expand_setcc (target, EQ,
35981 gen_rtx_REG (mode3, FLAGS_REG), const0_rtx);
35982 return target;
35984 case IX86_BUILTIN_GATHERSIV2DF:
35985 icode = CODE_FOR_avx2_gathersiv2df;
35986 goto gather_gen;
35987 case IX86_BUILTIN_GATHERSIV4DF:
35988 icode = CODE_FOR_avx2_gathersiv4df;
35989 goto gather_gen;
35990 case IX86_BUILTIN_GATHERDIV2DF:
35991 icode = CODE_FOR_avx2_gatherdiv2df;
35992 goto gather_gen;
35993 case IX86_BUILTIN_GATHERDIV4DF:
35994 icode = CODE_FOR_avx2_gatherdiv4df;
35995 goto gather_gen;
35996 case IX86_BUILTIN_GATHERSIV4SF:
35997 icode = CODE_FOR_avx2_gathersiv4sf;
35998 goto gather_gen;
35999 case IX86_BUILTIN_GATHERSIV8SF:
36000 icode = CODE_FOR_avx2_gathersiv8sf;
36001 goto gather_gen;
36002 case IX86_BUILTIN_GATHERDIV4SF:
36003 icode = CODE_FOR_avx2_gatherdiv4sf;
36004 goto gather_gen;
36005 case IX86_BUILTIN_GATHERDIV8SF:
36006 icode = CODE_FOR_avx2_gatherdiv8sf;
36007 goto gather_gen;
36008 case IX86_BUILTIN_GATHERSIV2DI:
36009 icode = CODE_FOR_avx2_gathersiv2di;
36010 goto gather_gen;
36011 case IX86_BUILTIN_GATHERSIV4DI:
36012 icode = CODE_FOR_avx2_gathersiv4di;
36013 goto gather_gen;
36014 case IX86_BUILTIN_GATHERDIV2DI:
36015 icode = CODE_FOR_avx2_gatherdiv2di;
36016 goto gather_gen;
36017 case IX86_BUILTIN_GATHERDIV4DI:
36018 icode = CODE_FOR_avx2_gatherdiv4di;
36019 goto gather_gen;
36020 case IX86_BUILTIN_GATHERSIV4SI:
36021 icode = CODE_FOR_avx2_gathersiv4si;
36022 goto gather_gen;
36023 case IX86_BUILTIN_GATHERSIV8SI:
36024 icode = CODE_FOR_avx2_gathersiv8si;
36025 goto gather_gen;
36026 case IX86_BUILTIN_GATHERDIV4SI:
36027 icode = CODE_FOR_avx2_gatherdiv4si;
36028 goto gather_gen;
36029 case IX86_BUILTIN_GATHERDIV8SI:
36030 icode = CODE_FOR_avx2_gatherdiv8si;
36031 goto gather_gen;
36032 case IX86_BUILTIN_GATHERALTSIV4DF:
36033 icode = CODE_FOR_avx2_gathersiv4df;
36034 goto gather_gen;
36035 case IX86_BUILTIN_GATHERALTDIV8SF:
36036 icode = CODE_FOR_avx2_gatherdiv8sf;
36037 goto gather_gen;
36038 case IX86_BUILTIN_GATHERALTSIV4DI:
36039 icode = CODE_FOR_avx2_gathersiv4di;
36040 goto gather_gen;
36041 case IX86_BUILTIN_GATHERALTDIV8SI:
36042 icode = CODE_FOR_avx2_gatherdiv8si;
36043 goto gather_gen;
36044 case IX86_BUILTIN_GATHER3SIV16SF:
36045 icode = CODE_FOR_avx512f_gathersiv16sf;
36046 goto gather_gen;
36047 case IX86_BUILTIN_GATHER3SIV8DF:
36048 icode = CODE_FOR_avx512f_gathersiv8df;
36049 goto gather_gen;
36050 case IX86_BUILTIN_GATHER3DIV16SF:
36051 icode = CODE_FOR_avx512f_gatherdiv16sf;
36052 goto gather_gen;
36053 case IX86_BUILTIN_GATHER3DIV8DF:
36054 icode = CODE_FOR_avx512f_gatherdiv8df;
36055 goto gather_gen;
36056 case IX86_BUILTIN_GATHER3SIV16SI:
36057 icode = CODE_FOR_avx512f_gathersiv16si;
36058 goto gather_gen;
36059 case IX86_BUILTIN_GATHER3SIV8DI:
36060 icode = CODE_FOR_avx512f_gathersiv8di;
36061 goto gather_gen;
36062 case IX86_BUILTIN_GATHER3DIV16SI:
36063 icode = CODE_FOR_avx512f_gatherdiv16si;
36064 goto gather_gen;
36065 case IX86_BUILTIN_GATHER3DIV8DI:
36066 icode = CODE_FOR_avx512f_gatherdiv8di;
36067 goto gather_gen;
36068 case IX86_BUILTIN_GATHER3ALTSIV8DF:
36069 icode = CODE_FOR_avx512f_gathersiv8df;
36070 goto gather_gen;
36071 case IX86_BUILTIN_GATHER3ALTDIV16SF:
36072 icode = CODE_FOR_avx512f_gatherdiv16sf;
36073 goto gather_gen;
36074 case IX86_BUILTIN_GATHER3ALTSIV8DI:
36075 icode = CODE_FOR_avx512f_gathersiv8di;
36076 goto gather_gen;
36077 case IX86_BUILTIN_GATHER3ALTDIV16SI:
36078 icode = CODE_FOR_avx512f_gatherdiv16si;
36079 goto gather_gen;
36080 case IX86_BUILTIN_GATHER3SIV2DF:
36081 icode = CODE_FOR_avx512vl_gathersiv2df;
36082 goto gather_gen;
36083 case IX86_BUILTIN_GATHER3SIV4DF:
36084 icode = CODE_FOR_avx512vl_gathersiv4df;
36085 goto gather_gen;
36086 case IX86_BUILTIN_GATHER3DIV2DF:
36087 icode = CODE_FOR_avx512vl_gatherdiv2df;
36088 goto gather_gen;
36089 case IX86_BUILTIN_GATHER3DIV4DF:
36090 icode = CODE_FOR_avx512vl_gatherdiv4df;
36091 goto gather_gen;
36092 case IX86_BUILTIN_GATHER3SIV4SF:
36093 icode = CODE_FOR_avx512vl_gathersiv4sf;
36094 goto gather_gen;
36095 case IX86_BUILTIN_GATHER3SIV8SF:
36096 icode = CODE_FOR_avx512vl_gathersiv8sf;
36097 goto gather_gen;
36098 case IX86_BUILTIN_GATHER3DIV4SF:
36099 icode = CODE_FOR_avx512vl_gatherdiv4sf;
36100 goto gather_gen;
36101 case IX86_BUILTIN_GATHER3DIV8SF:
36102 icode = CODE_FOR_avx512vl_gatherdiv8sf;
36103 goto gather_gen;
36104 case IX86_BUILTIN_GATHER3SIV2DI:
36105 icode = CODE_FOR_avx512vl_gathersiv2di;
36106 goto gather_gen;
36107 case IX86_BUILTIN_GATHER3SIV4DI:
36108 icode = CODE_FOR_avx512vl_gathersiv4di;
36109 goto gather_gen;
36110 case IX86_BUILTIN_GATHER3DIV2DI:
36111 icode = CODE_FOR_avx512vl_gatherdiv2di;
36112 goto gather_gen;
36113 case IX86_BUILTIN_GATHER3DIV4DI:
36114 icode = CODE_FOR_avx512vl_gatherdiv4di;
36115 goto gather_gen;
36116 case IX86_BUILTIN_GATHER3SIV4SI:
36117 icode = CODE_FOR_avx512vl_gathersiv4si;
36118 goto gather_gen;
36119 case IX86_BUILTIN_GATHER3SIV8SI:
36120 icode = CODE_FOR_avx512vl_gathersiv8si;
36121 goto gather_gen;
36122 case IX86_BUILTIN_GATHER3DIV4SI:
36123 icode = CODE_FOR_avx512vl_gatherdiv4si;
36124 goto gather_gen;
36125 case IX86_BUILTIN_GATHER3DIV8SI:
36126 icode = CODE_FOR_avx512vl_gatherdiv8si;
36127 goto gather_gen;
36128 case IX86_BUILTIN_GATHER3ALTSIV4DF:
36129 icode = CODE_FOR_avx512vl_gathersiv4df;
36130 goto gather_gen;
36131 case IX86_BUILTIN_GATHER3ALTDIV8SF:
36132 icode = CODE_FOR_avx512vl_gatherdiv8sf;
36133 goto gather_gen;
36134 case IX86_BUILTIN_GATHER3ALTSIV4DI:
36135 icode = CODE_FOR_avx512vl_gathersiv4di;
36136 goto gather_gen;
36137 case IX86_BUILTIN_GATHER3ALTDIV8SI:
36138 icode = CODE_FOR_avx512vl_gatherdiv8si;
36139 goto gather_gen;
36140 case IX86_BUILTIN_SCATTERSIV16SF:
36141 icode = CODE_FOR_avx512f_scattersiv16sf;
36142 goto scatter_gen;
36143 case IX86_BUILTIN_SCATTERSIV8DF:
36144 icode = CODE_FOR_avx512f_scattersiv8df;
36145 goto scatter_gen;
36146 case IX86_BUILTIN_SCATTERDIV16SF:
36147 icode = CODE_FOR_avx512f_scatterdiv16sf;
36148 goto scatter_gen;
36149 case IX86_BUILTIN_SCATTERDIV8DF:
36150 icode = CODE_FOR_avx512f_scatterdiv8df;
36151 goto scatter_gen;
36152 case IX86_BUILTIN_SCATTERSIV16SI:
36153 icode = CODE_FOR_avx512f_scattersiv16si;
36154 goto scatter_gen;
36155 case IX86_BUILTIN_SCATTERSIV8DI:
36156 icode = CODE_FOR_avx512f_scattersiv8di;
36157 goto scatter_gen;
36158 case IX86_BUILTIN_SCATTERDIV16SI:
36159 icode = CODE_FOR_avx512f_scatterdiv16si;
36160 goto scatter_gen;
36161 case IX86_BUILTIN_SCATTERDIV8DI:
36162 icode = CODE_FOR_avx512f_scatterdiv8di;
36163 goto scatter_gen;
36164 case IX86_BUILTIN_SCATTERSIV8SF:
36165 icode = CODE_FOR_avx512vl_scattersiv8sf;
36166 goto scatter_gen;
36167 case IX86_BUILTIN_SCATTERSIV4SF:
36168 icode = CODE_FOR_avx512vl_scattersiv4sf;
36169 goto scatter_gen;
36170 case IX86_BUILTIN_SCATTERSIV4DF:
36171 icode = CODE_FOR_avx512vl_scattersiv4df;
36172 goto scatter_gen;
36173 case IX86_BUILTIN_SCATTERSIV2DF:
36174 icode = CODE_FOR_avx512vl_scattersiv2df;
36175 goto scatter_gen;
36176 case IX86_BUILTIN_SCATTERDIV8SF:
36177 icode = CODE_FOR_avx512vl_scatterdiv8sf;
36178 goto scatter_gen;
36179 case IX86_BUILTIN_SCATTERDIV4SF:
36180 icode = CODE_FOR_avx512vl_scatterdiv4sf;
36181 goto scatter_gen;
36182 case IX86_BUILTIN_SCATTERDIV4DF:
36183 icode = CODE_FOR_avx512vl_scatterdiv4df;
36184 goto scatter_gen;
36185 case IX86_BUILTIN_SCATTERDIV2DF:
36186 icode = CODE_FOR_avx512vl_scatterdiv2df;
36187 goto scatter_gen;
36188 case IX86_BUILTIN_SCATTERSIV8SI:
36189 icode = CODE_FOR_avx512vl_scattersiv8si;
36190 goto scatter_gen;
36191 case IX86_BUILTIN_SCATTERSIV4SI:
36192 icode = CODE_FOR_avx512vl_scattersiv4si;
36193 goto scatter_gen;
36194 case IX86_BUILTIN_SCATTERSIV4DI:
36195 icode = CODE_FOR_avx512vl_scattersiv4di;
36196 goto scatter_gen;
36197 case IX86_BUILTIN_SCATTERSIV2DI:
36198 icode = CODE_FOR_avx512vl_scattersiv2di;
36199 goto scatter_gen;
36200 case IX86_BUILTIN_SCATTERDIV8SI:
36201 icode = CODE_FOR_avx512vl_scatterdiv8si;
36202 goto scatter_gen;
36203 case IX86_BUILTIN_SCATTERDIV4SI:
36204 icode = CODE_FOR_avx512vl_scatterdiv4si;
36205 goto scatter_gen;
36206 case IX86_BUILTIN_SCATTERDIV4DI:
36207 icode = CODE_FOR_avx512vl_scatterdiv4di;
36208 goto scatter_gen;
36209 case IX86_BUILTIN_SCATTERDIV2DI:
36210 icode = CODE_FOR_avx512vl_scatterdiv2di;
36211 goto scatter_gen;
36212 case IX86_BUILTIN_GATHERPFDPD:
36213 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
36214 goto vec_prefetch_gen;
36215 case IX86_BUILTIN_SCATTERALTSIV8DF:
36216 icode = CODE_FOR_avx512f_scattersiv8df;
36217 goto scatter_gen;
36218 case IX86_BUILTIN_SCATTERALTDIV16SF:
36219 icode = CODE_FOR_avx512f_scatterdiv16sf;
36220 goto scatter_gen;
36221 case IX86_BUILTIN_SCATTERALTSIV8DI:
36222 icode = CODE_FOR_avx512f_scattersiv8di;
36223 goto scatter_gen;
36224 case IX86_BUILTIN_SCATTERALTDIV16SI:
36225 icode = CODE_FOR_avx512f_scatterdiv16si;
36226 goto scatter_gen;
36227 case IX86_BUILTIN_GATHERPFDPS:
36228 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
36229 goto vec_prefetch_gen;
36230 case IX86_BUILTIN_GATHERPFQPD:
36231 icode = CODE_FOR_avx512pf_gatherpfv8didf;
36232 goto vec_prefetch_gen;
36233 case IX86_BUILTIN_GATHERPFQPS:
36234 icode = CODE_FOR_avx512pf_gatherpfv8disf;
36235 goto vec_prefetch_gen;
36236 case IX86_BUILTIN_SCATTERPFDPD:
36237 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
36238 goto vec_prefetch_gen;
36239 case IX86_BUILTIN_SCATTERPFDPS:
36240 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
36241 goto vec_prefetch_gen;
36242 case IX86_BUILTIN_SCATTERPFQPD:
36243 icode = CODE_FOR_avx512pf_scatterpfv8didf;
36244 goto vec_prefetch_gen;
36245 case IX86_BUILTIN_SCATTERPFQPS:
36246 icode = CODE_FOR_avx512pf_scatterpfv8disf;
36247 goto vec_prefetch_gen;
36249 gather_gen:
36250 rtx half;
36251 rtx (*gen) (rtx, rtx);
36253 arg0 = CALL_EXPR_ARG (exp, 0);
36254 arg1 = CALL_EXPR_ARG (exp, 1);
36255 arg2 = CALL_EXPR_ARG (exp, 2);
36256 arg3 = CALL_EXPR_ARG (exp, 3);
36257 arg4 = CALL_EXPR_ARG (exp, 4);
36258 op0 = expand_normal (arg0);
36259 op1 = expand_normal (arg1);
36260 op2 = expand_normal (arg2);
36261 op3 = expand_normal (arg3);
36262 op4 = expand_normal (arg4);
36263 /* Note the arg order is different from the operand order. */
36264 mode0 = insn_data[icode].operand[1].mode;
36265 mode2 = insn_data[icode].operand[3].mode;
36266 mode3 = insn_data[icode].operand[4].mode;
36267 mode4 = insn_data[icode].operand[5].mode;
36269 if (target == NULL_RTX
36270 || GET_MODE (target) != insn_data[icode].operand[0].mode
36271 || !insn_data[icode].operand[0].predicate (target,
36272 GET_MODE (target)))
36273 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
36274 else
36275 subtarget = target;
36277 switch (fcode)
36279 case IX86_BUILTIN_GATHER3ALTSIV8DF:
36280 case IX86_BUILTIN_GATHER3ALTSIV8DI:
36281 half = gen_reg_rtx (V8SImode);
36282 if (!nonimmediate_operand (op2, V16SImode))
36283 op2 = copy_to_mode_reg (V16SImode, op2);
36284 emit_insn (gen_vec_extract_lo_v16si (half, op2));
36285 op2 = half;
36286 break;
36287 case IX86_BUILTIN_GATHER3ALTSIV4DF:
36288 case IX86_BUILTIN_GATHER3ALTSIV4DI:
36289 case IX86_BUILTIN_GATHERALTSIV4DF:
36290 case IX86_BUILTIN_GATHERALTSIV4DI:
36291 half = gen_reg_rtx (V4SImode);
36292 if (!nonimmediate_operand (op2, V8SImode))
36293 op2 = copy_to_mode_reg (V8SImode, op2);
36294 emit_insn (gen_vec_extract_lo_v8si (half, op2));
36295 op2 = half;
36296 break;
36297 case IX86_BUILTIN_GATHER3ALTDIV16SF:
36298 case IX86_BUILTIN_GATHER3ALTDIV16SI:
36299 half = gen_reg_rtx (mode0);
36300 if (mode0 == V8SFmode)
36301 gen = gen_vec_extract_lo_v16sf;
36302 else
36303 gen = gen_vec_extract_lo_v16si;
36304 if (!nonimmediate_operand (op0, GET_MODE (op0)))
36305 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
36306 emit_insn (gen (half, op0));
36307 op0 = half;
36308 if (GET_MODE (op3) != VOIDmode)
36310 if (!nonimmediate_operand (op3, GET_MODE (op3)))
36311 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
36312 emit_insn (gen (half, op3));
36313 op3 = half;
36315 break;
36316 case IX86_BUILTIN_GATHER3ALTDIV8SF:
36317 case IX86_BUILTIN_GATHER3ALTDIV8SI:
36318 case IX86_BUILTIN_GATHERALTDIV8SF:
36319 case IX86_BUILTIN_GATHERALTDIV8SI:
36320 half = gen_reg_rtx (mode0);
36321 if (mode0 == V4SFmode)
36322 gen = gen_vec_extract_lo_v8sf;
36323 else
36324 gen = gen_vec_extract_lo_v8si;
36325 if (!nonimmediate_operand (op0, GET_MODE (op0)))
36326 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
36327 emit_insn (gen (half, op0));
36328 op0 = half;
36329 if (GET_MODE (op3) != VOIDmode)
36331 if (!nonimmediate_operand (op3, GET_MODE (op3)))
36332 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
36333 emit_insn (gen (half, op3));
36334 op3 = half;
36336 break;
36337 default:
36338 break;
36341 /* Force memory operand only with base register here. But we
36342 don't want to do it on memory operand for other builtin
36343 functions. */
36344 op1 = ix86_zero_extend_to_Pmode (op1);
36346 if (!insn_data[icode].operand[1].predicate (op0, mode0))
36347 op0 = copy_to_mode_reg (mode0, op0);
36348 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
36349 op1 = copy_to_mode_reg (Pmode, op1);
36350 if (!insn_data[icode].operand[3].predicate (op2, mode2))
36351 op2 = copy_to_mode_reg (mode2, op2);
36353 op3 = fixup_modeless_constant (op3, mode3);
36355 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
36357 if (!insn_data[icode].operand[4].predicate (op3, mode3))
36358 op3 = copy_to_mode_reg (mode3, op3);
36360 else
36362 op3 = copy_to_reg (op3);
36363 op3 = lowpart_subreg (mode3, op3, GET_MODE (op3));
36365 if (!insn_data[icode].operand[5].predicate (op4, mode4))
36367 error ("the last argument must be scale 1, 2, 4, 8");
36368 return const0_rtx;
36371 /* Optimize. If mask is known to have all high bits set,
36372 replace op0 with pc_rtx to signal that the instruction
36373 overwrites the whole destination and doesn't use its
36374 previous contents. */
36375 if (optimize)
36377 if (TREE_CODE (arg3) == INTEGER_CST)
36379 if (integer_all_onesp (arg3))
36380 op0 = pc_rtx;
36382 else if (TREE_CODE (arg3) == VECTOR_CST)
36384 unsigned int negative = 0;
36385 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
36387 tree cst = VECTOR_CST_ELT (arg3, i);
36388 if (TREE_CODE (cst) == INTEGER_CST
36389 && tree_int_cst_sign_bit (cst))
36390 negative++;
36391 else if (TREE_CODE (cst) == REAL_CST
36392 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
36393 negative++;
36395 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
36396 op0 = pc_rtx;
36398 else if (TREE_CODE (arg3) == SSA_NAME
36399 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
36401 /* Recognize also when mask is like:
36402 __v2df src = _mm_setzero_pd ();
36403 __v2df mask = _mm_cmpeq_pd (src, src);
36405 __v8sf src = _mm256_setzero_ps ();
36406 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
36407 as that is a cheaper way to load all ones into
36408 a register than having to load a constant from
36409 memory. */
36410 gimple *def_stmt = SSA_NAME_DEF_STMT (arg3);
36411 if (is_gimple_call (def_stmt))
36413 tree fndecl = gimple_call_fndecl (def_stmt);
36414 if (fndecl
36415 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
36416 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
36418 case IX86_BUILTIN_CMPPD:
36419 case IX86_BUILTIN_CMPPS:
36420 case IX86_BUILTIN_CMPPD256:
36421 case IX86_BUILTIN_CMPPS256:
36422 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
36423 break;
36424 /* FALLTHRU */
36425 case IX86_BUILTIN_CMPEQPD:
36426 case IX86_BUILTIN_CMPEQPS:
36427 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
36428 && initializer_zerop (gimple_call_arg (def_stmt,
36429 1)))
36430 op0 = pc_rtx;
36431 break;
36432 default:
36433 break;
36439 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
36440 if (! pat)
36441 return const0_rtx;
36442 emit_insn (pat);
36444 switch (fcode)
36446 case IX86_BUILTIN_GATHER3DIV16SF:
36447 if (target == NULL_RTX)
36448 target = gen_reg_rtx (V8SFmode);
36449 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
36450 break;
36451 case IX86_BUILTIN_GATHER3DIV16SI:
36452 if (target == NULL_RTX)
36453 target = gen_reg_rtx (V8SImode);
36454 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
36455 break;
36456 case IX86_BUILTIN_GATHER3DIV8SF:
36457 case IX86_BUILTIN_GATHERDIV8SF:
36458 if (target == NULL_RTX)
36459 target = gen_reg_rtx (V4SFmode);
36460 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
36461 break;
36462 case IX86_BUILTIN_GATHER3DIV8SI:
36463 case IX86_BUILTIN_GATHERDIV8SI:
36464 if (target == NULL_RTX)
36465 target = gen_reg_rtx (V4SImode);
36466 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
36467 break;
36468 default:
36469 target = subtarget;
36470 break;
36472 return target;
36474 scatter_gen:
36475 arg0 = CALL_EXPR_ARG (exp, 0);
36476 arg1 = CALL_EXPR_ARG (exp, 1);
36477 arg2 = CALL_EXPR_ARG (exp, 2);
36478 arg3 = CALL_EXPR_ARG (exp, 3);
36479 arg4 = CALL_EXPR_ARG (exp, 4);
36480 op0 = expand_normal (arg0);
36481 op1 = expand_normal (arg1);
36482 op2 = expand_normal (arg2);
36483 op3 = expand_normal (arg3);
36484 op4 = expand_normal (arg4);
36485 mode1 = insn_data[icode].operand[1].mode;
36486 mode2 = insn_data[icode].operand[2].mode;
36487 mode3 = insn_data[icode].operand[3].mode;
36488 mode4 = insn_data[icode].operand[4].mode;
36490 /* Scatter instruction stores operand op3 to memory with
36491 indices from op2 and scale from op4 under writemask op1.
36492 If index operand op2 has more elements then source operand
36493 op3 one need to use only its low half. And vice versa. */
36494 switch (fcode)
36496 case IX86_BUILTIN_SCATTERALTSIV8DF:
36497 case IX86_BUILTIN_SCATTERALTSIV8DI:
36498 half = gen_reg_rtx (V8SImode);
36499 if (!nonimmediate_operand (op2, V16SImode))
36500 op2 = copy_to_mode_reg (V16SImode, op2);
36501 emit_insn (gen_vec_extract_lo_v16si (half, op2));
36502 op2 = half;
36503 break;
36504 case IX86_BUILTIN_SCATTERALTDIV16SF:
36505 case IX86_BUILTIN_SCATTERALTDIV16SI:
36506 half = gen_reg_rtx (mode3);
36507 if (mode3 == V8SFmode)
36508 gen = gen_vec_extract_lo_v16sf;
36509 else
36510 gen = gen_vec_extract_lo_v16si;
36511 if (!nonimmediate_operand (op3, GET_MODE (op3)))
36512 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
36513 emit_insn (gen (half, op3));
36514 op3 = half;
36515 break;
36516 default:
36517 break;
36520 /* Force memory operand only with base register here. But we
36521 don't want to do it on memory operand for other builtin
36522 functions. */
36523 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
36525 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
36526 op0 = copy_to_mode_reg (Pmode, op0);
36528 op1 = fixup_modeless_constant (op1, mode1);
36530 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
36532 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36533 op1 = copy_to_mode_reg (mode1, op1);
36535 else
36537 op1 = copy_to_reg (op1);
36538 op1 = lowpart_subreg (mode1, op1, GET_MODE (op1));
36541 if (!insn_data[icode].operand[2].predicate (op2, mode2))
36542 op2 = copy_to_mode_reg (mode2, op2);
36544 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36545 op3 = copy_to_mode_reg (mode3, op3);
36547 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36549 error ("the last argument must be scale 1, 2, 4, 8");
36550 return const0_rtx;
36553 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36554 if (! pat)
36555 return const0_rtx;
36557 emit_insn (pat);
36558 return 0;
36560 vec_prefetch_gen:
36561 arg0 = CALL_EXPR_ARG (exp, 0);
36562 arg1 = CALL_EXPR_ARG (exp, 1);
36563 arg2 = CALL_EXPR_ARG (exp, 2);
36564 arg3 = CALL_EXPR_ARG (exp, 3);
36565 arg4 = CALL_EXPR_ARG (exp, 4);
36566 op0 = expand_normal (arg0);
36567 op1 = expand_normal (arg1);
36568 op2 = expand_normal (arg2);
36569 op3 = expand_normal (arg3);
36570 op4 = expand_normal (arg4);
36571 mode0 = insn_data[icode].operand[0].mode;
36572 mode1 = insn_data[icode].operand[1].mode;
36573 mode3 = insn_data[icode].operand[3].mode;
36574 mode4 = insn_data[icode].operand[4].mode;
36576 op0 = fixup_modeless_constant (op0, mode0);
36578 if (GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
36580 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36581 op0 = copy_to_mode_reg (mode0, op0);
36583 else
36585 op0 = copy_to_reg (op0);
36586 op0 = lowpart_subreg (mode0, op0, GET_MODE (op0));
36589 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36590 op1 = copy_to_mode_reg (mode1, op1);
36592 /* Force memory operand only with base register here. But we
36593 don't want to do it on memory operand for other builtin
36594 functions. */
36595 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
36597 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
36598 op2 = copy_to_mode_reg (Pmode, op2);
36600 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36602 error ("the forth argument must be scale 1, 2, 4, 8");
36603 return const0_rtx;
36606 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36608 error ("incorrect hint operand");
36609 return const0_rtx;
36612 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36613 if (! pat)
36614 return const0_rtx;
36616 emit_insn (pat);
36618 return 0;
36620 case IX86_BUILTIN_XABORT:
36621 icode = CODE_FOR_xabort;
36622 arg0 = CALL_EXPR_ARG (exp, 0);
36623 op0 = expand_normal (arg0);
36624 mode0 = insn_data[icode].operand[0].mode;
36625 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36627 error ("the xabort's argument must be an 8-bit immediate");
36628 return const0_rtx;
36630 emit_insn (gen_xabort (op0));
36631 return 0;
36633 default:
36634 break;
36637 if (fcode >= IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST
36638 && fcode <= IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST)
36640 i = fcode - IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST;
36641 return ix86_expand_special_args_builtin (bdesc_special_args + i, exp,
36642 target);
36645 if (fcode >= IX86_BUILTIN__BDESC_ARGS_FIRST
36646 && fcode <= IX86_BUILTIN__BDESC_ARGS_LAST)
36648 i = fcode - IX86_BUILTIN__BDESC_ARGS_FIRST;
36649 switch (fcode)
36651 case IX86_BUILTIN_FABSQ:
36652 case IX86_BUILTIN_COPYSIGNQ:
36653 if (!TARGET_SSE)
36654 /* Emit a normal call if SSE isn't available. */
36655 return expand_call (exp, target, ignore);
36656 /* FALLTHRU */
36657 default:
36658 return ix86_expand_args_builtin (bdesc_args + i, exp, target);
36662 if (fcode >= IX86_BUILTIN__BDESC_ARGS2_FIRST
36663 && fcode <= IX86_BUILTIN__BDESC_ARGS2_LAST)
36665 i = fcode - IX86_BUILTIN__BDESC_ARGS2_FIRST;
36666 rtx (*fcn) (rtx, rtx, rtx, rtx) = NULL;
36667 rtx (*fcn_mask) (rtx, rtx, rtx, rtx, rtx);
36668 rtx (*fcn_maskz) (rtx, rtx, rtx, rtx, rtx, rtx);
36669 int masked = 1;
36670 machine_mode mode, wide_mode, nar_mode;
36672 nar_mode = V4SFmode;
36673 mode = V16SFmode;
36674 wide_mode = V64SFmode;
36675 fcn_mask = gen_avx5124fmaddps_4fmaddps_mask;
36676 fcn_maskz = gen_avx5124fmaddps_4fmaddps_maskz;
36678 switch (fcode)
36680 case IX86_BUILTIN_4FMAPS:
36681 fcn = gen_avx5124fmaddps_4fmaddps;
36682 masked = 0;
36683 goto v4fma_expand;
36685 case IX86_BUILTIN_4DPWSSD:
36686 nar_mode = V4SImode;
36687 mode = V16SImode;
36688 wide_mode = V64SImode;
36689 fcn = gen_avx5124vnniw_vp4dpwssd;
36690 masked = 0;
36691 goto v4fma_expand;
36693 case IX86_BUILTIN_4DPWSSDS:
36694 nar_mode = V4SImode;
36695 mode = V16SImode;
36696 wide_mode = V64SImode;
36697 fcn = gen_avx5124vnniw_vp4dpwssds;
36698 masked = 0;
36699 goto v4fma_expand;
36701 case IX86_BUILTIN_4FNMAPS:
36702 fcn = gen_avx5124fmaddps_4fnmaddps;
36703 masked = 0;
36704 goto v4fma_expand;
36706 case IX86_BUILTIN_4FNMAPS_MASK:
36707 fcn_mask = gen_avx5124fmaddps_4fnmaddps_mask;
36708 fcn_maskz = gen_avx5124fmaddps_4fnmaddps_maskz;
36709 goto v4fma_expand;
36711 case IX86_BUILTIN_4DPWSSD_MASK:
36712 nar_mode = V4SImode;
36713 mode = V16SImode;
36714 wide_mode = V64SImode;
36715 fcn_mask = gen_avx5124vnniw_vp4dpwssd_mask;
36716 fcn_maskz = gen_avx5124vnniw_vp4dpwssd_maskz;
36717 goto v4fma_expand;
36719 case IX86_BUILTIN_4DPWSSDS_MASK:
36720 nar_mode = V4SImode;
36721 mode = V16SImode;
36722 wide_mode = V64SImode;
36723 fcn_mask = gen_avx5124vnniw_vp4dpwssds_mask;
36724 fcn_maskz = gen_avx5124vnniw_vp4dpwssds_maskz;
36725 goto v4fma_expand;
36727 case IX86_BUILTIN_4FMAPS_MASK:
36729 tree args[4];
36730 rtx ops[4];
36731 rtx wide_reg;
36732 rtx accum;
36733 rtx addr;
36734 rtx mem;
36736 v4fma_expand:
36737 wide_reg = gen_reg_rtx (wide_mode);
36738 for (i = 0; i < 4; i++)
36740 args[i] = CALL_EXPR_ARG (exp, i);
36741 ops[i] = expand_normal (args[i]);
36743 emit_move_insn (gen_rtx_SUBREG (mode, wide_reg, i * 64),
36744 ops[i]);
36747 accum = expand_normal (CALL_EXPR_ARG (exp, 4));
36748 accum = force_reg (mode, accum);
36750 addr = expand_normal (CALL_EXPR_ARG (exp, 5));
36751 addr = force_reg (Pmode, addr);
36753 mem = gen_rtx_MEM (nar_mode, addr);
36755 target = gen_reg_rtx (mode);
36757 emit_move_insn (target, accum);
36759 if (! masked)
36760 emit_insn (fcn (target, accum, wide_reg, mem));
36761 else
36763 rtx merge, mask;
36764 merge = expand_normal (CALL_EXPR_ARG (exp, 6));
36766 mask = expand_normal (CALL_EXPR_ARG (exp, 7));
36768 if (CONST_INT_P (mask))
36769 mask = fixup_modeless_constant (mask, HImode);
36771 mask = force_reg (HImode, mask);
36773 if (GET_MODE (mask) != HImode)
36774 mask = gen_rtx_SUBREG (HImode, mask, 0);
36776 /* If merge is 0 then we're about to emit z-masked variant. */
36777 if (const0_operand (merge, mode))
36778 emit_insn (fcn_maskz (target, accum, wide_reg, mem, merge, mask));
36779 /* If merge is the same as accum then emit merge-masked variant. */
36780 else if (CALL_EXPR_ARG (exp, 6) == CALL_EXPR_ARG (exp, 4))
36782 merge = force_reg (mode, merge);
36783 emit_insn (fcn_mask (target, wide_reg, mem, merge, mask));
36785 /* Merge with something unknown might happen if we z-mask w/ -O0. */
36786 else
36788 target = gen_reg_rtx (mode);
36789 emit_move_insn (target, merge);
36790 emit_insn (fcn_mask (target, wide_reg, mem, target, mask));
36793 return target;
36796 case IX86_BUILTIN_4FNMASS:
36797 fcn = gen_avx5124fmaddps_4fnmaddss;
36798 masked = 0;
36799 goto s4fma_expand;
36801 case IX86_BUILTIN_4FMASS:
36802 fcn = gen_avx5124fmaddps_4fmaddss;
36803 masked = 0;
36804 goto s4fma_expand;
36806 case IX86_BUILTIN_4FNMASS_MASK:
36807 fcn_mask = gen_avx5124fmaddps_4fnmaddss_mask;
36808 fcn_maskz = gen_avx5124fmaddps_4fnmaddss_maskz;
36809 goto s4fma_expand;
36811 case IX86_BUILTIN_4FMASS_MASK:
36813 tree args[4];
36814 rtx ops[4];
36815 rtx wide_reg;
36816 rtx accum;
36817 rtx addr;
36818 rtx mem;
36820 fcn_mask = gen_avx5124fmaddps_4fmaddss_mask;
36821 fcn_maskz = gen_avx5124fmaddps_4fmaddss_maskz;
36823 s4fma_expand:
36824 mode = V4SFmode;
36825 wide_reg = gen_reg_rtx (V64SFmode);
36826 for (i = 0; i < 4; i++)
36828 rtx tmp;
36829 args[i] = CALL_EXPR_ARG (exp, i);
36830 ops[i] = expand_normal (args[i]);
36832 tmp = gen_reg_rtx (SFmode);
36833 emit_move_insn (tmp, gen_rtx_SUBREG (SFmode, ops[i], 0));
36835 emit_move_insn (gen_rtx_SUBREG (V16SFmode, wide_reg, i * 64),
36836 gen_rtx_SUBREG (V16SFmode, tmp, 0));
36839 accum = expand_normal (CALL_EXPR_ARG (exp, 4));
36840 accum = force_reg (V4SFmode, accum);
36842 addr = expand_normal (CALL_EXPR_ARG (exp, 5));
36843 addr = force_reg (Pmode, addr);
36845 mem = gen_rtx_MEM (V4SFmode, addr);
36847 target = gen_reg_rtx (V4SFmode);
36849 emit_move_insn (target, accum);
36851 if (! masked)
36852 emit_insn (fcn (target, accum, wide_reg, mem));
36853 else
36855 rtx merge, mask;
36856 merge = expand_normal (CALL_EXPR_ARG (exp, 6));
36858 mask = expand_normal (CALL_EXPR_ARG (exp, 7));
36860 if (CONST_INT_P (mask))
36861 mask = fixup_modeless_constant (mask, QImode);
36863 mask = force_reg (QImode, mask);
36865 if (GET_MODE (mask) != QImode)
36866 mask = gen_rtx_SUBREG (QImode, mask, 0);
36868 /* If merge is 0 then we're about to emit z-masked variant. */
36869 if (const0_operand (merge, mode))
36870 emit_insn (fcn_maskz (target, accum, wide_reg, mem, merge, mask));
36871 /* If merge is the same as accum then emit merge-masked
36872 variant. */
36873 else if (CALL_EXPR_ARG (exp, 6) == CALL_EXPR_ARG (exp, 4))
36875 merge = force_reg (mode, merge);
36876 emit_insn (fcn_mask (target, wide_reg, mem, merge, mask));
36878 /* Merge with something unknown might happen if we z-mask
36879 w/ -O0. */
36880 else
36882 target = gen_reg_rtx (mode);
36883 emit_move_insn (target, merge);
36884 emit_insn (fcn_mask (target, wide_reg, mem, target, mask));
36887 return target;
36889 case IX86_BUILTIN_RDPID:
36890 return ix86_expand_special_args_builtin (bdesc_args2 + i, exp,
36891 target);
36892 default:
36893 return ix86_expand_args_builtin (bdesc_args2 + i, exp, target);
36897 if (fcode >= IX86_BUILTIN__BDESC_COMI_FIRST
36898 && fcode <= IX86_BUILTIN__BDESC_COMI_LAST)
36900 i = fcode - IX86_BUILTIN__BDESC_COMI_FIRST;
36901 return ix86_expand_sse_comi (bdesc_comi + i, exp, target);
36904 if (fcode >= IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST
36905 && fcode <= IX86_BUILTIN__BDESC_ROUND_ARGS_LAST)
36907 i = fcode - IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST;
36908 return ix86_expand_round_builtin (bdesc_round_args + i, exp, target);
36911 if (fcode >= IX86_BUILTIN__BDESC_PCMPESTR_FIRST
36912 && fcode <= IX86_BUILTIN__BDESC_PCMPESTR_LAST)
36914 i = fcode - IX86_BUILTIN__BDESC_PCMPESTR_FIRST;
36915 return ix86_expand_sse_pcmpestr (bdesc_pcmpestr + i, exp, target);
36918 if (fcode >= IX86_BUILTIN__BDESC_PCMPISTR_FIRST
36919 && fcode <= IX86_BUILTIN__BDESC_PCMPISTR_LAST)
36921 i = fcode - IX86_BUILTIN__BDESC_PCMPISTR_FIRST;
36922 return ix86_expand_sse_pcmpistr (bdesc_pcmpistr + i, exp, target);
36925 if (fcode >= IX86_BUILTIN__BDESC_MULTI_ARG_FIRST
36926 && fcode <= IX86_BUILTIN__BDESC_MULTI_ARG_LAST)
36928 i = fcode - IX86_BUILTIN__BDESC_MULTI_ARG_FIRST;
36929 const struct builtin_description *d = bdesc_multi_arg + i;
36930 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
36931 (enum ix86_builtin_func_type)
36932 d->flag, d->comparison);
36935 gcc_unreachable ();
36938 /* This returns the target-specific builtin with code CODE if
36939 current_function_decl has visibility on this builtin, which is checked
36940 using isa flags. Returns NULL_TREE otherwise. */
36942 static tree ix86_get_builtin (enum ix86_builtins code)
36944 struct cl_target_option *opts;
36945 tree target_tree = NULL_TREE;
36947 /* Determine the isa flags of current_function_decl. */
36949 if (current_function_decl)
36950 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
36952 if (target_tree == NULL)
36953 target_tree = target_option_default_node;
36955 opts = TREE_TARGET_OPTION (target_tree);
36957 if ((ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
36958 || (ix86_builtins_isa[(int) code].isa2 & opts->x_ix86_isa_flags2))
36959 return ix86_builtin_decl (code, true);
36960 else
36961 return NULL_TREE;
36964 /* Return function decl for target specific builtin
36965 for given MPX builtin passed i FCODE. */
36966 static tree
36967 ix86_builtin_mpx_function (unsigned fcode)
36969 switch (fcode)
36971 case BUILT_IN_CHKP_BNDMK:
36972 return ix86_builtins[IX86_BUILTIN_BNDMK];
36974 case BUILT_IN_CHKP_BNDSTX:
36975 return ix86_builtins[IX86_BUILTIN_BNDSTX];
36977 case BUILT_IN_CHKP_BNDLDX:
36978 return ix86_builtins[IX86_BUILTIN_BNDLDX];
36980 case BUILT_IN_CHKP_BNDCL:
36981 return ix86_builtins[IX86_BUILTIN_BNDCL];
36983 case BUILT_IN_CHKP_BNDCU:
36984 return ix86_builtins[IX86_BUILTIN_BNDCU];
36986 case BUILT_IN_CHKP_BNDRET:
36987 return ix86_builtins[IX86_BUILTIN_BNDRET];
36989 case BUILT_IN_CHKP_INTERSECT:
36990 return ix86_builtins[IX86_BUILTIN_BNDINT];
36992 case BUILT_IN_CHKP_NARROW:
36993 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
36995 case BUILT_IN_CHKP_SIZEOF:
36996 return ix86_builtins[IX86_BUILTIN_SIZEOF];
36998 case BUILT_IN_CHKP_EXTRACT_LOWER:
36999 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
37001 case BUILT_IN_CHKP_EXTRACT_UPPER:
37002 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
37004 default:
37005 return NULL_TREE;
37008 gcc_unreachable ();
37011 /* Helper function for ix86_load_bounds and ix86_store_bounds.
37013 Return an address to be used to load/store bounds for pointer
37014 passed in SLOT.
37016 SLOT_NO is an integer constant holding number of a target
37017 dependent special slot to be used in case SLOT is not a memory.
37019 SPECIAL_BASE is a pointer to be used as a base of fake address
37020 to access special slots in Bounds Table. SPECIAL_BASE[-1],
37021 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
37023 static rtx
37024 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
37026 rtx addr = NULL;
37028 /* NULL slot means we pass bounds for pointer not passed to the
37029 function at all. Register slot means we pass pointer in a
37030 register. In both these cases bounds are passed via Bounds
37031 Table. Since we do not have actual pointer stored in memory,
37032 we have to use fake addresses to access Bounds Table. We
37033 start with (special_base - sizeof (void*)) and decrease this
37034 address by pointer size to get addresses for other slots. */
37035 if (!slot || REG_P (slot))
37037 gcc_assert (CONST_INT_P (slot_no));
37038 addr = plus_constant (Pmode, special_base,
37039 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
37041 /* If pointer is passed in a memory then its address is used to
37042 access Bounds Table. */
37043 else if (MEM_P (slot))
37045 addr = XEXP (slot, 0);
37046 if (!register_operand (addr, Pmode))
37047 addr = copy_addr_to_reg (addr);
37049 else
37050 gcc_unreachable ();
37052 return addr;
37055 /* Expand pass uses this hook to load bounds for function parameter
37056 PTR passed in SLOT in case its bounds are not passed in a register.
37058 If SLOT is a memory, then bounds are loaded as for regular pointer
37059 loaded from memory. PTR may be NULL in case SLOT is a memory.
37060 In such case value of PTR (if required) may be loaded from SLOT.
37062 If SLOT is NULL or a register then SLOT_NO is an integer constant
37063 holding number of the target dependent special slot which should be
37064 used to obtain bounds.
37066 Return loaded bounds. */
37068 static rtx
37069 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
37071 rtx reg = gen_reg_rtx (BNDmode);
37072 rtx addr;
37074 /* Get address to be used to access Bounds Table. Special slots start
37075 at the location of return address of the current function. */
37076 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
37078 /* Load pointer value from a memory if we don't have it. */
37079 if (!ptr)
37081 gcc_assert (MEM_P (slot));
37082 ptr = copy_addr_to_reg (slot);
37085 if (!register_operand (ptr, Pmode))
37086 ptr = ix86_zero_extend_to_Pmode (ptr);
37088 emit_insn (BNDmode == BND64mode
37089 ? gen_bnd64_ldx (reg, addr, ptr)
37090 : gen_bnd32_ldx (reg, addr, ptr));
37092 return reg;
37095 /* Expand pass uses this hook to store BOUNDS for call argument PTR
37096 passed in SLOT in case BOUNDS are not passed in a register.
37098 If SLOT is a memory, then BOUNDS are stored as for regular pointer
37099 stored in memory. PTR may be NULL in case SLOT is a memory.
37100 In such case value of PTR (if required) may be loaded from SLOT.
37102 If SLOT is NULL or a register then SLOT_NO is an integer constant
37103 holding number of the target dependent special slot which should be
37104 used to store BOUNDS. */
37106 static void
37107 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
37109 rtx addr;
37111 /* Get address to be used to access Bounds Table. Special slots start
37112 at the location of return address of a called function. */
37113 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
37115 /* Load pointer value from a memory if we don't have it. */
37116 if (!ptr)
37118 gcc_assert (MEM_P (slot));
37119 ptr = copy_addr_to_reg (slot);
37122 if (!register_operand (ptr, Pmode))
37123 ptr = ix86_zero_extend_to_Pmode (ptr);
37125 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
37126 if (!register_operand (bounds, BNDmode))
37127 bounds = copy_to_mode_reg (BNDmode, bounds);
37129 emit_insn (BNDmode == BND64mode
37130 ? gen_bnd64_stx (addr, ptr, bounds)
37131 : gen_bnd32_stx (addr, ptr, bounds));
37134 /* Load and return bounds returned by function in SLOT. */
37136 static rtx
37137 ix86_load_returned_bounds (rtx slot)
37139 rtx res;
37141 gcc_assert (REG_P (slot));
37142 res = gen_reg_rtx (BNDmode);
37143 emit_move_insn (res, slot);
37145 return res;
37148 /* Store BOUNDS returned by function into SLOT. */
37150 static void
37151 ix86_store_returned_bounds (rtx slot, rtx bounds)
37153 gcc_assert (REG_P (slot));
37154 emit_move_insn (slot, bounds);
37157 /* Returns a function decl for a vectorized version of the combined function
37158 with combined_fn code FN and the result vector type TYPE, or NULL_TREE
37159 if it is not available. */
37161 static tree
37162 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
37163 tree type_in)
37165 machine_mode in_mode, out_mode;
37166 int in_n, out_n;
37168 if (TREE_CODE (type_out) != VECTOR_TYPE
37169 || TREE_CODE (type_in) != VECTOR_TYPE)
37170 return NULL_TREE;
37172 out_mode = TYPE_MODE (TREE_TYPE (type_out));
37173 out_n = TYPE_VECTOR_SUBPARTS (type_out);
37174 in_mode = TYPE_MODE (TREE_TYPE (type_in));
37175 in_n = TYPE_VECTOR_SUBPARTS (type_in);
37177 switch (fn)
37179 CASE_CFN_EXP2:
37180 if (out_mode == SFmode && in_mode == SFmode)
37182 if (out_n == 16 && in_n == 16)
37183 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
37185 break;
37187 CASE_CFN_IFLOOR:
37188 CASE_CFN_LFLOOR:
37189 CASE_CFN_LLFLOOR:
37190 /* The round insn does not trap on denormals. */
37191 if (flag_trapping_math || !TARGET_SSE4_1)
37192 break;
37194 if (out_mode == SImode && in_mode == DFmode)
37196 if (out_n == 4 && in_n == 2)
37197 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
37198 else if (out_n == 8 && in_n == 4)
37199 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
37200 else if (out_n == 16 && in_n == 8)
37201 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
37203 if (out_mode == SImode && in_mode == SFmode)
37205 if (out_n == 4 && in_n == 4)
37206 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
37207 else if (out_n == 8 && in_n == 8)
37208 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
37209 else if (out_n == 16 && in_n == 16)
37210 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX512);
37212 break;
37214 CASE_CFN_ICEIL:
37215 CASE_CFN_LCEIL:
37216 CASE_CFN_LLCEIL:
37217 /* The round insn does not trap on denormals. */
37218 if (flag_trapping_math || !TARGET_SSE4_1)
37219 break;
37221 if (out_mode == SImode && in_mode == DFmode)
37223 if (out_n == 4 && in_n == 2)
37224 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
37225 else if (out_n == 8 && in_n == 4)
37226 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
37227 else if (out_n == 16 && in_n == 8)
37228 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
37230 if (out_mode == SImode && in_mode == SFmode)
37232 if (out_n == 4 && in_n == 4)
37233 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
37234 else if (out_n == 8 && in_n == 8)
37235 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
37236 else if (out_n == 16 && in_n == 16)
37237 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX512);
37239 break;
37241 CASE_CFN_IRINT:
37242 CASE_CFN_LRINT:
37243 CASE_CFN_LLRINT:
37244 if (out_mode == SImode && in_mode == DFmode)
37246 if (out_n == 4 && in_n == 2)
37247 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
37248 else if (out_n == 8 && in_n == 4)
37249 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
37250 else if (out_n == 16 && in_n == 8)
37251 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX512);
37253 if (out_mode == SImode && in_mode == SFmode)
37255 if (out_n == 4 && in_n == 4)
37256 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
37257 else if (out_n == 8 && in_n == 8)
37258 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
37259 else if (out_n == 16 && in_n == 16)
37260 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ512);
37262 break;
37264 CASE_CFN_IROUND:
37265 CASE_CFN_LROUND:
37266 CASE_CFN_LLROUND:
37267 /* The round insn does not trap on denormals. */
37268 if (flag_trapping_math || !TARGET_SSE4_1)
37269 break;
37271 if (out_mode == SImode && in_mode == DFmode)
37273 if (out_n == 4 && in_n == 2)
37274 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
37275 else if (out_n == 8 && in_n == 4)
37276 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
37277 else if (out_n == 16 && in_n == 8)
37278 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
37280 if (out_mode == SImode && in_mode == SFmode)
37282 if (out_n == 4 && in_n == 4)
37283 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
37284 else if (out_n == 8 && in_n == 8)
37285 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
37286 else if (out_n == 16 && in_n == 16)
37287 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX512);
37289 break;
37291 CASE_CFN_FLOOR:
37292 /* The round insn does not trap on denormals. */
37293 if (flag_trapping_math || !TARGET_SSE4_1)
37294 break;
37296 if (out_mode == DFmode && in_mode == DFmode)
37298 if (out_n == 2 && in_n == 2)
37299 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
37300 else if (out_n == 4 && in_n == 4)
37301 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
37302 else if (out_n == 8 && in_n == 8)
37303 return ix86_get_builtin (IX86_BUILTIN_FLOORPD512);
37305 if (out_mode == SFmode && in_mode == SFmode)
37307 if (out_n == 4 && in_n == 4)
37308 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
37309 else if (out_n == 8 && in_n == 8)
37310 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
37311 else if (out_n == 16 && in_n == 16)
37312 return ix86_get_builtin (IX86_BUILTIN_FLOORPS512);
37314 break;
37316 CASE_CFN_CEIL:
37317 /* The round insn does not trap on denormals. */
37318 if (flag_trapping_math || !TARGET_SSE4_1)
37319 break;
37321 if (out_mode == DFmode && in_mode == DFmode)
37323 if (out_n == 2 && in_n == 2)
37324 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
37325 else if (out_n == 4 && in_n == 4)
37326 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
37327 else if (out_n == 8 && in_n == 8)
37328 return ix86_get_builtin (IX86_BUILTIN_CEILPD512);
37330 if (out_mode == SFmode && in_mode == SFmode)
37332 if (out_n == 4 && in_n == 4)
37333 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
37334 else if (out_n == 8 && in_n == 8)
37335 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
37336 else if (out_n == 16 && in_n == 16)
37337 return ix86_get_builtin (IX86_BUILTIN_CEILPS512);
37339 break;
37341 CASE_CFN_TRUNC:
37342 /* The round insn does not trap on denormals. */
37343 if (flag_trapping_math || !TARGET_SSE4_1)
37344 break;
37346 if (out_mode == DFmode && in_mode == DFmode)
37348 if (out_n == 2 && in_n == 2)
37349 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
37350 else if (out_n == 4 && in_n == 4)
37351 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
37352 else if (out_n == 8 && in_n == 8)
37353 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD512);
37355 if (out_mode == SFmode && in_mode == SFmode)
37357 if (out_n == 4 && in_n == 4)
37358 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
37359 else if (out_n == 8 && in_n == 8)
37360 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
37361 else if (out_n == 16 && in_n == 16)
37362 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS512);
37364 break;
37366 CASE_CFN_RINT:
37367 /* The round insn does not trap on denormals. */
37368 if (flag_trapping_math || !TARGET_SSE4_1)
37369 break;
37371 if (out_mode == DFmode && in_mode == DFmode)
37373 if (out_n == 2 && in_n == 2)
37374 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
37375 else if (out_n == 4 && in_n == 4)
37376 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
37378 if (out_mode == SFmode && in_mode == SFmode)
37380 if (out_n == 4 && in_n == 4)
37381 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
37382 else if (out_n == 8 && in_n == 8)
37383 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
37385 break;
37387 CASE_CFN_FMA:
37388 if (out_mode == DFmode && in_mode == DFmode)
37390 if (out_n == 2 && in_n == 2)
37391 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
37392 if (out_n == 4 && in_n == 4)
37393 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
37395 if (out_mode == SFmode && in_mode == SFmode)
37397 if (out_n == 4 && in_n == 4)
37398 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
37399 if (out_n == 8 && in_n == 8)
37400 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
37402 break;
37404 default:
37405 break;
37408 /* Dispatch to a handler for a vectorization library. */
37409 if (ix86_veclib_handler)
37410 return ix86_veclib_handler (combined_fn (fn), type_out, type_in);
37412 return NULL_TREE;
37415 /* Handler for an SVML-style interface to
37416 a library with vectorized intrinsics. */
37418 static tree
37419 ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
37421 char name[20];
37422 tree fntype, new_fndecl, args;
37423 unsigned arity;
37424 const char *bname;
37425 machine_mode el_mode, in_mode;
37426 int n, in_n;
37428 /* The SVML is suitable for unsafe math only. */
37429 if (!flag_unsafe_math_optimizations)
37430 return NULL_TREE;
37432 el_mode = TYPE_MODE (TREE_TYPE (type_out));
37433 n = TYPE_VECTOR_SUBPARTS (type_out);
37434 in_mode = TYPE_MODE (TREE_TYPE (type_in));
37435 in_n = TYPE_VECTOR_SUBPARTS (type_in);
37436 if (el_mode != in_mode
37437 || n != in_n)
37438 return NULL_TREE;
37440 switch (fn)
37442 CASE_CFN_EXP:
37443 CASE_CFN_LOG:
37444 CASE_CFN_LOG10:
37445 CASE_CFN_POW:
37446 CASE_CFN_TANH:
37447 CASE_CFN_TAN:
37448 CASE_CFN_ATAN:
37449 CASE_CFN_ATAN2:
37450 CASE_CFN_ATANH:
37451 CASE_CFN_CBRT:
37452 CASE_CFN_SINH:
37453 CASE_CFN_SIN:
37454 CASE_CFN_ASINH:
37455 CASE_CFN_ASIN:
37456 CASE_CFN_COSH:
37457 CASE_CFN_COS:
37458 CASE_CFN_ACOSH:
37459 CASE_CFN_ACOS:
37460 if ((el_mode != DFmode || n != 2)
37461 && (el_mode != SFmode || n != 4))
37462 return NULL_TREE;
37463 break;
37465 default:
37466 return NULL_TREE;
37469 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
37470 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
37472 if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF)
37473 strcpy (name, "vmlsLn4");
37474 else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG)
37475 strcpy (name, "vmldLn2");
37476 else if (n == 4)
37478 sprintf (name, "vmls%s", bname+10);
37479 name[strlen (name)-1] = '4';
37481 else
37482 sprintf (name, "vmld%s2", bname+10);
37484 /* Convert to uppercase. */
37485 name[4] &= ~0x20;
37487 arity = 0;
37488 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
37489 arity++;
37491 if (arity == 1)
37492 fntype = build_function_type_list (type_out, type_in, NULL);
37493 else
37494 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
37496 /* Build a function declaration for the vectorized function. */
37497 new_fndecl = build_decl (BUILTINS_LOCATION,
37498 FUNCTION_DECL, get_identifier (name), fntype);
37499 TREE_PUBLIC (new_fndecl) = 1;
37500 DECL_EXTERNAL (new_fndecl) = 1;
37501 DECL_IS_NOVOPS (new_fndecl) = 1;
37502 TREE_READONLY (new_fndecl) = 1;
37504 return new_fndecl;
37507 /* Handler for an ACML-style interface to
37508 a library with vectorized intrinsics. */
37510 static tree
37511 ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
37513 char name[20] = "__vr.._";
37514 tree fntype, new_fndecl, args;
37515 unsigned arity;
37516 const char *bname;
37517 machine_mode el_mode, in_mode;
37518 int n, in_n;
37520 /* The ACML is 64bits only and suitable for unsafe math only as
37521 it does not correctly support parts of IEEE with the required
37522 precision such as denormals. */
37523 if (!TARGET_64BIT
37524 || !flag_unsafe_math_optimizations)
37525 return NULL_TREE;
37527 el_mode = TYPE_MODE (TREE_TYPE (type_out));
37528 n = TYPE_VECTOR_SUBPARTS (type_out);
37529 in_mode = TYPE_MODE (TREE_TYPE (type_in));
37530 in_n = TYPE_VECTOR_SUBPARTS (type_in);
37531 if (el_mode != in_mode
37532 || n != in_n)
37533 return NULL_TREE;
37535 switch (fn)
37537 CASE_CFN_SIN:
37538 CASE_CFN_COS:
37539 CASE_CFN_EXP:
37540 CASE_CFN_LOG:
37541 CASE_CFN_LOG2:
37542 CASE_CFN_LOG10:
37543 if (el_mode == DFmode && n == 2)
37545 name[4] = 'd';
37546 name[5] = '2';
37548 else if (el_mode == SFmode && n == 4)
37550 name[4] = 's';
37551 name[5] = '4';
37553 else
37554 return NULL_TREE;
37555 break;
37557 default:
37558 return NULL_TREE;
37561 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
37562 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
37563 sprintf (name + 7, "%s", bname+10);
37565 arity = 0;
37566 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
37567 arity++;
37569 if (arity == 1)
37570 fntype = build_function_type_list (type_out, type_in, NULL);
37571 else
37572 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
37574 /* Build a function declaration for the vectorized function. */
37575 new_fndecl = build_decl (BUILTINS_LOCATION,
37576 FUNCTION_DECL, get_identifier (name), fntype);
37577 TREE_PUBLIC (new_fndecl) = 1;
37578 DECL_EXTERNAL (new_fndecl) = 1;
37579 DECL_IS_NOVOPS (new_fndecl) = 1;
37580 TREE_READONLY (new_fndecl) = 1;
37582 return new_fndecl;
37585 /* Returns a decl of a function that implements gather load with
37586 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
37587 Return NULL_TREE if it is not available. */
37589 static tree
37590 ix86_vectorize_builtin_gather (const_tree mem_vectype,
37591 const_tree index_type, int scale)
37593 bool si;
37594 enum ix86_builtins code;
37596 if (! TARGET_AVX2)
37597 return NULL_TREE;
37599 if ((TREE_CODE (index_type) != INTEGER_TYPE
37600 && !POINTER_TYPE_P (index_type))
37601 || (TYPE_MODE (index_type) != SImode
37602 && TYPE_MODE (index_type) != DImode))
37603 return NULL_TREE;
37605 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
37606 return NULL_TREE;
37608 /* v*gather* insn sign extends index to pointer mode. */
37609 if (TYPE_PRECISION (index_type) < POINTER_SIZE
37610 && TYPE_UNSIGNED (index_type))
37611 return NULL_TREE;
37613 if (scale <= 0
37614 || scale > 8
37615 || (scale & (scale - 1)) != 0)
37616 return NULL_TREE;
37618 si = TYPE_MODE (index_type) == SImode;
37619 switch (TYPE_MODE (mem_vectype))
37621 case E_V2DFmode:
37622 if (TARGET_AVX512VL)
37623 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
37624 else
37625 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
37626 break;
37627 case E_V4DFmode:
37628 if (TARGET_AVX512VL)
37629 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
37630 else
37631 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
37632 break;
37633 case E_V2DImode:
37634 if (TARGET_AVX512VL)
37635 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
37636 else
37637 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
37638 break;
37639 case E_V4DImode:
37640 if (TARGET_AVX512VL)
37641 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
37642 else
37643 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
37644 break;
37645 case E_V4SFmode:
37646 if (TARGET_AVX512VL)
37647 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
37648 else
37649 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
37650 break;
37651 case E_V8SFmode:
37652 if (TARGET_AVX512VL)
37653 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
37654 else
37655 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
37656 break;
37657 case E_V4SImode:
37658 if (TARGET_AVX512VL)
37659 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
37660 else
37661 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
37662 break;
37663 case E_V8SImode:
37664 if (TARGET_AVX512VL)
37665 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
37666 else
37667 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
37668 break;
37669 case E_V8DFmode:
37670 if (TARGET_AVX512F)
37671 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
37672 else
37673 return NULL_TREE;
37674 break;
37675 case E_V8DImode:
37676 if (TARGET_AVX512F)
37677 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
37678 else
37679 return NULL_TREE;
37680 break;
37681 case E_V16SFmode:
37682 if (TARGET_AVX512F)
37683 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
37684 else
37685 return NULL_TREE;
37686 break;
37687 case E_V16SImode:
37688 if (TARGET_AVX512F)
37689 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
37690 else
37691 return NULL_TREE;
37692 break;
37693 default:
37694 return NULL_TREE;
37697 return ix86_get_builtin (code);
37700 /* Returns a decl of a function that implements scatter store with
37701 register type VECTYPE and index type INDEX_TYPE and SCALE.
37702 Return NULL_TREE if it is not available. */
37704 static tree
37705 ix86_vectorize_builtin_scatter (const_tree vectype,
37706 const_tree index_type, int scale)
37708 bool si;
37709 enum ix86_builtins code;
37711 if (!TARGET_AVX512F)
37712 return NULL_TREE;
37714 if ((TREE_CODE (index_type) != INTEGER_TYPE
37715 && !POINTER_TYPE_P (index_type))
37716 || (TYPE_MODE (index_type) != SImode
37717 && TYPE_MODE (index_type) != DImode))
37718 return NULL_TREE;
37720 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
37721 return NULL_TREE;
37723 /* v*scatter* insn sign extends index to pointer mode. */
37724 if (TYPE_PRECISION (index_type) < POINTER_SIZE
37725 && TYPE_UNSIGNED (index_type))
37726 return NULL_TREE;
37728 /* Scale can be 1, 2, 4 or 8. */
37729 if (scale <= 0
37730 || scale > 8
37731 || (scale & (scale - 1)) != 0)
37732 return NULL_TREE;
37734 si = TYPE_MODE (index_type) == SImode;
37735 switch (TYPE_MODE (vectype))
37737 case E_V8DFmode:
37738 code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
37739 break;
37740 case E_V8DImode:
37741 code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
37742 break;
37743 case E_V16SFmode:
37744 code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
37745 break;
37746 case E_V16SImode:
37747 code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
37748 break;
37749 default:
37750 return NULL_TREE;
37753 return ix86_builtins[code];
37756 /* Return true if it is safe to use the rsqrt optabs to optimize
37757 1.0/sqrt. */
37759 static bool
37760 use_rsqrt_p ()
37762 return (TARGET_SSE_MATH
37763 && flag_finite_math_only
37764 && !flag_trapping_math
37765 && flag_unsafe_math_optimizations);
37768 /* Returns a code for a target-specific builtin that implements
37769 reciprocal of the function, or NULL_TREE if not available. */
37771 static tree
37772 ix86_builtin_reciprocal (tree fndecl)
37774 switch (DECL_FUNCTION_CODE (fndecl))
37776 /* Vectorized version of sqrt to rsqrt conversion. */
37777 case IX86_BUILTIN_SQRTPS_NR:
37778 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
37780 case IX86_BUILTIN_SQRTPS_NR256:
37781 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
37783 default:
37784 return NULL_TREE;
37788 /* Helper for avx_vpermilps256_operand et al. This is also used by
37789 the expansion functions to turn the parallel back into a mask.
37790 The return value is 0 for no match and the imm8+1 for a match. */
37793 avx_vpermilp_parallel (rtx par, machine_mode mode)
37795 unsigned i, nelt = GET_MODE_NUNITS (mode);
37796 unsigned mask = 0;
37797 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
37799 if (XVECLEN (par, 0) != (int) nelt)
37800 return 0;
37802 /* Validate that all of the elements are constants, and not totally
37803 out of range. Copy the data into an integral array to make the
37804 subsequent checks easier. */
37805 for (i = 0; i < nelt; ++i)
37807 rtx er = XVECEXP (par, 0, i);
37808 unsigned HOST_WIDE_INT ei;
37810 if (!CONST_INT_P (er))
37811 return 0;
37812 ei = INTVAL (er);
37813 if (ei >= nelt)
37814 return 0;
37815 ipar[i] = ei;
37818 switch (mode)
37820 case E_V8DFmode:
37821 /* In the 512-bit DFmode case, we can only move elements within
37822 a 128-bit lane. First fill the second part of the mask,
37823 then fallthru. */
37824 for (i = 4; i < 6; ++i)
37826 if (ipar[i] < 4 || ipar[i] >= 6)
37827 return 0;
37828 mask |= (ipar[i] - 4) << i;
37830 for (i = 6; i < 8; ++i)
37832 if (ipar[i] < 6)
37833 return 0;
37834 mask |= (ipar[i] - 6) << i;
37836 /* FALLTHRU */
37838 case E_V4DFmode:
37839 /* In the 256-bit DFmode case, we can only move elements within
37840 a 128-bit lane. */
37841 for (i = 0; i < 2; ++i)
37843 if (ipar[i] >= 2)
37844 return 0;
37845 mask |= ipar[i] << i;
37847 for (i = 2; i < 4; ++i)
37849 if (ipar[i] < 2)
37850 return 0;
37851 mask |= (ipar[i] - 2) << i;
37853 break;
37855 case E_V16SFmode:
37856 /* In 512 bit SFmode case, permutation in the upper 256 bits
37857 must mirror the permutation in the lower 256-bits. */
37858 for (i = 0; i < 8; ++i)
37859 if (ipar[i] + 8 != ipar[i + 8])
37860 return 0;
37861 /* FALLTHRU */
37863 case E_V8SFmode:
37864 /* In 256 bit SFmode case, we have full freedom of
37865 movement within the low 128-bit lane, but the high 128-bit
37866 lane must mirror the exact same pattern. */
37867 for (i = 0; i < 4; ++i)
37868 if (ipar[i] + 4 != ipar[i + 4])
37869 return 0;
37870 nelt = 4;
37871 /* FALLTHRU */
37873 case E_V2DFmode:
37874 case E_V4SFmode:
37875 /* In the 128-bit case, we've full freedom in the placement of
37876 the elements from the source operand. */
37877 for (i = 0; i < nelt; ++i)
37878 mask |= ipar[i] << (i * (nelt / 2));
37879 break;
37881 default:
37882 gcc_unreachable ();
37885 /* Make sure success has a non-zero value by adding one. */
37886 return mask + 1;
37889 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
37890 the expansion functions to turn the parallel back into a mask.
37891 The return value is 0 for no match and the imm8+1 for a match. */
37894 avx_vperm2f128_parallel (rtx par, machine_mode mode)
37896 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
37897 unsigned mask = 0;
37898 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
37900 if (XVECLEN (par, 0) != (int) nelt)
37901 return 0;
37903 /* Validate that all of the elements are constants, and not totally
37904 out of range. Copy the data into an integral array to make the
37905 subsequent checks easier. */
37906 for (i = 0; i < nelt; ++i)
37908 rtx er = XVECEXP (par, 0, i);
37909 unsigned HOST_WIDE_INT ei;
37911 if (!CONST_INT_P (er))
37912 return 0;
37913 ei = INTVAL (er);
37914 if (ei >= 2 * nelt)
37915 return 0;
37916 ipar[i] = ei;
37919 /* Validate that the halves of the permute are halves. */
37920 for (i = 0; i < nelt2 - 1; ++i)
37921 if (ipar[i] + 1 != ipar[i + 1])
37922 return 0;
37923 for (i = nelt2; i < nelt - 1; ++i)
37924 if (ipar[i] + 1 != ipar[i + 1])
37925 return 0;
37927 /* Reconstruct the mask. */
37928 for (i = 0; i < 2; ++i)
37930 unsigned e = ipar[i * nelt2];
37931 if (e % nelt2)
37932 return 0;
37933 e /= nelt2;
37934 mask |= e << (i * 4);
37937 /* Make sure success has a non-zero value by adding one. */
37938 return mask + 1;
37941 /* Return a register priority for hard reg REGNO. */
37942 static int
37943 ix86_register_priority (int hard_regno)
37945 /* ebp and r13 as the base always wants a displacement, r12 as the
37946 base always wants an index. So discourage their usage in an
37947 address. */
37948 if (hard_regno == R12_REG || hard_regno == R13_REG)
37949 return 0;
37950 if (hard_regno == BP_REG)
37951 return 1;
37952 /* New x86-64 int registers result in bigger code size. Discourage
37953 them. */
37954 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
37955 return 2;
37956 /* New x86-64 SSE registers result in bigger code size. Discourage
37957 them. */
37958 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
37959 return 2;
37960 /* Usage of AX register results in smaller code. Prefer it. */
37961 if (hard_regno == AX_REG)
37962 return 4;
37963 return 3;
37966 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
37968 Put float CONST_DOUBLE in the constant pool instead of fp regs.
37969 QImode must go into class Q_REGS.
37970 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
37971 movdf to do mem-to-mem moves through integer regs. */
37973 static reg_class_t
37974 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
37976 machine_mode mode = GET_MODE (x);
37978 /* We're only allowed to return a subclass of CLASS. Many of the
37979 following checks fail for NO_REGS, so eliminate that early. */
37980 if (regclass == NO_REGS)
37981 return NO_REGS;
37983 /* All classes can load zeros. */
37984 if (x == CONST0_RTX (mode))
37985 return regclass;
37987 /* Force constants into memory if we are loading a (nonzero) constant into
37988 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
37989 instructions to load from a constant. */
37990 if (CONSTANT_P (x)
37991 && (MAYBE_MMX_CLASS_P (regclass)
37992 || MAYBE_SSE_CLASS_P (regclass)
37993 || MAYBE_MASK_CLASS_P (regclass)))
37994 return NO_REGS;
37996 /* Floating-point constants need more complex checks. */
37997 if (CONST_DOUBLE_P (x))
37999 /* General regs can load everything. */
38000 if (INTEGER_CLASS_P (regclass))
38001 return regclass;
38003 /* Floats can load 0 and 1 plus some others. Note that we eliminated
38004 zero above. We only want to wind up preferring 80387 registers if
38005 we plan on doing computation with them. */
38006 if (IS_STACK_MODE (mode)
38007 && standard_80387_constant_p (x) > 0)
38009 /* Limit class to FP regs. */
38010 if (FLOAT_CLASS_P (regclass))
38011 return FLOAT_REGS;
38012 else if (regclass == FP_TOP_SSE_REGS)
38013 return FP_TOP_REG;
38014 else if (regclass == FP_SECOND_SSE_REGS)
38015 return FP_SECOND_REG;
38018 return NO_REGS;
38021 /* Prefer SSE regs only, if we can use them for math. */
38022 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38023 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
38025 /* Generally when we see PLUS here, it's the function invariant
38026 (plus soft-fp const_int). Which can only be computed into general
38027 regs. */
38028 if (GET_CODE (x) == PLUS)
38029 return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS;
38031 /* QImode constants are easy to load, but non-constant QImode data
38032 must go into Q_REGS. */
38033 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
38035 if (Q_CLASS_P (regclass))
38036 return regclass;
38037 else if (reg_class_subset_p (Q_REGS, regclass))
38038 return Q_REGS;
38039 else
38040 return NO_REGS;
38043 return regclass;
38046 /* Discourage putting floating-point values in SSE registers unless
38047 SSE math is being used, and likewise for the 387 registers. */
38048 static reg_class_t
38049 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
38051 machine_mode mode = GET_MODE (x);
38053 /* Restrict the output reload class to the register bank that we are doing
38054 math on. If we would like not to return a subset of CLASS, reject this
38055 alternative: if reload cannot do this, it will still use its choice. */
38056 mode = GET_MODE (x);
38057 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38058 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
38060 if (IS_STACK_MODE (mode))
38062 if (regclass == FP_TOP_SSE_REGS)
38063 return FP_TOP_REG;
38064 else if (regclass == FP_SECOND_SSE_REGS)
38065 return FP_SECOND_REG;
38066 else
38067 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
38070 return regclass;
38073 static reg_class_t
38074 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
38075 machine_mode mode, secondary_reload_info *sri)
38077 /* Double-word spills from general registers to non-offsettable memory
38078 references (zero-extended addresses) require special handling. */
38079 if (TARGET_64BIT
38080 && MEM_P (x)
38081 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
38082 && INTEGER_CLASS_P (rclass)
38083 && !offsettable_memref_p (x))
38085 sri->icode = (in_p
38086 ? CODE_FOR_reload_noff_load
38087 : CODE_FOR_reload_noff_store);
38088 /* Add the cost of moving address to a temporary. */
38089 sri->extra_cost = 1;
38091 return NO_REGS;
38094 /* QImode spills from non-QI registers require
38095 intermediate register on 32bit targets. */
38096 if (mode == QImode
38097 && ((!TARGET_64BIT && !in_p
38098 && INTEGER_CLASS_P (rclass)
38099 && MAYBE_NON_Q_CLASS_P (rclass))
38100 || (!TARGET_AVX512DQ
38101 && MAYBE_MASK_CLASS_P (rclass))))
38103 int regno = true_regnum (x);
38105 /* Return Q_REGS if the operand is in memory. */
38106 if (regno == -1)
38107 return Q_REGS;
38109 return NO_REGS;
38112 /* This condition handles corner case where an expression involving
38113 pointers gets vectorized. We're trying to use the address of a
38114 stack slot as a vector initializer.
38116 (set (reg:V2DI 74 [ vect_cst_.2 ])
38117 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
38119 Eventually frame gets turned into sp+offset like this:
38121 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
38122 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
38123 (const_int 392 [0x188]))))
38125 That later gets turned into:
38127 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
38128 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
38129 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
38131 We'll have the following reload recorded:
38133 Reload 0: reload_in (DI) =
38134 (plus:DI (reg/f:DI 7 sp)
38135 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
38136 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
38137 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
38138 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
38139 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
38140 reload_reg_rtx: (reg:V2DI 22 xmm1)
38142 Which isn't going to work since SSE instructions can't handle scalar
38143 additions. Returning GENERAL_REGS forces the addition into integer
38144 register and reload can handle subsequent reloads without problems. */
38146 if (in_p && GET_CODE (x) == PLUS
38147 && SSE_CLASS_P (rclass)
38148 && SCALAR_INT_MODE_P (mode))
38149 return GENERAL_REGS;
38151 return NO_REGS;
38154 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
38156 static bool
38157 ix86_class_likely_spilled_p (reg_class_t rclass)
38159 switch (rclass)
38161 case AREG:
38162 case DREG:
38163 case CREG:
38164 case BREG:
38165 case AD_REGS:
38166 case SIREG:
38167 case DIREG:
38168 case SSE_FIRST_REG:
38169 case FP_TOP_REG:
38170 case FP_SECOND_REG:
38171 case BND_REGS:
38172 return true;
38174 default:
38175 break;
38178 return false;
38181 /* If we are copying between registers from different register sets
38182 (e.g. FP and integer), we may need a memory location.
38184 The function can't work reliably when one of the CLASSES is a class
38185 containing registers from multiple sets. We avoid this by never combining
38186 different sets in a single alternative in the machine description.
38187 Ensure that this constraint holds to avoid unexpected surprises.
38189 When STRICT is false, we are being called from REGISTER_MOVE_COST,
38190 so do not enforce these sanity checks.
38192 To optimize register_move_cost performance, define inline variant. */
38194 static inline bool
38195 inline_secondary_memory_needed (machine_mode mode, reg_class_t class1,
38196 reg_class_t class2, int strict)
38198 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
38199 return false;
38201 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
38202 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
38203 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
38204 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
38205 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
38206 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)
38207 || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1)
38208 || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2))
38210 gcc_assert (!strict || lra_in_progress);
38211 return true;
38214 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
38215 return true;
38217 /* Between mask and general, we have moves no larger than word size. */
38218 if ((MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
38219 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
38220 return true;
38222 /* ??? This is a lie. We do have moves between mmx/general, and for
38223 mmx/sse2. But by saying we need secondary memory we discourage the
38224 register allocator from using the mmx registers unless needed. */
38225 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
38226 return true;
38228 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
38230 /* SSE1 doesn't have any direct moves from other classes. */
38231 if (!TARGET_SSE2)
38232 return true;
38234 /* If the target says that inter-unit moves are more expensive
38235 than moving through memory, then don't generate them. */
38236 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
38237 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
38238 return true;
38240 /* Between SSE and general, we have moves no larger than word size. */
38241 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38242 return true;
38245 return false;
38248 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
38250 static bool
38251 ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1,
38252 reg_class_t class2)
38254 return inline_secondary_memory_needed (mode, class1, class2, true);
38257 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
38259 get_secondary_mem widens integral modes to BITS_PER_WORD.
38260 There is no need to emit full 64 bit move on 64 bit targets
38261 for integral modes that can be moved using 32 bit move. */
38263 static machine_mode
38264 ix86_secondary_memory_needed_mode (machine_mode mode)
38266 if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode))
38267 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
38268 return mode;
38271 /* Implement the TARGET_CLASS_MAX_NREGS hook.
38273 On the 80386, this is the size of MODE in words,
38274 except in the FP regs, where a single reg is always enough. */
38276 static unsigned char
38277 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
38279 if (MAYBE_INTEGER_CLASS_P (rclass))
38281 if (mode == XFmode)
38282 return (TARGET_64BIT ? 2 : 3);
38283 else if (mode == XCmode)
38284 return (TARGET_64BIT ? 4 : 6);
38285 else
38286 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
38288 else
38290 if (COMPLEX_MODE_P (mode))
38291 return 2;
38292 else
38293 return 1;
38297 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
38299 static bool
38300 ix86_can_change_mode_class (machine_mode from, machine_mode to,
38301 reg_class_t regclass)
38303 if (from == to)
38304 return true;
38306 /* x87 registers can't do subreg at all, as all values are reformatted
38307 to extended precision. */
38308 if (MAYBE_FLOAT_CLASS_P (regclass))
38309 return false;
38311 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
38313 /* Vector registers do not support QI or HImode loads. If we don't
38314 disallow a change to these modes, reload will assume it's ok to
38315 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
38316 the vec_dupv4hi pattern. */
38317 if (GET_MODE_SIZE (from) < 4)
38318 return false;
38321 return true;
38324 /* Return the cost of moving data of mode M between a
38325 register and memory. A value of 2 is the default; this cost is
38326 relative to those in `REGISTER_MOVE_COST'.
38328 This function is used extensively by register_move_cost that is used to
38329 build tables at startup. Make it inline in this case.
38330 When IN is 2, return maximum of in and out move cost.
38332 If moving between registers and memory is more expensive than
38333 between two registers, you should define this macro to express the
38334 relative cost.
38336 Model also increased moving costs of QImode registers in non
38337 Q_REGS classes.
38339 static inline int
38340 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
38341 int in)
38343 int cost;
38344 if (FLOAT_CLASS_P (regclass))
38346 int index;
38347 switch (mode)
38349 case E_SFmode:
38350 index = 0;
38351 break;
38352 case E_DFmode:
38353 index = 1;
38354 break;
38355 case E_XFmode:
38356 index = 2;
38357 break;
38358 default:
38359 return 100;
38361 if (in == 2)
38362 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
38363 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
38365 if (SSE_CLASS_P (regclass))
38367 int index;
38368 switch (GET_MODE_SIZE (mode))
38370 case 4:
38371 index = 0;
38372 break;
38373 case 8:
38374 index = 1;
38375 break;
38376 case 16:
38377 index = 2;
38378 break;
38379 default:
38380 return 100;
38382 if (in == 2)
38383 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
38384 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
38386 if (MMX_CLASS_P (regclass))
38388 int index;
38389 switch (GET_MODE_SIZE (mode))
38391 case 4:
38392 index = 0;
38393 break;
38394 case 8:
38395 index = 1;
38396 break;
38397 default:
38398 return 100;
38400 if (in)
38401 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
38402 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
38404 switch (GET_MODE_SIZE (mode))
38406 case 1:
38407 if (Q_CLASS_P (regclass) || TARGET_64BIT)
38409 if (!in)
38410 return ix86_cost->int_store[0];
38411 if (TARGET_PARTIAL_REG_DEPENDENCY
38412 && optimize_function_for_speed_p (cfun))
38413 cost = ix86_cost->movzbl_load;
38414 else
38415 cost = ix86_cost->int_load[0];
38416 if (in == 2)
38417 return MAX (cost, ix86_cost->int_store[0]);
38418 return cost;
38420 else
38422 if (in == 2)
38423 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
38424 if (in)
38425 return ix86_cost->movzbl_load;
38426 else
38427 return ix86_cost->int_store[0] + 4;
38429 break;
38430 case 2:
38431 if (in == 2)
38432 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
38433 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
38434 default:
38435 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
38436 if (mode == TFmode)
38437 mode = XFmode;
38438 if (in == 2)
38439 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
38440 else if (in)
38441 cost = ix86_cost->int_load[2];
38442 else
38443 cost = ix86_cost->int_store[2];
38444 return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
38448 static int
38449 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
38450 bool in)
38452 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
38456 /* Return the cost of moving data from a register in class CLASS1 to
38457 one in class CLASS2.
38459 It is not required that the cost always equal 2 when FROM is the same as TO;
38460 on some machines it is expensive to move between registers if they are not
38461 general registers. */
38463 static int
38464 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
38465 reg_class_t class2_i)
38467 enum reg_class class1 = (enum reg_class) class1_i;
38468 enum reg_class class2 = (enum reg_class) class2_i;
38470 /* In case we require secondary memory, compute cost of the store followed
38471 by load. In order to avoid bad register allocation choices, we need
38472 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
38474 if (inline_secondary_memory_needed (mode, class1, class2, false))
38476 int cost = 1;
38478 cost += inline_memory_move_cost (mode, class1, 2);
38479 cost += inline_memory_move_cost (mode, class2, 2);
38481 /* In case of copying from general_purpose_register we may emit multiple
38482 stores followed by single load causing memory size mismatch stall.
38483 Count this as arbitrarily high cost of 20. */
38484 if (targetm.class_max_nregs (class1, mode)
38485 > targetm.class_max_nregs (class2, mode))
38486 cost += 20;
38488 /* In the case of FP/MMX moves, the registers actually overlap, and we
38489 have to switch modes in order to treat them differently. */
38490 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
38491 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
38492 cost += 20;
38494 return cost;
38497 /* Moves between SSE/MMX and integer unit are expensive. */
38498 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
38499 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
38501 /* ??? By keeping returned value relatively high, we limit the number
38502 of moves between integer and MMX/SSE registers for all targets.
38503 Additionally, high value prevents problem with x86_modes_tieable_p(),
38504 where integer modes in MMX/SSE registers are not tieable
38505 because of missing QImode and HImode moves to, from or between
38506 MMX/SSE registers. */
38507 return MAX (8, ix86_cost->mmxsse_to_integer);
38509 if (MAYBE_FLOAT_CLASS_P (class1))
38510 return ix86_cost->fp_move;
38511 if (MAYBE_SSE_CLASS_P (class1))
38512 return ix86_cost->sse_move;
38513 if (MAYBE_MMX_CLASS_P (class1))
38514 return ix86_cost->mmx_move;
38515 return 2;
38518 /* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in
38519 words of a value of mode MODE but can be less for certain modes in
38520 special long registers.
38522 Actually there are no two word move instructions for consecutive
38523 registers. And only registers 0-3 may have mov byte instructions
38524 applied to them. */
38526 static unsigned int
38527 ix86_hard_regno_nregs (unsigned int regno, machine_mode mode)
38529 if (GENERAL_REGNO_P (regno))
38531 if (mode == XFmode)
38532 return TARGET_64BIT ? 2 : 3;
38533 if (mode == XCmode)
38534 return TARGET_64BIT ? 4 : 6;
38535 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
38537 if (COMPLEX_MODE_P (mode))
38538 return 2;
38539 if (mode == V64SFmode || mode == V64SImode)
38540 return 4;
38541 return 1;
38544 /* Implement TARGET_HARD_REGNO_MODE_OK. */
38546 static bool
38547 ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
38549 /* Flags and only flags can only hold CCmode values. */
38550 if (CC_REGNO_P (regno))
38551 return GET_MODE_CLASS (mode) == MODE_CC;
38552 if (GET_MODE_CLASS (mode) == MODE_CC
38553 || GET_MODE_CLASS (mode) == MODE_RANDOM
38554 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
38555 return false;
38556 if (STACK_REGNO_P (regno))
38557 return VALID_FP_MODE_P (mode);
38558 if (MASK_REGNO_P (regno))
38559 return (VALID_MASK_REG_MODE (mode)
38560 || (TARGET_AVX512BW
38561 && VALID_MASK_AVX512BW_MODE (mode)));
38562 if (BND_REGNO_P (regno))
38563 return VALID_BND_REG_MODE (mode);
38564 if (SSE_REGNO_P (regno))
38566 /* We implement the move patterns for all vector modes into and
38567 out of SSE registers, even when no operation instructions
38568 are available. */
38570 /* For AVX-512 we allow, regardless of regno:
38571 - XI mode
38572 - any of 512-bit wide vector mode
38573 - any scalar mode. */
38574 if (TARGET_AVX512F
38575 && (mode == XImode
38576 || VALID_AVX512F_REG_MODE (mode)
38577 || VALID_AVX512F_SCALAR_MODE (mode)))
38578 return true;
38580 /* For AVX-5124FMAPS allow V64SFmode for special regnos. */
38581 if ((TARGET_AVX5124FMAPS || TARGET_AVX5124VNNIW)
38582 && MOD4_SSE_REGNO_P (regno)
38583 && mode == V64SFmode)
38584 return true;
38586 /* For AVX-5124VNNIW allow V64SImode for special regnos. */
38587 if ((TARGET_AVX5124FMAPS || TARGET_AVX5124VNNIW)
38588 && MOD4_SSE_REGNO_P (regno)
38589 && mode == V64SImode)
38590 return true;
38592 /* TODO check for QI/HI scalars. */
38593 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
38594 if (TARGET_AVX512VL
38595 && (mode == OImode
38596 || mode == TImode
38597 || VALID_AVX256_REG_MODE (mode)
38598 || VALID_AVX512VL_128_REG_MODE (mode)))
38599 return true;
38601 /* xmm16-xmm31 are only available for AVX-512. */
38602 if (EXT_REX_SSE_REGNO_P (regno))
38603 return false;
38605 /* OImode and AVX modes are available only when AVX is enabled. */
38606 return ((TARGET_AVX
38607 && VALID_AVX256_REG_OR_OI_MODE (mode))
38608 || VALID_SSE_REG_MODE (mode)
38609 || VALID_SSE2_REG_MODE (mode)
38610 || VALID_MMX_REG_MODE (mode)
38611 || VALID_MMX_REG_MODE_3DNOW (mode));
38613 if (MMX_REGNO_P (regno))
38615 /* We implement the move patterns for 3DNOW modes even in MMX mode,
38616 so if the register is available at all, then we can move data of
38617 the given mode into or out of it. */
38618 return (VALID_MMX_REG_MODE (mode)
38619 || VALID_MMX_REG_MODE_3DNOW (mode));
38622 if (mode == QImode)
38624 /* Take care for QImode values - they can be in non-QI regs,
38625 but then they do cause partial register stalls. */
38626 if (ANY_QI_REGNO_P (regno))
38627 return true;
38628 if (!TARGET_PARTIAL_REG_STALL)
38629 return true;
38630 /* LRA checks if the hard register is OK for the given mode.
38631 QImode values can live in non-QI regs, so we allow all
38632 registers here. */
38633 if (lra_in_progress)
38634 return true;
38635 return !can_create_pseudo_p ();
38637 /* We handle both integer and floats in the general purpose registers. */
38638 else if (VALID_INT_MODE_P (mode))
38639 return true;
38640 else if (VALID_FP_MODE_P (mode))
38641 return true;
38642 else if (VALID_DFP_MODE_P (mode))
38643 return true;
38644 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
38645 on to use that value in smaller contexts, this can easily force a
38646 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
38647 supporting DImode, allow it. */
38648 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
38649 return true;
38651 return false;
38654 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that
38655 saves SSE registers across calls is Win64 (thus no need to check the
38656 current ABI here), and with AVX enabled Win64 only guarantees that
38657 the low 16 bytes are saved. */
38659 static bool
38660 ix86_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode)
38662 return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16;
38665 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
38666 tieable integer mode. */
38668 static bool
38669 ix86_tieable_integer_mode_p (machine_mode mode)
38671 switch (mode)
38673 case E_HImode:
38674 case E_SImode:
38675 return true;
38677 case E_QImode:
38678 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
38680 case E_DImode:
38681 return TARGET_64BIT;
38683 default:
38684 return false;
38688 /* Implement TARGET_MODES_TIEABLE_P.
38690 Return true if MODE1 is accessible in a register that can hold MODE2
38691 without copying. That is, all register classes that can hold MODE2
38692 can also hold MODE1. */
38694 static bool
38695 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
38697 if (mode1 == mode2)
38698 return true;
38700 if (ix86_tieable_integer_mode_p (mode1)
38701 && ix86_tieable_integer_mode_p (mode2))
38702 return true;
38704 /* MODE2 being XFmode implies fp stack or general regs, which means we
38705 can tie any smaller floating point modes to it. Note that we do not
38706 tie this with TFmode. */
38707 if (mode2 == XFmode)
38708 return mode1 == SFmode || mode1 == DFmode;
38710 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
38711 that we can tie it with SFmode. */
38712 if (mode2 == DFmode)
38713 return mode1 == SFmode;
38715 /* If MODE2 is only appropriate for an SSE register, then tie with
38716 any other mode acceptable to SSE registers. */
38717 if (GET_MODE_SIZE (mode2) == 32
38718 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
38719 return (GET_MODE_SIZE (mode1) == 32
38720 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
38721 if (GET_MODE_SIZE (mode2) == 16
38722 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
38723 return (GET_MODE_SIZE (mode1) == 16
38724 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
38726 /* If MODE2 is appropriate for an MMX register, then tie
38727 with any other mode acceptable to MMX registers. */
38728 if (GET_MODE_SIZE (mode2) == 8
38729 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
38730 return (GET_MODE_SIZE (mode1) == 8
38731 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
38733 return false;
38736 /* Return the cost of moving between two registers of mode MODE. */
38738 static int
38739 ix86_set_reg_reg_cost (machine_mode mode)
38741 unsigned int units = UNITS_PER_WORD;
38743 switch (GET_MODE_CLASS (mode))
38745 default:
38746 break;
38748 case MODE_CC:
38749 units = GET_MODE_SIZE (CCmode);
38750 break;
38752 case MODE_FLOAT:
38753 if ((TARGET_SSE && mode == TFmode)
38754 || (TARGET_80387 && mode == XFmode)
38755 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
38756 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
38757 units = GET_MODE_SIZE (mode);
38758 break;
38760 case MODE_COMPLEX_FLOAT:
38761 if ((TARGET_SSE && mode == TCmode)
38762 || (TARGET_80387 && mode == XCmode)
38763 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
38764 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
38765 units = GET_MODE_SIZE (mode);
38766 break;
38768 case MODE_VECTOR_INT:
38769 case MODE_VECTOR_FLOAT:
38770 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
38771 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
38772 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
38773 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
38774 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
38775 units = GET_MODE_SIZE (mode);
38778 /* Return the cost of moving between two registers of mode MODE,
38779 assuming that the move will be in pieces of at most UNITS bytes. */
38780 return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
38783 /* Return cost of vector operation in MODE given that scalar version has
38784 COST. If PARALLEL is true assume that CPU has more than one unit
38785 performing the operation. */
38787 static int
38788 ix86_vec_cost (machine_mode mode, int cost, bool parallel)
38790 if (!VECTOR_MODE_P (mode))
38791 return cost;
38793 if (!parallel)
38794 return cost * GET_MODE_NUNITS (mode);
38795 if (GET_MODE_BITSIZE (mode) == 128
38796 && TARGET_SSE_SPLIT_REGS)
38797 return cost * 2;
38798 if (GET_MODE_BITSIZE (mode) > 128
38799 && TARGET_AVX128_OPTIMAL)
38800 return cost * GET_MODE_BITSIZE (mode) / 128;
38801 return cost;
38804 /* Compute a (partial) cost for rtx X. Return true if the complete
38805 cost has been computed, and false if subexpressions should be
38806 scanned. In either case, *TOTAL contains the cost result. */
38808 static bool
38809 ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
38810 int *total, bool speed)
38812 rtx mask;
38813 enum rtx_code code = GET_CODE (x);
38814 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
38815 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
38816 int src_cost;
38817 machine_mode inner_mode = mode;
38818 if (VECTOR_MODE_P (mode))
38819 inner_mode = GET_MODE_INNER (mode);
38821 switch (code)
38823 case SET:
38824 if (register_operand (SET_DEST (x), VOIDmode)
38825 && reg_or_0_operand (SET_SRC (x), VOIDmode))
38827 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
38828 return true;
38831 if (register_operand (SET_SRC (x), VOIDmode))
38832 /* Avoid potentially incorrect high cost from rtx_costs
38833 for non-tieable SUBREGs. */
38834 src_cost = 0;
38835 else
38837 src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed);
38839 if (CONSTANT_P (SET_SRC (x)))
38840 /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
38841 a small value, possibly zero for cheap constants. */
38842 src_cost += COSTS_N_INSNS (1);
38845 *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed);
38846 return true;
38848 case CONST_INT:
38849 case CONST:
38850 case LABEL_REF:
38851 case SYMBOL_REF:
38852 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
38853 *total = 3;
38854 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
38855 *total = 2;
38856 else if (flag_pic && SYMBOLIC_CONST (x)
38857 && !(TARGET_64BIT
38858 && (GET_CODE (x) == LABEL_REF
38859 || (GET_CODE (x) == SYMBOL_REF
38860 && SYMBOL_REF_LOCAL_P (x))))
38861 /* Use 0 cost for CONST to improve its propagation. */
38862 && (TARGET_64BIT || GET_CODE (x) != CONST))
38863 *total = 1;
38864 else
38865 *total = 0;
38866 return true;
38868 case CONST_DOUBLE:
38869 if (IS_STACK_MODE (mode))
38870 switch (standard_80387_constant_p (x))
38872 case -1:
38873 case 0:
38874 break;
38875 case 1: /* 0.0 */
38876 *total = 1;
38877 return true;
38878 default: /* Other constants */
38879 *total = 2;
38880 return true;
38882 /* FALLTHRU */
38884 case CONST_VECTOR:
38885 switch (standard_sse_constant_p (x, mode))
38887 case 0:
38888 break;
38889 case 1: /* 0: xor eliminates false dependency */
38890 *total = 0;
38891 return true;
38892 default: /* -1: cmp contains false dependency */
38893 *total = 1;
38894 return true;
38896 /* FALLTHRU */
38898 case CONST_WIDE_INT:
38899 /* Fall back to (MEM (SYMBOL_REF)), since that's where
38900 it'll probably end up. Add a penalty for size. */
38901 *total = (COSTS_N_INSNS (1)
38902 + (!TARGET_64BIT && flag_pic)
38903 + (GET_MODE_SIZE (mode) <= 4
38904 ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2));
38905 return true;
38907 case ZERO_EXTEND:
38908 /* The zero extensions is often completely free on x86_64, so make
38909 it as cheap as possible. */
38910 if (TARGET_64BIT && mode == DImode
38911 && GET_MODE (XEXP (x, 0)) == SImode)
38912 *total = 1;
38913 else if (TARGET_ZERO_EXTEND_WITH_AND)
38914 *total = cost->add;
38915 else
38916 *total = cost->movzx;
38917 return false;
38919 case SIGN_EXTEND:
38920 *total = cost->movsx;
38921 return false;
38923 case ASHIFT:
38924 if (SCALAR_INT_MODE_P (mode)
38925 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
38926 && CONST_INT_P (XEXP (x, 1)))
38928 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
38929 if (value == 1)
38931 *total = cost->add;
38932 return false;
38934 if ((value == 2 || value == 3)
38935 && cost->lea <= cost->shift_const)
38937 *total = cost->lea;
38938 return false;
38941 /* FALLTHRU */
38943 case ROTATE:
38944 case ASHIFTRT:
38945 case LSHIFTRT:
38946 case ROTATERT:
38947 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38949 /* ??? Should be SSE vector operation cost. */
38950 /* At least for published AMD latencies, this really is the same
38951 as the latency for a simple fpu operation like fabs. */
38952 /* V*QImode is emulated with 1-11 insns. */
38953 if (mode == V16QImode || mode == V32QImode)
38955 int count = 11;
38956 if (TARGET_XOP && mode == V16QImode)
38958 /* For XOP we use vpshab, which requires a broadcast of the
38959 value to the variable shift insn. For constants this
38960 means a V16Q const in mem; even when we can perform the
38961 shift with one insn set the cost to prefer paddb. */
38962 if (CONSTANT_P (XEXP (x, 1)))
38964 *total = ix86_vec_cost (mode,
38965 cost->sse_op
38966 + rtx_cost (XEXP (x, 0), mode, code, 0, speed)
38967 + (speed ? 2 : COSTS_N_BYTES (16)), true);
38968 return true;
38970 count = 3;
38972 else if (TARGET_SSSE3)
38973 count = 7;
38974 *total = ix86_vec_cost (mode, cost->sse_op * count, true);
38976 else
38977 *total = ix86_vec_cost (mode, cost->sse_op, true);
38979 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38981 if (CONST_INT_P (XEXP (x, 1)))
38983 if (INTVAL (XEXP (x, 1)) > 32)
38984 *total = cost->shift_const + COSTS_N_INSNS (2);
38985 else
38986 *total = cost->shift_const * 2;
38988 else
38990 if (GET_CODE (XEXP (x, 1)) == AND)
38991 *total = cost->shift_var * 2;
38992 else
38993 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
38996 else
38998 if (CONST_INT_P (XEXP (x, 1)))
38999 *total = cost->shift_const;
39000 else if (SUBREG_P (XEXP (x, 1))
39001 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
39003 /* Return the cost after shift-and truncation. */
39004 *total = cost->shift_var;
39005 return true;
39007 else
39008 *total = cost->shift_var;
39010 return false;
39012 case FMA:
39014 rtx sub;
39016 gcc_assert (FLOAT_MODE_P (mode));
39017 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
39019 *total = ix86_vec_cost (mode,
39020 mode == SFmode ? cost->fmass : cost->fmasd,
39021 true);
39022 *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
39024 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
39025 sub = XEXP (x, 0);
39026 if (GET_CODE (sub) == NEG)
39027 sub = XEXP (sub, 0);
39028 *total += rtx_cost (sub, mode, FMA, 0, speed);
39030 sub = XEXP (x, 2);
39031 if (GET_CODE (sub) == NEG)
39032 sub = XEXP (sub, 0);
39033 *total += rtx_cost (sub, mode, FMA, 2, speed);
39034 return true;
39037 case MULT:
39038 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
39040 *total = inner_mode == DFmode ? cost->mulsd : cost->mulss;
39041 return false;
39043 else if (X87_FLOAT_MODE_P (mode))
39045 *total = cost->fmul;
39046 return false;
39048 else if (FLOAT_MODE_P (mode))
39050 *total = ix86_vec_cost (mode,
39051 inner_mode == DFmode
39052 ? cost->mulsd : cost->mulss, true);
39053 return false;
39055 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
39057 /* V*QImode is emulated with 7-13 insns. */
39058 if (mode == V16QImode || mode == V32QImode)
39060 int extra = 11;
39061 if (TARGET_XOP && mode == V16QImode)
39062 extra = 5;
39063 else if (TARGET_SSSE3)
39064 extra = 6;
39065 *total = ix86_vec_cost (mode,
39066 cost->mulss * 2 + cost->sse_op * extra,
39067 true);
39069 /* V*DImode is emulated with 5-8 insns. */
39070 else if (mode == V2DImode || mode == V4DImode)
39072 if (TARGET_XOP && mode == V2DImode)
39073 *total = ix86_vec_cost (mode,
39074 cost->mulss * 2 + cost->sse_op * 3,
39075 true);
39076 else
39077 *total = ix86_vec_cost (mode,
39078 cost->mulss * 3 + cost->sse_op * 5,
39079 true);
39081 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
39082 insns, including two PMULUDQ. */
39083 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
39084 *total = ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5,
39085 true);
39086 else
39087 *total = ix86_vec_cost (mode, cost->mulss, true);
39088 return false;
39090 else
39092 rtx op0 = XEXP (x, 0);
39093 rtx op1 = XEXP (x, 1);
39094 int nbits;
39095 if (CONST_INT_P (XEXP (x, 1)))
39097 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
39098 for (nbits = 0; value != 0; value &= value - 1)
39099 nbits++;
39101 else
39102 /* This is arbitrary. */
39103 nbits = 7;
39105 /* Compute costs correctly for widening multiplication. */
39106 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
39107 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
39108 == GET_MODE_SIZE (mode))
39110 int is_mulwiden = 0;
39111 machine_mode inner_mode = GET_MODE (op0);
39113 if (GET_CODE (op0) == GET_CODE (op1))
39114 is_mulwiden = 1, op1 = XEXP (op1, 0);
39115 else if (CONST_INT_P (op1))
39117 if (GET_CODE (op0) == SIGN_EXTEND)
39118 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
39119 == INTVAL (op1);
39120 else
39121 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
39124 if (is_mulwiden)
39125 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
39128 *total = (cost->mult_init[MODE_INDEX (mode)]
39129 + nbits * cost->mult_bit
39130 + rtx_cost (op0, mode, outer_code, opno, speed)
39131 + rtx_cost (op1, mode, outer_code, opno, speed));
39133 return true;
39136 case DIV:
39137 case UDIV:
39138 case MOD:
39139 case UMOD:
39140 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
39141 *total = inner_mode == DFmode ? cost->divsd : cost->divss;
39142 else if (X87_FLOAT_MODE_P (mode))
39143 *total = cost->fdiv;
39144 else if (FLOAT_MODE_P (mode))
39145 *total = ix86_vec_cost (mode,
39146 inner_mode == DFmode ? cost->divsd : cost->divss,
39147 true);
39148 else
39149 *total = cost->divide[MODE_INDEX (mode)];
39150 return false;
39152 case PLUS:
39153 if (GET_MODE_CLASS (mode) == MODE_INT
39154 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
39156 if (GET_CODE (XEXP (x, 0)) == PLUS
39157 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
39158 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
39159 && CONSTANT_P (XEXP (x, 1)))
39161 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
39162 if (val == 2 || val == 4 || val == 8)
39164 *total = cost->lea;
39165 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
39166 outer_code, opno, speed);
39167 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
39168 outer_code, opno, speed);
39169 *total += rtx_cost (XEXP (x, 1), mode,
39170 outer_code, opno, speed);
39171 return true;
39174 else if (GET_CODE (XEXP (x, 0)) == MULT
39175 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
39177 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
39178 if (val == 2 || val == 4 || val == 8)
39180 *total = cost->lea;
39181 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
39182 outer_code, opno, speed);
39183 *total += rtx_cost (XEXP (x, 1), mode,
39184 outer_code, opno, speed);
39185 return true;
39188 else if (GET_CODE (XEXP (x, 0)) == PLUS)
39190 /* Add with carry, ignore the cost of adding a carry flag. */
39191 if (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 0), mode))
39192 *total = cost->add;
39193 else
39195 *total = cost->lea;
39196 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
39197 outer_code, opno, speed);
39200 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
39201 outer_code, opno, speed);
39202 *total += rtx_cost (XEXP (x, 1), mode,
39203 outer_code, opno, speed);
39204 return true;
39207 /* FALLTHRU */
39209 case MINUS:
39210 /* Subtract with borrow, ignore the cost of subtracting a carry flag. */
39211 if (GET_MODE_CLASS (mode) == MODE_INT
39212 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
39213 && GET_CODE (XEXP (x, 0)) == MINUS
39214 && ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode))
39216 *total = cost->add;
39217 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
39218 outer_code, opno, speed);
39219 *total += rtx_cost (XEXP (x, 1), mode,
39220 outer_code, opno, speed);
39221 return true;
39224 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
39226 *total = cost->addss;
39227 return false;
39229 else if (X87_FLOAT_MODE_P (mode))
39231 *total = cost->fadd;
39232 return false;
39234 else if (FLOAT_MODE_P (mode))
39236 *total = ix86_vec_cost (mode, cost->addss, true);
39237 return false;
39239 /* FALLTHRU */
39241 case AND:
39242 case IOR:
39243 case XOR:
39244 if (GET_MODE_CLASS (mode) == MODE_INT
39245 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
39247 *total = (cost->add * 2
39248 + (rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
39249 << (GET_MODE (XEXP (x, 0)) != DImode))
39250 + (rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
39251 << (GET_MODE (XEXP (x, 1)) != DImode)));
39252 return true;
39254 /* FALLTHRU */
39256 case NEG:
39257 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
39259 *total = cost->sse_op;
39260 return false;
39262 else if (X87_FLOAT_MODE_P (mode))
39264 *total = cost->fchs;
39265 return false;
39267 else if (FLOAT_MODE_P (mode))
39269 *total = ix86_vec_cost (mode, cost->sse_op, true);
39270 return false;
39272 /* FALLTHRU */
39274 case NOT:
39275 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
39276 *total = ix86_vec_cost (mode, cost->sse_op, true);
39277 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
39278 *total = cost->add * 2;
39279 else
39280 *total = cost->add;
39281 return false;
39283 case COMPARE:
39284 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
39285 && XEXP (XEXP (x, 0), 1) == const1_rtx
39286 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
39287 && XEXP (x, 1) == const0_rtx)
39289 /* This kind of construct is implemented using test[bwl].
39290 Treat it as if we had an AND. */
39291 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
39292 *total = (cost->add
39293 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, outer_code,
39294 opno, speed)
39295 + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
39296 return true;
39299 /* The embedded comparison operand is completely free. */
39300 if (!general_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0)))
39301 && XEXP (x, 1) == const0_rtx)
39302 *total = 0;
39304 return false;
39306 case FLOAT_EXTEND:
39307 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
39308 *total = 0;
39309 else
39310 *total = ix86_vec_cost (mode, cost->addss, true);
39311 return false;
39313 case FLOAT_TRUNCATE:
39314 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
39315 *total = cost->fadd;
39316 else
39317 *total = ix86_vec_cost (mode, cost->addss, true);
39318 return false;
39320 case ABS:
39321 /* SSE requires memory load for the constant operand. It may make
39322 sense to account for this. Of course the constant operand may or
39323 may not be reused. */
39324 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
39325 *total = cost->sse_op;
39326 else if (X87_FLOAT_MODE_P (mode))
39327 *total = cost->fabs;
39328 else if (FLOAT_MODE_P (mode))
39329 *total = ix86_vec_cost (mode, cost->sse_op, true);
39330 return false;
39332 case SQRT:
39333 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
39334 *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd;
39335 else if (X87_FLOAT_MODE_P (mode))
39336 *total = cost->fsqrt;
39337 else if (FLOAT_MODE_P (mode))
39338 *total = ix86_vec_cost (mode,
39339 mode == SFmode ? cost->sqrtss : cost->sqrtsd,
39340 true);
39341 return false;
39343 case UNSPEC:
39344 if (XINT (x, 1) == UNSPEC_TP)
39345 *total = 0;
39346 return false;
39348 case VEC_SELECT:
39349 case VEC_CONCAT:
39350 case VEC_DUPLICATE:
39351 /* ??? Assume all of these vector manipulation patterns are
39352 recognizable. In which case they all pretty much have the
39353 same cost. */
39354 *total = cost->sse_op;
39355 return true;
39356 case VEC_MERGE:
39357 mask = XEXP (x, 2);
39358 /* This is masked instruction, assume the same cost,
39359 as nonmasked variant. */
39360 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
39361 *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
39362 else
39363 *total = cost->sse_op;
39364 return true;
39366 default:
39367 return false;
39371 #if TARGET_MACHO
39373 static int current_machopic_label_num;
39375 /* Given a symbol name and its associated stub, write out the
39376 definition of the stub. */
39378 void
39379 machopic_output_stub (FILE *file, const char *symb, const char *stub)
39381 unsigned int length;
39382 char *binder_name, *symbol_name, lazy_ptr_name[32];
39383 int label = ++current_machopic_label_num;
39385 /* For 64-bit we shouldn't get here. */
39386 gcc_assert (!TARGET_64BIT);
39388 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
39389 symb = targetm.strip_name_encoding (symb);
39391 length = strlen (stub);
39392 binder_name = XALLOCAVEC (char, length + 32);
39393 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
39395 length = strlen (symb);
39396 symbol_name = XALLOCAVEC (char, length + 32);
39397 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
39399 sprintf (lazy_ptr_name, "L%d$lz", label);
39401 if (MACHOPIC_ATT_STUB)
39402 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
39403 else if (MACHOPIC_PURE)
39404 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
39405 else
39406 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
39408 fprintf (file, "%s:\n", stub);
39409 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
39411 if (MACHOPIC_ATT_STUB)
39413 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
39415 else if (MACHOPIC_PURE)
39417 /* PIC stub. */
39418 /* 25-byte PIC stub using "CALL get_pc_thunk". */
39419 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
39420 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
39421 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
39422 label, lazy_ptr_name, label);
39423 fprintf (file, "\tjmp\t*%%ecx\n");
39425 else
39426 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
39428 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
39429 it needs no stub-binding-helper. */
39430 if (MACHOPIC_ATT_STUB)
39431 return;
39433 fprintf (file, "%s:\n", binder_name);
39435 if (MACHOPIC_PURE)
39437 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
39438 fprintf (file, "\tpushl\t%%ecx\n");
39440 else
39441 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
39443 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
39445 /* N.B. Keep the correspondence of these
39446 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
39447 old-pic/new-pic/non-pic stubs; altering this will break
39448 compatibility with existing dylibs. */
39449 if (MACHOPIC_PURE)
39451 /* 25-byte PIC stub using "CALL get_pc_thunk". */
39452 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
39454 else
39455 /* 16-byte -mdynamic-no-pic stub. */
39456 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
39458 fprintf (file, "%s:\n", lazy_ptr_name);
39459 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
39460 fprintf (file, ASM_LONG "%s\n", binder_name);
39462 #endif /* TARGET_MACHO */
39464 /* Order the registers for register allocator. */
39466 void
39467 x86_order_regs_for_local_alloc (void)
39469 int pos = 0;
39470 int i;
39472 /* First allocate the local general purpose registers. */
39473 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
39474 if (GENERAL_REGNO_P (i) && call_used_regs[i])
39475 reg_alloc_order [pos++] = i;
39477 /* Global general purpose registers. */
39478 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
39479 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
39480 reg_alloc_order [pos++] = i;
39482 /* x87 registers come first in case we are doing FP math
39483 using them. */
39484 if (!TARGET_SSE_MATH)
39485 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
39486 reg_alloc_order [pos++] = i;
39488 /* SSE registers. */
39489 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
39490 reg_alloc_order [pos++] = i;
39491 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
39492 reg_alloc_order [pos++] = i;
39494 /* Extended REX SSE registers. */
39495 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
39496 reg_alloc_order [pos++] = i;
39498 /* Mask register. */
39499 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
39500 reg_alloc_order [pos++] = i;
39502 /* MPX bound registers. */
39503 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
39504 reg_alloc_order [pos++] = i;
39506 /* x87 registers. */
39507 if (TARGET_SSE_MATH)
39508 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
39509 reg_alloc_order [pos++] = i;
39511 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
39512 reg_alloc_order [pos++] = i;
39514 /* Initialize the rest of array as we do not allocate some registers
39515 at all. */
39516 while (pos < FIRST_PSEUDO_REGISTER)
39517 reg_alloc_order [pos++] = 0;
39520 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
39521 in struct attribute_spec handler. */
39522 static tree
39523 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
39524 tree args,
39525 int,
39526 bool *no_add_attrs)
39528 if (TREE_CODE (*node) != FUNCTION_TYPE
39529 && TREE_CODE (*node) != METHOD_TYPE
39530 && TREE_CODE (*node) != FIELD_DECL
39531 && TREE_CODE (*node) != TYPE_DECL)
39533 warning (OPT_Wattributes, "%qE attribute only applies to functions",
39534 name);
39535 *no_add_attrs = true;
39536 return NULL_TREE;
39538 if (TARGET_64BIT)
39540 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
39541 name);
39542 *no_add_attrs = true;
39543 return NULL_TREE;
39545 if (is_attribute_p ("callee_pop_aggregate_return", name))
39547 tree cst;
39549 cst = TREE_VALUE (args);
39550 if (TREE_CODE (cst) != INTEGER_CST)
39552 warning (OPT_Wattributes,
39553 "%qE attribute requires an integer constant argument",
39554 name);
39555 *no_add_attrs = true;
39557 else if (compare_tree_int (cst, 0) != 0
39558 && compare_tree_int (cst, 1) != 0)
39560 warning (OPT_Wattributes,
39561 "argument to %qE attribute is neither zero, nor one",
39562 name);
39563 *no_add_attrs = true;
39566 return NULL_TREE;
39569 return NULL_TREE;
39572 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
39573 struct attribute_spec.handler. */
39574 static tree
39575 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
39576 bool *no_add_attrs)
39578 if (TREE_CODE (*node) != FUNCTION_TYPE
39579 && TREE_CODE (*node) != METHOD_TYPE
39580 && TREE_CODE (*node) != FIELD_DECL
39581 && TREE_CODE (*node) != TYPE_DECL)
39583 warning (OPT_Wattributes, "%qE attribute only applies to functions",
39584 name);
39585 *no_add_attrs = true;
39586 return NULL_TREE;
39589 /* Can combine regparm with all attributes but fastcall. */
39590 if (is_attribute_p ("ms_abi", name))
39592 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
39594 error ("ms_abi and sysv_abi attributes are not compatible");
39597 return NULL_TREE;
39599 else if (is_attribute_p ("sysv_abi", name))
39601 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
39603 error ("ms_abi and sysv_abi attributes are not compatible");
39606 return NULL_TREE;
39609 return NULL_TREE;
39612 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
39613 struct attribute_spec.handler. */
39614 static tree
39615 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
39616 bool *no_add_attrs)
39618 tree *type = NULL;
39619 if (DECL_P (*node))
39621 if (TREE_CODE (*node) == TYPE_DECL)
39622 type = &TREE_TYPE (*node);
39624 else
39625 type = node;
39627 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
39629 warning (OPT_Wattributes, "%qE attribute ignored",
39630 name);
39631 *no_add_attrs = true;
39634 else if ((is_attribute_p ("ms_struct", name)
39635 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
39636 || ((is_attribute_p ("gcc_struct", name)
39637 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
39639 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
39640 name);
39641 *no_add_attrs = true;
39644 return NULL_TREE;
39647 static tree
39648 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
39649 bool *no_add_attrs)
39651 if (TREE_CODE (*node) != FUNCTION_DECL)
39653 warning (OPT_Wattributes, "%qE attribute only applies to functions",
39654 name);
39655 *no_add_attrs = true;
39657 return NULL_TREE;
39660 static tree
39661 ix86_handle_no_caller_saved_registers_attribute (tree *, tree, tree,
39662 int, bool *)
39664 return NULL_TREE;
39667 static tree
39668 ix86_handle_interrupt_attribute (tree *node, tree, tree, int, bool *)
39670 /* DECL_RESULT and DECL_ARGUMENTS do not exist there yet,
39671 but the function type contains args and return type data. */
39672 tree func_type = *node;
39673 tree return_type = TREE_TYPE (func_type);
39675 int nargs = 0;
39676 tree current_arg_type = TYPE_ARG_TYPES (func_type);
39677 while (current_arg_type
39678 && ! VOID_TYPE_P (TREE_VALUE (current_arg_type)))
39680 if (nargs == 0)
39682 if (! POINTER_TYPE_P (TREE_VALUE (current_arg_type)))
39683 error ("interrupt service routine should have a pointer "
39684 "as the first argument");
39686 else if (nargs == 1)
39688 if (TREE_CODE (TREE_VALUE (current_arg_type)) != INTEGER_TYPE
39689 || TYPE_MODE (TREE_VALUE (current_arg_type)) != word_mode)
39690 error ("interrupt service routine should have unsigned %s"
39691 "int as the second argument",
39692 TARGET_64BIT
39693 ? (TARGET_X32 ? "long long " : "long ")
39694 : "");
39696 nargs++;
39697 current_arg_type = TREE_CHAIN (current_arg_type);
39699 if (!nargs || nargs > 2)
39700 error ("interrupt service routine can only have a pointer argument "
39701 "and an optional integer argument");
39702 if (! VOID_TYPE_P (return_type))
39703 error ("interrupt service routine can't have non-void return value");
39705 return NULL_TREE;
39708 static bool
39709 ix86_ms_bitfield_layout_p (const_tree record_type)
39711 return ((TARGET_MS_BITFIELD_LAYOUT
39712 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
39713 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
39716 /* Returns an expression indicating where the this parameter is
39717 located on entry to the FUNCTION. */
39719 static rtx
39720 x86_this_parameter (tree function)
39722 tree type = TREE_TYPE (function);
39723 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
39724 int nregs;
39726 if (TARGET_64BIT)
39728 const int *parm_regs;
39730 if (ix86_function_type_abi (type) == MS_ABI)
39731 parm_regs = x86_64_ms_abi_int_parameter_registers;
39732 else
39733 parm_regs = x86_64_int_parameter_registers;
39734 return gen_rtx_REG (Pmode, parm_regs[aggr]);
39737 nregs = ix86_function_regparm (type, function);
39739 if (nregs > 0 && !stdarg_p (type))
39741 int regno;
39742 unsigned int ccvt = ix86_get_callcvt (type);
39744 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
39745 regno = aggr ? DX_REG : CX_REG;
39746 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
39748 regno = CX_REG;
39749 if (aggr)
39750 return gen_rtx_MEM (SImode,
39751 plus_constant (Pmode, stack_pointer_rtx, 4));
39753 else
39755 regno = AX_REG;
39756 if (aggr)
39758 regno = DX_REG;
39759 if (nregs == 1)
39760 return gen_rtx_MEM (SImode,
39761 plus_constant (Pmode,
39762 stack_pointer_rtx, 4));
39765 return gen_rtx_REG (SImode, regno);
39768 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
39769 aggr ? 8 : 4));
39772 /* Determine whether x86_output_mi_thunk can succeed. */
39774 static bool
39775 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
39776 const_tree function)
39778 /* 64-bit can handle anything. */
39779 if (TARGET_64BIT)
39780 return true;
39782 /* For 32-bit, everything's fine if we have one free register. */
39783 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
39784 return true;
39786 /* Need a free register for vcall_offset. */
39787 if (vcall_offset)
39788 return false;
39790 /* Need a free register for GOT references. */
39791 if (flag_pic && !targetm.binds_local_p (function))
39792 return false;
39794 /* Otherwise ok. */
39795 return true;
39798 /* Output the assembler code for a thunk function. THUNK_DECL is the
39799 declaration for the thunk function itself, FUNCTION is the decl for
39800 the target function. DELTA is an immediate constant offset to be
39801 added to THIS. If VCALL_OFFSET is nonzero, the word at
39802 *(*this + vcall_offset) should be added to THIS. */
39804 static void
39805 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
39806 HOST_WIDE_INT vcall_offset, tree function)
39808 rtx this_param = x86_this_parameter (function);
39809 rtx this_reg, tmp, fnaddr;
39810 unsigned int tmp_regno;
39811 rtx_insn *insn;
39813 if (TARGET_64BIT)
39814 tmp_regno = R10_REG;
39815 else
39817 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
39818 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
39819 tmp_regno = AX_REG;
39820 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
39821 tmp_regno = DX_REG;
39822 else
39823 tmp_regno = CX_REG;
39826 emit_note (NOTE_INSN_PROLOGUE_END);
39828 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
39829 pull it in now and let DELTA benefit. */
39830 if (REG_P (this_param))
39831 this_reg = this_param;
39832 else if (vcall_offset)
39834 /* Put the this parameter into %eax. */
39835 this_reg = gen_rtx_REG (Pmode, AX_REG);
39836 emit_move_insn (this_reg, this_param);
39838 else
39839 this_reg = NULL_RTX;
39841 /* Adjust the this parameter by a fixed constant. */
39842 if (delta)
39844 rtx delta_rtx = GEN_INT (delta);
39845 rtx delta_dst = this_reg ? this_reg : this_param;
39847 if (TARGET_64BIT)
39849 if (!x86_64_general_operand (delta_rtx, Pmode))
39851 tmp = gen_rtx_REG (Pmode, tmp_regno);
39852 emit_move_insn (tmp, delta_rtx);
39853 delta_rtx = tmp;
39857 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
39860 /* Adjust the this parameter by a value stored in the vtable. */
39861 if (vcall_offset)
39863 rtx vcall_addr, vcall_mem, this_mem;
39865 tmp = gen_rtx_REG (Pmode, tmp_regno);
39867 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
39868 if (Pmode != ptr_mode)
39869 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
39870 emit_move_insn (tmp, this_mem);
39872 /* Adjust the this parameter. */
39873 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
39874 if (TARGET_64BIT
39875 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
39877 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
39878 emit_move_insn (tmp2, GEN_INT (vcall_offset));
39879 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
39882 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
39883 if (Pmode != ptr_mode)
39884 emit_insn (gen_addsi_1_zext (this_reg,
39885 gen_rtx_REG (ptr_mode,
39886 REGNO (this_reg)),
39887 vcall_mem));
39888 else
39889 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
39892 /* If necessary, drop THIS back to its stack slot. */
39893 if (this_reg && this_reg != this_param)
39894 emit_move_insn (this_param, this_reg);
39896 fnaddr = XEXP (DECL_RTL (function), 0);
39897 if (TARGET_64BIT)
39899 if (!flag_pic || targetm.binds_local_p (function)
39900 || TARGET_PECOFF)
39902 else
39904 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
39905 tmp = gen_rtx_CONST (Pmode, tmp);
39906 fnaddr = gen_const_mem (Pmode, tmp);
39909 else
39911 if (!flag_pic || targetm.binds_local_p (function))
39913 #if TARGET_MACHO
39914 else if (TARGET_MACHO)
39916 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
39917 fnaddr = XEXP (fnaddr, 0);
39919 #endif /* TARGET_MACHO */
39920 else
39922 tmp = gen_rtx_REG (Pmode, CX_REG);
39923 output_set_got (tmp, NULL_RTX);
39925 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
39926 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
39927 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
39928 fnaddr = gen_const_mem (Pmode, fnaddr);
39932 /* Our sibling call patterns do not allow memories, because we have no
39933 predicate that can distinguish between frame and non-frame memory.
39934 For our purposes here, we can get away with (ab)using a jump pattern,
39935 because we're going to do no optimization. */
39936 if (MEM_P (fnaddr))
39938 if (sibcall_insn_operand (fnaddr, word_mode))
39940 fnaddr = XEXP (DECL_RTL (function), 0);
39941 tmp = gen_rtx_MEM (QImode, fnaddr);
39942 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
39943 tmp = emit_call_insn (tmp);
39944 SIBLING_CALL_P (tmp) = 1;
39946 else
39947 emit_jump_insn (gen_indirect_jump (fnaddr));
39949 else
39951 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
39953 // CM_LARGE_PIC always uses pseudo PIC register which is
39954 // uninitialized. Since FUNCTION is local and calling it
39955 // doesn't go through PLT, we use scratch register %r11 as
39956 // PIC register and initialize it here.
39957 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
39958 ix86_init_large_pic_reg (tmp_regno);
39959 fnaddr = legitimize_pic_address (fnaddr,
39960 gen_rtx_REG (Pmode, tmp_regno));
39963 if (!sibcall_insn_operand (fnaddr, word_mode))
39965 tmp = gen_rtx_REG (word_mode, tmp_regno);
39966 if (GET_MODE (fnaddr) != word_mode)
39967 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
39968 emit_move_insn (tmp, fnaddr);
39969 fnaddr = tmp;
39972 tmp = gen_rtx_MEM (QImode, fnaddr);
39973 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
39974 tmp = emit_call_insn (tmp);
39975 SIBLING_CALL_P (tmp) = 1;
39977 emit_barrier ();
39979 /* Emit just enough of rest_of_compilation to get the insns emitted.
39980 Note that use_thunk calls assemble_start_function et al. */
39981 insn = get_insns ();
39982 shorten_branches (insn);
39983 final_start_function (insn, file, 1);
39984 final (insn, file, 1);
39985 final_end_function ();
39988 static void
39989 x86_file_start (void)
39991 default_file_start ();
39992 if (TARGET_16BIT)
39993 fputs ("\t.code16gcc\n", asm_out_file);
39994 #if TARGET_MACHO
39995 darwin_file_start ();
39996 #endif
39997 if (X86_FILE_START_VERSION_DIRECTIVE)
39998 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
39999 if (X86_FILE_START_FLTUSED)
40000 fputs ("\t.global\t__fltused\n", asm_out_file);
40001 if (ix86_asm_dialect == ASM_INTEL)
40002 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
40006 x86_field_alignment (tree type, int computed)
40008 machine_mode mode;
40010 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
40011 return computed;
40012 if (TARGET_IAMCU)
40013 return iamcu_alignment (type, computed);
40014 mode = TYPE_MODE (strip_array_types (type));
40015 if (mode == DFmode || mode == DCmode
40016 || GET_MODE_CLASS (mode) == MODE_INT
40017 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
40018 return MIN (32, computed);
40019 return computed;
40022 /* Print call to TARGET to FILE. */
40024 static void
40025 x86_print_call_or_nop (FILE *file, const char *target)
40027 if (flag_nop_mcount)
40028 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
40029 else
40030 fprintf (file, "1:\tcall\t%s\n", target);
40033 /* Output assembler code to FILE to increment profiler label # LABELNO
40034 for profiling a function entry. */
40035 void
40036 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
40038 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
40039 : MCOUNT_NAME);
40040 if (TARGET_64BIT)
40042 #ifndef NO_PROFILE_COUNTERS
40043 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
40044 #endif
40046 if (!TARGET_PECOFF && flag_pic)
40047 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
40048 else
40049 x86_print_call_or_nop (file, mcount_name);
40051 else if (flag_pic)
40053 #ifndef NO_PROFILE_COUNTERS
40054 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
40055 LPREFIX, labelno);
40056 #endif
40057 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
40059 else
40061 #ifndef NO_PROFILE_COUNTERS
40062 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
40063 LPREFIX, labelno);
40064 #endif
40065 x86_print_call_or_nop (file, mcount_name);
40068 if (flag_record_mcount)
40070 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
40071 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
40072 fprintf (file, "\t.previous\n");
40076 /* We don't have exact information about the insn sizes, but we may assume
40077 quite safely that we are informed about all 1 byte insns and memory
40078 address sizes. This is enough to eliminate unnecessary padding in
40079 99% of cases. */
40082 ix86_min_insn_size (rtx_insn *insn)
40084 int l = 0, len;
40086 if (!INSN_P (insn) || !active_insn_p (insn))
40087 return 0;
40089 /* Discard alignments we've emit and jump instructions. */
40090 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
40091 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
40092 return 0;
40094 /* Important case - calls are always 5 bytes.
40095 It is common to have many calls in the row. */
40096 if (CALL_P (insn)
40097 && symbolic_reference_mentioned_p (PATTERN (insn))
40098 && !SIBLING_CALL_P (insn))
40099 return 5;
40100 len = get_attr_length (insn);
40101 if (len <= 1)
40102 return 1;
40104 /* For normal instructions we rely on get_attr_length being exact,
40105 with a few exceptions. */
40106 if (!JUMP_P (insn))
40108 enum attr_type type = get_attr_type (insn);
40110 switch (type)
40112 case TYPE_MULTI:
40113 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
40114 || asm_noperands (PATTERN (insn)) >= 0)
40115 return 0;
40116 break;
40117 case TYPE_OTHER:
40118 case TYPE_FCMP:
40119 break;
40120 default:
40121 /* Otherwise trust get_attr_length. */
40122 return len;
40125 l = get_attr_length_address (insn);
40126 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
40127 l = 4;
40129 if (l)
40130 return 1+l;
40131 else
40132 return 2;
40135 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
40137 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
40138 window. */
40140 static void
40141 ix86_avoid_jump_mispredicts (void)
40143 rtx_insn *insn, *start = get_insns ();
40144 int nbytes = 0, njumps = 0;
40145 bool isjump = false;
40147 /* Look for all minimal intervals of instructions containing 4 jumps.
40148 The intervals are bounded by START and INSN. NBYTES is the total
40149 size of instructions in the interval including INSN and not including
40150 START. When the NBYTES is smaller than 16 bytes, it is possible
40151 that the end of START and INSN ends up in the same 16byte page.
40153 The smallest offset in the page INSN can start is the case where START
40154 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
40155 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
40157 Don't consider asm goto as jump, while it can contain a jump, it doesn't
40158 have to, control transfer to label(s) can be performed through other
40159 means, and also we estimate minimum length of all asm stmts as 0. */
40160 for (insn = start; insn; insn = NEXT_INSN (insn))
40162 int min_size;
40164 if (LABEL_P (insn))
40166 int align = label_to_alignment (insn);
40167 int max_skip = label_to_max_skip (insn);
40169 if (max_skip > 15)
40170 max_skip = 15;
40171 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
40172 already in the current 16 byte page, because otherwise
40173 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
40174 bytes to reach 16 byte boundary. */
40175 if (align <= 0
40176 || (align <= 3 && max_skip != (1 << align) - 1))
40177 max_skip = 0;
40178 if (dump_file)
40179 fprintf (dump_file, "Label %i with max_skip %i\n",
40180 INSN_UID (insn), max_skip);
40181 if (max_skip)
40183 while (nbytes + max_skip >= 16)
40185 start = NEXT_INSN (start);
40186 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
40187 || CALL_P (start))
40188 njumps--, isjump = true;
40189 else
40190 isjump = false;
40191 nbytes -= ix86_min_insn_size (start);
40194 continue;
40197 min_size = ix86_min_insn_size (insn);
40198 nbytes += min_size;
40199 if (dump_file)
40200 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
40201 INSN_UID (insn), min_size);
40202 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
40203 || CALL_P (insn))
40204 njumps++;
40205 else
40206 continue;
40208 while (njumps > 3)
40210 start = NEXT_INSN (start);
40211 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
40212 || CALL_P (start))
40213 njumps--, isjump = true;
40214 else
40215 isjump = false;
40216 nbytes -= ix86_min_insn_size (start);
40218 gcc_assert (njumps >= 0);
40219 if (dump_file)
40220 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
40221 INSN_UID (start), INSN_UID (insn), nbytes);
40223 if (njumps == 3 && isjump && nbytes < 16)
40225 int padsize = 15 - nbytes + ix86_min_insn_size (insn);
40227 if (dump_file)
40228 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
40229 INSN_UID (insn), padsize);
40230 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
40234 #endif
40236 /* AMD Athlon works faster
40237 when RET is not destination of conditional jump or directly preceded
40238 by other jump instruction. We avoid the penalty by inserting NOP just
40239 before the RET instructions in such cases. */
40240 static void
40241 ix86_pad_returns (void)
40243 edge e;
40244 edge_iterator ei;
40246 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
40248 basic_block bb = e->src;
40249 rtx_insn *ret = BB_END (bb);
40250 rtx_insn *prev;
40251 bool replace = false;
40253 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
40254 || optimize_bb_for_size_p (bb))
40255 continue;
40256 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
40257 if (active_insn_p (prev) || LABEL_P (prev))
40258 break;
40259 if (prev && LABEL_P (prev))
40261 edge e;
40262 edge_iterator ei;
40264 FOR_EACH_EDGE (e, ei, bb->preds)
40265 if (EDGE_FREQUENCY (e) && e->src->index >= 0
40266 && !(e->flags & EDGE_FALLTHRU))
40268 replace = true;
40269 break;
40272 if (!replace)
40274 prev = prev_active_insn (ret);
40275 if (prev
40276 && ((JUMP_P (prev) && any_condjump_p (prev))
40277 || CALL_P (prev)))
40278 replace = true;
40279 /* Empty functions get branch mispredict even when
40280 the jump destination is not visible to us. */
40281 if (!prev && !optimize_function_for_size_p (cfun))
40282 replace = true;
40284 if (replace)
40286 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
40287 delete_insn (ret);
40292 /* Count the minimum number of instructions in BB. Return 4 if the
40293 number of instructions >= 4. */
40295 static int
40296 ix86_count_insn_bb (basic_block bb)
40298 rtx_insn *insn;
40299 int insn_count = 0;
40301 /* Count number of instructions in this block. Return 4 if the number
40302 of instructions >= 4. */
40303 FOR_BB_INSNS (bb, insn)
40305 /* Only happen in exit blocks. */
40306 if (JUMP_P (insn)
40307 && ANY_RETURN_P (PATTERN (insn)))
40308 break;
40310 if (NONDEBUG_INSN_P (insn)
40311 && GET_CODE (PATTERN (insn)) != USE
40312 && GET_CODE (PATTERN (insn)) != CLOBBER)
40314 insn_count++;
40315 if (insn_count >= 4)
40316 return insn_count;
40320 return insn_count;
40324 /* Count the minimum number of instructions in code path in BB.
40325 Return 4 if the number of instructions >= 4. */
40327 static int
40328 ix86_count_insn (basic_block bb)
40330 edge e;
40331 edge_iterator ei;
40332 int min_prev_count;
40334 /* Only bother counting instructions along paths with no
40335 more than 2 basic blocks between entry and exit. Given
40336 that BB has an edge to exit, determine if a predecessor
40337 of BB has an edge from entry. If so, compute the number
40338 of instructions in the predecessor block. If there
40339 happen to be multiple such blocks, compute the minimum. */
40340 min_prev_count = 4;
40341 FOR_EACH_EDGE (e, ei, bb->preds)
40343 edge prev_e;
40344 edge_iterator prev_ei;
40346 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
40348 min_prev_count = 0;
40349 break;
40351 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
40353 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
40355 int count = ix86_count_insn_bb (e->src);
40356 if (count < min_prev_count)
40357 min_prev_count = count;
40358 break;
40363 if (min_prev_count < 4)
40364 min_prev_count += ix86_count_insn_bb (bb);
40366 return min_prev_count;
40369 /* Pad short function to 4 instructions. */
40371 static void
40372 ix86_pad_short_function (void)
40374 edge e;
40375 edge_iterator ei;
40377 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
40379 rtx_insn *ret = BB_END (e->src);
40380 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
40382 int insn_count = ix86_count_insn (e->src);
40384 /* Pad short function. */
40385 if (insn_count < 4)
40387 rtx_insn *insn = ret;
40389 /* Find epilogue. */
40390 while (insn
40391 && (!NOTE_P (insn)
40392 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
40393 insn = PREV_INSN (insn);
40395 if (!insn)
40396 insn = ret;
40398 /* Two NOPs count as one instruction. */
40399 insn_count = 2 * (4 - insn_count);
40400 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
40406 /* Fix up a Windows system unwinder issue. If an EH region falls through into
40407 the epilogue, the Windows system unwinder will apply epilogue logic and
40408 produce incorrect offsets. This can be avoided by adding a nop between
40409 the last insn that can throw and the first insn of the epilogue. */
40411 static void
40412 ix86_seh_fixup_eh_fallthru (void)
40414 edge e;
40415 edge_iterator ei;
40417 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
40419 rtx_insn *insn, *next;
40421 /* Find the beginning of the epilogue. */
40422 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
40423 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
40424 break;
40425 if (insn == NULL)
40426 continue;
40428 /* We only care about preceding insns that can throw. */
40429 insn = prev_active_insn (insn);
40430 if (insn == NULL || !can_throw_internal (insn))
40431 continue;
40433 /* Do not separate calls from their debug information. */
40434 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
40435 if (NOTE_P (next)
40436 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
40437 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
40438 insn = next;
40439 else
40440 break;
40442 emit_insn_after (gen_nops (const1_rtx), insn);
40446 /* Given a register number BASE, the lowest of a group of registers, update
40447 regsets IN and OUT with the registers that should be avoided in input
40448 and output operands respectively when trying to avoid generating a modr/m
40449 byte for -fmitigate-rop. */
40451 static void
40452 set_rop_modrm_reg_bits (int base, HARD_REG_SET &in, HARD_REG_SET &out)
40454 SET_HARD_REG_BIT (out, base);
40455 SET_HARD_REG_BIT (out, base + 1);
40456 SET_HARD_REG_BIT (in, base + 2);
40457 SET_HARD_REG_BIT (in, base + 3);
40460 /* Called if -fmitigate_rop is in effect. Try to rewrite instructions so
40461 that certain encodings of modr/m bytes do not occur. */
40462 static void
40463 ix86_mitigate_rop (void)
40465 HARD_REG_SET input_risky;
40466 HARD_REG_SET output_risky;
40467 HARD_REG_SET inout_risky;
40469 CLEAR_HARD_REG_SET (output_risky);
40470 CLEAR_HARD_REG_SET (input_risky);
40471 SET_HARD_REG_BIT (output_risky, AX_REG);
40472 SET_HARD_REG_BIT (output_risky, CX_REG);
40473 SET_HARD_REG_BIT (input_risky, BX_REG);
40474 SET_HARD_REG_BIT (input_risky, DX_REG);
40475 set_rop_modrm_reg_bits (FIRST_SSE_REG, input_risky, output_risky);
40476 set_rop_modrm_reg_bits (FIRST_REX_INT_REG, input_risky, output_risky);
40477 set_rop_modrm_reg_bits (FIRST_REX_SSE_REG, input_risky, output_risky);
40478 set_rop_modrm_reg_bits (FIRST_EXT_REX_SSE_REG, input_risky, output_risky);
40479 set_rop_modrm_reg_bits (FIRST_MASK_REG, input_risky, output_risky);
40480 set_rop_modrm_reg_bits (FIRST_BND_REG, input_risky, output_risky);
40481 COPY_HARD_REG_SET (inout_risky, input_risky);
40482 IOR_HARD_REG_SET (inout_risky, output_risky);
40484 df_note_add_problem ();
40485 /* Fix up what stack-regs did. */
40486 df_insn_rescan_all ();
40487 df_analyze ();
40489 regrename_init (true);
40490 regrename_analyze (NULL);
40492 auto_vec<du_head_p> cands;
40494 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
40496 if (!NONDEBUG_INSN_P (insn))
40497 continue;
40499 if (GET_CODE (PATTERN (insn)) == USE
40500 || GET_CODE (PATTERN (insn)) == CLOBBER)
40501 continue;
40503 extract_insn (insn);
40505 int opno0, opno1;
40506 int modrm = ix86_get_modrm_for_rop (insn, recog_data.operand,
40507 recog_data.n_operands, &opno0,
40508 &opno1);
40510 if (!ix86_rop_should_change_byte_p (modrm))
40511 continue;
40513 insn_rr_info *info = &insn_rr[INSN_UID (insn)];
40515 /* This happens when regrename has to fail a block. */
40516 if (!info->op_info)
40517 continue;
40519 if (info->op_info[opno0].n_chains != 0)
40521 gcc_assert (info->op_info[opno0].n_chains == 1);
40522 du_head_p op0c;
40523 op0c = regrename_chain_from_id (info->op_info[opno0].heads[0]->id);
40524 if (op0c->target_data_1 + op0c->target_data_2 == 0
40525 && !op0c->cannot_rename)
40526 cands.safe_push (op0c);
40528 op0c->target_data_1++;
40530 if (info->op_info[opno1].n_chains != 0)
40532 gcc_assert (info->op_info[opno1].n_chains == 1);
40533 du_head_p op1c;
40534 op1c = regrename_chain_from_id (info->op_info[opno1].heads[0]->id);
40535 if (op1c->target_data_1 + op1c->target_data_2 == 0
40536 && !op1c->cannot_rename)
40537 cands.safe_push (op1c);
40539 op1c->target_data_2++;
40543 int i;
40544 du_head_p head;
40545 FOR_EACH_VEC_ELT (cands, i, head)
40547 int old_reg, best_reg;
40548 HARD_REG_SET unavailable;
40550 CLEAR_HARD_REG_SET (unavailable);
40551 if (head->target_data_1)
40552 IOR_HARD_REG_SET (unavailable, output_risky);
40553 if (head->target_data_2)
40554 IOR_HARD_REG_SET (unavailable, input_risky);
40556 int n_uses;
40557 reg_class superclass = regrename_find_superclass (head, &n_uses,
40558 &unavailable);
40559 old_reg = head->regno;
40560 best_reg = find_rename_reg (head, superclass, &unavailable,
40561 old_reg, false);
40562 bool ok = regrename_do_replace (head, best_reg);
40563 gcc_assert (ok);
40564 if (dump_file)
40565 fprintf (dump_file, "Chain %d renamed as %s in %s\n", head->id,
40566 reg_names[best_reg], reg_class_names[superclass]);
40570 regrename_finish ();
40572 df_analyze ();
40574 basic_block bb;
40575 regset_head live;
40577 INIT_REG_SET (&live);
40579 FOR_EACH_BB_FN (bb, cfun)
40581 rtx_insn *insn;
40583 COPY_REG_SET (&live, DF_LR_OUT (bb));
40584 df_simulate_initialize_backwards (bb, &live);
40586 FOR_BB_INSNS_REVERSE (bb, insn)
40588 if (!NONDEBUG_INSN_P (insn))
40589 continue;
40591 df_simulate_one_insn_backwards (bb, insn, &live);
40593 if (GET_CODE (PATTERN (insn)) == USE
40594 || GET_CODE (PATTERN (insn)) == CLOBBER)
40595 continue;
40597 extract_insn (insn);
40598 constrain_operands_cached (insn, reload_completed);
40599 int opno0, opno1;
40600 int modrm = ix86_get_modrm_for_rop (insn, recog_data.operand,
40601 recog_data.n_operands, &opno0,
40602 &opno1);
40603 if (modrm < 0
40604 || !ix86_rop_should_change_byte_p (modrm)
40605 || opno0 == opno1)
40606 continue;
40608 rtx oldreg = recog_data.operand[opno1];
40609 preprocess_constraints (insn);
40610 const operand_alternative *alt = which_op_alt ();
40612 int i;
40613 for (i = 0; i < recog_data.n_operands; i++)
40614 if (i != opno1
40615 && alt[i].earlyclobber
40616 && reg_overlap_mentioned_p (recog_data.operand[i],
40617 oldreg))
40618 break;
40620 if (i < recog_data.n_operands)
40621 continue;
40623 if (dump_file)
40624 fprintf (dump_file,
40625 "attempting to fix modrm byte in insn %d:"
40626 " reg %d class %s", INSN_UID (insn), REGNO (oldreg),
40627 reg_class_names[alt[opno1].cl]);
40629 HARD_REG_SET unavailable;
40630 REG_SET_TO_HARD_REG_SET (unavailable, &live);
40631 SET_HARD_REG_BIT (unavailable, REGNO (oldreg));
40632 IOR_COMPL_HARD_REG_SET (unavailable, call_used_reg_set);
40633 IOR_HARD_REG_SET (unavailable, fixed_reg_set);
40634 IOR_HARD_REG_SET (unavailable, output_risky);
40635 IOR_COMPL_HARD_REG_SET (unavailable,
40636 reg_class_contents[alt[opno1].cl]);
40638 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
40639 if (!TEST_HARD_REG_BIT (unavailable, i))
40640 break;
40641 if (i == FIRST_PSEUDO_REGISTER)
40643 if (dump_file)
40644 fprintf (dump_file, ", none available\n");
40645 continue;
40647 if (dump_file)
40648 fprintf (dump_file, " -> %d\n", i);
40649 rtx newreg = gen_rtx_REG (recog_data.operand_mode[opno1], i);
40650 validate_change (insn, recog_data.operand_loc[opno1], newreg, false);
40651 insn = emit_insn_before (gen_move_insn (newreg, oldreg), insn);
40656 /* Implement machine specific optimizations. We implement padding of returns
40657 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
40658 static void
40659 ix86_reorg (void)
40661 /* We are freeing block_for_insn in the toplev to keep compatibility
40662 with old MDEP_REORGS that are not CFG based. Recompute it now. */
40663 compute_bb_for_insn ();
40665 if (flag_mitigate_rop)
40666 ix86_mitigate_rop ();
40668 if (TARGET_SEH && current_function_has_exception_handlers ())
40669 ix86_seh_fixup_eh_fallthru ();
40671 if (optimize && optimize_function_for_speed_p (cfun))
40673 if (TARGET_PAD_SHORT_FUNCTION)
40674 ix86_pad_short_function ();
40675 else if (TARGET_PAD_RETURNS)
40676 ix86_pad_returns ();
40677 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
40678 if (TARGET_FOUR_JUMP_LIMIT)
40679 ix86_avoid_jump_mispredicts ();
40680 #endif
40684 /* Return nonzero when QImode register that must be represented via REX prefix
40685 is used. */
40686 bool
40687 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
40689 int i;
40690 extract_insn_cached (insn);
40691 for (i = 0; i < recog_data.n_operands; i++)
40692 if (GENERAL_REG_P (recog_data.operand[i])
40693 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
40694 return true;
40695 return false;
40698 /* Return true when INSN mentions register that must be encoded using REX
40699 prefix. */
40700 bool
40701 x86_extended_reg_mentioned_p (rtx insn)
40703 subrtx_iterator::array_type array;
40704 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
40706 const_rtx x = *iter;
40707 if (REG_P (x)
40708 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
40709 return true;
40711 return false;
40714 /* If profitable, negate (without causing overflow) integer constant
40715 of mode MODE at location LOC. Return true in this case. */
40716 bool
40717 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
40719 HOST_WIDE_INT val;
40721 if (!CONST_INT_P (*loc))
40722 return false;
40724 switch (mode)
40726 case E_DImode:
40727 /* DImode x86_64 constants must fit in 32 bits. */
40728 gcc_assert (x86_64_immediate_operand (*loc, mode));
40730 mode = SImode;
40731 break;
40733 case E_SImode:
40734 case E_HImode:
40735 case E_QImode:
40736 break;
40738 default:
40739 gcc_unreachable ();
40742 /* Avoid overflows. */
40743 if (mode_signbit_p (mode, *loc))
40744 return false;
40746 val = INTVAL (*loc);
40748 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
40749 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
40750 if ((val < 0 && val != -128)
40751 || val == 128)
40753 *loc = GEN_INT (-val);
40754 return true;
40757 return false;
40760 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
40761 optabs would emit if we didn't have TFmode patterns. */
40763 void
40764 x86_emit_floatuns (rtx operands[2])
40766 rtx_code_label *neglab, *donelab;
40767 rtx i0, i1, f0, in, out;
40768 machine_mode mode, inmode;
40770 inmode = GET_MODE (operands[1]);
40771 gcc_assert (inmode == SImode || inmode == DImode);
40773 out = operands[0];
40774 in = force_reg (inmode, operands[1]);
40775 mode = GET_MODE (out);
40776 neglab = gen_label_rtx ();
40777 donelab = gen_label_rtx ();
40778 f0 = gen_reg_rtx (mode);
40780 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
40782 expand_float (out, in, 0);
40784 emit_jump_insn (gen_jump (donelab));
40785 emit_barrier ();
40787 emit_label (neglab);
40789 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
40790 1, OPTAB_DIRECT);
40791 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
40792 1, OPTAB_DIRECT);
40793 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
40795 expand_float (f0, i0, 0);
40797 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
40799 emit_label (donelab);
40802 static bool canonicalize_perm (struct expand_vec_perm_d *d);
40803 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
40804 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
40805 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
40807 /* Get a vector mode of the same size as the original but with elements
40808 twice as wide. This is only guaranteed to apply to integral vectors. */
40810 static inline machine_mode
40811 get_mode_wider_vector (machine_mode o)
40813 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
40814 machine_mode n = GET_MODE_WIDER_MODE (o).require ();
40815 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
40816 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
40817 return n;
40820 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
40821 fill target with val via vec_duplicate. */
40823 static bool
40824 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
40826 bool ok;
40827 rtx_insn *insn;
40828 rtx dup;
40830 /* First attempt to recognize VAL as-is. */
40831 dup = gen_rtx_VEC_DUPLICATE (mode, val);
40832 insn = emit_insn (gen_rtx_SET (target, dup));
40833 if (recog_memoized (insn) < 0)
40835 rtx_insn *seq;
40836 machine_mode innermode = GET_MODE_INNER (mode);
40837 rtx reg;
40839 /* If that fails, force VAL into a register. */
40841 start_sequence ();
40842 reg = force_reg (innermode, val);
40843 if (GET_MODE (reg) != innermode)
40844 reg = gen_lowpart (innermode, reg);
40845 XEXP (dup, 0) = reg;
40846 seq = get_insns ();
40847 end_sequence ();
40848 if (seq)
40849 emit_insn_before (seq, insn);
40851 ok = recog_memoized (insn) >= 0;
40852 gcc_assert (ok);
40854 return true;
40857 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
40858 with all elements equal to VAR. Return true if successful. */
40860 static bool
40861 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
40862 rtx target, rtx val)
40864 bool ok;
40866 switch (mode)
40868 case E_V2SImode:
40869 case E_V2SFmode:
40870 if (!mmx_ok)
40871 return false;
40872 /* FALLTHRU */
40874 case E_V4DFmode:
40875 case E_V4DImode:
40876 case E_V8SFmode:
40877 case E_V8SImode:
40878 case E_V2DFmode:
40879 case E_V2DImode:
40880 case E_V4SFmode:
40881 case E_V4SImode:
40882 case E_V16SImode:
40883 case E_V8DImode:
40884 case E_V16SFmode:
40885 case E_V8DFmode:
40886 return ix86_vector_duplicate_value (mode, target, val);
40888 case E_V4HImode:
40889 if (!mmx_ok)
40890 return false;
40891 if (TARGET_SSE || TARGET_3DNOW_A)
40893 rtx x;
40895 val = gen_lowpart (SImode, val);
40896 x = gen_rtx_TRUNCATE (HImode, val);
40897 x = gen_rtx_VEC_DUPLICATE (mode, x);
40898 emit_insn (gen_rtx_SET (target, x));
40899 return true;
40901 goto widen;
40903 case E_V8QImode:
40904 if (!mmx_ok)
40905 return false;
40906 goto widen;
40908 case E_V8HImode:
40909 if (TARGET_AVX2)
40910 return ix86_vector_duplicate_value (mode, target, val);
40912 if (TARGET_SSE2)
40914 struct expand_vec_perm_d dperm;
40915 rtx tmp1, tmp2;
40917 permute:
40918 memset (&dperm, 0, sizeof (dperm));
40919 dperm.target = target;
40920 dperm.vmode = mode;
40921 dperm.nelt = GET_MODE_NUNITS (mode);
40922 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
40923 dperm.one_operand_p = true;
40925 /* Extend to SImode using a paradoxical SUBREG. */
40926 tmp1 = gen_reg_rtx (SImode);
40927 emit_move_insn (tmp1, gen_lowpart (SImode, val));
40929 /* Insert the SImode value as low element of a V4SImode vector. */
40930 tmp2 = gen_reg_rtx (V4SImode);
40931 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
40932 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
40934 ok = (expand_vec_perm_1 (&dperm)
40935 || expand_vec_perm_broadcast_1 (&dperm));
40936 gcc_assert (ok);
40937 return ok;
40939 goto widen;
40941 case E_V16QImode:
40942 if (TARGET_AVX2)
40943 return ix86_vector_duplicate_value (mode, target, val);
40945 if (TARGET_SSE2)
40946 goto permute;
40947 goto widen;
40949 widen:
40950 /* Replicate the value once into the next wider mode and recurse. */
40952 machine_mode smode, wsmode, wvmode;
40953 rtx x;
40955 smode = GET_MODE_INNER (mode);
40956 wvmode = get_mode_wider_vector (mode);
40957 wsmode = GET_MODE_INNER (wvmode);
40959 val = convert_modes (wsmode, smode, val, true);
40960 x = expand_simple_binop (wsmode, ASHIFT, val,
40961 GEN_INT (GET_MODE_BITSIZE (smode)),
40962 NULL_RTX, 1, OPTAB_LIB_WIDEN);
40963 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
40965 x = gen_reg_rtx (wvmode);
40966 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
40967 gcc_assert (ok);
40968 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
40969 return ok;
40972 case E_V16HImode:
40973 case E_V32QImode:
40974 if (TARGET_AVX2)
40975 return ix86_vector_duplicate_value (mode, target, val);
40976 else
40978 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
40979 rtx x = gen_reg_rtx (hvmode);
40981 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
40982 gcc_assert (ok);
40984 x = gen_rtx_VEC_CONCAT (mode, x, x);
40985 emit_insn (gen_rtx_SET (target, x));
40987 return true;
40989 case E_V64QImode:
40990 case E_V32HImode:
40991 if (TARGET_AVX512BW)
40992 return ix86_vector_duplicate_value (mode, target, val);
40993 else
40995 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
40996 rtx x = gen_reg_rtx (hvmode);
40998 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
40999 gcc_assert (ok);
41001 x = gen_rtx_VEC_CONCAT (mode, x, x);
41002 emit_insn (gen_rtx_SET (target, x));
41004 return true;
41006 default:
41007 return false;
41011 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
41012 whose ONE_VAR element is VAR, and other elements are zero. Return true
41013 if successful. */
41015 static bool
41016 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
41017 rtx target, rtx var, int one_var)
41019 machine_mode vsimode;
41020 rtx new_target;
41021 rtx x, tmp;
41022 bool use_vector_set = false;
41024 switch (mode)
41026 case E_V2DImode:
41027 /* For SSE4.1, we normally use vector set. But if the second
41028 element is zero and inter-unit moves are OK, we use movq
41029 instead. */
41030 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
41031 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
41032 && one_var == 0));
41033 break;
41034 case E_V16QImode:
41035 case E_V4SImode:
41036 case E_V4SFmode:
41037 use_vector_set = TARGET_SSE4_1;
41038 break;
41039 case E_V8HImode:
41040 use_vector_set = TARGET_SSE2;
41041 break;
41042 case E_V4HImode:
41043 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
41044 break;
41045 case E_V32QImode:
41046 case E_V16HImode:
41047 case E_V8SImode:
41048 case E_V8SFmode:
41049 case E_V4DFmode:
41050 use_vector_set = TARGET_AVX;
41051 break;
41052 case E_V4DImode:
41053 /* Use ix86_expand_vector_set in 64bit mode only. */
41054 use_vector_set = TARGET_AVX && TARGET_64BIT;
41055 break;
41056 default:
41057 break;
41060 if (use_vector_set)
41062 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
41063 var = force_reg (GET_MODE_INNER (mode), var);
41064 ix86_expand_vector_set (mmx_ok, target, var, one_var);
41065 return true;
41068 switch (mode)
41070 case E_V2SFmode:
41071 case E_V2SImode:
41072 if (!mmx_ok)
41073 return false;
41074 /* FALLTHRU */
41076 case E_V2DFmode:
41077 case E_V2DImode:
41078 if (one_var != 0)
41079 return false;
41080 var = force_reg (GET_MODE_INNER (mode), var);
41081 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
41082 emit_insn (gen_rtx_SET (target, x));
41083 return true;
41085 case E_V4SFmode:
41086 case E_V4SImode:
41087 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
41088 new_target = gen_reg_rtx (mode);
41089 else
41090 new_target = target;
41091 var = force_reg (GET_MODE_INNER (mode), var);
41092 x = gen_rtx_VEC_DUPLICATE (mode, var);
41093 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
41094 emit_insn (gen_rtx_SET (new_target, x));
41095 if (one_var != 0)
41097 /* We need to shuffle the value to the correct position, so
41098 create a new pseudo to store the intermediate result. */
41100 /* With SSE2, we can use the integer shuffle insns. */
41101 if (mode != V4SFmode && TARGET_SSE2)
41103 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
41104 const1_rtx,
41105 GEN_INT (one_var == 1 ? 0 : 1),
41106 GEN_INT (one_var == 2 ? 0 : 1),
41107 GEN_INT (one_var == 3 ? 0 : 1)));
41108 if (target != new_target)
41109 emit_move_insn (target, new_target);
41110 return true;
41113 /* Otherwise convert the intermediate result to V4SFmode and
41114 use the SSE1 shuffle instructions. */
41115 if (mode != V4SFmode)
41117 tmp = gen_reg_rtx (V4SFmode);
41118 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
41120 else
41121 tmp = new_target;
41123 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
41124 const1_rtx,
41125 GEN_INT (one_var == 1 ? 0 : 1),
41126 GEN_INT (one_var == 2 ? 0+4 : 1+4),
41127 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
41129 if (mode != V4SFmode)
41130 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
41131 else if (tmp != target)
41132 emit_move_insn (target, tmp);
41134 else if (target != new_target)
41135 emit_move_insn (target, new_target);
41136 return true;
41138 case E_V8HImode:
41139 case E_V16QImode:
41140 vsimode = V4SImode;
41141 goto widen;
41142 case E_V4HImode:
41143 case E_V8QImode:
41144 if (!mmx_ok)
41145 return false;
41146 vsimode = V2SImode;
41147 goto widen;
41148 widen:
41149 if (one_var != 0)
41150 return false;
41152 /* Zero extend the variable element to SImode and recurse. */
41153 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
41155 x = gen_reg_rtx (vsimode);
41156 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
41157 var, one_var))
41158 gcc_unreachable ();
41160 emit_move_insn (target, gen_lowpart (mode, x));
41161 return true;
41163 default:
41164 return false;
41168 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
41169 consisting of the values in VALS. It is known that all elements
41170 except ONE_VAR are constants. Return true if successful. */
41172 static bool
41173 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
41174 rtx target, rtx vals, int one_var)
41176 rtx var = XVECEXP (vals, 0, one_var);
41177 machine_mode wmode;
41178 rtx const_vec, x;
41180 const_vec = copy_rtx (vals);
41181 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
41182 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
41184 switch (mode)
41186 case E_V2DFmode:
41187 case E_V2DImode:
41188 case E_V2SFmode:
41189 case E_V2SImode:
41190 /* For the two element vectors, it's just as easy to use
41191 the general case. */
41192 return false;
41194 case E_V4DImode:
41195 /* Use ix86_expand_vector_set in 64bit mode only. */
41196 if (!TARGET_64BIT)
41197 return false;
41198 /* FALLTHRU */
41199 case E_V4DFmode:
41200 case E_V8SFmode:
41201 case E_V8SImode:
41202 case E_V16HImode:
41203 case E_V32QImode:
41204 case E_V4SFmode:
41205 case E_V4SImode:
41206 case E_V8HImode:
41207 case E_V4HImode:
41208 break;
41210 case E_V16QImode:
41211 if (TARGET_SSE4_1)
41212 break;
41213 wmode = V8HImode;
41214 goto widen;
41215 case E_V8QImode:
41216 wmode = V4HImode;
41217 goto widen;
41218 widen:
41219 /* There's no way to set one QImode entry easily. Combine
41220 the variable value with its adjacent constant value, and
41221 promote to an HImode set. */
41222 x = XVECEXP (vals, 0, one_var ^ 1);
41223 if (one_var & 1)
41225 var = convert_modes (HImode, QImode, var, true);
41226 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
41227 NULL_RTX, 1, OPTAB_LIB_WIDEN);
41228 x = GEN_INT (INTVAL (x) & 0xff);
41230 else
41232 var = convert_modes (HImode, QImode, var, true);
41233 x = gen_int_mode (INTVAL (x) << 8, HImode);
41235 if (x != const0_rtx)
41236 var = expand_simple_binop (HImode, IOR, var, x, var,
41237 1, OPTAB_LIB_WIDEN);
41239 x = gen_reg_rtx (wmode);
41240 emit_move_insn (x, gen_lowpart (wmode, const_vec));
41241 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
41243 emit_move_insn (target, gen_lowpart (mode, x));
41244 return true;
41246 default:
41247 return false;
41250 emit_move_insn (target, const_vec);
41251 ix86_expand_vector_set (mmx_ok, target, var, one_var);
41252 return true;
41255 /* A subroutine of ix86_expand_vector_init_general. Use vector
41256 concatenate to handle the most general case: all values variable,
41257 and none identical. */
41259 static void
41260 ix86_expand_vector_init_concat (machine_mode mode,
41261 rtx target, rtx *ops, int n)
41263 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
41264 rtx first[16], second[8], third[4];
41265 rtvec v;
41266 int i, j;
41268 switch (n)
41270 case 2:
41271 switch (mode)
41273 case E_V16SImode:
41274 cmode = V8SImode;
41275 break;
41276 case E_V16SFmode:
41277 cmode = V8SFmode;
41278 break;
41279 case E_V8DImode:
41280 cmode = V4DImode;
41281 break;
41282 case E_V8DFmode:
41283 cmode = V4DFmode;
41284 break;
41285 case E_V8SImode:
41286 cmode = V4SImode;
41287 break;
41288 case E_V8SFmode:
41289 cmode = V4SFmode;
41290 break;
41291 case E_V4DImode:
41292 cmode = V2DImode;
41293 break;
41294 case E_V4DFmode:
41295 cmode = V2DFmode;
41296 break;
41297 case E_V4SImode:
41298 cmode = V2SImode;
41299 break;
41300 case E_V4SFmode:
41301 cmode = V2SFmode;
41302 break;
41303 case E_V2DImode:
41304 cmode = DImode;
41305 break;
41306 case E_V2SImode:
41307 cmode = SImode;
41308 break;
41309 case E_V2DFmode:
41310 cmode = DFmode;
41311 break;
41312 case E_V2SFmode:
41313 cmode = SFmode;
41314 break;
41315 default:
41316 gcc_unreachable ();
41319 if (!register_operand (ops[1], cmode))
41320 ops[1] = force_reg (cmode, ops[1]);
41321 if (!register_operand (ops[0], cmode))
41322 ops[0] = force_reg (cmode, ops[0]);
41323 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, ops[0],
41324 ops[1])));
41325 break;
41327 case 4:
41328 switch (mode)
41330 case E_V4DImode:
41331 cmode = V2DImode;
41332 break;
41333 case E_V4DFmode:
41334 cmode = V2DFmode;
41335 break;
41336 case E_V4SImode:
41337 cmode = V2SImode;
41338 break;
41339 case E_V4SFmode:
41340 cmode = V2SFmode;
41341 break;
41342 default:
41343 gcc_unreachable ();
41345 goto half;
41347 case 8:
41348 switch (mode)
41350 case E_V8DImode:
41351 cmode = V2DImode;
41352 hmode = V4DImode;
41353 break;
41354 case E_V8DFmode:
41355 cmode = V2DFmode;
41356 hmode = V4DFmode;
41357 break;
41358 case E_V8SImode:
41359 cmode = V2SImode;
41360 hmode = V4SImode;
41361 break;
41362 case E_V8SFmode:
41363 cmode = V2SFmode;
41364 hmode = V4SFmode;
41365 break;
41366 default:
41367 gcc_unreachable ();
41369 goto half;
41371 case 16:
41372 switch (mode)
41374 case E_V16SImode:
41375 cmode = V2SImode;
41376 hmode = V4SImode;
41377 gmode = V8SImode;
41378 break;
41379 case E_V16SFmode:
41380 cmode = V2SFmode;
41381 hmode = V4SFmode;
41382 gmode = V8SFmode;
41383 break;
41384 default:
41385 gcc_unreachable ();
41387 goto half;
41389 half:
41390 /* FIXME: We process inputs backward to help RA. PR 36222. */
41391 i = n - 1;
41392 j = (n >> 1) - 1;
41393 for (; i > 0; i -= 2, j--)
41395 first[j] = gen_reg_rtx (cmode);
41396 v = gen_rtvec (2, ops[i - 1], ops[i]);
41397 ix86_expand_vector_init (false, first[j],
41398 gen_rtx_PARALLEL (cmode, v));
41401 n >>= 1;
41402 if (n > 4)
41404 gcc_assert (hmode != VOIDmode);
41405 gcc_assert (gmode != VOIDmode);
41406 for (i = j = 0; i < n; i += 2, j++)
41408 second[j] = gen_reg_rtx (hmode);
41409 ix86_expand_vector_init_concat (hmode, second [j],
41410 &first [i], 2);
41412 n >>= 1;
41413 for (i = j = 0; i < n; i += 2, j++)
41415 third[j] = gen_reg_rtx (gmode);
41416 ix86_expand_vector_init_concat (gmode, third[j],
41417 &second[i], 2);
41419 n >>= 1;
41420 ix86_expand_vector_init_concat (mode, target, third, n);
41422 else if (n > 2)
41424 gcc_assert (hmode != VOIDmode);
41425 for (i = j = 0; i < n; i += 2, j++)
41427 second[j] = gen_reg_rtx (hmode);
41428 ix86_expand_vector_init_concat (hmode, second [j],
41429 &first [i], 2);
41431 n >>= 1;
41432 ix86_expand_vector_init_concat (mode, target, second, n);
41434 else
41435 ix86_expand_vector_init_concat (mode, target, first, n);
41436 break;
41438 default:
41439 gcc_unreachable ();
41443 /* A subroutine of ix86_expand_vector_init_general. Use vector
41444 interleave to handle the most general case: all values variable,
41445 and none identical. */
41447 static void
41448 ix86_expand_vector_init_interleave (machine_mode mode,
41449 rtx target, rtx *ops, int n)
41451 machine_mode first_imode, second_imode, third_imode, inner_mode;
41452 int i, j;
41453 rtx op0, op1;
41454 rtx (*gen_load_even) (rtx, rtx, rtx);
41455 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
41456 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
41458 switch (mode)
41460 case E_V8HImode:
41461 gen_load_even = gen_vec_setv8hi;
41462 gen_interleave_first_low = gen_vec_interleave_lowv4si;
41463 gen_interleave_second_low = gen_vec_interleave_lowv2di;
41464 inner_mode = HImode;
41465 first_imode = V4SImode;
41466 second_imode = V2DImode;
41467 third_imode = VOIDmode;
41468 break;
41469 case E_V16QImode:
41470 gen_load_even = gen_vec_setv16qi;
41471 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
41472 gen_interleave_second_low = gen_vec_interleave_lowv4si;
41473 inner_mode = QImode;
41474 first_imode = V8HImode;
41475 second_imode = V4SImode;
41476 third_imode = V2DImode;
41477 break;
41478 default:
41479 gcc_unreachable ();
41482 for (i = 0; i < n; i++)
41484 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
41485 op0 = gen_reg_rtx (SImode);
41486 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
41488 /* Insert the SImode value as low element of V4SImode vector. */
41489 op1 = gen_reg_rtx (V4SImode);
41490 op0 = gen_rtx_VEC_MERGE (V4SImode,
41491 gen_rtx_VEC_DUPLICATE (V4SImode,
41492 op0),
41493 CONST0_RTX (V4SImode),
41494 const1_rtx);
41495 emit_insn (gen_rtx_SET (op1, op0));
41497 /* Cast the V4SImode vector back to a vector in orignal mode. */
41498 op0 = gen_reg_rtx (mode);
41499 emit_move_insn (op0, gen_lowpart (mode, op1));
41501 /* Load even elements into the second position. */
41502 emit_insn (gen_load_even (op0,
41503 force_reg (inner_mode,
41504 ops [i + i + 1]),
41505 const1_rtx));
41507 /* Cast vector to FIRST_IMODE vector. */
41508 ops[i] = gen_reg_rtx (first_imode);
41509 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
41512 /* Interleave low FIRST_IMODE vectors. */
41513 for (i = j = 0; i < n; i += 2, j++)
41515 op0 = gen_reg_rtx (first_imode);
41516 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
41518 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
41519 ops[j] = gen_reg_rtx (second_imode);
41520 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
41523 /* Interleave low SECOND_IMODE vectors. */
41524 switch (second_imode)
41526 case E_V4SImode:
41527 for (i = j = 0; i < n / 2; i += 2, j++)
41529 op0 = gen_reg_rtx (second_imode);
41530 emit_insn (gen_interleave_second_low (op0, ops[i],
41531 ops[i + 1]));
41533 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
41534 vector. */
41535 ops[j] = gen_reg_rtx (third_imode);
41536 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
41538 second_imode = V2DImode;
41539 gen_interleave_second_low = gen_vec_interleave_lowv2di;
41540 /* FALLTHRU */
41542 case E_V2DImode:
41543 op0 = gen_reg_rtx (second_imode);
41544 emit_insn (gen_interleave_second_low (op0, ops[0],
41545 ops[1]));
41547 /* Cast the SECOND_IMODE vector back to a vector on original
41548 mode. */
41549 emit_insn (gen_rtx_SET (target, gen_lowpart (mode, op0)));
41550 break;
41552 default:
41553 gcc_unreachable ();
41557 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
41558 all values variable, and none identical. */
41560 static void
41561 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
41562 rtx target, rtx vals)
41564 rtx ops[64], op0, op1, op2, op3, op4, op5;
41565 machine_mode half_mode = VOIDmode;
41566 machine_mode quarter_mode = VOIDmode;
41567 int n, i;
41569 switch (mode)
41571 case E_V2SFmode:
41572 case E_V2SImode:
41573 if (!mmx_ok && !TARGET_SSE)
41574 break;
41575 /* FALLTHRU */
41577 case E_V16SImode:
41578 case E_V16SFmode:
41579 case E_V8DFmode:
41580 case E_V8DImode:
41581 case E_V8SFmode:
41582 case E_V8SImode:
41583 case E_V4DFmode:
41584 case E_V4DImode:
41585 case E_V4SFmode:
41586 case E_V4SImode:
41587 case E_V2DFmode:
41588 case E_V2DImode:
41589 n = GET_MODE_NUNITS (mode);
41590 for (i = 0; i < n; i++)
41591 ops[i] = XVECEXP (vals, 0, i);
41592 ix86_expand_vector_init_concat (mode, target, ops, n);
41593 return;
41595 case E_V2TImode:
41596 for (i = 0; i < 2; i++)
41597 ops[i] = gen_lowpart (V2DImode, XVECEXP (vals, 0, i));
41598 op0 = gen_reg_rtx (V4DImode);
41599 ix86_expand_vector_init_concat (V4DImode, op0, ops, 2);
41600 emit_move_insn (target, gen_lowpart (GET_MODE (target), op0));
41601 return;
41603 case E_V4TImode:
41604 for (i = 0; i < 4; i++)
41605 ops[i] = gen_lowpart (V2DImode, XVECEXP (vals, 0, i));
41606 ops[4] = gen_reg_rtx (V4DImode);
41607 ix86_expand_vector_init_concat (V4DImode, ops[4], ops, 2);
41608 ops[5] = gen_reg_rtx (V4DImode);
41609 ix86_expand_vector_init_concat (V4DImode, ops[5], ops + 2, 2);
41610 op0 = gen_reg_rtx (V8DImode);
41611 ix86_expand_vector_init_concat (V8DImode, op0, ops + 4, 2);
41612 emit_move_insn (target, gen_lowpart (GET_MODE (target), op0));
41613 return;
41615 case E_V32QImode:
41616 half_mode = V16QImode;
41617 goto half;
41619 case E_V16HImode:
41620 half_mode = V8HImode;
41621 goto half;
41623 half:
41624 n = GET_MODE_NUNITS (mode);
41625 for (i = 0; i < n; i++)
41626 ops[i] = XVECEXP (vals, 0, i);
41627 op0 = gen_reg_rtx (half_mode);
41628 op1 = gen_reg_rtx (half_mode);
41629 ix86_expand_vector_init_interleave (half_mode, op0, ops,
41630 n >> 2);
41631 ix86_expand_vector_init_interleave (half_mode, op1,
41632 &ops [n >> 1], n >> 2);
41633 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op0, op1)));
41634 return;
41636 case E_V64QImode:
41637 quarter_mode = V16QImode;
41638 half_mode = V32QImode;
41639 goto quarter;
41641 case E_V32HImode:
41642 quarter_mode = V8HImode;
41643 half_mode = V16HImode;
41644 goto quarter;
41646 quarter:
41647 n = GET_MODE_NUNITS (mode);
41648 for (i = 0; i < n; i++)
41649 ops[i] = XVECEXP (vals, 0, i);
41650 op0 = gen_reg_rtx (quarter_mode);
41651 op1 = gen_reg_rtx (quarter_mode);
41652 op2 = gen_reg_rtx (quarter_mode);
41653 op3 = gen_reg_rtx (quarter_mode);
41654 op4 = gen_reg_rtx (half_mode);
41655 op5 = gen_reg_rtx (half_mode);
41656 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
41657 n >> 3);
41658 ix86_expand_vector_init_interleave (quarter_mode, op1,
41659 &ops [n >> 2], n >> 3);
41660 ix86_expand_vector_init_interleave (quarter_mode, op2,
41661 &ops [n >> 1], n >> 3);
41662 ix86_expand_vector_init_interleave (quarter_mode, op3,
41663 &ops [(n >> 1) | (n >> 2)], n >> 3);
41664 emit_insn (gen_rtx_SET (op4, gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
41665 emit_insn (gen_rtx_SET (op5, gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
41666 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op4, op5)));
41667 return;
41669 case E_V16QImode:
41670 if (!TARGET_SSE4_1)
41671 break;
41672 /* FALLTHRU */
41674 case E_V8HImode:
41675 if (!TARGET_SSE2)
41676 break;
41678 /* Don't use ix86_expand_vector_init_interleave if we can't
41679 move from GPR to SSE register directly. */
41680 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
41681 break;
41683 n = GET_MODE_NUNITS (mode);
41684 for (i = 0; i < n; i++)
41685 ops[i] = XVECEXP (vals, 0, i);
41686 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
41687 return;
41689 case E_V4HImode:
41690 case E_V8QImode:
41691 break;
41693 default:
41694 gcc_unreachable ();
41698 int i, j, n_elts, n_words, n_elt_per_word;
41699 machine_mode inner_mode;
41700 rtx words[4], shift;
41702 inner_mode = GET_MODE_INNER (mode);
41703 n_elts = GET_MODE_NUNITS (mode);
41704 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
41705 n_elt_per_word = n_elts / n_words;
41706 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
41708 for (i = 0; i < n_words; ++i)
41710 rtx word = NULL_RTX;
41712 for (j = 0; j < n_elt_per_word; ++j)
41714 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
41715 elt = convert_modes (word_mode, inner_mode, elt, true);
41717 if (j == 0)
41718 word = elt;
41719 else
41721 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
41722 word, 1, OPTAB_LIB_WIDEN);
41723 word = expand_simple_binop (word_mode, IOR, word, elt,
41724 word, 1, OPTAB_LIB_WIDEN);
41728 words[i] = word;
41731 if (n_words == 1)
41732 emit_move_insn (target, gen_lowpart (mode, words[0]));
41733 else if (n_words == 2)
41735 rtx tmp = gen_reg_rtx (mode);
41736 emit_clobber (tmp);
41737 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
41738 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
41739 emit_move_insn (target, tmp);
41741 else if (n_words == 4)
41743 rtx tmp = gen_reg_rtx (V4SImode);
41744 gcc_assert (word_mode == SImode);
41745 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
41746 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
41747 emit_move_insn (target, gen_lowpart (mode, tmp));
41749 else
41750 gcc_unreachable ();
41754 /* Initialize vector TARGET via VALS. Suppress the use of MMX
41755 instructions unless MMX_OK is true. */
41757 void
41758 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
41760 machine_mode mode = GET_MODE (target);
41761 machine_mode inner_mode = GET_MODE_INNER (mode);
41762 int n_elts = GET_MODE_NUNITS (mode);
41763 int n_var = 0, one_var = -1;
41764 bool all_same = true, all_const_zero = true;
41765 int i;
41766 rtx x;
41768 /* Handle first initialization from vector elts. */
41769 if (n_elts != XVECLEN (vals, 0))
41771 rtx subtarget = target;
41772 x = XVECEXP (vals, 0, 0);
41773 gcc_assert (GET_MODE_INNER (GET_MODE (x)) == inner_mode);
41774 if (GET_MODE_NUNITS (GET_MODE (x)) * 2 == n_elts)
41776 rtx ops[2] = { XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1) };
41777 if (inner_mode == QImode || inner_mode == HImode)
41779 unsigned int n_bits = n_elts * GET_MODE_SIZE (inner_mode);
41780 mode = mode_for_vector (SImode, n_bits / 4).require ();
41781 inner_mode = mode_for_vector (SImode, n_bits / 8).require ();
41782 ops[0] = gen_lowpart (inner_mode, ops[0]);
41783 ops[1] = gen_lowpart (inner_mode, ops[1]);
41784 subtarget = gen_reg_rtx (mode);
41786 ix86_expand_vector_init_concat (mode, subtarget, ops, 2);
41787 if (subtarget != target)
41788 emit_move_insn (target, gen_lowpart (GET_MODE (target), subtarget));
41789 return;
41791 gcc_unreachable ();
41794 for (i = 0; i < n_elts; ++i)
41796 x = XVECEXP (vals, 0, i);
41797 if (!(CONST_SCALAR_INT_P (x)
41798 || CONST_DOUBLE_P (x)
41799 || CONST_FIXED_P (x)))
41800 n_var++, one_var = i;
41801 else if (x != CONST0_RTX (inner_mode))
41802 all_const_zero = false;
41803 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
41804 all_same = false;
41807 /* Constants are best loaded from the constant pool. */
41808 if (n_var == 0)
41810 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
41811 return;
41814 /* If all values are identical, broadcast the value. */
41815 if (all_same
41816 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
41817 XVECEXP (vals, 0, 0)))
41818 return;
41820 /* Values where only one field is non-constant are best loaded from
41821 the pool and overwritten via move later. */
41822 if (n_var == 1)
41824 if (all_const_zero
41825 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
41826 XVECEXP (vals, 0, one_var),
41827 one_var))
41828 return;
41830 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
41831 return;
41834 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
41837 void
41838 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
41840 machine_mode mode = GET_MODE (target);
41841 machine_mode inner_mode = GET_MODE_INNER (mode);
41842 machine_mode half_mode;
41843 bool use_vec_merge = false;
41844 rtx tmp;
41845 static rtx (*gen_extract[6][2]) (rtx, rtx)
41847 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
41848 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
41849 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
41850 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
41851 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
41852 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
41854 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
41856 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
41857 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
41858 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
41859 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
41860 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
41861 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
41863 int i, j, n;
41864 machine_mode mmode = VOIDmode;
41865 rtx (*gen_blendm) (rtx, rtx, rtx, rtx);
41867 switch (mode)
41869 case E_V2SFmode:
41870 case E_V2SImode:
41871 if (mmx_ok)
41873 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
41874 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
41875 if (elt == 0)
41876 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
41877 else
41878 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
41879 emit_insn (gen_rtx_SET (target, tmp));
41880 return;
41882 break;
41884 case E_V2DImode:
41885 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
41886 if (use_vec_merge)
41887 break;
41889 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
41890 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
41891 if (elt == 0)
41892 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
41893 else
41894 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
41895 emit_insn (gen_rtx_SET (target, tmp));
41896 return;
41898 case E_V2DFmode:
41900 rtx op0, op1;
41902 /* For the two element vectors, we implement a VEC_CONCAT with
41903 the extraction of the other element. */
41905 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
41906 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
41908 if (elt == 0)
41909 op0 = val, op1 = tmp;
41910 else
41911 op0 = tmp, op1 = val;
41913 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
41914 emit_insn (gen_rtx_SET (target, tmp));
41916 return;
41918 case E_V4SFmode:
41919 use_vec_merge = TARGET_SSE4_1;
41920 if (use_vec_merge)
41921 break;
41923 switch (elt)
41925 case 0:
41926 use_vec_merge = true;
41927 break;
41929 case 1:
41930 /* tmp = target = A B C D */
41931 tmp = copy_to_reg (target);
41932 /* target = A A B B */
41933 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
41934 /* target = X A B B */
41935 ix86_expand_vector_set (false, target, val, 0);
41936 /* target = A X C D */
41937 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
41938 const1_rtx, const0_rtx,
41939 GEN_INT (2+4), GEN_INT (3+4)));
41940 return;
41942 case 2:
41943 /* tmp = target = A B C D */
41944 tmp = copy_to_reg (target);
41945 /* tmp = X B C D */
41946 ix86_expand_vector_set (false, tmp, val, 0);
41947 /* target = A B X D */
41948 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
41949 const0_rtx, const1_rtx,
41950 GEN_INT (0+4), GEN_INT (3+4)));
41951 return;
41953 case 3:
41954 /* tmp = target = A B C D */
41955 tmp = copy_to_reg (target);
41956 /* tmp = X B C D */
41957 ix86_expand_vector_set (false, tmp, val, 0);
41958 /* target = A B X D */
41959 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
41960 const0_rtx, const1_rtx,
41961 GEN_INT (2+4), GEN_INT (0+4)));
41962 return;
41964 default:
41965 gcc_unreachable ();
41967 break;
41969 case E_V4SImode:
41970 use_vec_merge = TARGET_SSE4_1;
41971 if (use_vec_merge)
41972 break;
41974 /* Element 0 handled by vec_merge below. */
41975 if (elt == 0)
41977 use_vec_merge = true;
41978 break;
41981 if (TARGET_SSE2)
41983 /* With SSE2, use integer shuffles to swap element 0 and ELT,
41984 store into element 0, then shuffle them back. */
41986 rtx order[4];
41988 order[0] = GEN_INT (elt);
41989 order[1] = const1_rtx;
41990 order[2] = const2_rtx;
41991 order[3] = GEN_INT (3);
41992 order[elt] = const0_rtx;
41994 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
41995 order[1], order[2], order[3]));
41997 ix86_expand_vector_set (false, target, val, 0);
41999 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
42000 order[1], order[2], order[3]));
42002 else
42004 /* For SSE1, we have to reuse the V4SF code. */
42005 rtx t = gen_reg_rtx (V4SFmode);
42006 emit_move_insn (t, gen_lowpart (V4SFmode, target));
42007 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
42008 emit_move_insn (target, gen_lowpart (mode, t));
42010 return;
42012 case E_V8HImode:
42013 use_vec_merge = TARGET_SSE2;
42014 break;
42015 case E_V4HImode:
42016 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
42017 break;
42019 case E_V16QImode:
42020 use_vec_merge = TARGET_SSE4_1;
42021 break;
42023 case E_V8QImode:
42024 break;
42026 case E_V32QImode:
42027 half_mode = V16QImode;
42028 j = 0;
42029 n = 16;
42030 goto half;
42032 case E_V16HImode:
42033 half_mode = V8HImode;
42034 j = 1;
42035 n = 8;
42036 goto half;
42038 case E_V8SImode:
42039 half_mode = V4SImode;
42040 j = 2;
42041 n = 4;
42042 goto half;
42044 case E_V4DImode:
42045 half_mode = V2DImode;
42046 j = 3;
42047 n = 2;
42048 goto half;
42050 case E_V8SFmode:
42051 half_mode = V4SFmode;
42052 j = 4;
42053 n = 4;
42054 goto half;
42056 case E_V4DFmode:
42057 half_mode = V2DFmode;
42058 j = 5;
42059 n = 2;
42060 goto half;
42062 half:
42063 /* Compute offset. */
42064 i = elt / n;
42065 elt %= n;
42067 gcc_assert (i <= 1);
42069 /* Extract the half. */
42070 tmp = gen_reg_rtx (half_mode);
42071 emit_insn (gen_extract[j][i] (tmp, target));
42073 /* Put val in tmp at elt. */
42074 ix86_expand_vector_set (false, tmp, val, elt);
42076 /* Put it back. */
42077 emit_insn (gen_insert[j][i] (target, target, tmp));
42078 return;
42080 case E_V8DFmode:
42081 if (TARGET_AVX512F)
42083 mmode = QImode;
42084 gen_blendm = gen_avx512f_blendmv8df;
42086 break;
42088 case E_V8DImode:
42089 if (TARGET_AVX512F)
42091 mmode = QImode;
42092 gen_blendm = gen_avx512f_blendmv8di;
42094 break;
42096 case E_V16SFmode:
42097 if (TARGET_AVX512F)
42099 mmode = HImode;
42100 gen_blendm = gen_avx512f_blendmv16sf;
42102 break;
42104 case E_V16SImode:
42105 if (TARGET_AVX512F)
42107 mmode = HImode;
42108 gen_blendm = gen_avx512f_blendmv16si;
42110 break;
42112 case E_V32HImode:
42113 if (TARGET_AVX512F && TARGET_AVX512BW)
42115 mmode = SImode;
42116 gen_blendm = gen_avx512bw_blendmv32hi;
42118 break;
42120 case E_V64QImode:
42121 if (TARGET_AVX512F && TARGET_AVX512BW)
42123 mmode = DImode;
42124 gen_blendm = gen_avx512bw_blendmv64qi;
42126 break;
42128 default:
42129 break;
42132 if (mmode != VOIDmode)
42134 tmp = gen_reg_rtx (mode);
42135 emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val)));
42136 /* The avx512*_blendm<mode> expanders have different operand order
42137 from VEC_MERGE. In VEC_MERGE, the first input operand is used for
42138 elements where the mask is set and second input operand otherwise,
42139 in {sse,avx}*_*blend* the first input operand is used for elements
42140 where the mask is clear and second input operand otherwise. */
42141 emit_insn (gen_blendm (target, target, tmp,
42142 force_reg (mmode,
42143 gen_int_mode (1 << elt, mmode))));
42145 else if (use_vec_merge)
42147 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
42148 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
42149 emit_insn (gen_rtx_SET (target, tmp));
42151 else
42153 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
42155 emit_move_insn (mem, target);
42157 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
42158 emit_move_insn (tmp, val);
42160 emit_move_insn (target, mem);
42164 void
42165 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
42167 machine_mode mode = GET_MODE (vec);
42168 machine_mode inner_mode = GET_MODE_INNER (mode);
42169 bool use_vec_extr = false;
42170 rtx tmp;
42172 switch (mode)
42174 case E_V2SImode:
42175 case E_V2SFmode:
42176 if (!mmx_ok)
42177 break;
42178 /* FALLTHRU */
42180 case E_V2DFmode:
42181 case E_V2DImode:
42182 case E_V2TImode:
42183 case E_V4TImode:
42184 use_vec_extr = true;
42185 break;
42187 case E_V4SFmode:
42188 use_vec_extr = TARGET_SSE4_1;
42189 if (use_vec_extr)
42190 break;
42192 switch (elt)
42194 case 0:
42195 tmp = vec;
42196 break;
42198 case 1:
42199 case 3:
42200 tmp = gen_reg_rtx (mode);
42201 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
42202 GEN_INT (elt), GEN_INT (elt),
42203 GEN_INT (elt+4), GEN_INT (elt+4)));
42204 break;
42206 case 2:
42207 tmp = gen_reg_rtx (mode);
42208 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
42209 break;
42211 default:
42212 gcc_unreachable ();
42214 vec = tmp;
42215 use_vec_extr = true;
42216 elt = 0;
42217 break;
42219 case E_V4SImode:
42220 use_vec_extr = TARGET_SSE4_1;
42221 if (use_vec_extr)
42222 break;
42224 if (TARGET_SSE2)
42226 switch (elt)
42228 case 0:
42229 tmp = vec;
42230 break;
42232 case 1:
42233 case 3:
42234 tmp = gen_reg_rtx (mode);
42235 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
42236 GEN_INT (elt), GEN_INT (elt),
42237 GEN_INT (elt), GEN_INT (elt)));
42238 break;
42240 case 2:
42241 tmp = gen_reg_rtx (mode);
42242 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
42243 break;
42245 default:
42246 gcc_unreachable ();
42248 vec = tmp;
42249 use_vec_extr = true;
42250 elt = 0;
42252 else
42254 /* For SSE1, we have to reuse the V4SF code. */
42255 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
42256 gen_lowpart (V4SFmode, vec), elt);
42257 return;
42259 break;
42261 case E_V8HImode:
42262 use_vec_extr = TARGET_SSE2;
42263 break;
42264 case E_V4HImode:
42265 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
42266 break;
42268 case E_V16QImode:
42269 use_vec_extr = TARGET_SSE4_1;
42270 break;
42272 case E_V8SFmode:
42273 if (TARGET_AVX)
42275 tmp = gen_reg_rtx (V4SFmode);
42276 if (elt < 4)
42277 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
42278 else
42279 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
42280 ix86_expand_vector_extract (false, target, tmp, elt & 3);
42281 return;
42283 break;
42285 case E_V4DFmode:
42286 if (TARGET_AVX)
42288 tmp = gen_reg_rtx (V2DFmode);
42289 if (elt < 2)
42290 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
42291 else
42292 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
42293 ix86_expand_vector_extract (false, target, tmp, elt & 1);
42294 return;
42296 break;
42298 case E_V32QImode:
42299 if (TARGET_AVX)
42301 tmp = gen_reg_rtx (V16QImode);
42302 if (elt < 16)
42303 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
42304 else
42305 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
42306 ix86_expand_vector_extract (false, target, tmp, elt & 15);
42307 return;
42309 break;
42311 case E_V16HImode:
42312 if (TARGET_AVX)
42314 tmp = gen_reg_rtx (V8HImode);
42315 if (elt < 8)
42316 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
42317 else
42318 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
42319 ix86_expand_vector_extract (false, target, tmp, elt & 7);
42320 return;
42322 break;
42324 case E_V8SImode:
42325 if (TARGET_AVX)
42327 tmp = gen_reg_rtx (V4SImode);
42328 if (elt < 4)
42329 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
42330 else
42331 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
42332 ix86_expand_vector_extract (false, target, tmp, elt & 3);
42333 return;
42335 break;
42337 case E_V4DImode:
42338 if (TARGET_AVX)
42340 tmp = gen_reg_rtx (V2DImode);
42341 if (elt < 2)
42342 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
42343 else
42344 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
42345 ix86_expand_vector_extract (false, target, tmp, elt & 1);
42346 return;
42348 break;
42350 case E_V32HImode:
42351 if (TARGET_AVX512BW)
42353 tmp = gen_reg_rtx (V16HImode);
42354 if (elt < 16)
42355 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
42356 else
42357 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
42358 ix86_expand_vector_extract (false, target, tmp, elt & 15);
42359 return;
42361 break;
42363 case E_V64QImode:
42364 if (TARGET_AVX512BW)
42366 tmp = gen_reg_rtx (V32QImode);
42367 if (elt < 32)
42368 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
42369 else
42370 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
42371 ix86_expand_vector_extract (false, target, tmp, elt & 31);
42372 return;
42374 break;
42376 case E_V16SFmode:
42377 tmp = gen_reg_rtx (V8SFmode);
42378 if (elt < 8)
42379 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
42380 else
42381 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
42382 ix86_expand_vector_extract (false, target, tmp, elt & 7);
42383 return;
42385 case E_V8DFmode:
42386 tmp = gen_reg_rtx (V4DFmode);
42387 if (elt < 4)
42388 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
42389 else
42390 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
42391 ix86_expand_vector_extract (false, target, tmp, elt & 3);
42392 return;
42394 case E_V16SImode:
42395 tmp = gen_reg_rtx (V8SImode);
42396 if (elt < 8)
42397 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
42398 else
42399 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
42400 ix86_expand_vector_extract (false, target, tmp, elt & 7);
42401 return;
42403 case E_V8DImode:
42404 tmp = gen_reg_rtx (V4DImode);
42405 if (elt < 4)
42406 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
42407 else
42408 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
42409 ix86_expand_vector_extract (false, target, tmp, elt & 3);
42410 return;
42412 case E_V8QImode:
42413 /* ??? Could extract the appropriate HImode element and shift. */
42414 default:
42415 break;
42418 if (use_vec_extr)
42420 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
42421 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
42423 /* Let the rtl optimizers know about the zero extension performed. */
42424 if (inner_mode == QImode || inner_mode == HImode)
42426 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
42427 target = gen_lowpart (SImode, target);
42430 emit_insn (gen_rtx_SET (target, tmp));
42432 else
42434 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
42436 emit_move_insn (mem, vec);
42438 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
42439 emit_move_insn (target, tmp);
42443 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
42444 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
42445 The upper bits of DEST are undefined, though they shouldn't cause
42446 exceptions (some bits from src or all zeros are ok). */
42448 static void
42449 emit_reduc_half (rtx dest, rtx src, int i)
42451 rtx tem, d = dest;
42452 switch (GET_MODE (src))
42454 case E_V4SFmode:
42455 if (i == 128)
42456 tem = gen_sse_movhlps (dest, src, src);
42457 else
42458 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
42459 GEN_INT (1 + 4), GEN_INT (1 + 4));
42460 break;
42461 case E_V2DFmode:
42462 tem = gen_vec_interleave_highv2df (dest, src, src);
42463 break;
42464 case E_V16QImode:
42465 case E_V8HImode:
42466 case E_V4SImode:
42467 case E_V2DImode:
42468 d = gen_reg_rtx (V1TImode);
42469 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
42470 GEN_INT (i / 2));
42471 break;
42472 case E_V8SFmode:
42473 if (i == 256)
42474 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
42475 else
42476 tem = gen_avx_shufps256 (dest, src, src,
42477 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
42478 break;
42479 case E_V4DFmode:
42480 if (i == 256)
42481 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
42482 else
42483 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
42484 break;
42485 case E_V32QImode:
42486 case E_V16HImode:
42487 case E_V8SImode:
42488 case E_V4DImode:
42489 if (i == 256)
42491 if (GET_MODE (dest) != V4DImode)
42492 d = gen_reg_rtx (V4DImode);
42493 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
42494 gen_lowpart (V4DImode, src),
42495 const1_rtx);
42497 else
42499 d = gen_reg_rtx (V2TImode);
42500 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
42501 GEN_INT (i / 2));
42503 break;
42504 case E_V64QImode:
42505 case E_V32HImode:
42506 case E_V16SImode:
42507 case E_V16SFmode:
42508 case E_V8DImode:
42509 case E_V8DFmode:
42510 if (i > 128)
42511 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
42512 gen_lowpart (V16SImode, src),
42513 gen_lowpart (V16SImode, src),
42514 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
42515 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
42516 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
42517 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
42518 GEN_INT (0xC), GEN_INT (0xD),
42519 GEN_INT (0xE), GEN_INT (0xF),
42520 GEN_INT (0x10), GEN_INT (0x11),
42521 GEN_INT (0x12), GEN_INT (0x13),
42522 GEN_INT (0x14), GEN_INT (0x15),
42523 GEN_INT (0x16), GEN_INT (0x17));
42524 else
42525 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
42526 gen_lowpart (V16SImode, src),
42527 GEN_INT (i == 128 ? 0x2 : 0x1),
42528 GEN_INT (0x3),
42529 GEN_INT (0x3),
42530 GEN_INT (0x3),
42531 GEN_INT (i == 128 ? 0x6 : 0x5),
42532 GEN_INT (0x7),
42533 GEN_INT (0x7),
42534 GEN_INT (0x7),
42535 GEN_INT (i == 128 ? 0xA : 0x9),
42536 GEN_INT (0xB),
42537 GEN_INT (0xB),
42538 GEN_INT (0xB),
42539 GEN_INT (i == 128 ? 0xE : 0xD),
42540 GEN_INT (0xF),
42541 GEN_INT (0xF),
42542 GEN_INT (0xF));
42543 break;
42544 default:
42545 gcc_unreachable ();
42547 emit_insn (tem);
42548 if (d != dest)
42549 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
42552 /* Expand a vector reduction. FN is the binary pattern to reduce;
42553 DEST is the destination; IN is the input vector. */
42555 void
42556 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
42558 rtx half, dst, vec = in;
42559 machine_mode mode = GET_MODE (in);
42560 int i;
42562 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
42563 if (TARGET_SSE4_1
42564 && mode == V8HImode
42565 && fn == gen_uminv8hi3)
42567 emit_insn (gen_sse4_1_phminposuw (dest, in));
42568 return;
42571 for (i = GET_MODE_BITSIZE (mode);
42572 i > GET_MODE_UNIT_BITSIZE (mode);
42573 i >>= 1)
42575 half = gen_reg_rtx (mode);
42576 emit_reduc_half (half, vec, i);
42577 if (i == GET_MODE_UNIT_BITSIZE (mode) * 2)
42578 dst = dest;
42579 else
42580 dst = gen_reg_rtx (mode);
42581 emit_insn (fn (dst, half, vec));
42582 vec = dst;
42586 /* Target hook for scalar_mode_supported_p. */
42587 static bool
42588 ix86_scalar_mode_supported_p (scalar_mode mode)
42590 if (DECIMAL_FLOAT_MODE_P (mode))
42591 return default_decimal_float_supported_p ();
42592 else if (mode == TFmode)
42593 return true;
42594 else
42595 return default_scalar_mode_supported_p (mode);
42598 /* Implements target hook vector_mode_supported_p. */
42599 static bool
42600 ix86_vector_mode_supported_p (machine_mode mode)
42602 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
42603 return true;
42604 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
42605 return true;
42606 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
42607 return true;
42608 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
42609 return true;
42610 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
42611 return true;
42612 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
42613 return true;
42614 return false;
42617 /* Target hook for c_mode_for_suffix. */
42618 static machine_mode
42619 ix86_c_mode_for_suffix (char suffix)
42621 if (suffix == 'q')
42622 return TFmode;
42623 if (suffix == 'w')
42624 return XFmode;
42626 return VOIDmode;
42629 /* Worker function for TARGET_MD_ASM_ADJUST.
42631 We implement asm flag outputs, and maintain source compatibility
42632 with the old cc0-based compiler. */
42634 static rtx_insn *
42635 ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
42636 vec<const char *> &constraints,
42637 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
42639 clobbers.safe_push (gen_rtx_REG (CCFPmode, FPSR_REG));
42640 SET_HARD_REG_BIT (clobbered_regs, FPSR_REG);
42642 bool saw_asm_flag = false;
42644 start_sequence ();
42645 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
42647 const char *con = constraints[i];
42648 if (strncmp (con, "=@cc", 4) != 0)
42649 continue;
42650 con += 4;
42651 if (strchr (con, ',') != NULL)
42653 error ("alternatives not allowed in asm flag output");
42654 continue;
42657 bool invert = false;
42658 if (con[0] == 'n')
42659 invert = true, con++;
42661 machine_mode mode = CCmode;
42662 rtx_code code = UNKNOWN;
42664 switch (con[0])
42666 case 'a':
42667 if (con[1] == 0)
42668 mode = CCAmode, code = EQ;
42669 else if (con[1] == 'e' && con[2] == 0)
42670 mode = CCCmode, code = NE;
42671 break;
42672 case 'b':
42673 if (con[1] == 0)
42674 mode = CCCmode, code = EQ;
42675 else if (con[1] == 'e' && con[2] == 0)
42676 mode = CCAmode, code = NE;
42677 break;
42678 case 'c':
42679 if (con[1] == 0)
42680 mode = CCCmode, code = EQ;
42681 break;
42682 case 'e':
42683 if (con[1] == 0)
42684 mode = CCZmode, code = EQ;
42685 break;
42686 case 'g':
42687 if (con[1] == 0)
42688 mode = CCGCmode, code = GT;
42689 else if (con[1] == 'e' && con[2] == 0)
42690 mode = CCGCmode, code = GE;
42691 break;
42692 case 'l':
42693 if (con[1] == 0)
42694 mode = CCGCmode, code = LT;
42695 else if (con[1] == 'e' && con[2] == 0)
42696 mode = CCGCmode, code = LE;
42697 break;
42698 case 'o':
42699 if (con[1] == 0)
42700 mode = CCOmode, code = EQ;
42701 break;
42702 case 'p':
42703 if (con[1] == 0)
42704 mode = CCPmode, code = EQ;
42705 break;
42706 case 's':
42707 if (con[1] == 0)
42708 mode = CCSmode, code = EQ;
42709 break;
42710 case 'z':
42711 if (con[1] == 0)
42712 mode = CCZmode, code = EQ;
42713 break;
42715 if (code == UNKNOWN)
42717 error ("unknown asm flag output %qs", constraints[i]);
42718 continue;
42720 if (invert)
42721 code = reverse_condition (code);
42723 rtx dest = outputs[i];
42724 if (!saw_asm_flag)
42726 /* This is the first asm flag output. Here we put the flags
42727 register in as the real output and adjust the condition to
42728 allow it. */
42729 constraints[i] = "=Bf";
42730 outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
42731 saw_asm_flag = true;
42733 else
42735 /* We don't need the flags register as output twice. */
42736 constraints[i] = "=X";
42737 outputs[i] = gen_rtx_SCRATCH (SImode);
42740 rtx x = gen_rtx_REG (mode, FLAGS_REG);
42741 x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
42743 machine_mode dest_mode = GET_MODE (dest);
42744 if (!SCALAR_INT_MODE_P (dest_mode))
42746 error ("invalid type for asm flag output");
42747 continue;
42750 if (dest_mode == DImode && !TARGET_64BIT)
42751 dest_mode = SImode;
42753 if (dest_mode != QImode)
42755 rtx destqi = gen_reg_rtx (QImode);
42756 emit_insn (gen_rtx_SET (destqi, x));
42758 if (TARGET_ZERO_EXTEND_WITH_AND
42759 && optimize_function_for_speed_p (cfun))
42761 x = force_reg (dest_mode, const0_rtx);
42763 emit_insn (gen_movstrictqi
42764 (gen_lowpart (QImode, x), destqi));
42766 else
42767 x = gen_rtx_ZERO_EXTEND (dest_mode, destqi);
42770 if (dest_mode != GET_MODE (dest))
42772 rtx tmp = gen_reg_rtx (SImode);
42774 emit_insn (gen_rtx_SET (tmp, x));
42775 emit_insn (gen_zero_extendsidi2 (dest, tmp));
42777 else
42778 emit_insn (gen_rtx_SET (dest, x));
42780 rtx_insn *seq = get_insns ();
42781 end_sequence ();
42783 if (saw_asm_flag)
42784 return seq;
42785 else
42787 /* If we had no asm flag outputs, clobber the flags. */
42788 clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
42789 SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
42790 return NULL;
42794 /* Implements target vector targetm.asm.encode_section_info. */
42796 static void ATTRIBUTE_UNUSED
42797 ix86_encode_section_info (tree decl, rtx rtl, int first)
42799 default_encode_section_info (decl, rtl, first);
42801 if (ix86_in_large_data_p (decl))
42802 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
42805 /* Worker function for REVERSE_CONDITION. */
42807 enum rtx_code
42808 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
42810 return (mode != CCFPmode && mode != CCFPUmode
42811 ? reverse_condition (code)
42812 : reverse_condition_maybe_unordered (code));
42815 /* Output code to perform an x87 FP register move, from OPERANDS[1]
42816 to OPERANDS[0]. */
42818 const char *
42819 output_387_reg_move (rtx_insn *insn, rtx *operands)
42821 if (REG_P (operands[0]))
42823 if (REG_P (operands[1])
42824 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
42826 if (REGNO (operands[0]) == FIRST_STACK_REG)
42827 return output_387_ffreep (operands, 0);
42828 return "fstp\t%y0";
42830 if (STACK_TOP_P (operands[0]))
42831 return "fld%Z1\t%y1";
42832 return "fst\t%y0";
42834 else if (MEM_P (operands[0]))
42836 gcc_assert (REG_P (operands[1]));
42837 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
42838 return "fstp%Z0\t%y0";
42839 else
42841 /* There is no non-popping store to memory for XFmode.
42842 So if we need one, follow the store with a load. */
42843 if (GET_MODE (operands[0]) == XFmode)
42844 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
42845 else
42846 return "fst%Z0\t%y0";
42849 else
42850 gcc_unreachable();
42853 /* Output code to perform a conditional jump to LABEL, if C2 flag in
42854 FP status register is set. */
42856 void
42857 ix86_emit_fp_unordered_jump (rtx label)
42859 rtx reg = gen_reg_rtx (HImode);
42860 rtx temp;
42862 emit_insn (gen_x86_fnstsw_1 (reg));
42864 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
42866 emit_insn (gen_x86_sahf_1 (reg));
42868 temp = gen_rtx_REG (CCmode, FLAGS_REG);
42869 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
42871 else
42873 emit_insn (gen_testqi_ext_1_ccno (reg, GEN_INT (0x04)));
42875 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
42876 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
42879 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
42880 gen_rtx_LABEL_REF (VOIDmode, label),
42881 pc_rtx);
42882 temp = gen_rtx_SET (pc_rtx, temp);
42884 emit_jump_insn (temp);
42885 predict_jump (REG_BR_PROB_BASE * 10 / 100);
42888 /* Output code to perform a log1p XFmode calculation. */
42890 void ix86_emit_i387_log1p (rtx op0, rtx op1)
42892 rtx_code_label *label1 = gen_label_rtx ();
42893 rtx_code_label *label2 = gen_label_rtx ();
42895 rtx tmp = gen_reg_rtx (XFmode);
42896 rtx tmp2 = gen_reg_rtx (XFmode);
42897 rtx test;
42899 emit_insn (gen_absxf2 (tmp, op1));
42900 test = gen_rtx_GE (VOIDmode, tmp,
42901 const_double_from_real_value (
42902 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
42903 XFmode));
42904 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
42906 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
42907 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
42908 emit_jump (label2);
42910 emit_label (label1);
42911 emit_move_insn (tmp, CONST1_RTX (XFmode));
42912 emit_insn (gen_addxf3 (tmp, op1, tmp));
42913 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
42914 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
42916 emit_label (label2);
42919 /* Emit code for round calculation. */
42920 void ix86_emit_i387_round (rtx op0, rtx op1)
42922 machine_mode inmode = GET_MODE (op1);
42923 machine_mode outmode = GET_MODE (op0);
42924 rtx e1, e2, res, tmp, tmp1, half;
42925 rtx scratch = gen_reg_rtx (HImode);
42926 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
42927 rtx_code_label *jump_label = gen_label_rtx ();
42928 rtx insn;
42929 rtx (*gen_abs) (rtx, rtx);
42930 rtx (*gen_neg) (rtx, rtx);
42932 switch (inmode)
42934 case E_SFmode:
42935 gen_abs = gen_abssf2;
42936 break;
42937 case E_DFmode:
42938 gen_abs = gen_absdf2;
42939 break;
42940 case E_XFmode:
42941 gen_abs = gen_absxf2;
42942 break;
42943 default:
42944 gcc_unreachable ();
42947 switch (outmode)
42949 case E_SFmode:
42950 gen_neg = gen_negsf2;
42951 break;
42952 case E_DFmode:
42953 gen_neg = gen_negdf2;
42954 break;
42955 case E_XFmode:
42956 gen_neg = gen_negxf2;
42957 break;
42958 case E_HImode:
42959 gen_neg = gen_neghi2;
42960 break;
42961 case E_SImode:
42962 gen_neg = gen_negsi2;
42963 break;
42964 case E_DImode:
42965 gen_neg = gen_negdi2;
42966 break;
42967 default:
42968 gcc_unreachable ();
42971 e1 = gen_reg_rtx (inmode);
42972 e2 = gen_reg_rtx (inmode);
42973 res = gen_reg_rtx (outmode);
42975 half = const_double_from_real_value (dconsthalf, inmode);
42977 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
42979 /* scratch = fxam(op1) */
42980 emit_insn (gen_rtx_SET (scratch,
42981 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
42982 UNSPEC_FXAM)));
42983 /* e1 = fabs(op1) */
42984 emit_insn (gen_abs (e1, op1));
42986 /* e2 = e1 + 0.5 */
42987 half = force_reg (inmode, half);
42988 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (inmode, e1, half)));
42990 /* res = floor(e2) */
42991 if (inmode != XFmode)
42993 tmp1 = gen_reg_rtx (XFmode);
42995 emit_insn (gen_rtx_SET (tmp1, gen_rtx_FLOAT_EXTEND (XFmode, e2)));
42997 else
42998 tmp1 = e2;
43000 switch (outmode)
43002 case E_SFmode:
43003 case E_DFmode:
43005 rtx tmp0 = gen_reg_rtx (XFmode);
43007 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
43009 emit_insn (gen_rtx_SET (res,
43010 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
43011 UNSPEC_TRUNC_NOOP)));
43013 break;
43014 case E_XFmode:
43015 emit_insn (gen_frndintxf2_floor (res, tmp1));
43016 break;
43017 case E_HImode:
43018 emit_insn (gen_lfloorxfhi2 (res, tmp1));
43019 break;
43020 case E_SImode:
43021 emit_insn (gen_lfloorxfsi2 (res, tmp1));
43022 break;
43023 case E_DImode:
43024 emit_insn (gen_lfloorxfdi2 (res, tmp1));
43025 break;
43026 default:
43027 gcc_unreachable ();
43030 /* flags = signbit(a) */
43031 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x02)));
43033 /* if (flags) then res = -res */
43034 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
43035 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
43036 gen_rtx_LABEL_REF (VOIDmode, jump_label),
43037 pc_rtx);
43038 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
43039 predict_jump (REG_BR_PROB_BASE * 50 / 100);
43040 JUMP_LABEL (insn) = jump_label;
43042 emit_insn (gen_neg (res, res));
43044 emit_label (jump_label);
43045 LABEL_NUSES (jump_label) = 1;
43047 emit_move_insn (op0, res);
43050 /* Output code to perform a Newton-Rhapson approximation of a single precision
43051 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
43053 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
43055 rtx x0, x1, e0, e1;
43057 x0 = gen_reg_rtx (mode);
43058 e0 = gen_reg_rtx (mode);
43059 e1 = gen_reg_rtx (mode);
43060 x1 = gen_reg_rtx (mode);
43062 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
43064 b = force_reg (mode, b);
43066 /* x0 = rcp(b) estimate */
43067 if (mode == V16SFmode || mode == V8DFmode)
43069 if (TARGET_AVX512ER)
43071 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
43072 UNSPEC_RCP28)));
43073 /* res = a * x0 */
43074 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x0)));
43075 return;
43077 else
43078 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
43079 UNSPEC_RCP14)));
43081 else
43082 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
43083 UNSPEC_RCP)));
43085 /* e0 = x0 * b */
43086 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
43088 /* e0 = x0 * e0 */
43089 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
43091 /* e1 = x0 + x0 */
43092 emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
43094 /* x1 = e1 - e0 */
43095 emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
43097 /* res = a * x1 */
43098 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
43101 /* Output code to perform a Newton-Rhapson approximation of a
43102 single precision floating point [reciprocal] square root. */
43104 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode, bool recip)
43106 rtx x0, e0, e1, e2, e3, mthree, mhalf;
43107 REAL_VALUE_TYPE r;
43108 int unspec;
43110 x0 = gen_reg_rtx (mode);
43111 e0 = gen_reg_rtx (mode);
43112 e1 = gen_reg_rtx (mode);
43113 e2 = gen_reg_rtx (mode);
43114 e3 = gen_reg_rtx (mode);
43116 if (TARGET_AVX512ER && mode == V16SFmode)
43118 if (recip)
43119 /* res = rsqrt28(a) estimate */
43120 emit_insn (gen_rtx_SET (res, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
43121 UNSPEC_RSQRT28)));
43122 else
43124 /* x0 = rsqrt28(a) estimate */
43125 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
43126 UNSPEC_RSQRT28)));
43127 /* res = rcp28(x0) estimate */
43128 emit_insn (gen_rtx_SET (res, gen_rtx_UNSPEC (mode, gen_rtvec (1, x0),
43129 UNSPEC_RCP28)));
43131 return;
43134 real_from_integer (&r, VOIDmode, -3, SIGNED);
43135 mthree = const_double_from_real_value (r, SFmode);
43137 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
43138 mhalf = const_double_from_real_value (r, SFmode);
43139 unspec = UNSPEC_RSQRT;
43141 if (VECTOR_MODE_P (mode))
43143 mthree = ix86_build_const_vector (mode, true, mthree);
43144 mhalf = ix86_build_const_vector (mode, true, mhalf);
43145 /* There is no 512-bit rsqrt. There is however rsqrt14. */
43146 if (GET_MODE_SIZE (mode) == 64)
43147 unspec = UNSPEC_RSQRT14;
43150 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
43151 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
43153 a = force_reg (mode, a);
43155 /* x0 = rsqrt(a) estimate */
43156 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
43157 unspec)));
43159 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
43160 if (!recip)
43162 rtx zero = force_reg (mode, CONST0_RTX(mode));
43163 rtx mask;
43165 /* Handle masked compare. */
43166 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
43168 mask = gen_reg_rtx (HImode);
43169 /* Imm value 0x4 corresponds to not-equal comparison. */
43170 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
43171 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
43173 else
43175 mask = gen_reg_rtx (mode);
43176 emit_insn (gen_rtx_SET (mask, gen_rtx_NE (mode, zero, a)));
43177 emit_insn (gen_rtx_SET (x0, gen_rtx_AND (mode, x0, mask)));
43181 /* e0 = x0 * a */
43182 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
43183 /* e1 = e0 * x0 */
43184 emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0)));
43186 /* e2 = e1 - 3. */
43187 mthree = force_reg (mode, mthree);
43188 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (mode, e1, mthree)));
43190 mhalf = force_reg (mode, mhalf);
43191 if (recip)
43192 /* e3 = -.5 * x0 */
43193 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, x0, mhalf)));
43194 else
43195 /* e3 = -.5 * e0 */
43196 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, e0, mhalf)));
43197 /* ret = e2 * e3 */
43198 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e2, e3)));
43201 #ifdef TARGET_SOLARIS
43202 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
43204 static void
43205 i386_solaris_elf_named_section (const char *name, unsigned int flags,
43206 tree decl)
43208 /* With Binutils 2.15, the "@unwind" marker must be specified on
43209 every occurrence of the ".eh_frame" section, not just the first
43210 one. */
43211 if (TARGET_64BIT
43212 && strcmp (name, ".eh_frame") == 0)
43214 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
43215 flags & SECTION_WRITE ? "aw" : "a");
43216 return;
43219 #ifndef USE_GAS
43220 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
43222 solaris_elf_asm_comdat_section (name, flags, decl);
43223 return;
43225 #endif
43227 default_elf_asm_named_section (name, flags, decl);
43229 #endif /* TARGET_SOLARIS */
43231 /* Return the mangling of TYPE if it is an extended fundamental type. */
43233 static const char *
43234 ix86_mangle_type (const_tree type)
43236 type = TYPE_MAIN_VARIANT (type);
43238 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
43239 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
43240 return NULL;
43242 switch (TYPE_MODE (type))
43244 case E_TFmode:
43245 /* __float128 is "g". */
43246 return "g";
43247 case E_XFmode:
43248 /* "long double" or __float80 is "e". */
43249 return "e";
43250 default:
43251 return NULL;
43255 static GTY(()) tree ix86_tls_stack_chk_guard_decl;
43257 static tree
43258 ix86_stack_protect_guard (void)
43260 if (TARGET_SSP_TLS_GUARD)
43262 tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1);
43263 int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg);
43264 tree type = build_qualified_type (type_node, qual);
43265 tree t;
43267 if (global_options_set.x_ix86_stack_protector_guard_symbol_str)
43269 t = ix86_tls_stack_chk_guard_decl;
43271 if (t == NULL)
43273 rtx x;
43275 t = build_decl
43276 (UNKNOWN_LOCATION, VAR_DECL,
43277 get_identifier (ix86_stack_protector_guard_symbol_str),
43278 type);
43279 TREE_STATIC (t) = 1;
43280 TREE_PUBLIC (t) = 1;
43281 DECL_EXTERNAL (t) = 1;
43282 TREE_USED (t) = 1;
43283 TREE_THIS_VOLATILE (t) = 1;
43284 DECL_ARTIFICIAL (t) = 1;
43285 DECL_IGNORED_P (t) = 1;
43287 /* Do not share RTL as the declaration is visible outside of
43288 current function. */
43289 x = DECL_RTL (t);
43290 RTX_FLAG (x, used) = 1;
43292 ix86_tls_stack_chk_guard_decl = t;
43295 else
43297 tree asptrtype = build_pointer_type (type);
43299 t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset);
43300 t = build2 (MEM_REF, asptrtype, t,
43301 build_int_cst (asptrtype, 0));
43304 return t;
43307 return default_stack_protect_guard ();
43310 /* For 32-bit code we can save PIC register setup by using
43311 __stack_chk_fail_local hidden function instead of calling
43312 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
43313 register, so it is better to call __stack_chk_fail directly. */
43315 static tree ATTRIBUTE_UNUSED
43316 ix86_stack_protect_fail (void)
43318 return TARGET_64BIT
43319 ? default_external_stack_protect_fail ()
43320 : default_hidden_stack_protect_fail ();
43323 /* Select a format to encode pointers in exception handling data. CODE
43324 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
43325 true if the symbol may be affected by dynamic relocations.
43327 ??? All x86 object file formats are capable of representing this.
43328 After all, the relocation needed is the same as for the call insn.
43329 Whether or not a particular assembler allows us to enter such, I
43330 guess we'll have to see. */
43332 asm_preferred_eh_data_format (int code, int global)
43334 if (flag_pic)
43336 int type = DW_EH_PE_sdata8;
43337 if (!TARGET_64BIT
43338 || ix86_cmodel == CM_SMALL_PIC
43339 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
43340 type = DW_EH_PE_sdata4;
43341 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
43343 if (ix86_cmodel == CM_SMALL
43344 || (ix86_cmodel == CM_MEDIUM && code))
43345 return DW_EH_PE_udata4;
43346 return DW_EH_PE_absptr;
43349 /* Expand copysign from SIGN to the positive value ABS_VALUE
43350 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
43351 the sign-bit. */
43352 static void
43353 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
43355 machine_mode mode = GET_MODE (sign);
43356 rtx sgn = gen_reg_rtx (mode);
43357 if (mask == NULL_RTX)
43359 machine_mode vmode;
43361 if (mode == SFmode)
43362 vmode = V4SFmode;
43363 else if (mode == DFmode)
43364 vmode = V2DFmode;
43365 else
43366 vmode = mode;
43368 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
43369 if (!VECTOR_MODE_P (mode))
43371 /* We need to generate a scalar mode mask in this case. */
43372 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
43373 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
43374 mask = gen_reg_rtx (mode);
43375 emit_insn (gen_rtx_SET (mask, tmp));
43378 else
43379 mask = gen_rtx_NOT (mode, mask);
43380 emit_insn (gen_rtx_SET (sgn, gen_rtx_AND (mode, mask, sign)));
43381 emit_insn (gen_rtx_SET (result, gen_rtx_IOR (mode, abs_value, sgn)));
43384 /* Expand fabs (OP0) and return a new rtx that holds the result. The
43385 mask for masking out the sign-bit is stored in *SMASK, if that is
43386 non-null. */
43387 static rtx
43388 ix86_expand_sse_fabs (rtx op0, rtx *smask)
43390 machine_mode vmode, mode = GET_MODE (op0);
43391 rtx xa, mask;
43393 xa = gen_reg_rtx (mode);
43394 if (mode == SFmode)
43395 vmode = V4SFmode;
43396 else if (mode == DFmode)
43397 vmode = V2DFmode;
43398 else
43399 vmode = mode;
43400 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
43401 if (!VECTOR_MODE_P (mode))
43403 /* We need to generate a scalar mode mask in this case. */
43404 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
43405 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
43406 mask = gen_reg_rtx (mode);
43407 emit_insn (gen_rtx_SET (mask, tmp));
43409 emit_insn (gen_rtx_SET (xa, gen_rtx_AND (mode, op0, mask)));
43411 if (smask)
43412 *smask = mask;
43414 return xa;
43417 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
43418 swapping the operands if SWAP_OPERANDS is true. The expanded
43419 code is a forward jump to a newly created label in case the
43420 comparison is true. The generated label rtx is returned. */
43421 static rtx_code_label *
43422 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
43423 bool swap_operands)
43425 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
43426 rtx_code_label *label;
43427 rtx tmp;
43429 if (swap_operands)
43430 std::swap (op0, op1);
43432 label = gen_label_rtx ();
43433 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
43434 emit_insn (gen_rtx_SET (tmp, gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
43435 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
43436 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
43437 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
43438 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
43439 JUMP_LABEL (tmp) = label;
43441 return label;
43444 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
43445 using comparison code CODE. Operands are swapped for the comparison if
43446 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
43447 static rtx
43448 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
43449 bool swap_operands)
43451 rtx (*insn)(rtx, rtx, rtx, rtx);
43452 machine_mode mode = GET_MODE (op0);
43453 rtx mask = gen_reg_rtx (mode);
43455 if (swap_operands)
43456 std::swap (op0, op1);
43458 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
43460 emit_insn (insn (mask, op0, op1,
43461 gen_rtx_fmt_ee (code, mode, op0, op1)));
43462 return mask;
43465 /* Generate and return a rtx of mode MODE for 2**n where n is the number
43466 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
43467 static rtx
43468 ix86_gen_TWO52 (machine_mode mode)
43470 REAL_VALUE_TYPE TWO52r;
43471 rtx TWO52;
43473 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
43474 TWO52 = const_double_from_real_value (TWO52r, mode);
43475 TWO52 = force_reg (mode, TWO52);
43477 return TWO52;
43480 /* Expand SSE sequence for computing lround from OP1 storing
43481 into OP0. */
43482 void
43483 ix86_expand_lround (rtx op0, rtx op1)
43485 /* C code for the stuff we're doing below:
43486 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
43487 return (long)tmp;
43489 machine_mode mode = GET_MODE (op1);
43490 const struct real_format *fmt;
43491 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
43492 rtx adj;
43494 /* load nextafter (0.5, 0.0) */
43495 fmt = REAL_MODE_FORMAT (mode);
43496 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
43497 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
43499 /* adj = copysign (0.5, op1) */
43500 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
43501 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
43503 /* adj = op1 + adj */
43504 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
43506 /* op0 = (imode)adj */
43507 expand_fix (op0, adj, 0);
43510 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
43511 into OPERAND0. */
43512 void
43513 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
43515 /* C code for the stuff we're doing below (for do_floor):
43516 xi = (long)op1;
43517 xi -= (double)xi > op1 ? 1 : 0;
43518 return xi;
43520 machine_mode fmode = GET_MODE (op1);
43521 machine_mode imode = GET_MODE (op0);
43522 rtx ireg, freg, tmp;
43523 rtx_code_label *label;
43525 /* reg = (long)op1 */
43526 ireg = gen_reg_rtx (imode);
43527 expand_fix (ireg, op1, 0);
43529 /* freg = (double)reg */
43530 freg = gen_reg_rtx (fmode);
43531 expand_float (freg, ireg, 0);
43533 /* ireg = (freg > op1) ? ireg - 1 : ireg */
43534 label = ix86_expand_sse_compare_and_jump (UNLE,
43535 freg, op1, !do_floor);
43536 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
43537 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
43538 emit_move_insn (ireg, tmp);
43540 emit_label (label);
43541 LABEL_NUSES (label) = 1;
43543 emit_move_insn (op0, ireg);
43546 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
43547 result in OPERAND0. */
43548 void
43549 ix86_expand_rint (rtx operand0, rtx operand1)
43551 /* C code for the stuff we're doing below:
43552 xa = fabs (operand1);
43553 if (!isless (xa, 2**52))
43554 return operand1;
43555 xa = xa + 2**52 - 2**52;
43556 return copysign (xa, operand1);
43558 machine_mode mode = GET_MODE (operand0);
43559 rtx res, xa, TWO52, mask;
43560 rtx_code_label *label;
43562 res = gen_reg_rtx (mode);
43563 emit_move_insn (res, operand1);
43565 /* xa = abs (operand1) */
43566 xa = ix86_expand_sse_fabs (res, &mask);
43568 /* if (!isless (xa, TWO52)) goto label; */
43569 TWO52 = ix86_gen_TWO52 (mode);
43570 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
43572 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
43573 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
43575 ix86_sse_copysign_to_positive (res, xa, res, mask);
43577 emit_label (label);
43578 LABEL_NUSES (label) = 1;
43580 emit_move_insn (operand0, res);
43583 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
43584 into OPERAND0. */
43585 void
43586 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
43588 /* C code for the stuff we expand below.
43589 double xa = fabs (x), x2;
43590 if (!isless (xa, TWO52))
43591 return x;
43592 xa = xa + TWO52 - TWO52;
43593 x2 = copysign (xa, x);
43594 Compensate. Floor:
43595 if (x2 > x)
43596 x2 -= 1;
43597 Compensate. Ceil:
43598 if (x2 < x)
43599 x2 -= -1;
43600 return x2;
43602 machine_mode mode = GET_MODE (operand0);
43603 rtx xa, TWO52, tmp, one, res, mask;
43604 rtx_code_label *label;
43606 TWO52 = ix86_gen_TWO52 (mode);
43608 /* Temporary for holding the result, initialized to the input
43609 operand to ease control flow. */
43610 res = gen_reg_rtx (mode);
43611 emit_move_insn (res, operand1);
43613 /* xa = abs (operand1) */
43614 xa = ix86_expand_sse_fabs (res, &mask);
43616 /* if (!isless (xa, TWO52)) goto label; */
43617 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
43619 /* xa = xa + TWO52 - TWO52; */
43620 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
43621 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
43623 /* xa = copysign (xa, operand1) */
43624 ix86_sse_copysign_to_positive (xa, xa, res, mask);
43626 /* generate 1.0 or -1.0 */
43627 one = force_reg (mode,
43628 const_double_from_real_value (do_floor
43629 ? dconst1 : dconstm1, mode));
43631 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
43632 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
43633 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
43634 /* We always need to subtract here to preserve signed zero. */
43635 tmp = expand_simple_binop (mode, MINUS,
43636 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
43637 emit_move_insn (res, tmp);
43639 emit_label (label);
43640 LABEL_NUSES (label) = 1;
43642 emit_move_insn (operand0, res);
43645 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
43646 into OPERAND0. */
43647 void
43648 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
43650 /* C code for the stuff we expand below.
43651 double xa = fabs (x), x2;
43652 if (!isless (xa, TWO52))
43653 return x;
43654 x2 = (double)(long)x;
43655 Compensate. Floor:
43656 if (x2 > x)
43657 x2 -= 1;
43658 Compensate. Ceil:
43659 if (x2 < x)
43660 x2 += 1;
43661 if (HONOR_SIGNED_ZEROS (mode))
43662 return copysign (x2, x);
43663 return x2;
43665 machine_mode mode = GET_MODE (operand0);
43666 rtx xa, xi, TWO52, tmp, one, res, mask;
43667 rtx_code_label *label;
43669 TWO52 = ix86_gen_TWO52 (mode);
43671 /* Temporary for holding the result, initialized to the input
43672 operand to ease control flow. */
43673 res = gen_reg_rtx (mode);
43674 emit_move_insn (res, operand1);
43676 /* xa = abs (operand1) */
43677 xa = ix86_expand_sse_fabs (res, &mask);
43679 /* if (!isless (xa, TWO52)) goto label; */
43680 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
43682 /* xa = (double)(long)x */
43683 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
43684 expand_fix (xi, res, 0);
43685 expand_float (xa, xi, 0);
43687 /* generate 1.0 */
43688 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
43690 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
43691 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
43692 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
43693 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
43694 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
43695 emit_move_insn (res, tmp);
43697 if (HONOR_SIGNED_ZEROS (mode))
43698 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
43700 emit_label (label);
43701 LABEL_NUSES (label) = 1;
43703 emit_move_insn (operand0, res);
43706 /* Expand SSE sequence for computing round from OPERAND1 storing
43707 into OPERAND0. Sequence that works without relying on DImode truncation
43708 via cvttsd2siq that is only available on 64bit targets. */
43709 void
43710 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
43712 /* C code for the stuff we expand below.
43713 double xa = fabs (x), xa2, x2;
43714 if (!isless (xa, TWO52))
43715 return x;
43716 Using the absolute value and copying back sign makes
43717 -0.0 -> -0.0 correct.
43718 xa2 = xa + TWO52 - TWO52;
43719 Compensate.
43720 dxa = xa2 - xa;
43721 if (dxa <= -0.5)
43722 xa2 += 1;
43723 else if (dxa > 0.5)
43724 xa2 -= 1;
43725 x2 = copysign (xa2, x);
43726 return x2;
43728 machine_mode mode = GET_MODE (operand0);
43729 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
43730 rtx_code_label *label;
43732 TWO52 = ix86_gen_TWO52 (mode);
43734 /* Temporary for holding the result, initialized to the input
43735 operand to ease control flow. */
43736 res = gen_reg_rtx (mode);
43737 emit_move_insn (res, operand1);
43739 /* xa = abs (operand1) */
43740 xa = ix86_expand_sse_fabs (res, &mask);
43742 /* if (!isless (xa, TWO52)) goto label; */
43743 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
43745 /* xa2 = xa + TWO52 - TWO52; */
43746 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
43747 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
43749 /* dxa = xa2 - xa; */
43750 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
43752 /* generate 0.5, 1.0 and -0.5 */
43753 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
43754 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
43755 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
43756 0, OPTAB_DIRECT);
43758 /* Compensate. */
43759 tmp = gen_reg_rtx (mode);
43760 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
43761 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
43762 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
43763 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
43764 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
43765 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
43766 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
43767 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
43769 /* res = copysign (xa2, operand1) */
43770 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
43772 emit_label (label);
43773 LABEL_NUSES (label) = 1;
43775 emit_move_insn (operand0, res);
43778 /* Expand SSE sequence for computing trunc from OPERAND1 storing
43779 into OPERAND0. */
43780 void
43781 ix86_expand_trunc (rtx operand0, rtx operand1)
43783 /* C code for SSE variant we expand below.
43784 double xa = fabs (x), x2;
43785 if (!isless (xa, TWO52))
43786 return x;
43787 x2 = (double)(long)x;
43788 if (HONOR_SIGNED_ZEROS (mode))
43789 return copysign (x2, x);
43790 return x2;
43792 machine_mode mode = GET_MODE (operand0);
43793 rtx xa, xi, TWO52, res, mask;
43794 rtx_code_label *label;
43796 TWO52 = ix86_gen_TWO52 (mode);
43798 /* Temporary for holding the result, initialized to the input
43799 operand to ease control flow. */
43800 res = gen_reg_rtx (mode);
43801 emit_move_insn (res, operand1);
43803 /* xa = abs (operand1) */
43804 xa = ix86_expand_sse_fabs (res, &mask);
43806 /* if (!isless (xa, TWO52)) goto label; */
43807 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
43809 /* x = (double)(long)x */
43810 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
43811 expand_fix (xi, res, 0);
43812 expand_float (res, xi, 0);
43814 if (HONOR_SIGNED_ZEROS (mode))
43815 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
43817 emit_label (label);
43818 LABEL_NUSES (label) = 1;
43820 emit_move_insn (operand0, res);
43823 /* Expand SSE sequence for computing trunc from OPERAND1 storing
43824 into OPERAND0. */
43825 void
43826 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
43828 machine_mode mode = GET_MODE (operand0);
43829 rtx xa, mask, TWO52, one, res, smask, tmp;
43830 rtx_code_label *label;
43832 /* C code for SSE variant we expand below.
43833 double xa = fabs (x), x2;
43834 if (!isless (xa, TWO52))
43835 return x;
43836 xa2 = xa + TWO52 - TWO52;
43837 Compensate:
43838 if (xa2 > xa)
43839 xa2 -= 1.0;
43840 x2 = copysign (xa2, x);
43841 return x2;
43844 TWO52 = ix86_gen_TWO52 (mode);
43846 /* Temporary for holding the result, initialized to the input
43847 operand to ease control flow. */
43848 res = gen_reg_rtx (mode);
43849 emit_move_insn (res, operand1);
43851 /* xa = abs (operand1) */
43852 xa = ix86_expand_sse_fabs (res, &smask);
43854 /* if (!isless (xa, TWO52)) goto label; */
43855 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
43857 /* res = xa + TWO52 - TWO52; */
43858 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
43859 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
43860 emit_move_insn (res, tmp);
43862 /* generate 1.0 */
43863 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
43865 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
43866 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
43867 emit_insn (gen_rtx_SET (mask, gen_rtx_AND (mode, mask, one)));
43868 tmp = expand_simple_binop (mode, MINUS,
43869 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
43870 emit_move_insn (res, tmp);
43872 /* res = copysign (res, operand1) */
43873 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
43875 emit_label (label);
43876 LABEL_NUSES (label) = 1;
43878 emit_move_insn (operand0, res);
43881 /* Expand SSE sequence for computing round from OPERAND1 storing
43882 into OPERAND0. */
43883 void
43884 ix86_expand_round (rtx operand0, rtx operand1)
43886 /* C code for the stuff we're doing below:
43887 double xa = fabs (x);
43888 if (!isless (xa, TWO52))
43889 return x;
43890 xa = (double)(long)(xa + nextafter (0.5, 0.0));
43891 return copysign (xa, x);
43893 machine_mode mode = GET_MODE (operand0);
43894 rtx res, TWO52, xa, xi, half, mask;
43895 rtx_code_label *label;
43896 const struct real_format *fmt;
43897 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
43899 /* Temporary for holding the result, initialized to the input
43900 operand to ease control flow. */
43901 res = gen_reg_rtx (mode);
43902 emit_move_insn (res, operand1);
43904 TWO52 = ix86_gen_TWO52 (mode);
43905 xa = ix86_expand_sse_fabs (res, &mask);
43906 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
43908 /* load nextafter (0.5, 0.0) */
43909 fmt = REAL_MODE_FORMAT (mode);
43910 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
43911 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
43913 /* xa = xa + 0.5 */
43914 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
43915 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
43917 /* xa = (double)(int64_t)xa */
43918 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
43919 expand_fix (xi, xa, 0);
43920 expand_float (xa, xi, 0);
43922 /* res = copysign (xa, operand1) */
43923 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
43925 emit_label (label);
43926 LABEL_NUSES (label) = 1;
43928 emit_move_insn (operand0, res);
43931 /* Expand SSE sequence for computing round
43932 from OP1 storing into OP0 using sse4 round insn. */
43933 void
43934 ix86_expand_round_sse4 (rtx op0, rtx op1)
43936 machine_mode mode = GET_MODE (op0);
43937 rtx e1, e2, res, half;
43938 const struct real_format *fmt;
43939 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
43940 rtx (*gen_copysign) (rtx, rtx, rtx);
43941 rtx (*gen_round) (rtx, rtx, rtx);
43943 switch (mode)
43945 case E_SFmode:
43946 gen_copysign = gen_copysignsf3;
43947 gen_round = gen_sse4_1_roundsf2;
43948 break;
43949 case E_DFmode:
43950 gen_copysign = gen_copysigndf3;
43951 gen_round = gen_sse4_1_rounddf2;
43952 break;
43953 default:
43954 gcc_unreachable ();
43957 /* round (a) = trunc (a + copysign (0.5, a)) */
43959 /* load nextafter (0.5, 0.0) */
43960 fmt = REAL_MODE_FORMAT (mode);
43961 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
43962 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
43963 half = const_double_from_real_value (pred_half, mode);
43965 /* e1 = copysign (0.5, op1) */
43966 e1 = gen_reg_rtx (mode);
43967 emit_insn (gen_copysign (e1, half, op1));
43969 /* e2 = op1 + e1 */
43970 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
43972 /* res = trunc (e2) */
43973 res = gen_reg_rtx (mode);
43974 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
43976 emit_move_insn (op0, res);
43980 /* Table of valid machine attributes. */
43981 static const struct attribute_spec ix86_attribute_table[] =
43983 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
43984 affects_type_identity } */
43985 /* Stdcall attribute says callee is responsible for popping arguments
43986 if they are not variable. */
43987 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
43988 true },
43989 /* Fastcall attribute says callee is responsible for popping arguments
43990 if they are not variable. */
43991 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
43992 true },
43993 /* Thiscall attribute says callee is responsible for popping arguments
43994 if they are not variable. */
43995 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
43996 true },
43997 /* Cdecl attribute says the callee is a normal C declaration */
43998 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
43999 true },
44000 /* Regparm attribute specifies how many integer arguments are to be
44001 passed in registers. */
44002 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
44003 true },
44004 /* Sseregparm attribute says we are using x86_64 calling conventions
44005 for FP arguments. */
44006 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
44007 true },
44008 /* The transactional memory builtins are implicitly regparm or fastcall
44009 depending on the ABI. Override the generic do-nothing attribute that
44010 these builtins were declared with. */
44011 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
44012 true },
44013 /* force_align_arg_pointer says this function realigns the stack at entry. */
44014 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
44015 false, true, true, ix86_handle_force_align_arg_pointer_attribute, false },
44016 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
44017 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
44018 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
44019 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
44020 false },
44021 #endif
44022 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
44023 false },
44024 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
44025 false },
44026 #ifdef SUBTARGET_ATTRIBUTE_TABLE
44027 SUBTARGET_ATTRIBUTE_TABLE,
44028 #endif
44029 /* ms_abi and sysv_abi calling convention function attributes. */
44030 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
44031 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
44032 { "ms_abi va_list", 0, 0, false, false, false, NULL, false },
44033 { "sysv_abi va_list", 0, 0, false, false, false, NULL, false },
44034 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
44035 false },
44036 { "callee_pop_aggregate_return", 1, 1, false, true, true,
44037 ix86_handle_callee_pop_aggregate_return, true },
44038 { "interrupt", 0, 0, false, true, true,
44039 ix86_handle_interrupt_attribute, false },
44040 { "no_caller_saved_registers", 0, 0, false, true, true,
44041 ix86_handle_no_caller_saved_registers_attribute, false },
44042 { "naked", 0, 0, true, false, false,
44043 ix86_handle_fndecl_attribute, false },
44045 /* End element. */
44046 { NULL, 0, 0, false, false, false, NULL, false }
44049 /* Implement targetm.vectorize.builtin_vectorization_cost. */
44050 static int
44051 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
44052 tree vectype, int)
44054 bool fp = false;
44055 machine_mode mode = TImode;
44056 if (vectype != NULL)
44058 fp = FLOAT_TYPE_P (vectype);
44059 mode = TYPE_MODE (vectype);
44062 switch (type_of_cost)
44064 case scalar_stmt:
44065 return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
44067 case scalar_load:
44068 /* load/store costs are relative to register move which is 2. Recompute
44069 it to COSTS_N_INSNS so everything have same base. */
44070 return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0]
44071 : ix86_cost->int_load [2]) / 2;
44073 case scalar_store:
44074 return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0]
44075 : ix86_cost->int_store [2]) / 2;
44077 case vector_stmt:
44078 return ix86_vec_cost (mode,
44079 fp ? ix86_cost->addss : ix86_cost->sse_op,
44080 true);
44082 case vector_load:
44083 return ix86_vec_cost (mode,
44084 COSTS_N_INSNS (ix86_cost->sse_load[2]) / 2,
44085 true);
44087 case vector_store:
44088 return ix86_vec_cost (mode,
44089 COSTS_N_INSNS (ix86_cost->sse_store[2]) / 2,
44090 true);
44092 case vec_to_scalar:
44093 case scalar_to_vec:
44094 return ix86_vec_cost (mode, ix86_cost->sse_op, true);
44096 /* We should have separate costs for unaligned loads and gather/scatter.
44097 Do that incrementally. */
44098 case unaligned_load:
44099 case vector_gather_load:
44100 return ix86_vec_cost (mode,
44101 COSTS_N_INSNS (ix86_cost->sse_load[2]),
44102 true);
44104 case unaligned_store:
44105 case vector_scatter_store:
44106 return ix86_vec_cost (mode,
44107 COSTS_N_INSNS (ix86_cost->sse_store[2]),
44108 true);
44110 case cond_branch_taken:
44111 return ix86_cost->cond_taken_branch_cost;
44113 case cond_branch_not_taken:
44114 return ix86_cost->cond_not_taken_branch_cost;
44116 case vec_perm:
44117 case vec_promote_demote:
44118 return ix86_vec_cost (mode,
44119 ix86_cost->sse_op, true);
44121 case vec_construct:
44122 return ix86_vec_cost (mode, ix86_cost->sse_op, false);
44124 default:
44125 gcc_unreachable ();
44129 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
44130 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
44131 insn every time. */
44133 static GTY(()) rtx_insn *vselect_insn;
44135 /* Initialize vselect_insn. */
44137 static void
44138 init_vselect_insn (void)
44140 unsigned i;
44141 rtx x;
44143 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
44144 for (i = 0; i < MAX_VECT_LEN; ++i)
44145 XVECEXP (x, 0, i) = const0_rtx;
44146 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
44147 const0_rtx), x);
44148 x = gen_rtx_SET (const0_rtx, x);
44149 start_sequence ();
44150 vselect_insn = emit_insn (x);
44151 end_sequence ();
44154 /* Construct (set target (vec_select op0 (parallel perm))) and
44155 return true if that's a valid instruction in the active ISA. */
44157 static bool
44158 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
44159 unsigned nelt, bool testing_p)
44161 unsigned int i;
44162 rtx x, save_vconcat;
44163 int icode;
44165 if (vselect_insn == NULL_RTX)
44166 init_vselect_insn ();
44168 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
44169 PUT_NUM_ELEM (XVEC (x, 0), nelt);
44170 for (i = 0; i < nelt; ++i)
44171 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
44172 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
44173 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
44174 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
44175 SET_DEST (PATTERN (vselect_insn)) = target;
44176 icode = recog_memoized (vselect_insn);
44178 if (icode >= 0 && !testing_p)
44179 emit_insn (copy_rtx (PATTERN (vselect_insn)));
44181 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
44182 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
44183 INSN_CODE (vselect_insn) = -1;
44185 return icode >= 0;
44188 /* Similar, but generate a vec_concat from op0 and op1 as well. */
44190 static bool
44191 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
44192 const unsigned char *perm, unsigned nelt,
44193 bool testing_p)
44195 machine_mode v2mode;
44196 rtx x;
44197 bool ok;
44199 if (vselect_insn == NULL_RTX)
44200 init_vselect_insn ();
44202 if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0)).exists (&v2mode))
44203 return false;
44204 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
44205 PUT_MODE (x, v2mode);
44206 XEXP (x, 0) = op0;
44207 XEXP (x, 1) = op1;
44208 ok = expand_vselect (target, x, perm, nelt, testing_p);
44209 XEXP (x, 0) = const0_rtx;
44210 XEXP (x, 1) = const0_rtx;
44211 return ok;
44214 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
44215 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
44217 static bool
44218 expand_vec_perm_blend (struct expand_vec_perm_d *d)
44220 machine_mode mmode, vmode = d->vmode;
44221 unsigned i, mask, nelt = d->nelt;
44222 rtx target, op0, op1, maskop, x;
44223 rtx rperm[32], vperm;
44225 if (d->one_operand_p)
44226 return false;
44227 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
44228 && (TARGET_AVX512BW
44229 || GET_MODE_UNIT_SIZE (vmode) >= 4))
44231 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
44233 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
44235 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
44237 else
44238 return false;
44240 /* This is a blend, not a permute. Elements must stay in their
44241 respective lanes. */
44242 for (i = 0; i < nelt; ++i)
44244 unsigned e = d->perm[i];
44245 if (!(e == i || e == i + nelt))
44246 return false;
44249 if (d->testing_p)
44250 return true;
44252 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
44253 decision should be extracted elsewhere, so that we only try that
44254 sequence once all budget==3 options have been tried. */
44255 target = d->target;
44256 op0 = d->op0;
44257 op1 = d->op1;
44258 mask = 0;
44260 switch (vmode)
44262 case E_V8DFmode:
44263 case E_V16SFmode:
44264 case E_V4DFmode:
44265 case E_V8SFmode:
44266 case E_V2DFmode:
44267 case E_V4SFmode:
44268 case E_V8HImode:
44269 case E_V8SImode:
44270 case E_V32HImode:
44271 case E_V64QImode:
44272 case E_V16SImode:
44273 case E_V8DImode:
44274 for (i = 0; i < nelt; ++i)
44275 mask |= (d->perm[i] >= nelt) << i;
44276 break;
44278 case E_V2DImode:
44279 for (i = 0; i < 2; ++i)
44280 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
44281 vmode = V8HImode;
44282 goto do_subreg;
44284 case E_V4SImode:
44285 for (i = 0; i < 4; ++i)
44286 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
44287 vmode = V8HImode;
44288 goto do_subreg;
44290 case E_V16QImode:
44291 /* See if bytes move in pairs so we can use pblendw with
44292 an immediate argument, rather than pblendvb with a vector
44293 argument. */
44294 for (i = 0; i < 16; i += 2)
44295 if (d->perm[i] + 1 != d->perm[i + 1])
44297 use_pblendvb:
44298 for (i = 0; i < nelt; ++i)
44299 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
44301 finish_pblendvb:
44302 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
44303 vperm = force_reg (vmode, vperm);
44305 if (GET_MODE_SIZE (vmode) == 16)
44306 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
44307 else
44308 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
44309 if (target != d->target)
44310 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
44311 return true;
44314 for (i = 0; i < 8; ++i)
44315 mask |= (d->perm[i * 2] >= 16) << i;
44316 vmode = V8HImode;
44317 /* FALLTHRU */
44319 do_subreg:
44320 target = gen_reg_rtx (vmode);
44321 op0 = gen_lowpart (vmode, op0);
44322 op1 = gen_lowpart (vmode, op1);
44323 break;
44325 case E_V32QImode:
44326 /* See if bytes move in pairs. If not, vpblendvb must be used. */
44327 for (i = 0; i < 32; i += 2)
44328 if (d->perm[i] + 1 != d->perm[i + 1])
44329 goto use_pblendvb;
44330 /* See if bytes move in quadruplets. If yes, vpblendd
44331 with immediate can be used. */
44332 for (i = 0; i < 32; i += 4)
44333 if (d->perm[i] + 2 != d->perm[i + 2])
44334 break;
44335 if (i < 32)
44337 /* See if bytes move the same in both lanes. If yes,
44338 vpblendw with immediate can be used. */
44339 for (i = 0; i < 16; i += 2)
44340 if (d->perm[i] + 16 != d->perm[i + 16])
44341 goto use_pblendvb;
44343 /* Use vpblendw. */
44344 for (i = 0; i < 16; ++i)
44345 mask |= (d->perm[i * 2] >= 32) << i;
44346 vmode = V16HImode;
44347 goto do_subreg;
44350 /* Use vpblendd. */
44351 for (i = 0; i < 8; ++i)
44352 mask |= (d->perm[i * 4] >= 32) << i;
44353 vmode = V8SImode;
44354 goto do_subreg;
44356 case E_V16HImode:
44357 /* See if words move in pairs. If yes, vpblendd can be used. */
44358 for (i = 0; i < 16; i += 2)
44359 if (d->perm[i] + 1 != d->perm[i + 1])
44360 break;
44361 if (i < 16)
44363 /* See if words move the same in both lanes. If not,
44364 vpblendvb must be used. */
44365 for (i = 0; i < 8; i++)
44366 if (d->perm[i] + 8 != d->perm[i + 8])
44368 /* Use vpblendvb. */
44369 for (i = 0; i < 32; ++i)
44370 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
44372 vmode = V32QImode;
44373 nelt = 32;
44374 target = gen_reg_rtx (vmode);
44375 op0 = gen_lowpart (vmode, op0);
44376 op1 = gen_lowpart (vmode, op1);
44377 goto finish_pblendvb;
44380 /* Use vpblendw. */
44381 for (i = 0; i < 16; ++i)
44382 mask |= (d->perm[i] >= 16) << i;
44383 break;
44386 /* Use vpblendd. */
44387 for (i = 0; i < 8; ++i)
44388 mask |= (d->perm[i * 2] >= 16) << i;
44389 vmode = V8SImode;
44390 goto do_subreg;
44392 case E_V4DImode:
44393 /* Use vpblendd. */
44394 for (i = 0; i < 4; ++i)
44395 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
44396 vmode = V8SImode;
44397 goto do_subreg;
44399 default:
44400 gcc_unreachable ();
44403 switch (vmode)
44405 case E_V8DFmode:
44406 case E_V8DImode:
44407 mmode = QImode;
44408 break;
44409 case E_V16SFmode:
44410 case E_V16SImode:
44411 mmode = HImode;
44412 break;
44413 case E_V32HImode:
44414 mmode = SImode;
44415 break;
44416 case E_V64QImode:
44417 mmode = DImode;
44418 break;
44419 default:
44420 mmode = VOIDmode;
44423 if (mmode != VOIDmode)
44424 maskop = force_reg (mmode, gen_int_mode (mask, mmode));
44425 else
44426 maskop = GEN_INT (mask);
44428 /* This matches five different patterns with the different modes. */
44429 x = gen_rtx_VEC_MERGE (vmode, op1, op0, maskop);
44430 x = gen_rtx_SET (target, x);
44431 emit_insn (x);
44432 if (target != d->target)
44433 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
44435 return true;
44438 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
44439 in terms of the variable form of vpermilps.
44441 Note that we will have already failed the immediate input vpermilps,
44442 which requires that the high and low part shuffle be identical; the
44443 variable form doesn't require that. */
44445 static bool
44446 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
44448 rtx rperm[8], vperm;
44449 unsigned i;
44451 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
44452 return false;
44454 /* We can only permute within the 128-bit lane. */
44455 for (i = 0; i < 8; ++i)
44457 unsigned e = d->perm[i];
44458 if (i < 4 ? e >= 4 : e < 4)
44459 return false;
44462 if (d->testing_p)
44463 return true;
44465 for (i = 0; i < 8; ++i)
44467 unsigned e = d->perm[i];
44469 /* Within each 128-bit lane, the elements of op0 are numbered
44470 from 0 and the elements of op1 are numbered from 4. */
44471 if (e >= 8 + 4)
44472 e -= 8;
44473 else if (e >= 4)
44474 e -= 4;
44476 rperm[i] = GEN_INT (e);
44479 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
44480 vperm = force_reg (V8SImode, vperm);
44481 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
44483 return true;
44486 /* Return true if permutation D can be performed as VMODE permutation
44487 instead. */
44489 static bool
44490 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
44492 unsigned int i, j, chunk;
44494 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
44495 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
44496 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
44497 return false;
44499 if (GET_MODE_NUNITS (vmode) >= d->nelt)
44500 return true;
44502 chunk = d->nelt / GET_MODE_NUNITS (vmode);
44503 for (i = 0; i < d->nelt; i += chunk)
44504 if (d->perm[i] & (chunk - 1))
44505 return false;
44506 else
44507 for (j = 1; j < chunk; ++j)
44508 if (d->perm[i] + j != d->perm[i + j])
44509 return false;
44511 return true;
44514 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
44515 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
44517 static bool
44518 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
44520 unsigned i, nelt, eltsz, mask;
44521 unsigned char perm[64];
44522 machine_mode vmode = V16QImode;
44523 rtx rperm[64], vperm, target, op0, op1;
44525 nelt = d->nelt;
44527 if (!d->one_operand_p)
44529 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
44531 if (TARGET_AVX2
44532 && valid_perm_using_mode_p (V2TImode, d))
44534 if (d->testing_p)
44535 return true;
44537 /* Use vperm2i128 insn. The pattern uses
44538 V4DImode instead of V2TImode. */
44539 target = d->target;
44540 if (d->vmode != V4DImode)
44541 target = gen_reg_rtx (V4DImode);
44542 op0 = gen_lowpart (V4DImode, d->op0);
44543 op1 = gen_lowpart (V4DImode, d->op1);
44544 rperm[0]
44545 = GEN_INT ((d->perm[0] / (nelt / 2))
44546 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
44547 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
44548 if (target != d->target)
44549 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
44550 return true;
44552 return false;
44555 else
44557 if (GET_MODE_SIZE (d->vmode) == 16)
44559 if (!TARGET_SSSE3)
44560 return false;
44562 else if (GET_MODE_SIZE (d->vmode) == 32)
44564 if (!TARGET_AVX2)
44565 return false;
44567 /* V4DImode should be already handled through
44568 expand_vselect by vpermq instruction. */
44569 gcc_assert (d->vmode != V4DImode);
44571 vmode = V32QImode;
44572 if (d->vmode == V8SImode
44573 || d->vmode == V16HImode
44574 || d->vmode == V32QImode)
44576 /* First see if vpermq can be used for
44577 V8SImode/V16HImode/V32QImode. */
44578 if (valid_perm_using_mode_p (V4DImode, d))
44580 for (i = 0; i < 4; i++)
44581 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
44582 if (d->testing_p)
44583 return true;
44584 target = gen_reg_rtx (V4DImode);
44585 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
44586 perm, 4, false))
44588 emit_move_insn (d->target,
44589 gen_lowpart (d->vmode, target));
44590 return true;
44592 return false;
44595 /* Next see if vpermd can be used. */
44596 if (valid_perm_using_mode_p (V8SImode, d))
44597 vmode = V8SImode;
44599 /* Or if vpermps can be used. */
44600 else if (d->vmode == V8SFmode)
44601 vmode = V8SImode;
44603 if (vmode == V32QImode)
44605 /* vpshufb only works intra lanes, it is not
44606 possible to shuffle bytes in between the lanes. */
44607 for (i = 0; i < nelt; ++i)
44608 if ((d->perm[i] ^ i) & (nelt / 2))
44609 return false;
44612 else if (GET_MODE_SIZE (d->vmode) == 64)
44614 if (!TARGET_AVX512BW)
44615 return false;
44617 /* If vpermq didn't work, vpshufb won't work either. */
44618 if (d->vmode == V8DFmode || d->vmode == V8DImode)
44619 return false;
44621 vmode = V64QImode;
44622 if (d->vmode == V16SImode
44623 || d->vmode == V32HImode
44624 || d->vmode == V64QImode)
44626 /* First see if vpermq can be used for
44627 V16SImode/V32HImode/V64QImode. */
44628 if (valid_perm_using_mode_p (V8DImode, d))
44630 for (i = 0; i < 8; i++)
44631 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
44632 if (d->testing_p)
44633 return true;
44634 target = gen_reg_rtx (V8DImode);
44635 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
44636 perm, 8, false))
44638 emit_move_insn (d->target,
44639 gen_lowpart (d->vmode, target));
44640 return true;
44642 return false;
44645 /* Next see if vpermd can be used. */
44646 if (valid_perm_using_mode_p (V16SImode, d))
44647 vmode = V16SImode;
44649 /* Or if vpermps can be used. */
44650 else if (d->vmode == V16SFmode)
44651 vmode = V16SImode;
44652 if (vmode == V64QImode)
44654 /* vpshufb only works intra lanes, it is not
44655 possible to shuffle bytes in between the lanes. */
44656 for (i = 0; i < nelt; ++i)
44657 if ((d->perm[i] ^ i) & (nelt / 4))
44658 return false;
44661 else
44662 return false;
44665 if (d->testing_p)
44666 return true;
44668 if (vmode == V8SImode)
44669 for (i = 0; i < 8; ++i)
44670 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
44671 else if (vmode == V16SImode)
44672 for (i = 0; i < 16; ++i)
44673 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
44674 else
44676 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
44677 if (!d->one_operand_p)
44678 mask = 2 * nelt - 1;
44679 else if (vmode == V16QImode)
44680 mask = nelt - 1;
44681 else if (vmode == V64QImode)
44682 mask = nelt / 4 - 1;
44683 else
44684 mask = nelt / 2 - 1;
44686 for (i = 0; i < nelt; ++i)
44688 unsigned j, e = d->perm[i] & mask;
44689 for (j = 0; j < eltsz; ++j)
44690 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
44694 vperm = gen_rtx_CONST_VECTOR (vmode,
44695 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
44696 vperm = force_reg (vmode, vperm);
44698 target = d->target;
44699 if (d->vmode != vmode)
44700 target = gen_reg_rtx (vmode);
44701 op0 = gen_lowpart (vmode, d->op0);
44702 if (d->one_operand_p)
44704 if (vmode == V16QImode)
44705 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
44706 else if (vmode == V32QImode)
44707 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
44708 else if (vmode == V64QImode)
44709 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
44710 else if (vmode == V8SFmode)
44711 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
44712 else if (vmode == V8SImode)
44713 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
44714 else if (vmode == V16SFmode)
44715 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
44716 else if (vmode == V16SImode)
44717 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
44718 else
44719 gcc_unreachable ();
44721 else
44723 op1 = gen_lowpart (vmode, d->op1);
44724 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
44726 if (target != d->target)
44727 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
44729 return true;
44732 /* For V*[QHS]Imode permutations, check if the same permutation
44733 can't be performed in a 2x, 4x or 8x wider inner mode. */
44735 static bool
44736 canonicalize_vector_int_perm (const struct expand_vec_perm_d *d,
44737 struct expand_vec_perm_d *nd)
44739 int i;
44740 machine_mode mode = VOIDmode;
44742 switch (d->vmode)
44744 case E_V16QImode: mode = V8HImode; break;
44745 case E_V32QImode: mode = V16HImode; break;
44746 case E_V64QImode: mode = V32HImode; break;
44747 case E_V8HImode: mode = V4SImode; break;
44748 case E_V16HImode: mode = V8SImode; break;
44749 case E_V32HImode: mode = V16SImode; break;
44750 case E_V4SImode: mode = V2DImode; break;
44751 case E_V8SImode: mode = V4DImode; break;
44752 case E_V16SImode: mode = V8DImode; break;
44753 default: return false;
44755 for (i = 0; i < d->nelt; i += 2)
44756 if ((d->perm[i] & 1) || d->perm[i + 1] != d->perm[i] + 1)
44757 return false;
44758 nd->vmode = mode;
44759 nd->nelt = d->nelt / 2;
44760 for (i = 0; i < nd->nelt; i++)
44761 nd->perm[i] = d->perm[2 * i] / 2;
44762 if (GET_MODE_INNER (mode) != DImode)
44763 canonicalize_vector_int_perm (nd, nd);
44764 if (nd != d)
44766 nd->one_operand_p = d->one_operand_p;
44767 nd->testing_p = d->testing_p;
44768 if (d->op0 == d->op1)
44769 nd->op0 = nd->op1 = gen_lowpart (nd->vmode, d->op0);
44770 else
44772 nd->op0 = gen_lowpart (nd->vmode, d->op0);
44773 nd->op1 = gen_lowpart (nd->vmode, d->op1);
44775 if (d->testing_p)
44776 nd->target = gen_raw_REG (nd->vmode, LAST_VIRTUAL_REGISTER + 1);
44777 else
44778 nd->target = gen_reg_rtx (nd->vmode);
44780 return true;
44783 /* Try to expand one-operand permutation with constant mask. */
44785 static bool
44786 ix86_expand_vec_one_operand_perm_avx512 (struct expand_vec_perm_d *d)
44788 machine_mode mode = GET_MODE (d->op0);
44789 machine_mode maskmode = mode;
44790 rtx (*gen) (rtx, rtx, rtx) = NULL;
44791 rtx target, op0, mask;
44792 rtx vec[64];
44794 if (!rtx_equal_p (d->op0, d->op1))
44795 return false;
44797 if (!TARGET_AVX512F)
44798 return false;
44800 switch (mode)
44802 case E_V16SImode:
44803 gen = gen_avx512f_permvarv16si;
44804 break;
44805 case E_V16SFmode:
44806 gen = gen_avx512f_permvarv16sf;
44807 maskmode = V16SImode;
44808 break;
44809 case E_V8DImode:
44810 gen = gen_avx512f_permvarv8di;
44811 break;
44812 case E_V8DFmode:
44813 gen = gen_avx512f_permvarv8df;
44814 maskmode = V8DImode;
44815 break;
44816 default:
44817 return false;
44820 target = d->target;
44821 op0 = d->op0;
44822 for (int i = 0; i < d->nelt; ++i)
44823 vec[i] = GEN_INT (d->perm[i]);
44824 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
44825 emit_insn (gen (target, op0, force_reg (maskmode, mask)));
44826 return true;
44829 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
44830 in a single instruction. */
44832 static bool
44833 expand_vec_perm_1 (struct expand_vec_perm_d *d)
44835 unsigned i, nelt = d->nelt;
44836 struct expand_vec_perm_d nd;
44838 /* Check plain VEC_SELECT first, because AVX has instructions that could
44839 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
44840 input where SEL+CONCAT may not. */
44841 if (d->one_operand_p)
44843 int mask = nelt - 1;
44844 bool identity_perm = true;
44845 bool broadcast_perm = true;
44847 for (i = 0; i < nelt; i++)
44849 nd.perm[i] = d->perm[i] & mask;
44850 if (nd.perm[i] != i)
44851 identity_perm = false;
44852 if (nd.perm[i])
44853 broadcast_perm = false;
44856 if (identity_perm)
44858 if (!d->testing_p)
44859 emit_move_insn (d->target, d->op0);
44860 return true;
44862 else if (broadcast_perm && TARGET_AVX2)
44864 /* Use vpbroadcast{b,w,d}. */
44865 rtx (*gen) (rtx, rtx) = NULL;
44866 switch (d->vmode)
44868 case E_V64QImode:
44869 if (TARGET_AVX512BW)
44870 gen = gen_avx512bw_vec_dupv64qi_1;
44871 break;
44872 case E_V32QImode:
44873 gen = gen_avx2_pbroadcastv32qi_1;
44874 break;
44875 case E_V32HImode:
44876 if (TARGET_AVX512BW)
44877 gen = gen_avx512bw_vec_dupv32hi_1;
44878 break;
44879 case E_V16HImode:
44880 gen = gen_avx2_pbroadcastv16hi_1;
44881 break;
44882 case E_V16SImode:
44883 if (TARGET_AVX512F)
44884 gen = gen_avx512f_vec_dupv16si_1;
44885 break;
44886 case E_V8SImode:
44887 gen = gen_avx2_pbroadcastv8si_1;
44888 break;
44889 case E_V16QImode:
44890 gen = gen_avx2_pbroadcastv16qi;
44891 break;
44892 case E_V8HImode:
44893 gen = gen_avx2_pbroadcastv8hi;
44894 break;
44895 case E_V16SFmode:
44896 if (TARGET_AVX512F)
44897 gen = gen_avx512f_vec_dupv16sf_1;
44898 break;
44899 case E_V8SFmode:
44900 gen = gen_avx2_vec_dupv8sf_1;
44901 break;
44902 case E_V8DFmode:
44903 if (TARGET_AVX512F)
44904 gen = gen_avx512f_vec_dupv8df_1;
44905 break;
44906 case E_V8DImode:
44907 if (TARGET_AVX512F)
44908 gen = gen_avx512f_vec_dupv8di_1;
44909 break;
44910 /* For other modes prefer other shuffles this function creates. */
44911 default: break;
44913 if (gen != NULL)
44915 if (!d->testing_p)
44916 emit_insn (gen (d->target, d->op0));
44917 return true;
44921 if (expand_vselect (d->target, d->op0, nd.perm, nelt, d->testing_p))
44922 return true;
44924 /* There are plenty of patterns in sse.md that are written for
44925 SEL+CONCAT and are not replicated for a single op. Perhaps
44926 that should be changed, to avoid the nastiness here. */
44928 /* Recognize interleave style patterns, which means incrementing
44929 every other permutation operand. */
44930 for (i = 0; i < nelt; i += 2)
44932 nd.perm[i] = d->perm[i] & mask;
44933 nd.perm[i + 1] = (d->perm[i + 1] & mask) + nelt;
44935 if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt,
44936 d->testing_p))
44937 return true;
44939 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
44940 if (nelt >= 4)
44942 for (i = 0; i < nelt; i += 4)
44944 nd.perm[i + 0] = d->perm[i + 0] & mask;
44945 nd.perm[i + 1] = d->perm[i + 1] & mask;
44946 nd.perm[i + 2] = (d->perm[i + 2] & mask) + nelt;
44947 nd.perm[i + 3] = (d->perm[i + 3] & mask) + nelt;
44950 if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt,
44951 d->testing_p))
44952 return true;
44956 /* Finally, try the fully general two operand permute. */
44957 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
44958 d->testing_p))
44959 return true;
44961 /* Recognize interleave style patterns with reversed operands. */
44962 if (!d->one_operand_p)
44964 for (i = 0; i < nelt; ++i)
44966 unsigned e = d->perm[i];
44967 if (e >= nelt)
44968 e -= nelt;
44969 else
44970 e += nelt;
44971 nd.perm[i] = e;
44974 if (expand_vselect_vconcat (d->target, d->op1, d->op0, nd.perm, nelt,
44975 d->testing_p))
44976 return true;
44979 /* Try the SSE4.1 blend variable merge instructions. */
44980 if (expand_vec_perm_blend (d))
44981 return true;
44983 /* Try one of the AVX vpermil variable permutations. */
44984 if (expand_vec_perm_vpermil (d))
44985 return true;
44987 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
44988 vpshufb, vpermd, vpermps or vpermq variable permutation. */
44989 if (expand_vec_perm_pshufb (d))
44990 return true;
44992 /* Try the AVX2 vpalignr instruction. */
44993 if (expand_vec_perm_palignr (d, true))
44994 return true;
44996 /* Try the AVX512F vperm{s,d} instructions. */
44997 if (ix86_expand_vec_one_operand_perm_avx512 (d))
44998 return true;
45000 /* Try the AVX512F vpermi2 instructions. */
45001 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
45002 return true;
45004 /* See if we can get the same permutation in different vector integer
45005 mode. */
45006 if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd))
45008 if (!d->testing_p)
45009 emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target));
45010 return true;
45012 return false;
45015 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
45016 in terms of a pair of pshuflw + pshufhw instructions. */
45018 static bool
45019 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
45021 unsigned char perm2[MAX_VECT_LEN];
45022 unsigned i;
45023 bool ok;
45025 if (d->vmode != V8HImode || !d->one_operand_p)
45026 return false;
45028 /* The two permutations only operate in 64-bit lanes. */
45029 for (i = 0; i < 4; ++i)
45030 if (d->perm[i] >= 4)
45031 return false;
45032 for (i = 4; i < 8; ++i)
45033 if (d->perm[i] < 4)
45034 return false;
45036 if (d->testing_p)
45037 return true;
45039 /* Emit the pshuflw. */
45040 memcpy (perm2, d->perm, 4);
45041 for (i = 4; i < 8; ++i)
45042 perm2[i] = i;
45043 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
45044 gcc_assert (ok);
45046 /* Emit the pshufhw. */
45047 memcpy (perm2 + 4, d->perm + 4, 4);
45048 for (i = 0; i < 4; ++i)
45049 perm2[i] = i;
45050 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
45051 gcc_assert (ok);
45053 return true;
45056 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
45057 the permutation using the SSSE3 palignr instruction. This succeeds
45058 when all of the elements in PERM fit within one vector and we merely
45059 need to shift them down so that a single vector permutation has a
45060 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
45061 the vpalignr instruction itself can perform the requested permutation. */
45063 static bool
45064 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
45066 unsigned i, nelt = d->nelt;
45067 unsigned min, max, minswap, maxswap;
45068 bool in_order, ok, swap = false;
45069 rtx shift, target;
45070 struct expand_vec_perm_d dcopy;
45072 /* Even with AVX, palignr only operates on 128-bit vectors,
45073 in AVX2 palignr operates on both 128-bit lanes. */
45074 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
45075 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
45076 return false;
45078 min = 2 * nelt;
45079 max = 0;
45080 minswap = 2 * nelt;
45081 maxswap = 0;
45082 for (i = 0; i < nelt; ++i)
45084 unsigned e = d->perm[i];
45085 unsigned eswap = d->perm[i] ^ nelt;
45086 if (GET_MODE_SIZE (d->vmode) == 32)
45088 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
45089 eswap = e ^ (nelt / 2);
45091 if (e < min)
45092 min = e;
45093 if (e > max)
45094 max = e;
45095 if (eswap < minswap)
45096 minswap = eswap;
45097 if (eswap > maxswap)
45098 maxswap = eswap;
45100 if (min == 0
45101 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
45103 if (d->one_operand_p
45104 || minswap == 0
45105 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
45106 ? nelt / 2 : nelt))
45107 return false;
45108 swap = true;
45109 min = minswap;
45110 max = maxswap;
45113 /* Given that we have SSSE3, we know we'll be able to implement the
45114 single operand permutation after the palignr with pshufb for
45115 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
45116 first. */
45117 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
45118 return true;
45120 dcopy = *d;
45121 if (swap)
45123 dcopy.op0 = d->op1;
45124 dcopy.op1 = d->op0;
45125 for (i = 0; i < nelt; ++i)
45126 dcopy.perm[i] ^= nelt;
45129 in_order = true;
45130 for (i = 0; i < nelt; ++i)
45132 unsigned e = dcopy.perm[i];
45133 if (GET_MODE_SIZE (d->vmode) == 32
45134 && e >= nelt
45135 && (e & (nelt / 2 - 1)) < min)
45136 e = e - min - (nelt / 2);
45137 else
45138 e = e - min;
45139 if (e != i)
45140 in_order = false;
45141 dcopy.perm[i] = e;
45143 dcopy.one_operand_p = true;
45145 if (single_insn_only_p && !in_order)
45146 return false;
45148 /* For AVX2, test whether we can permute the result in one instruction. */
45149 if (d->testing_p)
45151 if (in_order)
45152 return true;
45153 dcopy.op1 = dcopy.op0;
45154 return expand_vec_perm_1 (&dcopy);
45157 shift = GEN_INT (min * GET_MODE_UNIT_BITSIZE (d->vmode));
45158 if (GET_MODE_SIZE (d->vmode) == 16)
45160 target = gen_reg_rtx (TImode);
45161 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
45162 gen_lowpart (TImode, dcopy.op0), shift));
45164 else
45166 target = gen_reg_rtx (V2TImode);
45167 emit_insn (gen_avx2_palignrv2ti (target,
45168 gen_lowpart (V2TImode, dcopy.op1),
45169 gen_lowpart (V2TImode, dcopy.op0),
45170 shift));
45173 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
45175 /* Test for the degenerate case where the alignment by itself
45176 produces the desired permutation. */
45177 if (in_order)
45179 emit_move_insn (d->target, dcopy.op0);
45180 return true;
45183 ok = expand_vec_perm_1 (&dcopy);
45184 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
45186 return ok;
45189 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
45190 the permutation using the SSE4_1 pblendv instruction. Potentially
45191 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
45193 static bool
45194 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
45196 unsigned i, which, nelt = d->nelt;
45197 struct expand_vec_perm_d dcopy, dcopy1;
45198 machine_mode vmode = d->vmode;
45199 bool ok;
45201 /* Use the same checks as in expand_vec_perm_blend. */
45202 if (d->one_operand_p)
45203 return false;
45204 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
45206 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
45208 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
45210 else
45211 return false;
45213 /* Figure out where permutation elements stay not in their
45214 respective lanes. */
45215 for (i = 0, which = 0; i < nelt; ++i)
45217 unsigned e = d->perm[i];
45218 if (e != i)
45219 which |= (e < nelt ? 1 : 2);
45221 /* We can pblend the part where elements stay not in their
45222 respective lanes only when these elements are all in one
45223 half of a permutation.
45224 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
45225 lanes, but both 8 and 9 >= 8
45226 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
45227 respective lanes and 8 >= 8, but 2 not. */
45228 if (which != 1 && which != 2)
45229 return false;
45230 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
45231 return true;
45233 /* First we apply one operand permutation to the part where
45234 elements stay not in their respective lanes. */
45235 dcopy = *d;
45236 if (which == 2)
45237 dcopy.op0 = dcopy.op1 = d->op1;
45238 else
45239 dcopy.op0 = dcopy.op1 = d->op0;
45240 if (!d->testing_p)
45241 dcopy.target = gen_reg_rtx (vmode);
45242 dcopy.one_operand_p = true;
45244 for (i = 0; i < nelt; ++i)
45245 dcopy.perm[i] = d->perm[i] & (nelt - 1);
45247 ok = expand_vec_perm_1 (&dcopy);
45248 if (GET_MODE_SIZE (vmode) != 16 && !ok)
45249 return false;
45250 else
45251 gcc_assert (ok);
45252 if (d->testing_p)
45253 return true;
45255 /* Next we put permuted elements into their positions. */
45256 dcopy1 = *d;
45257 if (which == 2)
45258 dcopy1.op1 = dcopy.target;
45259 else
45260 dcopy1.op0 = dcopy.target;
45262 for (i = 0; i < nelt; ++i)
45263 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
45265 ok = expand_vec_perm_blend (&dcopy1);
45266 gcc_assert (ok);
45268 return true;
45271 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
45273 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
45274 a two vector permutation into a single vector permutation by using
45275 an interleave operation to merge the vectors. */
45277 static bool
45278 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
45280 struct expand_vec_perm_d dremap, dfinal;
45281 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
45282 unsigned HOST_WIDE_INT contents;
45283 unsigned char remap[2 * MAX_VECT_LEN];
45284 rtx_insn *seq;
45285 bool ok, same_halves = false;
45287 if (GET_MODE_SIZE (d->vmode) == 16)
45289 if (d->one_operand_p)
45290 return false;
45292 else if (GET_MODE_SIZE (d->vmode) == 32)
45294 if (!TARGET_AVX)
45295 return false;
45296 /* For 32-byte modes allow even d->one_operand_p.
45297 The lack of cross-lane shuffling in some instructions
45298 might prevent a single insn shuffle. */
45299 dfinal = *d;
45300 dfinal.testing_p = true;
45301 /* If expand_vec_perm_interleave3 can expand this into
45302 a 3 insn sequence, give up and let it be expanded as
45303 3 insn sequence. While that is one insn longer,
45304 it doesn't need a memory operand and in the common
45305 case that both interleave low and high permutations
45306 with the same operands are adjacent needs 4 insns
45307 for both after CSE. */
45308 if (expand_vec_perm_interleave3 (&dfinal))
45309 return false;
45311 else
45312 return false;
45314 /* Examine from whence the elements come. */
45315 contents = 0;
45316 for (i = 0; i < nelt; ++i)
45317 contents |= HOST_WIDE_INT_1U << d->perm[i];
45319 memset (remap, 0xff, sizeof (remap));
45320 dremap = *d;
45322 if (GET_MODE_SIZE (d->vmode) == 16)
45324 unsigned HOST_WIDE_INT h1, h2, h3, h4;
45326 /* Split the two input vectors into 4 halves. */
45327 h1 = (HOST_WIDE_INT_1U << nelt2) - 1;
45328 h2 = h1 << nelt2;
45329 h3 = h2 << nelt2;
45330 h4 = h3 << nelt2;
45332 /* If the elements from the low halves use interleave low, and similarly
45333 for interleave high. If the elements are from mis-matched halves, we
45334 can use shufps for V4SF/V4SI or do a DImode shuffle. */
45335 if ((contents & (h1 | h3)) == contents)
45337 /* punpckl* */
45338 for (i = 0; i < nelt2; ++i)
45340 remap[i] = i * 2;
45341 remap[i + nelt] = i * 2 + 1;
45342 dremap.perm[i * 2] = i;
45343 dremap.perm[i * 2 + 1] = i + nelt;
45345 if (!TARGET_SSE2 && d->vmode == V4SImode)
45346 dremap.vmode = V4SFmode;
45348 else if ((contents & (h2 | h4)) == contents)
45350 /* punpckh* */
45351 for (i = 0; i < nelt2; ++i)
45353 remap[i + nelt2] = i * 2;
45354 remap[i + nelt + nelt2] = i * 2 + 1;
45355 dremap.perm[i * 2] = i + nelt2;
45356 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
45358 if (!TARGET_SSE2 && d->vmode == V4SImode)
45359 dremap.vmode = V4SFmode;
45361 else if ((contents & (h1 | h4)) == contents)
45363 /* shufps */
45364 for (i = 0; i < nelt2; ++i)
45366 remap[i] = i;
45367 remap[i + nelt + nelt2] = i + nelt2;
45368 dremap.perm[i] = i;
45369 dremap.perm[i + nelt2] = i + nelt + nelt2;
45371 if (nelt != 4)
45373 /* shufpd */
45374 dremap.vmode = V2DImode;
45375 dremap.nelt = 2;
45376 dremap.perm[0] = 0;
45377 dremap.perm[1] = 3;
45380 else if ((contents & (h2 | h3)) == contents)
45382 /* shufps */
45383 for (i = 0; i < nelt2; ++i)
45385 remap[i + nelt2] = i;
45386 remap[i + nelt] = i + nelt2;
45387 dremap.perm[i] = i + nelt2;
45388 dremap.perm[i + nelt2] = i + nelt;
45390 if (nelt != 4)
45392 /* shufpd */
45393 dremap.vmode = V2DImode;
45394 dremap.nelt = 2;
45395 dremap.perm[0] = 1;
45396 dremap.perm[1] = 2;
45399 else
45400 return false;
45402 else
45404 unsigned int nelt4 = nelt / 4, nzcnt = 0;
45405 unsigned HOST_WIDE_INT q[8];
45406 unsigned int nonzero_halves[4];
45408 /* Split the two input vectors into 8 quarters. */
45409 q[0] = (HOST_WIDE_INT_1U << nelt4) - 1;
45410 for (i = 1; i < 8; ++i)
45411 q[i] = q[0] << (nelt4 * i);
45412 for (i = 0; i < 4; ++i)
45413 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
45415 nonzero_halves[nzcnt] = i;
45416 ++nzcnt;
45419 if (nzcnt == 1)
45421 gcc_assert (d->one_operand_p);
45422 nonzero_halves[1] = nonzero_halves[0];
45423 same_halves = true;
45425 else if (d->one_operand_p)
45427 gcc_assert (nonzero_halves[0] == 0);
45428 gcc_assert (nonzero_halves[1] == 1);
45431 if (nzcnt <= 2)
45433 if (d->perm[0] / nelt2 == nonzero_halves[1])
45435 /* Attempt to increase the likelihood that dfinal
45436 shuffle will be intra-lane. */
45437 std::swap (nonzero_halves[0], nonzero_halves[1]);
45440 /* vperm2f128 or vperm2i128. */
45441 for (i = 0; i < nelt2; ++i)
45443 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
45444 remap[i + nonzero_halves[0] * nelt2] = i;
45445 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
45446 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
45449 if (d->vmode != V8SFmode
45450 && d->vmode != V4DFmode
45451 && d->vmode != V8SImode)
45453 dremap.vmode = V8SImode;
45454 dremap.nelt = 8;
45455 for (i = 0; i < 4; ++i)
45457 dremap.perm[i] = i + nonzero_halves[0] * 4;
45458 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
45462 else if (d->one_operand_p)
45463 return false;
45464 else if (TARGET_AVX2
45465 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
45467 /* vpunpckl* */
45468 for (i = 0; i < nelt4; ++i)
45470 remap[i] = i * 2;
45471 remap[i + nelt] = i * 2 + 1;
45472 remap[i + nelt2] = i * 2 + nelt2;
45473 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
45474 dremap.perm[i * 2] = i;
45475 dremap.perm[i * 2 + 1] = i + nelt;
45476 dremap.perm[i * 2 + nelt2] = i + nelt2;
45477 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
45480 else if (TARGET_AVX2
45481 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
45483 /* vpunpckh* */
45484 for (i = 0; i < nelt4; ++i)
45486 remap[i + nelt4] = i * 2;
45487 remap[i + nelt + nelt4] = i * 2 + 1;
45488 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
45489 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
45490 dremap.perm[i * 2] = i + nelt4;
45491 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
45492 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
45493 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
45496 else
45497 return false;
45500 /* Use the remapping array set up above to move the elements from their
45501 swizzled locations into their final destinations. */
45502 dfinal = *d;
45503 for (i = 0; i < nelt; ++i)
45505 unsigned e = remap[d->perm[i]];
45506 gcc_assert (e < nelt);
45507 /* If same_halves is true, both halves of the remapped vector are the
45508 same. Avoid cross-lane accesses if possible. */
45509 if (same_halves && i >= nelt2)
45511 gcc_assert (e < nelt2);
45512 dfinal.perm[i] = e + nelt2;
45514 else
45515 dfinal.perm[i] = e;
45517 if (!d->testing_p)
45519 dremap.target = gen_reg_rtx (dremap.vmode);
45520 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
45522 dfinal.op1 = dfinal.op0;
45523 dfinal.one_operand_p = true;
45525 /* Test if the final remap can be done with a single insn. For V4SFmode or
45526 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
45527 start_sequence ();
45528 ok = expand_vec_perm_1 (&dfinal);
45529 seq = get_insns ();
45530 end_sequence ();
45532 if (!ok)
45533 return false;
45535 if (d->testing_p)
45536 return true;
45538 if (dremap.vmode != dfinal.vmode)
45540 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
45541 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
45544 ok = expand_vec_perm_1 (&dremap);
45545 gcc_assert (ok);
45547 emit_insn (seq);
45548 return true;
45551 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
45552 a single vector cross-lane permutation into vpermq followed
45553 by any of the single insn permutations. */
45555 static bool
45556 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
45558 struct expand_vec_perm_d dremap, dfinal;
45559 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
45560 unsigned contents[2];
45561 bool ok;
45563 if (!(TARGET_AVX2
45564 && (d->vmode == V32QImode || d->vmode == V16HImode)
45565 && d->one_operand_p))
45566 return false;
45568 contents[0] = 0;
45569 contents[1] = 0;
45570 for (i = 0; i < nelt2; ++i)
45572 contents[0] |= 1u << (d->perm[i] / nelt4);
45573 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
45576 for (i = 0; i < 2; ++i)
45578 unsigned int cnt = 0;
45579 for (j = 0; j < 4; ++j)
45580 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
45581 return false;
45584 if (d->testing_p)
45585 return true;
45587 dremap = *d;
45588 dremap.vmode = V4DImode;
45589 dremap.nelt = 4;
45590 dremap.target = gen_reg_rtx (V4DImode);
45591 dremap.op0 = gen_lowpart (V4DImode, d->op0);
45592 dremap.op1 = dremap.op0;
45593 dremap.one_operand_p = true;
45594 for (i = 0; i < 2; ++i)
45596 unsigned int cnt = 0;
45597 for (j = 0; j < 4; ++j)
45598 if ((contents[i] & (1u << j)) != 0)
45599 dremap.perm[2 * i + cnt++] = j;
45600 for (; cnt < 2; ++cnt)
45601 dremap.perm[2 * i + cnt] = 0;
45604 dfinal = *d;
45605 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
45606 dfinal.op1 = dfinal.op0;
45607 dfinal.one_operand_p = true;
45608 for (i = 0, j = 0; i < nelt; ++i)
45610 if (i == nelt2)
45611 j = 2;
45612 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
45613 if ((d->perm[i] / nelt4) == dremap.perm[j])
45615 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
45616 dfinal.perm[i] |= nelt4;
45617 else
45618 gcc_unreachable ();
45621 ok = expand_vec_perm_1 (&dremap);
45622 gcc_assert (ok);
45624 ok = expand_vec_perm_1 (&dfinal);
45625 gcc_assert (ok);
45627 return true;
45630 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
45631 a vector permutation using two instructions, vperm2f128 resp.
45632 vperm2i128 followed by any single in-lane permutation. */
45634 static bool
45635 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
45637 struct expand_vec_perm_d dfirst, dsecond;
45638 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
45639 bool ok;
45641 if (!TARGET_AVX
45642 || GET_MODE_SIZE (d->vmode) != 32
45643 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
45644 return false;
45646 dsecond = *d;
45647 dsecond.one_operand_p = false;
45648 dsecond.testing_p = true;
45650 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
45651 immediate. For perm < 16 the second permutation uses
45652 d->op0 as first operand, for perm >= 16 it uses d->op1
45653 as first operand. The second operand is the result of
45654 vperm2[fi]128. */
45655 for (perm = 0; perm < 32; perm++)
45657 /* Ignore permutations which do not move anything cross-lane. */
45658 if (perm < 16)
45660 /* The second shuffle for e.g. V4DFmode has
45661 0123 and ABCD operands.
45662 Ignore AB23, as 23 is already in the second lane
45663 of the first operand. */
45664 if ((perm & 0xc) == (1 << 2)) continue;
45665 /* And 01CD, as 01 is in the first lane of the first
45666 operand. */
45667 if ((perm & 3) == 0) continue;
45668 /* And 4567, as then the vperm2[fi]128 doesn't change
45669 anything on the original 4567 second operand. */
45670 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
45672 else
45674 /* The second shuffle for e.g. V4DFmode has
45675 4567 and ABCD operands.
45676 Ignore AB67, as 67 is already in the second lane
45677 of the first operand. */
45678 if ((perm & 0xc) == (3 << 2)) continue;
45679 /* And 45CD, as 45 is in the first lane of the first
45680 operand. */
45681 if ((perm & 3) == 2) continue;
45682 /* And 0123, as then the vperm2[fi]128 doesn't change
45683 anything on the original 0123 first operand. */
45684 if ((perm & 0xf) == (1 << 2)) continue;
45687 for (i = 0; i < nelt; i++)
45689 j = d->perm[i] / nelt2;
45690 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
45691 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
45692 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
45693 dsecond.perm[i] = d->perm[i] & (nelt - 1);
45694 else
45695 break;
45698 if (i == nelt)
45700 start_sequence ();
45701 ok = expand_vec_perm_1 (&dsecond);
45702 end_sequence ();
45704 else
45705 ok = false;
45707 if (ok)
45709 if (d->testing_p)
45710 return true;
45712 /* Found a usable second shuffle. dfirst will be
45713 vperm2f128 on d->op0 and d->op1. */
45714 dsecond.testing_p = false;
45715 dfirst = *d;
45716 dfirst.target = gen_reg_rtx (d->vmode);
45717 for (i = 0; i < nelt; i++)
45718 dfirst.perm[i] = (i & (nelt2 - 1))
45719 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
45721 canonicalize_perm (&dfirst);
45722 ok = expand_vec_perm_1 (&dfirst);
45723 gcc_assert (ok);
45725 /* And dsecond is some single insn shuffle, taking
45726 d->op0 and result of vperm2f128 (if perm < 16) or
45727 d->op1 and result of vperm2f128 (otherwise). */
45728 if (perm >= 16)
45729 dsecond.op0 = dsecond.op1;
45730 dsecond.op1 = dfirst.target;
45732 ok = expand_vec_perm_1 (&dsecond);
45733 gcc_assert (ok);
45735 return true;
45738 /* For one operand, the only useful vperm2f128 permutation is 0x01
45739 aka lanes swap. */
45740 if (d->one_operand_p)
45741 return false;
45744 return false;
45747 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
45748 a two vector permutation using 2 intra-lane interleave insns
45749 and cross-lane shuffle for 32-byte vectors. */
45751 static bool
45752 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
45754 unsigned i, nelt;
45755 rtx (*gen) (rtx, rtx, rtx);
45757 if (d->one_operand_p)
45758 return false;
45759 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
45761 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
45763 else
45764 return false;
45766 nelt = d->nelt;
45767 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
45768 return false;
45769 for (i = 0; i < nelt; i += 2)
45770 if (d->perm[i] != d->perm[0] + i / 2
45771 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
45772 return false;
45774 if (d->testing_p)
45775 return true;
45777 switch (d->vmode)
45779 case E_V32QImode:
45780 if (d->perm[0])
45781 gen = gen_vec_interleave_highv32qi;
45782 else
45783 gen = gen_vec_interleave_lowv32qi;
45784 break;
45785 case E_V16HImode:
45786 if (d->perm[0])
45787 gen = gen_vec_interleave_highv16hi;
45788 else
45789 gen = gen_vec_interleave_lowv16hi;
45790 break;
45791 case E_V8SImode:
45792 if (d->perm[0])
45793 gen = gen_vec_interleave_highv8si;
45794 else
45795 gen = gen_vec_interleave_lowv8si;
45796 break;
45797 case E_V4DImode:
45798 if (d->perm[0])
45799 gen = gen_vec_interleave_highv4di;
45800 else
45801 gen = gen_vec_interleave_lowv4di;
45802 break;
45803 case E_V8SFmode:
45804 if (d->perm[0])
45805 gen = gen_vec_interleave_highv8sf;
45806 else
45807 gen = gen_vec_interleave_lowv8sf;
45808 break;
45809 case E_V4DFmode:
45810 if (d->perm[0])
45811 gen = gen_vec_interleave_highv4df;
45812 else
45813 gen = gen_vec_interleave_lowv4df;
45814 break;
45815 default:
45816 gcc_unreachable ();
45819 emit_insn (gen (d->target, d->op0, d->op1));
45820 return true;
45823 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
45824 a single vector permutation using a single intra-lane vector
45825 permutation, vperm2f128 swapping the lanes and vblend* insn blending
45826 the non-swapped and swapped vectors together. */
45828 static bool
45829 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
45831 struct expand_vec_perm_d dfirst, dsecond;
45832 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
45833 rtx_insn *seq;
45834 bool ok;
45835 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
45837 if (!TARGET_AVX
45838 || TARGET_AVX2
45839 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
45840 || !d->one_operand_p)
45841 return false;
45843 dfirst = *d;
45844 for (i = 0; i < nelt; i++)
45845 dfirst.perm[i] = 0xff;
45846 for (i = 0, msk = 0; i < nelt; i++)
45848 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
45849 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
45850 return false;
45851 dfirst.perm[j] = d->perm[i];
45852 if (j != i)
45853 msk |= (1 << i);
45855 for (i = 0; i < nelt; i++)
45856 if (dfirst.perm[i] == 0xff)
45857 dfirst.perm[i] = i;
45859 if (!d->testing_p)
45860 dfirst.target = gen_reg_rtx (dfirst.vmode);
45862 start_sequence ();
45863 ok = expand_vec_perm_1 (&dfirst);
45864 seq = get_insns ();
45865 end_sequence ();
45867 if (!ok)
45868 return false;
45870 if (d->testing_p)
45871 return true;
45873 emit_insn (seq);
45875 dsecond = *d;
45876 dsecond.op0 = dfirst.target;
45877 dsecond.op1 = dfirst.target;
45878 dsecond.one_operand_p = true;
45879 dsecond.target = gen_reg_rtx (dsecond.vmode);
45880 for (i = 0; i < nelt; i++)
45881 dsecond.perm[i] = i ^ nelt2;
45883 ok = expand_vec_perm_1 (&dsecond);
45884 gcc_assert (ok);
45886 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
45887 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
45888 return true;
45891 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
45892 permutation using two vperm2f128, followed by a vshufpd insn blending
45893 the two vectors together. */
45895 static bool
45896 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
45898 struct expand_vec_perm_d dfirst, dsecond, dthird;
45899 bool ok;
45901 if (!TARGET_AVX || (d->vmode != V4DFmode))
45902 return false;
45904 if (d->testing_p)
45905 return true;
45907 dfirst = *d;
45908 dsecond = *d;
45909 dthird = *d;
45911 dfirst.perm[0] = (d->perm[0] & ~1);
45912 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
45913 dfirst.perm[2] = (d->perm[2] & ~1);
45914 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
45915 dsecond.perm[0] = (d->perm[1] & ~1);
45916 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
45917 dsecond.perm[2] = (d->perm[3] & ~1);
45918 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
45919 dthird.perm[0] = (d->perm[0] % 2);
45920 dthird.perm[1] = (d->perm[1] % 2) + 4;
45921 dthird.perm[2] = (d->perm[2] % 2) + 2;
45922 dthird.perm[3] = (d->perm[3] % 2) + 6;
45924 dfirst.target = gen_reg_rtx (dfirst.vmode);
45925 dsecond.target = gen_reg_rtx (dsecond.vmode);
45926 dthird.op0 = dfirst.target;
45927 dthird.op1 = dsecond.target;
45928 dthird.one_operand_p = false;
45930 canonicalize_perm (&dfirst);
45931 canonicalize_perm (&dsecond);
45933 ok = expand_vec_perm_1 (&dfirst)
45934 && expand_vec_perm_1 (&dsecond)
45935 && expand_vec_perm_1 (&dthird);
45937 gcc_assert (ok);
45939 return true;
45942 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
45943 permutation with two pshufb insns and an ior. We should have already
45944 failed all two instruction sequences. */
45946 static bool
45947 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
45949 rtx rperm[2][16], vperm, l, h, op, m128;
45950 unsigned int i, nelt, eltsz;
45952 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
45953 return false;
45954 gcc_assert (!d->one_operand_p);
45956 if (d->testing_p)
45957 return true;
45959 nelt = d->nelt;
45960 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
45962 /* Generate two permutation masks. If the required element is within
45963 the given vector it is shuffled into the proper lane. If the required
45964 element is in the other vector, force a zero into the lane by setting
45965 bit 7 in the permutation mask. */
45966 m128 = GEN_INT (-128);
45967 for (i = 0; i < nelt; ++i)
45969 unsigned j, e = d->perm[i];
45970 unsigned which = (e >= nelt);
45971 if (e >= nelt)
45972 e -= nelt;
45974 for (j = 0; j < eltsz; ++j)
45976 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
45977 rperm[1-which][i*eltsz + j] = m128;
45981 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
45982 vperm = force_reg (V16QImode, vperm);
45984 l = gen_reg_rtx (V16QImode);
45985 op = gen_lowpart (V16QImode, d->op0);
45986 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
45988 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
45989 vperm = force_reg (V16QImode, vperm);
45991 h = gen_reg_rtx (V16QImode);
45992 op = gen_lowpart (V16QImode, d->op1);
45993 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
45995 op = d->target;
45996 if (d->vmode != V16QImode)
45997 op = gen_reg_rtx (V16QImode);
45998 emit_insn (gen_iorv16qi3 (op, l, h));
45999 if (op != d->target)
46000 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
46002 return true;
46005 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
46006 with two vpshufb insns, vpermq and vpor. We should have already failed
46007 all two or three instruction sequences. */
46009 static bool
46010 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
46012 rtx rperm[2][32], vperm, l, h, hp, op, m128;
46013 unsigned int i, nelt, eltsz;
46015 if (!TARGET_AVX2
46016 || !d->one_operand_p
46017 || (d->vmode != V32QImode && d->vmode != V16HImode))
46018 return false;
46020 if (d->testing_p)
46021 return true;
46023 nelt = d->nelt;
46024 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
46026 /* Generate two permutation masks. If the required element is within
46027 the same lane, it is shuffled in. If the required element from the
46028 other lane, force a zero by setting bit 7 in the permutation mask.
46029 In the other mask the mask has non-negative elements if element
46030 is requested from the other lane, but also moved to the other lane,
46031 so that the result of vpshufb can have the two V2TImode halves
46032 swapped. */
46033 m128 = GEN_INT (-128);
46034 for (i = 0; i < nelt; ++i)
46036 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
46037 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
46039 for (j = 0; j < eltsz; ++j)
46041 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
46042 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
46046 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
46047 vperm = force_reg (V32QImode, vperm);
46049 h = gen_reg_rtx (V32QImode);
46050 op = gen_lowpart (V32QImode, d->op0);
46051 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
46053 /* Swap the 128-byte lanes of h into hp. */
46054 hp = gen_reg_rtx (V4DImode);
46055 op = gen_lowpart (V4DImode, h);
46056 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
46057 const1_rtx));
46059 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
46060 vperm = force_reg (V32QImode, vperm);
46062 l = gen_reg_rtx (V32QImode);
46063 op = gen_lowpart (V32QImode, d->op0);
46064 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
46066 op = d->target;
46067 if (d->vmode != V32QImode)
46068 op = gen_reg_rtx (V32QImode);
46069 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
46070 if (op != d->target)
46071 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
46073 return true;
46076 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
46077 and extract-odd permutations of two V32QImode and V16QImode operand
46078 with two vpshufb insns, vpor and vpermq. We should have already
46079 failed all two or three instruction sequences. */
46081 static bool
46082 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
46084 rtx rperm[2][32], vperm, l, h, ior, op, m128;
46085 unsigned int i, nelt, eltsz;
46087 if (!TARGET_AVX2
46088 || d->one_operand_p
46089 || (d->vmode != V32QImode && d->vmode != V16HImode))
46090 return false;
46092 for (i = 0; i < d->nelt; ++i)
46093 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
46094 return false;
46096 if (d->testing_p)
46097 return true;
46099 nelt = d->nelt;
46100 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
46102 /* Generate two permutation masks. In the first permutation mask
46103 the first quarter will contain indexes for the first half
46104 of the op0, the second quarter will contain bit 7 set, third quarter
46105 will contain indexes for the second half of the op0 and the
46106 last quarter bit 7 set. In the second permutation mask
46107 the first quarter will contain bit 7 set, the second quarter
46108 indexes for the first half of the op1, the third quarter bit 7 set
46109 and last quarter indexes for the second half of the op1.
46110 I.e. the first mask e.g. for V32QImode extract even will be:
46111 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
46112 (all values masked with 0xf except for -128) and second mask
46113 for extract even will be
46114 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
46115 m128 = GEN_INT (-128);
46116 for (i = 0; i < nelt; ++i)
46118 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
46119 unsigned which = d->perm[i] >= nelt;
46120 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
46122 for (j = 0; j < eltsz; ++j)
46124 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
46125 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
46129 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
46130 vperm = force_reg (V32QImode, vperm);
46132 l = gen_reg_rtx (V32QImode);
46133 op = gen_lowpart (V32QImode, d->op0);
46134 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
46136 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
46137 vperm = force_reg (V32QImode, vperm);
46139 h = gen_reg_rtx (V32QImode);
46140 op = gen_lowpart (V32QImode, d->op1);
46141 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
46143 ior = gen_reg_rtx (V32QImode);
46144 emit_insn (gen_iorv32qi3 (ior, l, h));
46146 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
46147 op = gen_reg_rtx (V4DImode);
46148 ior = gen_lowpart (V4DImode, ior);
46149 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
46150 const1_rtx, GEN_INT (3)));
46151 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
46153 return true;
46156 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
46157 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
46158 with two "and" and "pack" or two "shift" and "pack" insns. We should
46159 have already failed all two instruction sequences. */
46161 static bool
46162 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
46164 rtx op, dop0, dop1, t, rperm[16];
46165 unsigned i, odd, c, s, nelt = d->nelt;
46166 bool end_perm = false;
46167 machine_mode half_mode;
46168 rtx (*gen_and) (rtx, rtx, rtx);
46169 rtx (*gen_pack) (rtx, rtx, rtx);
46170 rtx (*gen_shift) (rtx, rtx, rtx);
46172 if (d->one_operand_p)
46173 return false;
46175 switch (d->vmode)
46177 case E_V8HImode:
46178 /* Required for "pack". */
46179 if (!TARGET_SSE4_1)
46180 return false;
46181 c = 0xffff;
46182 s = 16;
46183 half_mode = V4SImode;
46184 gen_and = gen_andv4si3;
46185 gen_pack = gen_sse4_1_packusdw;
46186 gen_shift = gen_lshrv4si3;
46187 break;
46188 case E_V16QImode:
46189 /* No check as all instructions are SSE2. */
46190 c = 0xff;
46191 s = 8;
46192 half_mode = V8HImode;
46193 gen_and = gen_andv8hi3;
46194 gen_pack = gen_sse2_packuswb;
46195 gen_shift = gen_lshrv8hi3;
46196 break;
46197 case E_V16HImode:
46198 if (!TARGET_AVX2)
46199 return false;
46200 c = 0xffff;
46201 s = 16;
46202 half_mode = V8SImode;
46203 gen_and = gen_andv8si3;
46204 gen_pack = gen_avx2_packusdw;
46205 gen_shift = gen_lshrv8si3;
46206 end_perm = true;
46207 break;
46208 case E_V32QImode:
46209 if (!TARGET_AVX2)
46210 return false;
46211 c = 0xff;
46212 s = 8;
46213 half_mode = V16HImode;
46214 gen_and = gen_andv16hi3;
46215 gen_pack = gen_avx2_packuswb;
46216 gen_shift = gen_lshrv16hi3;
46217 end_perm = true;
46218 break;
46219 default:
46220 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
46221 general shuffles. */
46222 return false;
46225 /* Check that permutation is even or odd. */
46226 odd = d->perm[0];
46227 if (odd > 1)
46228 return false;
46230 for (i = 1; i < nelt; ++i)
46231 if (d->perm[i] != 2 * i + odd)
46232 return false;
46234 if (d->testing_p)
46235 return true;
46237 dop0 = gen_reg_rtx (half_mode);
46238 dop1 = gen_reg_rtx (half_mode);
46239 if (odd == 0)
46241 for (i = 0; i < nelt / 2; i++)
46242 rperm[i] = GEN_INT (c);
46243 t = gen_rtx_CONST_VECTOR (half_mode, gen_rtvec_v (nelt / 2, rperm));
46244 t = force_reg (half_mode, t);
46245 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
46246 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
46248 else
46250 emit_insn (gen_shift (dop0,
46251 gen_lowpart (half_mode, d->op0),
46252 GEN_INT (s)));
46253 emit_insn (gen_shift (dop1,
46254 gen_lowpart (half_mode, d->op1),
46255 GEN_INT (s)));
46257 /* In AVX2 for 256 bit case we need to permute pack result. */
46258 if (TARGET_AVX2 && end_perm)
46260 op = gen_reg_rtx (d->vmode);
46261 t = gen_reg_rtx (V4DImode);
46262 emit_insn (gen_pack (op, dop0, dop1));
46263 emit_insn (gen_avx2_permv4di_1 (t,
46264 gen_lowpart (V4DImode, op),
46265 const0_rtx,
46266 const2_rtx,
46267 const1_rtx,
46268 GEN_INT (3)));
46269 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
46271 else
46272 emit_insn (gen_pack (d->target, dop0, dop1));
46274 return true;
46277 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
46278 and extract-odd permutations of two V64QI operands
46279 with two "shifts", two "truncs" and one "concat" insns for "odd"
46280 and two "truncs" and one concat insn for "even."
46281 Have already failed all two instruction sequences. */
46283 static bool
46284 expand_vec_perm_even_odd_trunc (struct expand_vec_perm_d *d)
46286 rtx t1, t2, t3, t4;
46287 unsigned i, odd, nelt = d->nelt;
46289 if (!TARGET_AVX512BW
46290 || d->one_operand_p
46291 || d->vmode != V64QImode)
46292 return false;
46294 /* Check that permutation is even or odd. */
46295 odd = d->perm[0];
46296 if (odd > 1)
46297 return false;
46299 for (i = 1; i < nelt; ++i)
46300 if (d->perm[i] != 2 * i + odd)
46301 return false;
46303 if (d->testing_p)
46304 return true;
46307 if (odd)
46309 t1 = gen_reg_rtx (V32HImode);
46310 t2 = gen_reg_rtx (V32HImode);
46311 emit_insn (gen_lshrv32hi3 (t1,
46312 gen_lowpart (V32HImode, d->op0),
46313 GEN_INT (8)));
46314 emit_insn (gen_lshrv32hi3 (t2,
46315 gen_lowpart (V32HImode, d->op1),
46316 GEN_INT (8)));
46318 else
46320 t1 = gen_lowpart (V32HImode, d->op0);
46321 t2 = gen_lowpart (V32HImode, d->op1);
46324 t3 = gen_reg_rtx (V32QImode);
46325 t4 = gen_reg_rtx (V32QImode);
46326 emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t3, t1));
46327 emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t4, t2));
46328 emit_insn (gen_avx_vec_concatv64qi (d->target, t3, t4));
46330 return true;
46333 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
46334 and extract-odd permutations. */
46336 static bool
46337 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
46339 rtx t1, t2, t3, t4, t5;
46341 switch (d->vmode)
46343 case E_V4DFmode:
46344 if (d->testing_p)
46345 break;
46346 t1 = gen_reg_rtx (V4DFmode);
46347 t2 = gen_reg_rtx (V4DFmode);
46349 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
46350 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
46351 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
46353 /* Now an unpck[lh]pd will produce the result required. */
46354 if (odd)
46355 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
46356 else
46357 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
46358 emit_insn (t3);
46359 break;
46361 case E_V8SFmode:
46363 int mask = odd ? 0xdd : 0x88;
46365 if (d->testing_p)
46366 break;
46367 t1 = gen_reg_rtx (V8SFmode);
46368 t2 = gen_reg_rtx (V8SFmode);
46369 t3 = gen_reg_rtx (V8SFmode);
46371 /* Shuffle within the 128-bit lanes to produce:
46372 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
46373 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
46374 GEN_INT (mask)));
46376 /* Shuffle the lanes around to produce:
46377 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
46378 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
46379 GEN_INT (0x3)));
46381 /* Shuffle within the 128-bit lanes to produce:
46382 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
46383 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
46385 /* Shuffle within the 128-bit lanes to produce:
46386 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
46387 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
46389 /* Shuffle the lanes around to produce:
46390 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
46391 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
46392 GEN_INT (0x20)));
46394 break;
46396 case E_V2DFmode:
46397 case E_V4SFmode:
46398 case E_V2DImode:
46399 case E_V4SImode:
46400 /* These are always directly implementable by expand_vec_perm_1. */
46401 gcc_unreachable ();
46403 case E_V8HImode:
46404 if (TARGET_SSE4_1)
46405 return expand_vec_perm_even_odd_pack (d);
46406 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
46407 return expand_vec_perm_pshufb2 (d);
46408 else
46410 if (d->testing_p)
46411 break;
46412 /* We need 2*log2(N)-1 operations to achieve odd/even
46413 with interleave. */
46414 t1 = gen_reg_rtx (V8HImode);
46415 t2 = gen_reg_rtx (V8HImode);
46416 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
46417 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
46418 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
46419 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
46420 if (odd)
46421 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
46422 else
46423 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
46424 emit_insn (t3);
46426 break;
46428 case E_V16QImode:
46429 return expand_vec_perm_even_odd_pack (d);
46431 case E_V16HImode:
46432 case E_V32QImode:
46433 return expand_vec_perm_even_odd_pack (d);
46435 case E_V64QImode:
46436 return expand_vec_perm_even_odd_trunc (d);
46438 case E_V4DImode:
46439 if (!TARGET_AVX2)
46441 struct expand_vec_perm_d d_copy = *d;
46442 d_copy.vmode = V4DFmode;
46443 if (d->testing_p)
46444 d_copy.target = gen_raw_REG (V4DFmode, LAST_VIRTUAL_REGISTER + 1);
46445 else
46446 d_copy.target = gen_reg_rtx (V4DFmode);
46447 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
46448 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
46449 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
46451 if (!d->testing_p)
46452 emit_move_insn (d->target,
46453 gen_lowpart (V4DImode, d_copy.target));
46454 return true;
46456 return false;
46459 if (d->testing_p)
46460 break;
46462 t1 = gen_reg_rtx (V4DImode);
46463 t2 = gen_reg_rtx (V4DImode);
46465 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
46466 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
46467 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
46469 /* Now an vpunpck[lh]qdq will produce the result required. */
46470 if (odd)
46471 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
46472 else
46473 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
46474 emit_insn (t3);
46475 break;
46477 case E_V8SImode:
46478 if (!TARGET_AVX2)
46480 struct expand_vec_perm_d d_copy = *d;
46481 d_copy.vmode = V8SFmode;
46482 if (d->testing_p)
46483 d_copy.target = gen_raw_REG (V8SFmode, LAST_VIRTUAL_REGISTER + 1);
46484 else
46485 d_copy.target = gen_reg_rtx (V8SFmode);
46486 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
46487 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
46488 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
46490 if (!d->testing_p)
46491 emit_move_insn (d->target,
46492 gen_lowpart (V8SImode, d_copy.target));
46493 return true;
46495 return false;
46498 if (d->testing_p)
46499 break;
46501 t1 = gen_reg_rtx (V8SImode);
46502 t2 = gen_reg_rtx (V8SImode);
46503 t3 = gen_reg_rtx (V4DImode);
46504 t4 = gen_reg_rtx (V4DImode);
46505 t5 = gen_reg_rtx (V4DImode);
46507 /* Shuffle the lanes around into
46508 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
46509 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
46510 gen_lowpart (V4DImode, d->op1),
46511 GEN_INT (0x20)));
46512 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
46513 gen_lowpart (V4DImode, d->op1),
46514 GEN_INT (0x31)));
46516 /* Swap the 2nd and 3rd position in each lane into
46517 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
46518 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
46519 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
46520 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
46521 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
46523 /* Now an vpunpck[lh]qdq will produce
46524 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
46525 if (odd)
46526 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
46527 gen_lowpart (V4DImode, t2));
46528 else
46529 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
46530 gen_lowpart (V4DImode, t2));
46531 emit_insn (t3);
46532 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
46533 break;
46535 default:
46536 gcc_unreachable ();
46539 return true;
46542 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
46543 extract-even and extract-odd permutations. */
46545 static bool
46546 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
46548 unsigned i, odd, nelt = d->nelt;
46550 odd = d->perm[0];
46551 if (odd != 0 && odd != 1)
46552 return false;
46554 for (i = 1; i < nelt; ++i)
46555 if (d->perm[i] != 2 * i + odd)
46556 return false;
46558 return expand_vec_perm_even_odd_1 (d, odd);
46561 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
46562 permutations. We assume that expand_vec_perm_1 has already failed. */
46564 static bool
46565 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
46567 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
46568 machine_mode vmode = d->vmode;
46569 unsigned char perm2[4];
46570 rtx op0 = d->op0, dest;
46571 bool ok;
46573 switch (vmode)
46575 case E_V4DFmode:
46576 case E_V8SFmode:
46577 /* These are special-cased in sse.md so that we can optionally
46578 use the vbroadcast instruction. They expand to two insns
46579 if the input happens to be in a register. */
46580 gcc_unreachable ();
46582 case E_V2DFmode:
46583 case E_V2DImode:
46584 case E_V4SFmode:
46585 case E_V4SImode:
46586 /* These are always implementable using standard shuffle patterns. */
46587 gcc_unreachable ();
46589 case E_V8HImode:
46590 case E_V16QImode:
46591 /* These can be implemented via interleave. We save one insn by
46592 stopping once we have promoted to V4SImode and then use pshufd. */
46593 if (d->testing_p)
46594 return true;
46597 rtx dest;
46598 rtx (*gen) (rtx, rtx, rtx)
46599 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
46600 : gen_vec_interleave_lowv8hi;
46602 if (elt >= nelt2)
46604 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
46605 : gen_vec_interleave_highv8hi;
46606 elt -= nelt2;
46608 nelt2 /= 2;
46610 dest = gen_reg_rtx (vmode);
46611 emit_insn (gen (dest, op0, op0));
46612 vmode = get_mode_wider_vector (vmode);
46613 op0 = gen_lowpart (vmode, dest);
46615 while (vmode != V4SImode);
46617 memset (perm2, elt, 4);
46618 dest = gen_reg_rtx (V4SImode);
46619 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
46620 gcc_assert (ok);
46621 if (!d->testing_p)
46622 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
46623 return true;
46625 case E_V64QImode:
46626 case E_V32QImode:
46627 case E_V16HImode:
46628 case E_V8SImode:
46629 case E_V4DImode:
46630 /* For AVX2 broadcasts of the first element vpbroadcast* or
46631 vpermq should be used by expand_vec_perm_1. */
46632 gcc_assert (!TARGET_AVX2 || d->perm[0]);
46633 return false;
46635 default:
46636 gcc_unreachable ();
46640 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
46641 broadcast permutations. */
46643 static bool
46644 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
46646 unsigned i, elt, nelt = d->nelt;
46648 if (!d->one_operand_p)
46649 return false;
46651 elt = d->perm[0];
46652 for (i = 1; i < nelt; ++i)
46653 if (d->perm[i] != elt)
46654 return false;
46656 return expand_vec_perm_broadcast_1 (d);
46659 /* Implement arbitrary permutations of two V64QImode operands
46660 will 2 vpermi2w, 2 vpshufb and one vpor instruction. */
46661 static bool
46662 expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d)
46664 if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
46665 return false;
46667 if (d->testing_p)
46668 return true;
46670 struct expand_vec_perm_d ds[2];
46671 rtx rperm[128], vperm, target0, target1;
46672 unsigned int i, nelt;
46673 machine_mode vmode;
46675 nelt = d->nelt;
46676 vmode = V64QImode;
46678 for (i = 0; i < 2; i++)
46680 ds[i] = *d;
46681 ds[i].vmode = V32HImode;
46682 ds[i].nelt = 32;
46683 ds[i].target = gen_reg_rtx (V32HImode);
46684 ds[i].op0 = gen_lowpart (V32HImode, d->op0);
46685 ds[i].op1 = gen_lowpart (V32HImode, d->op1);
46688 /* Prepare permutations such that the first one takes care of
46689 putting the even bytes into the right positions or one higher
46690 positions (ds[0]) and the second one takes care of
46691 putting the odd bytes into the right positions or one below
46692 (ds[1]). */
46694 for (i = 0; i < nelt; i++)
46696 ds[i & 1].perm[i / 2] = d->perm[i] / 2;
46697 if (i & 1)
46699 rperm[i] = constm1_rtx;
46700 rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
46702 else
46704 rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
46705 rperm[i + 64] = constm1_rtx;
46709 bool ok = expand_vec_perm_1 (&ds[0]);
46710 gcc_assert (ok);
46711 ds[0].target = gen_lowpart (V64QImode, ds[0].target);
46713 ok = expand_vec_perm_1 (&ds[1]);
46714 gcc_assert (ok);
46715 ds[1].target = gen_lowpart (V64QImode, ds[1].target);
46717 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
46718 vperm = force_reg (vmode, vperm);
46719 target0 = gen_reg_rtx (V64QImode);
46720 emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
46722 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
46723 vperm = force_reg (vmode, vperm);
46724 target1 = gen_reg_rtx (V64QImode);
46725 emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
46727 emit_insn (gen_iorv64qi3 (d->target, target0, target1));
46728 return true;
46731 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
46732 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
46733 all the shorter instruction sequences. */
46735 static bool
46736 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
46738 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
46739 unsigned int i, nelt, eltsz;
46740 bool used[4];
46742 if (!TARGET_AVX2
46743 || d->one_operand_p
46744 || (d->vmode != V32QImode && d->vmode != V16HImode))
46745 return false;
46747 if (d->testing_p)
46748 return true;
46750 nelt = d->nelt;
46751 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
46753 /* Generate 4 permutation masks. If the required element is within
46754 the same lane, it is shuffled in. If the required element from the
46755 other lane, force a zero by setting bit 7 in the permutation mask.
46756 In the other mask the mask has non-negative elements if element
46757 is requested from the other lane, but also moved to the other lane,
46758 so that the result of vpshufb can have the two V2TImode halves
46759 swapped. */
46760 m128 = GEN_INT (-128);
46761 for (i = 0; i < 32; ++i)
46763 rperm[0][i] = m128;
46764 rperm[1][i] = m128;
46765 rperm[2][i] = m128;
46766 rperm[3][i] = m128;
46768 used[0] = false;
46769 used[1] = false;
46770 used[2] = false;
46771 used[3] = false;
46772 for (i = 0; i < nelt; ++i)
46774 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
46775 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
46776 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
46778 for (j = 0; j < eltsz; ++j)
46779 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
46780 used[which] = true;
46783 for (i = 0; i < 2; ++i)
46785 if (!used[2 * i + 1])
46787 h[i] = NULL_RTX;
46788 continue;
46790 vperm = gen_rtx_CONST_VECTOR (V32QImode,
46791 gen_rtvec_v (32, rperm[2 * i + 1]));
46792 vperm = force_reg (V32QImode, vperm);
46793 h[i] = gen_reg_rtx (V32QImode);
46794 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
46795 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
46798 /* Swap the 128-byte lanes of h[X]. */
46799 for (i = 0; i < 2; ++i)
46801 if (h[i] == NULL_RTX)
46802 continue;
46803 op = gen_reg_rtx (V4DImode);
46804 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
46805 const2_rtx, GEN_INT (3), const0_rtx,
46806 const1_rtx));
46807 h[i] = gen_lowpart (V32QImode, op);
46810 for (i = 0; i < 2; ++i)
46812 if (!used[2 * i])
46814 l[i] = NULL_RTX;
46815 continue;
46817 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
46818 vperm = force_reg (V32QImode, vperm);
46819 l[i] = gen_reg_rtx (V32QImode);
46820 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
46821 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
46824 for (i = 0; i < 2; ++i)
46826 if (h[i] && l[i])
46828 op = gen_reg_rtx (V32QImode);
46829 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
46830 l[i] = op;
46832 else if (h[i])
46833 l[i] = h[i];
46836 gcc_assert (l[0] && l[1]);
46837 op = d->target;
46838 if (d->vmode != V32QImode)
46839 op = gen_reg_rtx (V32QImode);
46840 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
46841 if (op != d->target)
46842 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
46843 return true;
46846 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
46847 With all of the interface bits taken care of, perform the expansion
46848 in D and return true on success. */
46850 static bool
46851 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
46853 /* Try a single instruction expansion. */
46854 if (expand_vec_perm_1 (d))
46855 return true;
46857 /* Try sequences of two instructions. */
46859 if (expand_vec_perm_pshuflw_pshufhw (d))
46860 return true;
46862 if (expand_vec_perm_palignr (d, false))
46863 return true;
46865 if (expand_vec_perm_interleave2 (d))
46866 return true;
46868 if (expand_vec_perm_broadcast (d))
46869 return true;
46871 if (expand_vec_perm_vpermq_perm_1 (d))
46872 return true;
46874 if (expand_vec_perm_vperm2f128 (d))
46875 return true;
46877 if (expand_vec_perm_pblendv (d))
46878 return true;
46880 /* Try sequences of three instructions. */
46882 if (expand_vec_perm_even_odd_pack (d))
46883 return true;
46885 if (expand_vec_perm_2vperm2f128_vshuf (d))
46886 return true;
46888 if (expand_vec_perm_pshufb2 (d))
46889 return true;
46891 if (expand_vec_perm_interleave3 (d))
46892 return true;
46894 if (expand_vec_perm_vperm2f128_vblend (d))
46895 return true;
46897 /* Try sequences of four instructions. */
46899 if (expand_vec_perm_even_odd_trunc (d))
46900 return true;
46901 if (expand_vec_perm_vpshufb2_vpermq (d))
46902 return true;
46904 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
46905 return true;
46907 if (expand_vec_perm_vpermi2_vpshub2 (d))
46908 return true;
46910 /* ??? Look for narrow permutations whose element orderings would
46911 allow the promotion to a wider mode. */
46913 /* ??? Look for sequences of interleave or a wider permute that place
46914 the data into the correct lanes for a half-vector shuffle like
46915 pshuf[lh]w or vpermilps. */
46917 /* ??? Look for sequences of interleave that produce the desired results.
46918 The combinatorics of punpck[lh] get pretty ugly... */
46920 if (expand_vec_perm_even_odd (d))
46921 return true;
46923 /* Even longer sequences. */
46924 if (expand_vec_perm_vpshufb4_vpermq2 (d))
46925 return true;
46927 /* See if we can get the same permutation in different vector integer
46928 mode. */
46929 struct expand_vec_perm_d nd;
46930 if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd))
46932 if (!d->testing_p)
46933 emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target));
46934 return true;
46937 return false;
46940 /* If a permutation only uses one operand, make it clear. Returns true
46941 if the permutation references both operands. */
46943 static bool
46944 canonicalize_perm (struct expand_vec_perm_d *d)
46946 int i, which, nelt = d->nelt;
46948 for (i = which = 0; i < nelt; ++i)
46949 which |= (d->perm[i] < nelt ? 1 : 2);
46951 d->one_operand_p = true;
46952 switch (which)
46954 default:
46955 gcc_unreachable();
46957 case 3:
46958 if (!rtx_equal_p (d->op0, d->op1))
46960 d->one_operand_p = false;
46961 break;
46963 /* The elements of PERM do not suggest that only the first operand
46964 is used, but both operands are identical. Allow easier matching
46965 of the permutation by folding the permutation into the single
46966 input vector. */
46967 /* FALLTHRU */
46969 case 2:
46970 for (i = 0; i < nelt; ++i)
46971 d->perm[i] &= nelt - 1;
46972 d->op0 = d->op1;
46973 break;
46975 case 1:
46976 d->op1 = d->op0;
46977 break;
46980 return (which == 3);
46983 bool
46984 ix86_expand_vec_perm_const (rtx operands[4])
46986 struct expand_vec_perm_d d;
46987 unsigned char perm[MAX_VECT_LEN];
46988 int i, nelt;
46989 bool two_args;
46990 rtx sel;
46992 d.target = operands[0];
46993 d.op0 = operands[1];
46994 d.op1 = operands[2];
46995 sel = operands[3];
46997 d.vmode = GET_MODE (d.target);
46998 gcc_assert (VECTOR_MODE_P (d.vmode));
46999 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
47000 d.testing_p = false;
47002 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
47003 gcc_assert (XVECLEN (sel, 0) == nelt);
47004 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
47006 for (i = 0; i < nelt; ++i)
47008 rtx e = XVECEXP (sel, 0, i);
47009 int ei = INTVAL (e) & (2 * nelt - 1);
47010 d.perm[i] = ei;
47011 perm[i] = ei;
47014 two_args = canonicalize_perm (&d);
47016 if (ix86_expand_vec_perm_const_1 (&d))
47017 return true;
47019 /* If the selector says both arguments are needed, but the operands are the
47020 same, the above tried to expand with one_operand_p and flattened selector.
47021 If that didn't work, retry without one_operand_p; we succeeded with that
47022 during testing. */
47023 if (two_args && d.one_operand_p)
47025 d.one_operand_p = false;
47026 memcpy (d.perm, perm, sizeof (perm));
47027 return ix86_expand_vec_perm_const_1 (&d);
47030 return false;
47033 /* Implement targetm.vectorize.vec_perm_const_ok. */
47035 static bool
47036 ix86_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
47038 struct expand_vec_perm_d d;
47039 unsigned int i, nelt, which;
47040 bool ret;
47042 d.vmode = vmode;
47043 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
47044 d.testing_p = true;
47046 /* Given sufficient ISA support we can just return true here
47047 for selected vector modes. */
47048 switch (d.vmode)
47050 case E_V16SFmode:
47051 case E_V16SImode:
47052 case E_V8DImode:
47053 case E_V8DFmode:
47054 if (TARGET_AVX512F)
47055 /* All implementable with a single vpermi2 insn. */
47056 return true;
47057 break;
47058 case E_V32HImode:
47059 if (TARGET_AVX512BW)
47060 /* All implementable with a single vpermi2 insn. */
47061 return true;
47062 break;
47063 case E_V64QImode:
47064 if (TARGET_AVX512BW)
47065 /* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn. */
47066 return true;
47067 break;
47068 case E_V8SImode:
47069 case E_V8SFmode:
47070 case E_V4DFmode:
47071 case E_V4DImode:
47072 if (TARGET_AVX512VL)
47073 /* All implementable with a single vpermi2 insn. */
47074 return true;
47075 break;
47076 case E_V16HImode:
47077 if (TARGET_AVX2)
47078 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
47079 return true;
47080 break;
47081 case E_V32QImode:
47082 if (TARGET_AVX2)
47083 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
47084 return true;
47085 break;
47086 case E_V4SImode:
47087 case E_V4SFmode:
47088 case E_V8HImode:
47089 case E_V16QImode:
47090 /* All implementable with a single vpperm insn. */
47091 if (TARGET_XOP)
47092 return true;
47093 /* All implementable with 2 pshufb + 1 ior. */
47094 if (TARGET_SSSE3)
47095 return true;
47096 break;
47097 case E_V2DImode:
47098 case E_V2DFmode:
47099 /* All implementable with shufpd or unpck[lh]pd. */
47100 return true;
47101 default:
47102 return false;
47105 /* Extract the values from the vector CST into the permutation
47106 array in D. */
47107 for (i = which = 0; i < nelt; ++i)
47109 unsigned char e = sel[i];
47110 gcc_assert (e < 2 * nelt);
47111 d.perm[i] = e;
47112 which |= (e < nelt ? 1 : 2);
47115 /* For all elements from second vector, fold the elements to first. */
47116 if (which == 2)
47117 for (i = 0; i < nelt; ++i)
47118 d.perm[i] -= nelt;
47120 /* Check whether the mask can be applied to the vector type. */
47121 d.one_operand_p = (which != 3);
47123 /* Implementable with shufps or pshufd. */
47124 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
47125 return true;
47127 /* Otherwise we have to go through the motions and see if we can
47128 figure out how to generate the requested permutation. */
47129 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
47130 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
47131 if (!d.one_operand_p)
47132 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
47134 start_sequence ();
47135 ret = ix86_expand_vec_perm_const_1 (&d);
47136 end_sequence ();
47138 return ret;
47141 void
47142 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
47144 struct expand_vec_perm_d d;
47145 unsigned i, nelt;
47147 d.target = targ;
47148 d.op0 = op0;
47149 d.op1 = op1;
47150 d.vmode = GET_MODE (targ);
47151 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
47152 d.one_operand_p = false;
47153 d.testing_p = false;
47155 for (i = 0; i < nelt; ++i)
47156 d.perm[i] = i * 2 + odd;
47158 /* We'll either be able to implement the permutation directly... */
47159 if (expand_vec_perm_1 (&d))
47160 return;
47162 /* ... or we use the special-case patterns. */
47163 expand_vec_perm_even_odd_1 (&d, odd);
47166 static void
47167 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
47169 struct expand_vec_perm_d d;
47170 unsigned i, nelt, base;
47171 bool ok;
47173 d.target = targ;
47174 d.op0 = op0;
47175 d.op1 = op1;
47176 d.vmode = GET_MODE (targ);
47177 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
47178 d.one_operand_p = false;
47179 d.testing_p = false;
47181 base = high_p ? nelt / 2 : 0;
47182 for (i = 0; i < nelt / 2; ++i)
47184 d.perm[i * 2] = i + base;
47185 d.perm[i * 2 + 1] = i + base + nelt;
47188 /* Note that for AVX this isn't one instruction. */
47189 ok = ix86_expand_vec_perm_const_1 (&d);
47190 gcc_assert (ok);
47194 /* Expand a vector operation CODE for a V*QImode in terms of the
47195 same operation on V*HImode. */
47197 void
47198 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
47200 machine_mode qimode = GET_MODE (dest);
47201 machine_mode himode;
47202 rtx (*gen_il) (rtx, rtx, rtx);
47203 rtx (*gen_ih) (rtx, rtx, rtx);
47204 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
47205 struct expand_vec_perm_d d;
47206 bool ok, full_interleave;
47207 bool uns_p = false;
47208 int i;
47210 switch (qimode)
47212 case E_V16QImode:
47213 himode = V8HImode;
47214 gen_il = gen_vec_interleave_lowv16qi;
47215 gen_ih = gen_vec_interleave_highv16qi;
47216 break;
47217 case E_V32QImode:
47218 himode = V16HImode;
47219 gen_il = gen_avx2_interleave_lowv32qi;
47220 gen_ih = gen_avx2_interleave_highv32qi;
47221 break;
47222 case E_V64QImode:
47223 himode = V32HImode;
47224 gen_il = gen_avx512bw_interleave_lowv64qi;
47225 gen_ih = gen_avx512bw_interleave_highv64qi;
47226 break;
47227 default:
47228 gcc_unreachable ();
47231 op2_l = op2_h = op2;
47232 switch (code)
47234 case MULT:
47235 /* Unpack data such that we've got a source byte in each low byte of
47236 each word. We don't care what goes into the high byte of each word.
47237 Rather than trying to get zero in there, most convenient is to let
47238 it be a copy of the low byte. */
47239 op2_l = gen_reg_rtx (qimode);
47240 op2_h = gen_reg_rtx (qimode);
47241 emit_insn (gen_il (op2_l, op2, op2));
47242 emit_insn (gen_ih (op2_h, op2, op2));
47243 /* FALLTHRU */
47245 op1_l = gen_reg_rtx (qimode);
47246 op1_h = gen_reg_rtx (qimode);
47247 emit_insn (gen_il (op1_l, op1, op1));
47248 emit_insn (gen_ih (op1_h, op1, op1));
47249 full_interleave = qimode == V16QImode;
47250 break;
47252 case ASHIFT:
47253 case LSHIFTRT:
47254 uns_p = true;
47255 /* FALLTHRU */
47256 case ASHIFTRT:
47257 op1_l = gen_reg_rtx (himode);
47258 op1_h = gen_reg_rtx (himode);
47259 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
47260 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
47261 full_interleave = true;
47262 break;
47263 default:
47264 gcc_unreachable ();
47267 /* Perform the operation. */
47268 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
47269 1, OPTAB_DIRECT);
47270 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
47271 1, OPTAB_DIRECT);
47272 gcc_assert (res_l && res_h);
47274 /* Merge the data back into the right place. */
47275 d.target = dest;
47276 d.op0 = gen_lowpart (qimode, res_l);
47277 d.op1 = gen_lowpart (qimode, res_h);
47278 d.vmode = qimode;
47279 d.nelt = GET_MODE_NUNITS (qimode);
47280 d.one_operand_p = false;
47281 d.testing_p = false;
47283 if (full_interleave)
47285 /* For SSE2, we used an full interleave, so the desired
47286 results are in the even elements. */
47287 for (i = 0; i < d.nelt; ++i)
47288 d.perm[i] = i * 2;
47290 else
47292 /* For AVX, the interleave used above was not cross-lane. So the
47293 extraction is evens but with the second and third quarter swapped.
47294 Happily, that is even one insn shorter than even extraction.
47295 For AVX512BW we have 4 lanes. We extract evens from within a lane,
47296 always first from the first and then from the second source operand,
47297 the index bits above the low 4 bits remains the same.
47298 Thus, for d.nelt == 32 we want permutation
47299 0,2,4,..14, 32,34,36,..46, 16,18,20,..30, 48,50,52,..62
47300 and for d.nelt == 64 we want permutation
47301 0,2,4,..14, 64,66,68,..78, 16,18,20,..30, 80,82,84,..94,
47302 32,34,36,..46, 96,98,100,..110, 48,50,52,..62, 112,114,116,..126. */
47303 for (i = 0; i < d.nelt; ++i)
47304 d.perm[i] = ((i * 2) & 14) + ((i & 8) ? d.nelt : 0) + (i & ~15);
47307 ok = ix86_expand_vec_perm_const_1 (&d);
47308 gcc_assert (ok);
47310 set_unique_reg_note (get_last_insn (), REG_EQUAL,
47311 gen_rtx_fmt_ee (code, qimode, op1, op2));
47314 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
47315 if op is CONST_VECTOR with all odd elements equal to their
47316 preceding element. */
47318 static bool
47319 const_vector_equal_evenodd_p (rtx op)
47321 machine_mode mode = GET_MODE (op);
47322 int i, nunits = GET_MODE_NUNITS (mode);
47323 if (GET_CODE (op) != CONST_VECTOR
47324 || nunits != CONST_VECTOR_NUNITS (op))
47325 return false;
47326 for (i = 0; i < nunits; i += 2)
47327 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
47328 return false;
47329 return true;
47332 void
47333 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
47334 bool uns_p, bool odd_p)
47336 machine_mode mode = GET_MODE (op1);
47337 machine_mode wmode = GET_MODE (dest);
47338 rtx x;
47339 rtx orig_op1 = op1, orig_op2 = op2;
47341 if (!nonimmediate_operand (op1, mode))
47342 op1 = force_reg (mode, op1);
47343 if (!nonimmediate_operand (op2, mode))
47344 op2 = force_reg (mode, op2);
47346 /* We only play even/odd games with vectors of SImode. */
47347 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
47349 /* If we're looking for the odd results, shift those members down to
47350 the even slots. For some cpus this is faster than a PSHUFD. */
47351 if (odd_p)
47353 /* For XOP use vpmacsdqh, but only for smult, as it is only
47354 signed. */
47355 if (TARGET_XOP && mode == V4SImode && !uns_p)
47357 x = force_reg (wmode, CONST0_RTX (wmode));
47358 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
47359 return;
47362 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
47363 if (!const_vector_equal_evenodd_p (orig_op1))
47364 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
47365 x, NULL, 1, OPTAB_DIRECT);
47366 if (!const_vector_equal_evenodd_p (orig_op2))
47367 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
47368 x, NULL, 1, OPTAB_DIRECT);
47369 op1 = gen_lowpart (mode, op1);
47370 op2 = gen_lowpart (mode, op2);
47373 if (mode == V16SImode)
47375 if (uns_p)
47376 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
47377 else
47378 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
47380 else if (mode == V8SImode)
47382 if (uns_p)
47383 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
47384 else
47385 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
47387 else if (uns_p)
47388 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
47389 else if (TARGET_SSE4_1)
47390 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
47391 else
47393 rtx s1, s2, t0, t1, t2;
47395 /* The easiest way to implement this without PMULDQ is to go through
47396 the motions as if we are performing a full 64-bit multiply. With
47397 the exception that we need to do less shuffling of the elements. */
47399 /* Compute the sign-extension, aka highparts, of the two operands. */
47400 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
47401 op1, pc_rtx, pc_rtx);
47402 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
47403 op2, pc_rtx, pc_rtx);
47405 /* Multiply LO(A) * HI(B), and vice-versa. */
47406 t1 = gen_reg_rtx (wmode);
47407 t2 = gen_reg_rtx (wmode);
47408 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
47409 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
47411 /* Multiply LO(A) * LO(B). */
47412 t0 = gen_reg_rtx (wmode);
47413 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
47415 /* Combine and shift the highparts into place. */
47416 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
47417 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
47418 1, OPTAB_DIRECT);
47420 /* Combine high and low parts. */
47421 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
47422 return;
47424 emit_insn (x);
47427 void
47428 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
47429 bool uns_p, bool high_p)
47431 machine_mode wmode = GET_MODE (dest);
47432 machine_mode mode = GET_MODE (op1);
47433 rtx t1, t2, t3, t4, mask;
47435 switch (mode)
47437 case E_V4SImode:
47438 t1 = gen_reg_rtx (mode);
47439 t2 = gen_reg_rtx (mode);
47440 if (TARGET_XOP && !uns_p)
47442 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
47443 shuffle the elements once so that all elements are in the right
47444 place for immediate use: { A C B D }. */
47445 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
47446 const1_rtx, GEN_INT (3)));
47447 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
47448 const1_rtx, GEN_INT (3)));
47450 else
47452 /* Put the elements into place for the multiply. */
47453 ix86_expand_vec_interleave (t1, op1, op1, high_p);
47454 ix86_expand_vec_interleave (t2, op2, op2, high_p);
47455 high_p = false;
47457 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
47458 break;
47460 case E_V8SImode:
47461 /* Shuffle the elements between the lanes. After this we
47462 have { A B E F | C D G H } for each operand. */
47463 t1 = gen_reg_rtx (V4DImode);
47464 t2 = gen_reg_rtx (V4DImode);
47465 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
47466 const0_rtx, const2_rtx,
47467 const1_rtx, GEN_INT (3)));
47468 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
47469 const0_rtx, const2_rtx,
47470 const1_rtx, GEN_INT (3)));
47472 /* Shuffle the elements within the lanes. After this we
47473 have { A A B B | C C D D } or { E E F F | G G H H }. */
47474 t3 = gen_reg_rtx (V8SImode);
47475 t4 = gen_reg_rtx (V8SImode);
47476 mask = GEN_INT (high_p
47477 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
47478 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
47479 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
47480 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
47482 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
47483 break;
47485 case E_V8HImode:
47486 case E_V16HImode:
47487 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
47488 uns_p, OPTAB_DIRECT);
47489 t2 = expand_binop (mode,
47490 uns_p ? umul_highpart_optab : smul_highpart_optab,
47491 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
47492 gcc_assert (t1 && t2);
47494 t3 = gen_reg_rtx (mode);
47495 ix86_expand_vec_interleave (t3, t1, t2, high_p);
47496 emit_move_insn (dest, gen_lowpart (wmode, t3));
47497 break;
47499 case E_V16QImode:
47500 case E_V32QImode:
47501 case E_V32HImode:
47502 case E_V16SImode:
47503 case E_V64QImode:
47504 t1 = gen_reg_rtx (wmode);
47505 t2 = gen_reg_rtx (wmode);
47506 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
47507 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
47509 emit_insn (gen_rtx_SET (dest, gen_rtx_MULT (wmode, t1, t2)));
47510 break;
47512 default:
47513 gcc_unreachable ();
47517 void
47518 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
47520 rtx res_1, res_2, res_3, res_4;
47522 res_1 = gen_reg_rtx (V4SImode);
47523 res_2 = gen_reg_rtx (V4SImode);
47524 res_3 = gen_reg_rtx (V2DImode);
47525 res_4 = gen_reg_rtx (V2DImode);
47526 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
47527 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
47529 /* Move the results in element 2 down to element 1; we don't care
47530 what goes in elements 2 and 3. Then we can merge the parts
47531 back together with an interleave.
47533 Note that two other sequences were tried:
47534 (1) Use interleaves at the start instead of psrldq, which allows
47535 us to use a single shufps to merge things back at the end.
47536 (2) Use shufps here to combine the two vectors, then pshufd to
47537 put the elements in the correct order.
47538 In both cases the cost of the reformatting stall was too high
47539 and the overall sequence slower. */
47541 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
47542 const0_rtx, const2_rtx,
47543 const0_rtx, const0_rtx));
47544 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
47545 const0_rtx, const2_rtx,
47546 const0_rtx, const0_rtx));
47547 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
47549 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
47552 void
47553 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
47555 machine_mode mode = GET_MODE (op0);
47556 rtx t1, t2, t3, t4, t5, t6;
47558 if (TARGET_AVX512DQ && mode == V8DImode)
47559 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
47560 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
47561 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
47562 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
47563 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
47564 else if (TARGET_XOP && mode == V2DImode)
47566 /* op1: A,B,C,D, op2: E,F,G,H */
47567 op1 = gen_lowpart (V4SImode, op1);
47568 op2 = gen_lowpart (V4SImode, op2);
47570 t1 = gen_reg_rtx (V4SImode);
47571 t2 = gen_reg_rtx (V4SImode);
47572 t3 = gen_reg_rtx (V2DImode);
47573 t4 = gen_reg_rtx (V2DImode);
47575 /* t1: B,A,D,C */
47576 emit_insn (gen_sse2_pshufd_1 (t1, op1,
47577 GEN_INT (1),
47578 GEN_INT (0),
47579 GEN_INT (3),
47580 GEN_INT (2)));
47582 /* t2: (B*E),(A*F),(D*G),(C*H) */
47583 emit_insn (gen_mulv4si3 (t2, t1, op2));
47585 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
47586 emit_insn (gen_xop_phadddq (t3, t2));
47588 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
47589 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
47591 /* Multiply lower parts and add all */
47592 t5 = gen_reg_rtx (V2DImode);
47593 emit_insn (gen_vec_widen_umult_even_v4si (t5,
47594 gen_lowpart (V4SImode, op1),
47595 gen_lowpart (V4SImode, op2)));
47596 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
47599 else
47601 machine_mode nmode;
47602 rtx (*umul) (rtx, rtx, rtx);
47604 if (mode == V2DImode)
47606 umul = gen_vec_widen_umult_even_v4si;
47607 nmode = V4SImode;
47609 else if (mode == V4DImode)
47611 umul = gen_vec_widen_umult_even_v8si;
47612 nmode = V8SImode;
47614 else if (mode == V8DImode)
47616 umul = gen_vec_widen_umult_even_v16si;
47617 nmode = V16SImode;
47619 else
47620 gcc_unreachable ();
47623 /* Multiply low parts. */
47624 t1 = gen_reg_rtx (mode);
47625 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
47627 /* Shift input vectors right 32 bits so we can multiply high parts. */
47628 t6 = GEN_INT (32);
47629 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
47630 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
47632 /* Multiply high parts by low parts. */
47633 t4 = gen_reg_rtx (mode);
47634 t5 = gen_reg_rtx (mode);
47635 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
47636 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
47638 /* Combine and shift the highparts back. */
47639 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
47640 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
47642 /* Combine high and low parts. */
47643 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
47646 set_unique_reg_note (get_last_insn (), REG_EQUAL,
47647 gen_rtx_MULT (mode, op1, op2));
47650 /* Return 1 if control tansfer instruction INSN
47651 should be encoded with bnd prefix.
47652 If insn is NULL then return 1 when control
47653 transfer instructions should be prefixed with
47654 bnd by default for current function. */
47656 bool
47657 ix86_bnd_prefixed_insn_p (rtx insn)
47659 /* For call insns check special flag. */
47660 if (insn && CALL_P (insn))
47662 rtx call = get_call_rtx_from (insn);
47663 if (call)
47664 return CALL_EXPR_WITH_BOUNDS_P (call);
47667 /* All other insns are prefixed only if function is instrumented. */
47668 return chkp_function_instrumented_p (current_function_decl);
47671 /* Calculate integer abs() using only SSE2 instructions. */
47673 void
47674 ix86_expand_sse2_abs (rtx target, rtx input)
47676 machine_mode mode = GET_MODE (target);
47677 rtx tmp0, tmp1, x;
47679 switch (mode)
47681 /* For 32-bit signed integer X, the best way to calculate the absolute
47682 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
47683 case E_V4SImode:
47684 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
47685 GEN_INT (GET_MODE_UNIT_BITSIZE (mode) - 1),
47686 NULL, 0, OPTAB_DIRECT);
47687 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
47688 NULL, 0, OPTAB_DIRECT);
47689 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
47690 target, 0, OPTAB_DIRECT);
47691 break;
47693 /* For 16-bit signed integer X, the best way to calculate the absolute
47694 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
47695 case E_V8HImode:
47696 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
47698 x = expand_simple_binop (mode, SMAX, tmp0, input,
47699 target, 0, OPTAB_DIRECT);
47700 break;
47702 /* For 8-bit signed integer X, the best way to calculate the absolute
47703 value of X is min ((unsigned char) X, (unsigned char) (-X)),
47704 as SSE2 provides the PMINUB insn. */
47705 case E_V16QImode:
47706 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
47708 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
47709 target, 0, OPTAB_DIRECT);
47710 break;
47712 default:
47713 gcc_unreachable ();
47716 if (x != target)
47717 emit_move_insn (target, x);
47720 /* Expand an extract from a vector register through pextr insn.
47721 Return true if successful. */
47723 bool
47724 ix86_expand_pextr (rtx *operands)
47726 rtx dst = operands[0];
47727 rtx src = operands[1];
47729 unsigned int size = INTVAL (operands[2]);
47730 unsigned int pos = INTVAL (operands[3]);
47732 if (SUBREG_P (dst))
47734 /* Reject non-lowpart subregs. */
47735 if (SUBREG_BYTE (dst) > 0)
47736 return false;
47737 dst = SUBREG_REG (dst);
47740 if (SUBREG_P (src))
47742 pos += SUBREG_BYTE (src) * BITS_PER_UNIT;
47743 src = SUBREG_REG (src);
47746 switch (GET_MODE (src))
47748 case E_V16QImode:
47749 case E_V8HImode:
47750 case E_V4SImode:
47751 case E_V2DImode:
47752 case E_V1TImode:
47753 case E_TImode:
47755 machine_mode srcmode, dstmode;
47756 rtx d, pat;
47758 if (!int_mode_for_size (size, 0).exists (&dstmode))
47759 return false;
47761 switch (dstmode)
47763 case E_QImode:
47764 if (!TARGET_SSE4_1)
47765 return false;
47766 srcmode = V16QImode;
47767 break;
47769 case E_HImode:
47770 if (!TARGET_SSE2)
47771 return false;
47772 srcmode = V8HImode;
47773 break;
47775 case E_SImode:
47776 if (!TARGET_SSE4_1)
47777 return false;
47778 srcmode = V4SImode;
47779 break;
47781 case E_DImode:
47782 gcc_assert (TARGET_64BIT);
47783 if (!TARGET_SSE4_1)
47784 return false;
47785 srcmode = V2DImode;
47786 break;
47788 default:
47789 return false;
47792 /* Reject extractions from misaligned positions. */
47793 if (pos & (size-1))
47794 return false;
47796 if (GET_MODE (dst) == dstmode)
47797 d = dst;
47798 else
47799 d = gen_reg_rtx (dstmode);
47801 /* Construct insn pattern. */
47802 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (pos / size)));
47803 pat = gen_rtx_VEC_SELECT (dstmode, gen_lowpart (srcmode, src), pat);
47805 /* Let the rtl optimizers know about the zero extension performed. */
47806 if (dstmode == QImode || dstmode == HImode)
47808 pat = gen_rtx_ZERO_EXTEND (SImode, pat);
47809 d = gen_lowpart (SImode, d);
47812 emit_insn (gen_rtx_SET (d, pat));
47814 if (d != dst)
47815 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
47816 return true;
47819 default:
47820 return false;
47824 /* Expand an insert into a vector register through pinsr insn.
47825 Return true if successful. */
47827 bool
47828 ix86_expand_pinsr (rtx *operands)
47830 rtx dst = operands[0];
47831 rtx src = operands[3];
47833 unsigned int size = INTVAL (operands[1]);
47834 unsigned int pos = INTVAL (operands[2]);
47836 if (SUBREG_P (dst))
47838 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
47839 dst = SUBREG_REG (dst);
47842 switch (GET_MODE (dst))
47844 case E_V16QImode:
47845 case E_V8HImode:
47846 case E_V4SImode:
47847 case E_V2DImode:
47848 case E_V1TImode:
47849 case E_TImode:
47851 machine_mode srcmode, dstmode;
47852 rtx (*pinsr)(rtx, rtx, rtx, rtx);
47853 rtx d;
47855 if (!int_mode_for_size (size, 0).exists (&srcmode))
47856 return false;
47858 switch (srcmode)
47860 case E_QImode:
47861 if (!TARGET_SSE4_1)
47862 return false;
47863 dstmode = V16QImode;
47864 pinsr = gen_sse4_1_pinsrb;
47865 break;
47867 case E_HImode:
47868 if (!TARGET_SSE2)
47869 return false;
47870 dstmode = V8HImode;
47871 pinsr = gen_sse2_pinsrw;
47872 break;
47874 case E_SImode:
47875 if (!TARGET_SSE4_1)
47876 return false;
47877 dstmode = V4SImode;
47878 pinsr = gen_sse4_1_pinsrd;
47879 break;
47881 case E_DImode:
47882 gcc_assert (TARGET_64BIT);
47883 if (!TARGET_SSE4_1)
47884 return false;
47885 dstmode = V2DImode;
47886 pinsr = gen_sse4_1_pinsrq;
47887 break;
47889 default:
47890 return false;
47893 /* Reject insertions to misaligned positions. */
47894 if (pos & (size-1))
47895 return false;
47897 if (SUBREG_P (src))
47899 unsigned int srcpos = SUBREG_BYTE (src);
47901 if (srcpos > 0)
47903 rtx extr_ops[4];
47905 extr_ops[0] = gen_reg_rtx (srcmode);
47906 extr_ops[1] = gen_lowpart (srcmode, SUBREG_REG (src));
47907 extr_ops[2] = GEN_INT (size);
47908 extr_ops[3] = GEN_INT (srcpos * BITS_PER_UNIT);
47910 if (!ix86_expand_pextr (extr_ops))
47911 return false;
47913 src = extr_ops[0];
47915 else
47916 src = gen_lowpart (srcmode, SUBREG_REG (src));
47919 if (GET_MODE (dst) == dstmode)
47920 d = dst;
47921 else
47922 d = gen_reg_rtx (dstmode);
47924 emit_insn (pinsr (d, gen_lowpart (dstmode, dst),
47925 gen_lowpart (srcmode, src),
47926 GEN_INT (1 << (pos / size))));
47927 if (d != dst)
47928 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
47929 return true;
47932 default:
47933 return false;
47937 /* This function returns the calling abi specific va_list type node.
47938 It returns the FNDECL specific va_list type. */
47940 static tree
47941 ix86_fn_abi_va_list (tree fndecl)
47943 if (!TARGET_64BIT)
47944 return va_list_type_node;
47945 gcc_assert (fndecl != NULL_TREE);
47947 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
47948 return ms_va_list_type_node;
47949 else
47950 return sysv_va_list_type_node;
47953 /* Returns the canonical va_list type specified by TYPE. If there
47954 is no valid TYPE provided, it return NULL_TREE. */
47956 static tree
47957 ix86_canonical_va_list_type (tree type)
47959 if (TARGET_64BIT)
47961 if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type)))
47962 return ms_va_list_type_node;
47964 if ((TREE_CODE (type) == ARRAY_TYPE
47965 && integer_zerop (array_type_nelts (type)))
47966 || POINTER_TYPE_P (type))
47968 tree elem_type = TREE_TYPE (type);
47969 if (TREE_CODE (elem_type) == RECORD_TYPE
47970 && lookup_attribute ("sysv_abi va_list",
47971 TYPE_ATTRIBUTES (elem_type)))
47972 return sysv_va_list_type_node;
47975 return NULL_TREE;
47978 return std_canonical_va_list_type (type);
47981 /* Iterate through the target-specific builtin types for va_list.
47982 IDX denotes the iterator, *PTREE is set to the result type of
47983 the va_list builtin, and *PNAME to its internal type.
47984 Returns zero if there is no element for this index, otherwise
47985 IDX should be increased upon the next call.
47986 Note, do not iterate a base builtin's name like __builtin_va_list.
47987 Used from c_common_nodes_and_builtins. */
47989 static int
47990 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
47992 if (TARGET_64BIT)
47994 switch (idx)
47996 default:
47997 break;
47999 case 0:
48000 *ptree = ms_va_list_type_node;
48001 *pname = "__builtin_ms_va_list";
48002 return 1;
48004 case 1:
48005 *ptree = sysv_va_list_type_node;
48006 *pname = "__builtin_sysv_va_list";
48007 return 1;
48011 return 0;
48014 #undef TARGET_SCHED_DISPATCH
48015 #define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
48016 #undef TARGET_SCHED_DISPATCH_DO
48017 #define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
48018 #undef TARGET_SCHED_REASSOCIATION_WIDTH
48019 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
48020 #undef TARGET_SCHED_REORDER
48021 #define TARGET_SCHED_REORDER ix86_atom_sched_reorder
48022 #undef TARGET_SCHED_ADJUST_PRIORITY
48023 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
48024 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
48025 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
48026 ix86_dependencies_evaluation_hook
48029 /* Implementation of reassociation_width target hook used by
48030 reassoc phase to identify parallelism level in reassociated
48031 tree. Statements tree_code is passed in OPC. Arguments type
48032 is passed in MODE. */
48034 static int
48035 ix86_reassociation_width (unsigned int op, machine_mode mode)
48037 int width = 1;
48038 /* Vector part. */
48039 if (VECTOR_MODE_P (mode))
48041 int div = 1;
48042 if (INTEGRAL_MODE_P (mode))
48043 width = ix86_cost->reassoc_vec_int;
48044 else if (FLOAT_MODE_P (mode))
48045 width = ix86_cost->reassoc_vec_fp;
48047 if (width == 1)
48048 return 1;
48050 /* Integer vector instructions execute in FP unit
48051 and can execute 3 additions and one multiplication per cycle. */
48052 if (ix86_tune == PROCESSOR_ZNVER1 && INTEGRAL_MODE_P (mode)
48053 && op != PLUS && op != MINUS)
48054 return 1;
48056 /* Account for targets that splits wide vectors into multiple parts. */
48057 if (TARGET_AVX128_OPTIMAL && GET_MODE_BITSIZE (mode) > 128)
48058 div = GET_MODE_BITSIZE (mode) / 128;
48059 else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64)
48060 div = GET_MODE_BITSIZE (mode) / 64;
48061 width = (width + div - 1) / div;
48063 /* Scalar part. */
48064 else if (INTEGRAL_MODE_P (mode))
48065 width = ix86_cost->reassoc_int;
48066 else if (FLOAT_MODE_P (mode))
48067 width = ix86_cost->reassoc_fp;
48069 /* Avoid using too many registers in 32bit mode. */
48070 if (!TARGET_64BIT && width > 2)
48071 width = 2;
48072 return width;
48075 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
48076 place emms and femms instructions. */
48078 static machine_mode
48079 ix86_preferred_simd_mode (scalar_mode mode)
48081 if (!TARGET_SSE)
48082 return word_mode;
48084 switch (mode)
48086 case E_QImode:
48087 if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
48088 return V64QImode;
48089 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
48090 return V32QImode;
48091 else
48092 return V16QImode;
48094 case E_HImode:
48095 if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
48096 return V32HImode;
48097 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
48098 return V16HImode;
48099 else
48100 return V8HImode;
48102 case E_SImode:
48103 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
48104 return V16SImode;
48105 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
48106 return V8SImode;
48107 else
48108 return V4SImode;
48110 case E_DImode:
48111 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
48112 return V8DImode;
48113 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
48114 return V4DImode;
48115 else
48116 return V2DImode;
48118 case E_SFmode:
48119 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
48120 return V16SFmode;
48121 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
48122 return V8SFmode;
48123 else
48124 return V4SFmode;
48126 case E_DFmode:
48127 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
48128 return V8DFmode;
48129 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
48130 return V4DFmode;
48131 else if (TARGET_SSE2)
48132 return V2DFmode;
48133 /* FALLTHRU */
48135 default:
48136 return word_mode;
48140 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
48141 vectors. If AVX512F is enabled then try vectorizing with 512bit,
48142 256bit and 128bit vectors. */
48144 static unsigned int
48145 ix86_autovectorize_vector_sizes (void)
48147 unsigned int bytesizes = 0;
48149 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
48150 bytesizes |= (64 | 32 | 16);
48151 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
48152 bytesizes |= (32 | 16);
48154 return bytesizes;
48157 /* Implemenation of targetm.vectorize.get_mask_mode. */
48159 static opt_machine_mode
48160 ix86_get_mask_mode (unsigned nunits, unsigned vector_size)
48162 unsigned elem_size = vector_size / nunits;
48164 /* Scalar mask case. */
48165 if ((TARGET_AVX512F && vector_size == 64)
48166 || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
48168 if (elem_size == 4 || elem_size == 8 || TARGET_AVX512BW)
48169 return smallest_int_mode_for_size (nunits);
48172 scalar_int_mode elem_mode
48173 = smallest_int_mode_for_size (elem_size * BITS_PER_UNIT);
48175 gcc_assert (elem_size * nunits == vector_size);
48177 return mode_for_vector (elem_mode, nunits);
48182 /* Return class of registers which could be used for pseudo of MODE
48183 and of class RCLASS for spilling instead of memory. Return NO_REGS
48184 if it is not possible or non-profitable. */
48186 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
48188 static reg_class_t
48189 ix86_spill_class (reg_class_t rclass, machine_mode mode)
48191 if (0 && TARGET_GENERAL_REGS_SSE_SPILL
48192 && TARGET_SSE2
48193 && TARGET_INTER_UNIT_MOVES_TO_VEC
48194 && TARGET_INTER_UNIT_MOVES_FROM_VEC
48195 && (mode == SImode || (TARGET_64BIT && mode == DImode))
48196 && INTEGER_CLASS_P (rclass))
48197 return ALL_SSE_REGS;
48198 return NO_REGS;
48201 /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
48202 but returns a lower bound. */
48204 static unsigned int
48205 ix86_max_noce_ifcvt_seq_cost (edge e)
48207 bool predictable_p = predictable_edge_p (e);
48209 enum compiler_param param
48210 = (predictable_p
48211 ? PARAM_MAX_RTL_IF_CONVERSION_PREDICTABLE_COST
48212 : PARAM_MAX_RTL_IF_CONVERSION_UNPREDICTABLE_COST);
48214 /* If we have a parameter set, use that, otherwise take a guess using
48215 BRANCH_COST. */
48216 if (global_options_set.x_param_values[param])
48217 return PARAM_VALUE (param);
48218 else
48219 return BRANCH_COST (true, predictable_p) * COSTS_N_INSNS (2);
48222 /* Return true if SEQ is a good candidate as a replacement for the
48223 if-convertible sequence described in IF_INFO. */
48225 static bool
48226 ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
48228 if (TARGET_ONE_IF_CONV_INSN && if_info->speed_p)
48230 int cmov_cnt = 0;
48231 /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
48232 Maybe we should allow even more conditional moves as long as they
48233 are used far enough not to stall the CPU, or also consider
48234 IF_INFO->TEST_BB succ edge probabilities. */
48235 for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
48237 rtx set = single_set (insn);
48238 if (!set)
48239 continue;
48240 if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
48241 continue;
48242 rtx src = SET_SRC (set);
48243 machine_mode mode = GET_MODE (src);
48244 if (GET_MODE_CLASS (mode) != MODE_INT
48245 && GET_MODE_CLASS (mode) != MODE_FLOAT)
48246 continue;
48247 if ((!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
48248 || (!REG_P (XEXP (src, 2)) && !MEM_P (XEXP (src, 2))))
48249 continue;
48250 /* insn is CMOV or FCMOV. */
48251 if (++cmov_cnt > 1)
48252 return false;
48255 return default_noce_conversion_profitable_p (seq, if_info);
48258 /* Implement targetm.vectorize.init_cost. */
48260 static void *
48261 ix86_init_cost (struct loop *)
48263 unsigned *cost = XNEWVEC (unsigned, 3);
48264 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
48265 return cost;
48268 /* Implement targetm.vectorize.add_stmt_cost. */
48270 static unsigned
48271 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
48272 struct _stmt_vec_info *stmt_info, int misalign,
48273 enum vect_cost_model_location where)
48275 unsigned *cost = (unsigned *) data;
48276 unsigned retval = 0;
48278 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
48279 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
48281 /* Penalize DFmode vector operations for Bonnell. */
48282 if (TARGET_BONNELL && kind == vector_stmt
48283 && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode)
48284 stmt_cost *= 5; /* FIXME: The value here is arbitrary. */
48286 /* Statements in an inner loop relative to the loop being
48287 vectorized are weighted more heavily. The value here is
48288 arbitrary and could potentially be improved with analysis. */
48289 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
48290 count *= 50; /* FIXME. */
48292 retval = (unsigned) (count * stmt_cost);
48294 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
48295 for Silvermont as it has out of order integer pipeline and can execute
48296 2 scalar instruction per tick, but has in order SIMD pipeline. */
48297 if ((TARGET_SILVERMONT || TARGET_INTEL)
48298 && stmt_info && stmt_info->stmt)
48300 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
48301 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
48302 retval = (retval * 17) / 10;
48305 cost[where] += retval;
48307 return retval;
48310 /* Implement targetm.vectorize.finish_cost. */
48312 static void
48313 ix86_finish_cost (void *data, unsigned *prologue_cost,
48314 unsigned *body_cost, unsigned *epilogue_cost)
48316 unsigned *cost = (unsigned *) data;
48317 *prologue_cost = cost[vect_prologue];
48318 *body_cost = cost[vect_body];
48319 *epilogue_cost = cost[vect_epilogue];
48322 /* Implement targetm.vectorize.destroy_cost_data. */
48324 static void
48325 ix86_destroy_cost_data (void *data)
48327 free (data);
48330 /* Validate target specific memory model bits in VAL. */
48332 static unsigned HOST_WIDE_INT
48333 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
48335 enum memmodel model = memmodel_from_int (val);
48336 bool strong;
48338 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
48339 |MEMMODEL_MASK)
48340 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
48342 warning (OPT_Winvalid_memory_model,
48343 "Unknown architecture specific memory model");
48344 return MEMMODEL_SEQ_CST;
48346 strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
48347 if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
48349 warning (OPT_Winvalid_memory_model,
48350 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
48351 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
48353 if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
48355 warning (OPT_Winvalid_memory_model,
48356 "HLE_RELEASE not used with RELEASE or stronger memory model");
48357 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
48359 return val;
48362 /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
48363 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
48364 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
48365 or number of vecsize_mangle variants that should be emitted. */
48367 static int
48368 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
48369 struct cgraph_simd_clone *clonei,
48370 tree base_type, int num)
48372 int ret = 1;
48374 if (clonei->simdlen
48375 && (clonei->simdlen < 2
48376 || clonei->simdlen > 1024
48377 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
48379 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
48380 "unsupported simdlen %d", clonei->simdlen);
48381 return 0;
48384 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
48385 if (TREE_CODE (ret_type) != VOID_TYPE)
48386 switch (TYPE_MODE (ret_type))
48388 case E_QImode:
48389 case E_HImode:
48390 case E_SImode:
48391 case E_DImode:
48392 case E_SFmode:
48393 case E_DFmode:
48394 /* case E_SCmode: */
48395 /* case E_DCmode: */
48396 break;
48397 default:
48398 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
48399 "unsupported return type %qT for simd\n", ret_type);
48400 return 0;
48403 tree t;
48404 int i;
48406 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
48407 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
48408 switch (TYPE_MODE (TREE_TYPE (t)))
48410 case E_QImode:
48411 case E_HImode:
48412 case E_SImode:
48413 case E_DImode:
48414 case E_SFmode:
48415 case E_DFmode:
48416 /* case E_SCmode: */
48417 /* case E_DCmode: */
48418 break;
48419 default:
48420 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
48421 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
48422 return 0;
48425 if (clonei->cilk_elemental)
48427 /* Parse here processor clause. If not present, default to 'b'. */
48428 clonei->vecsize_mangle = 'b';
48430 else if (!TREE_PUBLIC (node->decl))
48432 /* If the function isn't exported, we can pick up just one ISA
48433 for the clones. */
48434 if (TARGET_AVX512F)
48435 clonei->vecsize_mangle = 'e';
48436 else if (TARGET_AVX2)
48437 clonei->vecsize_mangle = 'd';
48438 else if (TARGET_AVX)
48439 clonei->vecsize_mangle = 'c';
48440 else
48441 clonei->vecsize_mangle = 'b';
48442 ret = 1;
48444 else
48446 clonei->vecsize_mangle = "bcde"[num];
48447 ret = 4;
48449 clonei->mask_mode = VOIDmode;
48450 switch (clonei->vecsize_mangle)
48452 case 'b':
48453 clonei->vecsize_int = 128;
48454 clonei->vecsize_float = 128;
48455 break;
48456 case 'c':
48457 clonei->vecsize_int = 128;
48458 clonei->vecsize_float = 256;
48459 break;
48460 case 'd':
48461 clonei->vecsize_int = 256;
48462 clonei->vecsize_float = 256;
48463 break;
48464 case 'e':
48465 clonei->vecsize_int = 512;
48466 clonei->vecsize_float = 512;
48467 if (TYPE_MODE (base_type) == QImode)
48468 clonei->mask_mode = DImode;
48469 else
48470 clonei->mask_mode = SImode;
48471 break;
48473 if (clonei->simdlen == 0)
48475 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
48476 clonei->simdlen = clonei->vecsize_int;
48477 else
48478 clonei->simdlen = clonei->vecsize_float;
48479 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
48481 else if (clonei->simdlen > 16)
48483 /* For compatibility with ICC, use the same upper bounds
48484 for simdlen. In particular, for CTYPE below, use the return type,
48485 unless the function returns void, in that case use the characteristic
48486 type. If it is possible for given SIMDLEN to pass CTYPE value
48487 in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
48488 for 64-bit code), accept that SIMDLEN, otherwise warn and don't
48489 emit corresponding clone. */
48490 tree ctype = ret_type;
48491 if (TREE_CODE (ret_type) == VOID_TYPE)
48492 ctype = base_type;
48493 int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen;
48494 if (SCALAR_INT_MODE_P (TYPE_MODE (ctype)))
48495 cnt /= clonei->vecsize_int;
48496 else
48497 cnt /= clonei->vecsize_float;
48498 if (cnt > (TARGET_64BIT ? 16 : 8))
48500 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
48501 "unsupported simdlen %d", clonei->simdlen);
48502 return 0;
48505 return ret;
48508 /* Add target attribute to SIMD clone NODE if needed. */
48510 static void
48511 ix86_simd_clone_adjust (struct cgraph_node *node)
48513 const char *str = NULL;
48514 gcc_assert (node->decl == cfun->decl);
48515 switch (node->simdclone->vecsize_mangle)
48517 case 'b':
48518 if (!TARGET_SSE2)
48519 str = "sse2";
48520 break;
48521 case 'c':
48522 if (!TARGET_AVX)
48523 str = "avx";
48524 break;
48525 case 'd':
48526 if (!TARGET_AVX2)
48527 str = "avx2";
48528 break;
48529 case 'e':
48530 if (!TARGET_AVX512F)
48531 str = "avx512f";
48532 break;
48533 default:
48534 gcc_unreachable ();
48536 if (str == NULL)
48537 return;
48538 push_cfun (NULL);
48539 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
48540 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
48541 gcc_assert (ok);
48542 pop_cfun ();
48543 ix86_reset_previous_fndecl ();
48544 ix86_set_current_function (node->decl);
48547 /* If SIMD clone NODE can't be used in a vectorized loop
48548 in current function, return -1, otherwise return a badness of using it
48549 (0 if it is most desirable from vecsize_mangle point of view, 1
48550 slightly less desirable, etc.). */
48552 static int
48553 ix86_simd_clone_usable (struct cgraph_node *node)
48555 switch (node->simdclone->vecsize_mangle)
48557 case 'b':
48558 if (!TARGET_SSE2)
48559 return -1;
48560 if (!TARGET_AVX)
48561 return 0;
48562 return TARGET_AVX2 ? 2 : 1;
48563 case 'c':
48564 if (!TARGET_AVX)
48565 return -1;
48566 return TARGET_AVX2 ? 1 : 0;
48567 case 'd':
48568 if (!TARGET_AVX2)
48569 return -1;
48570 return 0;
48571 case 'e':
48572 if (!TARGET_AVX512F)
48573 return -1;
48574 return 0;
48575 default:
48576 gcc_unreachable ();
48580 /* This function adjusts the unroll factor based on
48581 the hardware capabilities. For ex, bdver3 has
48582 a loop buffer which makes unrolling of smaller
48583 loops less important. This function decides the
48584 unroll factor using number of memory references
48585 (value 32 is used) as a heuristic. */
48587 static unsigned
48588 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
48590 basic_block *bbs;
48591 rtx_insn *insn;
48592 unsigned i;
48593 unsigned mem_count = 0;
48595 if (!TARGET_ADJUST_UNROLL)
48596 return nunroll;
48598 /* Count the number of memory references within the loop body.
48599 This value determines the unrolling factor for bdver3 and bdver4
48600 architectures. */
48601 subrtx_iterator::array_type array;
48602 bbs = get_loop_body (loop);
48603 for (i = 0; i < loop->num_nodes; i++)
48604 FOR_BB_INSNS (bbs[i], insn)
48605 if (NONDEBUG_INSN_P (insn))
48606 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
48607 if (const_rtx x = *iter)
48608 if (MEM_P (x))
48610 machine_mode mode = GET_MODE (x);
48611 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
48612 if (n_words > 4)
48613 mem_count += 2;
48614 else
48615 mem_count += 1;
48617 free (bbs);
48619 if (mem_count && mem_count <=32)
48620 return 32/mem_count;
48622 return nunroll;
48626 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
48628 static bool
48629 ix86_float_exceptions_rounding_supported_p (void)
48631 /* For x87 floating point with standard excess precision handling,
48632 there is no adddf3 pattern (since x87 floating point only has
48633 XFmode operations) so the default hook implementation gets this
48634 wrong. */
48635 return TARGET_80387 || TARGET_SSE_MATH;
48638 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
48640 static void
48641 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
48643 if (!TARGET_80387 && !TARGET_SSE_MATH)
48644 return;
48645 tree exceptions_var = create_tmp_var_raw (integer_type_node);
48646 if (TARGET_80387)
48648 tree fenv_index_type = build_index_type (size_int (6));
48649 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
48650 tree fenv_var = create_tmp_var_raw (fenv_type);
48651 TREE_ADDRESSABLE (fenv_var) = 1;
48652 tree fenv_ptr = build_pointer_type (fenv_type);
48653 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
48654 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
48655 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
48656 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
48657 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
48658 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
48659 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
48660 tree hold_fnclex = build_call_expr (fnclex, 0);
48661 fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
48662 NULL_TREE, NULL_TREE);
48663 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
48664 hold_fnclex);
48665 *clear = build_call_expr (fnclex, 0);
48666 tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
48667 tree fnstsw_call = build_call_expr (fnstsw, 0);
48668 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
48669 sw_var, fnstsw_call);
48670 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
48671 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
48672 exceptions_var, exceptions_x87);
48673 *update = build2 (COMPOUND_EXPR, integer_type_node,
48674 sw_mod, update_mod);
48675 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
48676 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
48678 if (TARGET_SSE_MATH)
48680 tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
48681 tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
48682 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
48683 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
48684 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
48685 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
48686 mxcsr_orig_var, stmxcsr_hold_call);
48687 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
48688 mxcsr_orig_var,
48689 build_int_cst (unsigned_type_node, 0x1f80));
48690 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
48691 build_int_cst (unsigned_type_node, 0xffffffc0));
48692 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
48693 mxcsr_mod_var, hold_mod_val);
48694 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
48695 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
48696 hold_assign_orig, hold_assign_mod);
48697 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
48698 ldmxcsr_hold_call);
48699 if (*hold)
48700 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
48701 else
48702 *hold = hold_all;
48703 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
48704 if (*clear)
48705 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
48706 ldmxcsr_clear_call);
48707 else
48708 *clear = ldmxcsr_clear_call;
48709 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
48710 tree exceptions_sse = fold_convert (integer_type_node,
48711 stxmcsr_update_call);
48712 if (*update)
48714 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
48715 exceptions_var, exceptions_sse);
48716 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
48717 exceptions_var, exceptions_mod);
48718 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
48719 exceptions_assign);
48721 else
48722 *update = build2 (MODIFY_EXPR, integer_type_node,
48723 exceptions_var, exceptions_sse);
48724 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
48725 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
48726 ldmxcsr_update_call);
48728 tree atomic_feraiseexcept
48729 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
48730 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
48731 1, exceptions_var);
48732 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
48733 atomic_feraiseexcept_call);
48736 /* Return mode to be used for bounds or VOIDmode
48737 if bounds are not supported. */
48739 static machine_mode
48740 ix86_mpx_bound_mode ()
48742 /* Do not support pointer checker if MPX
48743 is not enabled. */
48744 if (!TARGET_MPX)
48746 if (flag_check_pointer_bounds)
48747 warning (0, "Pointer Checker requires MPX support on this target."
48748 " Use -mmpx options to enable MPX.");
48749 return VOIDmode;
48752 return BNDmode;
48755 /* Return constant used to statically initialize constant bounds.
48757 This function is used to create special bound values. For now
48758 only INIT bounds and NONE bounds are expected. More special
48759 values may be added later. */
48761 static tree
48762 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
48764 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
48765 : build_zero_cst (pointer_sized_int_node);
48766 tree high = ub ? build_zero_cst (pointer_sized_int_node)
48767 : build_minus_one_cst (pointer_sized_int_node);
48769 /* This function is supposed to be used to create INIT and
48770 NONE bounds only. */
48771 gcc_assert ((lb == 0 && ub == -1)
48772 || (lb == -1 && ub == 0));
48774 return build_complex (NULL, low, high);
48777 /* Generate a list of statements STMTS to initialize pointer bounds
48778 variable VAR with bounds LB and UB. Return the number of generated
48779 statements. */
48781 static int
48782 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
48784 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
48785 tree lhs, modify, var_p;
48787 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
48788 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
48790 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
48791 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
48792 append_to_statement_list (modify, stmts);
48794 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
48795 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
48796 TYPE_SIZE_UNIT (pointer_sized_int_node)));
48797 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
48798 append_to_statement_list (modify, stmts);
48800 return 2;
48803 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
48804 /* For i386, common symbol is local only for non-PIE binaries. For
48805 x86-64, common symbol is local only for non-PIE binaries or linker
48806 supports copy reloc in PIE binaries. */
48808 static bool
48809 ix86_binds_local_p (const_tree exp)
48811 return default_binds_local_p_3 (exp, flag_shlib != 0, true, true,
48812 (!flag_pic
48813 || (TARGET_64BIT
48814 && HAVE_LD_PIE_COPYRELOC != 0)));
48816 #endif
48818 /* If MEM is in the form of [base+offset], extract the two parts
48819 of address and set to BASE and OFFSET, otherwise return false. */
48821 static bool
48822 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
48824 rtx addr;
48826 gcc_assert (MEM_P (mem));
48828 addr = XEXP (mem, 0);
48830 if (GET_CODE (addr) == CONST)
48831 addr = XEXP (addr, 0);
48833 if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
48835 *base = addr;
48836 *offset = const0_rtx;
48837 return true;
48840 if (GET_CODE (addr) == PLUS
48841 && (REG_P (XEXP (addr, 0))
48842 || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
48843 && CONST_INT_P (XEXP (addr, 1)))
48845 *base = XEXP (addr, 0);
48846 *offset = XEXP (addr, 1);
48847 return true;
48850 return false;
48853 /* Given OPERANDS of consecutive load/store, check if we can merge
48854 them into move multiple. LOAD is true if they are load instructions.
48855 MODE is the mode of memory operands. */
48857 bool
48858 ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
48859 machine_mode mode)
48861 HOST_WIDE_INT offval_1, offval_2, msize;
48862 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
48864 if (load)
48866 mem_1 = operands[1];
48867 mem_2 = operands[3];
48868 reg_1 = operands[0];
48869 reg_2 = operands[2];
48871 else
48873 mem_1 = operands[0];
48874 mem_2 = operands[2];
48875 reg_1 = operands[1];
48876 reg_2 = operands[3];
48879 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
48881 if (REGNO (reg_1) != REGNO (reg_2))
48882 return false;
48884 /* Check if the addresses are in the form of [base+offset]. */
48885 if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
48886 return false;
48887 if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
48888 return false;
48890 /* Check if the bases are the same. */
48891 if (!rtx_equal_p (base_1, base_2))
48892 return false;
48894 offval_1 = INTVAL (offset_1);
48895 offval_2 = INTVAL (offset_2);
48896 msize = GET_MODE_SIZE (mode);
48897 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
48898 if (offval_1 + msize != offval_2)
48899 return false;
48901 return true;
48904 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
48906 static bool
48907 ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
48908 optimization_type opt_type)
48910 switch (op)
48912 case asin_optab:
48913 case acos_optab:
48914 case log1p_optab:
48915 case exp_optab:
48916 case exp10_optab:
48917 case exp2_optab:
48918 case expm1_optab:
48919 case ldexp_optab:
48920 case scalb_optab:
48921 case round_optab:
48922 return opt_type == OPTIMIZE_FOR_SPEED;
48924 case rint_optab:
48925 if (SSE_FLOAT_MODE_P (mode1)
48926 && TARGET_SSE_MATH
48927 && !flag_trapping_math
48928 && !TARGET_SSE4_1)
48929 return opt_type == OPTIMIZE_FOR_SPEED;
48930 return true;
48932 case floor_optab:
48933 case ceil_optab:
48934 case btrunc_optab:
48935 if (SSE_FLOAT_MODE_P (mode1)
48936 && TARGET_SSE_MATH
48937 && !flag_trapping_math
48938 && TARGET_SSE4_1)
48939 return true;
48940 return opt_type == OPTIMIZE_FOR_SPEED;
48942 case rsqrt_optab:
48943 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p ();
48945 default:
48946 return true;
48950 /* Address space support.
48952 This is not "far pointers" in the 16-bit sense, but an easy way
48953 to use %fs and %gs segment prefixes. Therefore:
48955 (a) All address spaces have the same modes,
48956 (b) All address spaces have the same addresss forms,
48957 (c) While %fs and %gs are technically subsets of the generic
48958 address space, they are probably not subsets of each other.
48959 (d) Since we have no access to the segment base register values
48960 without resorting to a system call, we cannot convert a
48961 non-default address space to a default address space.
48962 Therefore we do not claim %fs or %gs are subsets of generic.
48964 Therefore we can (mostly) use the default hooks. */
48966 /* All use of segmentation is assumed to make address 0 valid. */
48968 static bool
48969 ix86_addr_space_zero_address_valid (addr_space_t as)
48971 return as != ADDR_SPACE_GENERIC;
48974 static void
48975 ix86_init_libfuncs (void)
48977 if (TARGET_64BIT)
48979 set_optab_libfunc (sdivmod_optab, TImode, "__divmodti4");
48980 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
48982 else
48984 set_optab_libfunc (sdivmod_optab, DImode, "__divmoddi4");
48985 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
48988 #if TARGET_MACHO
48989 darwin_rename_builtins ();
48990 #endif
48993 /* Generate call to __divmoddi4. */
48995 static void
48996 ix86_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
48997 rtx op0, rtx op1,
48998 rtx *quot_p, rtx *rem_p)
49000 rtx rem = assign_386_stack_local (mode, SLOT_TEMP);
49002 rtx quot = emit_library_call_value (libfunc, NULL_RTX, LCT_NORMAL,
49003 mode,
49004 op0, GET_MODE (op0),
49005 op1, GET_MODE (op1),
49006 XEXP (rem, 0), Pmode);
49007 *quot_p = quot;
49008 *rem_p = rem;
49011 /* Set the value of FLT_EVAL_METHOD in float.h. When using only the
49012 FPU, assume that the fpcw is set to extended precision; when using
49013 only SSE, rounding is correct; when using both SSE and the FPU,
49014 the rounding precision is indeterminate, since either may be chosen
49015 apparently at random. */
49017 static enum flt_eval_method
49018 ix86_excess_precision (enum excess_precision_type type)
49020 switch (type)
49022 case EXCESS_PRECISION_TYPE_FAST:
49023 /* The fastest type to promote to will always be the native type,
49024 whether that occurs with implicit excess precision or
49025 otherwise. */
49026 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
49027 case EXCESS_PRECISION_TYPE_STANDARD:
49028 case EXCESS_PRECISION_TYPE_IMPLICIT:
49029 /* Otherwise, the excess precision we want when we are
49030 in a standards compliant mode, and the implicit precision we
49031 provide would be identical were it not for the unpredictable
49032 cases. */
49033 if (!TARGET_80387)
49034 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
49035 else if (!TARGET_MIX_SSE_I387)
49037 if (!TARGET_SSE_MATH)
49038 return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE;
49039 else if (TARGET_SSE2)
49040 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
49043 /* If we are in standards compliant mode, but we know we will
49044 calculate in unpredictable precision, return
49045 FLT_EVAL_METHOD_FLOAT. There is no reason to introduce explicit
49046 excess precision if the target can't guarantee it will honor
49047 it. */
49048 return (type == EXCESS_PRECISION_TYPE_STANDARD
49049 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
49050 : FLT_EVAL_METHOD_UNPREDICTABLE);
49051 default:
49052 gcc_unreachable ();
49055 return FLT_EVAL_METHOD_UNPREDICTABLE;
49058 /* Target-specific selftests. */
49060 #if CHECKING_P
49062 namespace selftest {
49064 /* Verify that hard regs are dumped as expected (in compact mode). */
49066 static void
49067 ix86_test_dumping_hard_regs ()
49069 ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode, 0));
49070 ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode, 1));
49073 /* Test dumping an insn with repeated references to the same SCRATCH,
49074 to verify the rtx_reuse code. */
49076 static void
49077 ix86_test_dumping_memory_blockage ()
49079 set_new_first_and_last_insn (NULL, NULL);
49081 rtx pat = gen_memory_blockage ();
49082 rtx_reuse_manager r;
49083 r.preprocess (pat);
49085 /* Verify that the repeated references to the SCRATCH show use
49086 reuse IDS. The first should be prefixed with a reuse ID,
49087 and the second should be dumped as a "reuse_rtx" of that ID.
49088 The expected string assumes Pmode == DImode. */
49089 if (Pmode == DImode)
49090 ASSERT_RTL_DUMP_EQ_WITH_REUSE
49091 ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0 A8])\n"
49092 " (unspec:BLK [\n"
49093 " (mem/v:BLK (reuse_rtx 0) [0 A8])\n"
49094 " ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat, &r);
49097 /* Verify loading an RTL dump; specifically a dump of copying
49098 a param on x86_64 from a hard reg into the frame.
49099 This test is target-specific since the dump contains target-specific
49100 hard reg names. */
49102 static void
49103 ix86_test_loading_dump_fragment_1 ()
49105 rtl_dump_test t (SELFTEST_LOCATION,
49106 locate_file ("x86_64/copy-hard-reg-into-frame.rtl"));
49108 rtx_insn *insn = get_insn_by_uid (1);
49110 /* The block structure and indentation here is purely for
49111 readability; it mirrors the structure of the rtx. */
49112 tree mem_expr;
49114 rtx pat = PATTERN (insn);
49115 ASSERT_EQ (SET, GET_CODE (pat));
49117 rtx dest = SET_DEST (pat);
49118 ASSERT_EQ (MEM, GET_CODE (dest));
49119 /* Verify the "/c" was parsed. */
49120 ASSERT_TRUE (RTX_FLAG (dest, call));
49121 ASSERT_EQ (SImode, GET_MODE (dest));
49123 rtx addr = XEXP (dest, 0);
49124 ASSERT_EQ (PLUS, GET_CODE (addr));
49125 ASSERT_EQ (DImode, GET_MODE (addr));
49127 rtx lhs = XEXP (addr, 0);
49128 /* Verify that the "frame" REG was consolidated. */
49129 ASSERT_RTX_PTR_EQ (frame_pointer_rtx, lhs);
49132 rtx rhs = XEXP (addr, 1);
49133 ASSERT_EQ (CONST_INT, GET_CODE (rhs));
49134 ASSERT_EQ (-4, INTVAL (rhs));
49137 /* Verify the "[1 i+0 S4 A32]" was parsed. */
49138 ASSERT_EQ (1, MEM_ALIAS_SET (dest));
49139 /* "i" should have been handled by synthesizing a global int
49140 variable named "i". */
49141 mem_expr = MEM_EXPR (dest);
49142 ASSERT_NE (mem_expr, NULL);
49143 ASSERT_EQ (VAR_DECL, TREE_CODE (mem_expr));
49144 ASSERT_EQ (integer_type_node, TREE_TYPE (mem_expr));
49145 ASSERT_EQ (IDENTIFIER_NODE, TREE_CODE (DECL_NAME (mem_expr)));
49146 ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr)));
49147 /* "+0". */
49148 ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest));
49149 ASSERT_EQ (0, MEM_OFFSET (dest));
49150 /* "S4". */
49151 ASSERT_EQ (4, MEM_SIZE (dest));
49152 /* "A32. */
49153 ASSERT_EQ (32, MEM_ALIGN (dest));
49156 rtx src = SET_SRC (pat);
49157 ASSERT_EQ (REG, GET_CODE (src));
49158 ASSERT_EQ (SImode, GET_MODE (src));
49159 ASSERT_EQ (5, REGNO (src));
49160 tree reg_expr = REG_EXPR (src);
49161 /* "i" here should point to the same var as for the MEM_EXPR. */
49162 ASSERT_EQ (reg_expr, mem_expr);
49167 /* Verify that the RTL loader copes with a call_insn dump.
49168 This test is target-specific since the dump contains a target-specific
49169 hard reg name. */
49171 static void
49172 ix86_test_loading_call_insn ()
49174 /* The test dump includes register "xmm0", where requires TARGET_SSE
49175 to exist. */
49176 if (!TARGET_SSE)
49177 return;
49179 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/call-insn.rtl"));
49181 rtx_insn *insn = get_insns ();
49182 ASSERT_EQ (CALL_INSN, GET_CODE (insn));
49184 /* "/j". */
49185 ASSERT_TRUE (RTX_FLAG (insn, jump));
49187 rtx pat = PATTERN (insn);
49188 ASSERT_EQ (CALL, GET_CODE (SET_SRC (pat)));
49190 /* Verify REG_NOTES. */
49192 /* "(expr_list:REG_CALL_DECL". */
49193 ASSERT_EQ (EXPR_LIST, GET_CODE (REG_NOTES (insn)));
49194 rtx_expr_list *note0 = as_a <rtx_expr_list *> (REG_NOTES (insn));
49195 ASSERT_EQ (REG_CALL_DECL, REG_NOTE_KIND (note0));
49197 /* "(expr_list:REG_EH_REGION (const_int 0 [0])". */
49198 rtx_expr_list *note1 = note0->next ();
49199 ASSERT_EQ (REG_EH_REGION, REG_NOTE_KIND (note1));
49201 ASSERT_EQ (NULL, note1->next ());
49204 /* Verify CALL_INSN_FUNCTION_USAGE. */
49206 /* "(expr_list:DF (use (reg:DF 21 xmm0))". */
49207 rtx_expr_list *usage
49208 = as_a <rtx_expr_list *> (CALL_INSN_FUNCTION_USAGE (insn));
49209 ASSERT_EQ (EXPR_LIST, GET_CODE (usage));
49210 ASSERT_EQ (DFmode, GET_MODE (usage));
49211 ASSERT_EQ (USE, GET_CODE (usage->element ()));
49212 ASSERT_EQ (NULL, usage->next ());
49216 /* Verify that the RTL loader copes a dump from print_rtx_function.
49217 This test is target-specific since the dump contains target-specific
49218 hard reg names. */
49220 static void
49221 ix86_test_loading_full_dump ()
49223 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/times-two.rtl"));
49225 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
49227 rtx_insn *insn_1 = get_insn_by_uid (1);
49228 ASSERT_EQ (NOTE, GET_CODE (insn_1));
49230 rtx_insn *insn_7 = get_insn_by_uid (7);
49231 ASSERT_EQ (INSN, GET_CODE (insn_7));
49232 ASSERT_EQ (PARALLEL, GET_CODE (PATTERN (insn_7)));
49234 rtx_insn *insn_15 = get_insn_by_uid (15);
49235 ASSERT_EQ (INSN, GET_CODE (insn_15));
49236 ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
49238 /* Verify crtl->return_rtx. */
49239 ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
49240 ASSERT_EQ (0, REGNO (crtl->return_rtx));
49241 ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
49244 /* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
49245 In particular, verify that it correctly loads the 2nd operand.
49246 This test is target-specific since these are machine-specific
49247 operands (and enums). */
49249 static void
49250 ix86_test_loading_unspec ()
49252 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/unspec.rtl"));
49254 ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
49256 ASSERT_TRUE (cfun);
49258 /* Test of an UNSPEC. */
49259 rtx_insn *insn = get_insns ();
49260 ASSERT_EQ (INSN, GET_CODE (insn));
49261 rtx set = single_set (insn);
49262 ASSERT_NE (NULL, set);
49263 rtx dst = SET_DEST (set);
49264 ASSERT_EQ (MEM, GET_CODE (dst));
49265 rtx src = SET_SRC (set);
49266 ASSERT_EQ (UNSPEC, GET_CODE (src));
49267 ASSERT_EQ (BLKmode, GET_MODE (src));
49268 ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE, XINT (src, 1));
49270 rtx v0 = XVECEXP (src, 0, 0);
49272 /* Verify that the two uses of the first SCRATCH have pointer
49273 equality. */
49274 rtx scratch_a = XEXP (dst, 0);
49275 ASSERT_EQ (SCRATCH, GET_CODE (scratch_a));
49277 rtx scratch_b = XEXP (v0, 0);
49278 ASSERT_EQ (SCRATCH, GET_CODE (scratch_b));
49280 ASSERT_EQ (scratch_a, scratch_b);
49282 /* Verify that the two mems are thus treated as equal. */
49283 ASSERT_TRUE (rtx_equal_p (dst, v0));
49285 /* Verify the the insn is recognized. */
49286 ASSERT_NE(-1, recog_memoized (insn));
49288 /* Test of an UNSPEC_VOLATILE, which has its own enum values. */
49289 insn = NEXT_INSN (insn);
49290 ASSERT_EQ (INSN, GET_CODE (insn));
49292 set = single_set (insn);
49293 ASSERT_NE (NULL, set);
49295 src = SET_SRC (set);
49296 ASSERT_EQ (UNSPEC_VOLATILE, GET_CODE (src));
49297 ASSERT_EQ (UNSPECV_RDTSCP, XINT (src, 1));
49300 /* Run all target-specific selftests. */
49302 static void
49303 ix86_run_selftests (void)
49305 ix86_test_dumping_hard_regs ();
49306 ix86_test_dumping_memory_blockage ();
49308 /* Various tests of loading RTL dumps, here because they contain
49309 ix86-isms (e.g. names of hard regs). */
49310 ix86_test_loading_dump_fragment_1 ();
49311 ix86_test_loading_call_insn ();
49312 ix86_test_loading_full_dump ();
49313 ix86_test_loading_unspec ();
49316 } // namespace selftest
49318 #endif /* CHECKING_P */
49320 /* Initialize the GCC target structure. */
49321 #undef TARGET_RETURN_IN_MEMORY
49322 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
49324 #undef TARGET_LEGITIMIZE_ADDRESS
49325 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
49327 #undef TARGET_ATTRIBUTE_TABLE
49328 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
49329 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
49330 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
49331 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
49332 # undef TARGET_MERGE_DECL_ATTRIBUTES
49333 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
49334 #endif
49336 #undef TARGET_COMP_TYPE_ATTRIBUTES
49337 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
49339 #undef TARGET_INIT_BUILTINS
49340 #define TARGET_INIT_BUILTINS ix86_init_builtins
49341 #undef TARGET_BUILTIN_DECL
49342 #define TARGET_BUILTIN_DECL ix86_builtin_decl
49343 #undef TARGET_EXPAND_BUILTIN
49344 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
49346 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
49347 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
49348 ix86_builtin_vectorized_function
49350 #undef TARGET_VECTORIZE_BUILTIN_GATHER
49351 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
49353 #undef TARGET_VECTORIZE_BUILTIN_SCATTER
49354 #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
49356 #undef TARGET_BUILTIN_RECIPROCAL
49357 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
49359 #undef TARGET_ASM_FUNCTION_EPILOGUE
49360 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
49362 #undef TARGET_ENCODE_SECTION_INFO
49363 #ifndef SUBTARGET_ENCODE_SECTION_INFO
49364 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
49365 #else
49366 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
49367 #endif
49369 #undef TARGET_ASM_OPEN_PAREN
49370 #define TARGET_ASM_OPEN_PAREN ""
49371 #undef TARGET_ASM_CLOSE_PAREN
49372 #define TARGET_ASM_CLOSE_PAREN ""
49374 #undef TARGET_ASM_BYTE_OP
49375 #define TARGET_ASM_BYTE_OP ASM_BYTE
49377 #undef TARGET_ASM_ALIGNED_HI_OP
49378 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
49379 #undef TARGET_ASM_ALIGNED_SI_OP
49380 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
49381 #ifdef ASM_QUAD
49382 #undef TARGET_ASM_ALIGNED_DI_OP
49383 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
49384 #endif
49386 #undef TARGET_PROFILE_BEFORE_PROLOGUE
49387 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
49389 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
49390 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
49392 #undef TARGET_ASM_UNALIGNED_HI_OP
49393 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
49394 #undef TARGET_ASM_UNALIGNED_SI_OP
49395 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
49396 #undef TARGET_ASM_UNALIGNED_DI_OP
49397 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
49399 #undef TARGET_PRINT_OPERAND
49400 #define TARGET_PRINT_OPERAND ix86_print_operand
49401 #undef TARGET_PRINT_OPERAND_ADDRESS
49402 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
49403 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
49404 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
49405 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
49406 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
49408 #undef TARGET_SCHED_INIT_GLOBAL
49409 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
49410 #undef TARGET_SCHED_ADJUST_COST
49411 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
49412 #undef TARGET_SCHED_ISSUE_RATE
49413 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
49414 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
49415 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
49416 ia32_multipass_dfa_lookahead
49417 #undef TARGET_SCHED_MACRO_FUSION_P
49418 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
49419 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
49420 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
49422 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
49423 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
49425 #undef TARGET_MEMMODEL_CHECK
49426 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
49428 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
49429 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
49431 #ifdef HAVE_AS_TLS
49432 #undef TARGET_HAVE_TLS
49433 #define TARGET_HAVE_TLS true
49434 #endif
49435 #undef TARGET_CANNOT_FORCE_CONST_MEM
49436 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
49437 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
49438 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
49440 #undef TARGET_DELEGITIMIZE_ADDRESS
49441 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
49443 #undef TARGET_MS_BITFIELD_LAYOUT_P
49444 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
49446 #if TARGET_MACHO
49447 #undef TARGET_BINDS_LOCAL_P
49448 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
49449 #else
49450 #undef TARGET_BINDS_LOCAL_P
49451 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
49452 #endif
49453 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
49454 #undef TARGET_BINDS_LOCAL_P
49455 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
49456 #endif
49458 #undef TARGET_ASM_OUTPUT_MI_THUNK
49459 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
49460 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
49461 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
49463 #undef TARGET_ASM_FILE_START
49464 #define TARGET_ASM_FILE_START x86_file_start
49466 #undef TARGET_OPTION_OVERRIDE
49467 #define TARGET_OPTION_OVERRIDE ix86_option_override
49469 #undef TARGET_REGISTER_MOVE_COST
49470 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
49471 #undef TARGET_MEMORY_MOVE_COST
49472 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
49473 #undef TARGET_RTX_COSTS
49474 #define TARGET_RTX_COSTS ix86_rtx_costs
49475 #undef TARGET_ADDRESS_COST
49476 #define TARGET_ADDRESS_COST ix86_address_cost
49478 #undef TARGET_FLAGS_REGNUM
49479 #define TARGET_FLAGS_REGNUM FLAGS_REG
49480 #undef TARGET_FIXED_CONDITION_CODE_REGS
49481 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
49482 #undef TARGET_CC_MODES_COMPATIBLE
49483 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
49485 #undef TARGET_MACHINE_DEPENDENT_REORG
49486 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
49488 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
49489 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
49491 #undef TARGET_BUILD_BUILTIN_VA_LIST
49492 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
49494 #undef TARGET_FOLD_BUILTIN
49495 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
49497 #undef TARGET_GIMPLE_FOLD_BUILTIN
49498 #define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
49500 #undef TARGET_COMPARE_VERSION_PRIORITY
49501 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
49503 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
49504 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
49505 ix86_generate_version_dispatcher_body
49507 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
49508 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
49509 ix86_get_function_versions_dispatcher
49511 #undef TARGET_ENUM_VA_LIST_P
49512 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
49514 #undef TARGET_FN_ABI_VA_LIST
49515 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
49517 #undef TARGET_CANONICAL_VA_LIST_TYPE
49518 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
49520 #undef TARGET_EXPAND_BUILTIN_VA_START
49521 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
49523 #undef TARGET_MD_ASM_ADJUST
49524 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
49526 #undef TARGET_C_EXCESS_PRECISION
49527 #define TARGET_C_EXCESS_PRECISION ix86_excess_precision
49528 #undef TARGET_PROMOTE_PROTOTYPES
49529 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
49530 #undef TARGET_SETUP_INCOMING_VARARGS
49531 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
49532 #undef TARGET_MUST_PASS_IN_STACK
49533 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
49534 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
49535 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
49536 #undef TARGET_FUNCTION_ARG_ADVANCE
49537 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
49538 #undef TARGET_FUNCTION_ARG
49539 #define TARGET_FUNCTION_ARG ix86_function_arg
49540 #undef TARGET_INIT_PIC_REG
49541 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
49542 #undef TARGET_USE_PSEUDO_PIC_REG
49543 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
49544 #undef TARGET_FUNCTION_ARG_BOUNDARY
49545 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
49546 #undef TARGET_PASS_BY_REFERENCE
49547 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
49548 #undef TARGET_INTERNAL_ARG_POINTER
49549 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
49550 #undef TARGET_UPDATE_STACK_BOUNDARY
49551 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
49552 #undef TARGET_GET_DRAP_RTX
49553 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
49554 #undef TARGET_STRICT_ARGUMENT_NAMING
49555 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
49556 #undef TARGET_STATIC_CHAIN
49557 #define TARGET_STATIC_CHAIN ix86_static_chain
49558 #undef TARGET_TRAMPOLINE_INIT
49559 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
49560 #undef TARGET_RETURN_POPS_ARGS
49561 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
49563 #undef TARGET_WARN_FUNC_RETURN
49564 #define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
49566 #undef TARGET_LEGITIMATE_COMBINED_INSN
49567 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
49569 #undef TARGET_ASAN_SHADOW_OFFSET
49570 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
49572 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
49573 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
49575 #undef TARGET_SCALAR_MODE_SUPPORTED_P
49576 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
49578 #undef TARGET_VECTOR_MODE_SUPPORTED_P
49579 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
49581 #undef TARGET_C_MODE_FOR_SUFFIX
49582 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
49584 #ifdef HAVE_AS_TLS
49585 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
49586 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
49587 #endif
49589 #ifdef SUBTARGET_INSERT_ATTRIBUTES
49590 #undef TARGET_INSERT_ATTRIBUTES
49591 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
49592 #endif
49594 #undef TARGET_MANGLE_TYPE
49595 #define TARGET_MANGLE_TYPE ix86_mangle_type
49597 #undef TARGET_STACK_PROTECT_GUARD
49598 #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
49600 #if !TARGET_MACHO
49601 #undef TARGET_STACK_PROTECT_FAIL
49602 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
49603 #endif
49605 #undef TARGET_FUNCTION_VALUE
49606 #define TARGET_FUNCTION_VALUE ix86_function_value
49608 #undef TARGET_FUNCTION_VALUE_REGNO_P
49609 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
49611 #undef TARGET_PROMOTE_FUNCTION_MODE
49612 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
49614 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
49615 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
49617 #undef TARGET_MEMBER_TYPE_FORCES_BLK
49618 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
49620 #undef TARGET_INSTANTIATE_DECLS
49621 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
49623 #undef TARGET_SECONDARY_RELOAD
49624 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
49625 #undef TARGET_SECONDARY_MEMORY_NEEDED
49626 #define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
49627 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
49628 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
49630 #undef TARGET_CLASS_MAX_NREGS
49631 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
49633 #undef TARGET_PREFERRED_RELOAD_CLASS
49634 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
49635 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
49636 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
49637 #undef TARGET_CLASS_LIKELY_SPILLED_P
49638 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
49640 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
49641 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
49642 ix86_builtin_vectorization_cost
49643 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
49644 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
49645 ix86_vectorize_vec_perm_const_ok
49646 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
49647 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
49648 ix86_preferred_simd_mode
49649 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
49650 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
49651 ix86_autovectorize_vector_sizes
49652 #undef TARGET_VECTORIZE_GET_MASK_MODE
49653 #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
49654 #undef TARGET_VECTORIZE_INIT_COST
49655 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
49656 #undef TARGET_VECTORIZE_ADD_STMT_COST
49657 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
49658 #undef TARGET_VECTORIZE_FINISH_COST
49659 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
49660 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
49661 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
49663 #undef TARGET_SET_CURRENT_FUNCTION
49664 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
49666 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
49667 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
49669 #undef TARGET_OPTION_SAVE
49670 #define TARGET_OPTION_SAVE ix86_function_specific_save
49672 #undef TARGET_OPTION_RESTORE
49673 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
49675 #undef TARGET_OPTION_POST_STREAM_IN
49676 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
49678 #undef TARGET_OPTION_PRINT
49679 #define TARGET_OPTION_PRINT ix86_function_specific_print
49681 #undef TARGET_OPTION_FUNCTION_VERSIONS
49682 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
49684 #undef TARGET_CAN_INLINE_P
49685 #define TARGET_CAN_INLINE_P ix86_can_inline_p
49687 #undef TARGET_LEGITIMATE_ADDRESS_P
49688 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
49690 #undef TARGET_REGISTER_PRIORITY
49691 #define TARGET_REGISTER_PRIORITY ix86_register_priority
49693 #undef TARGET_REGISTER_USAGE_LEVELING_P
49694 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
49696 #undef TARGET_LEGITIMATE_CONSTANT_P
49697 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
49699 #undef TARGET_COMPUTE_FRAME_LAYOUT
49700 #define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
49702 #undef TARGET_FRAME_POINTER_REQUIRED
49703 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
49705 #undef TARGET_CAN_ELIMINATE
49706 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
49708 #undef TARGET_EXTRA_LIVE_ON_ENTRY
49709 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
49711 #undef TARGET_ASM_CODE_END
49712 #define TARGET_ASM_CODE_END ix86_code_end
49714 #undef TARGET_CONDITIONAL_REGISTER_USAGE
49715 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
49717 #undef TARGET_CANONICALIZE_COMPARISON
49718 #define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
49720 #undef TARGET_LOOP_UNROLL_ADJUST
49721 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
49723 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
49724 #undef TARGET_SPILL_CLASS
49725 #define TARGET_SPILL_CLASS ix86_spill_class
49727 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
49728 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
49729 ix86_simd_clone_compute_vecsize_and_simdlen
49731 #undef TARGET_SIMD_CLONE_ADJUST
49732 #define TARGET_SIMD_CLONE_ADJUST \
49733 ix86_simd_clone_adjust
49735 #undef TARGET_SIMD_CLONE_USABLE
49736 #define TARGET_SIMD_CLONE_USABLE \
49737 ix86_simd_clone_usable
49739 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
49740 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
49741 ix86_float_exceptions_rounding_supported_p
49743 #undef TARGET_MODE_EMIT
49744 #define TARGET_MODE_EMIT ix86_emit_mode_set
49746 #undef TARGET_MODE_NEEDED
49747 #define TARGET_MODE_NEEDED ix86_mode_needed
49749 #undef TARGET_MODE_AFTER
49750 #define TARGET_MODE_AFTER ix86_mode_after
49752 #undef TARGET_MODE_ENTRY
49753 #define TARGET_MODE_ENTRY ix86_mode_entry
49755 #undef TARGET_MODE_EXIT
49756 #define TARGET_MODE_EXIT ix86_mode_exit
49758 #undef TARGET_MODE_PRIORITY
49759 #define TARGET_MODE_PRIORITY ix86_mode_priority
49761 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
49762 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
49764 #undef TARGET_LOAD_BOUNDS_FOR_ARG
49765 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
49767 #undef TARGET_STORE_BOUNDS_FOR_ARG
49768 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
49770 #undef TARGET_LOAD_RETURNED_BOUNDS
49771 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
49773 #undef TARGET_STORE_RETURNED_BOUNDS
49774 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
49776 #undef TARGET_CHKP_BOUND_MODE
49777 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
49779 #undef TARGET_BUILTIN_CHKP_FUNCTION
49780 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
49782 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
49783 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
49785 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
49786 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
49788 #undef TARGET_CHKP_INITIALIZE_BOUNDS
49789 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
49791 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
49792 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
49794 #undef TARGET_OFFLOAD_OPTIONS
49795 #define TARGET_OFFLOAD_OPTIONS \
49796 ix86_offload_options
49798 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
49799 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
49801 #undef TARGET_OPTAB_SUPPORTED_P
49802 #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
49804 #undef TARGET_HARD_REGNO_SCRATCH_OK
49805 #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
49807 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
49808 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
49810 #undef TARGET_ADDITIONAL_ALLOCNO_CLASS_P
49811 #define TARGET_ADDITIONAL_ALLOCNO_CLASS_P ix86_additional_allocno_class_p
49813 #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
49814 #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
49816 #undef TARGET_INIT_LIBFUNCS
49817 #define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
49819 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
49820 #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
49822 #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
49823 #define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
49825 #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
49826 #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
49828 #undef TARGET_HARD_REGNO_NREGS
49829 #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
49830 #undef TARGET_HARD_REGNO_MODE_OK
49831 #define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
49833 #undef TARGET_MODES_TIEABLE_P
49834 #define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
49836 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
49837 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
49838 ix86_hard_regno_call_part_clobbered
49840 #undef TARGET_CAN_CHANGE_MODE_CLASS
49841 #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
49843 #undef TARGET_CONSTANT_ALIGNMENT
49844 #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment
49846 #if CHECKING_P
49847 #undef TARGET_RUN_TARGET_SELFTESTS
49848 #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
49849 #endif /* #if CHECKING_P */
49851 struct gcc_target targetm = TARGET_INITIALIZER;
49853 #include "gt-i386.h"