i386: Update -mfunction-return= for return with pop
[official-gcc.git] / gcc / config / i386 / i386.c
blob98075ad42c77e04c32a43c65cc716e58c1f9adbe
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2018 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #define IN_TARGET_CODE 1
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "memmodel.h"
29 #include "gimple.h"
30 #include "cfghooks.h"
31 #include "cfgloop.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "expmed.h"
36 #include "optabs.h"
37 #include "regs.h"
38 #include "emit-rtl.h"
39 #include "recog.h"
40 #include "cgraph.h"
41 #include "diagnostic.h"
42 #include "cfgbuild.h"
43 #include "alias.h"
44 #include "fold-const.h"
45 #include "attribs.h"
46 #include "calls.h"
47 #include "stor-layout.h"
48 #include "varasm.h"
49 #include "output.h"
50 #include "insn-attr.h"
51 #include "flags.h"
52 #include "except.h"
53 #include "explow.h"
54 #include "expr.h"
55 #include "cfgrtl.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "gimplify.h"
60 #include "dwarf2.h"
61 #include "tm-constrs.h"
62 #include "params.h"
63 #include "cselib.h"
64 #include "sched-int.h"
65 #include "opts.h"
66 #include "tree-pass.h"
67 #include "context.h"
68 #include "pass_manager.h"
69 #include "target-globals.h"
70 #include "gimple-iterator.h"
71 #include "tree-vectorizer.h"
72 #include "shrink-wrap.h"
73 #include "builtins.h"
74 #include "rtl-iter.h"
75 #include "tree-iterator.h"
76 #include "tree-chkp.h"
77 #include "rtl-chkp.h"
78 #include "dbgcnt.h"
79 #include "case-cfn-macros.h"
80 #include "regrename.h"
81 #include "dojump.h"
82 #include "fold-const-call.h"
83 #include "tree-vrp.h"
84 #include "tree-ssanames.h"
85 #include "selftest.h"
86 #include "selftest-rtl.h"
87 #include "print-rtl.h"
88 #include "intl.h"
89 #include "ifcvt.h"
90 #include "symbol-summary.h"
91 #include "ipa-prop.h"
92 #include "ipa-fnsummary.h"
93 #include "wide-int-bitmask.h"
95 /* This file should be included last. */
96 #include "target-def.h"
98 #include "x86-tune-costs.h"
100 static rtx legitimize_dllimport_symbol (rtx, bool);
101 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
102 static rtx legitimize_pe_coff_symbol (rtx, bool);
103 static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
104 static bool ix86_save_reg (unsigned int, bool, bool);
105 static bool ix86_function_naked (const_tree);
106 static bool ix86_notrack_prefixed_insn_p (rtx);
107 static void ix86_emit_restore_reg_using_pop (rtx);
110 #ifndef CHECK_STACK_LIMIT
111 #define CHECK_STACK_LIMIT (-1)
112 #endif
114 /* Return index of given mode in mult and division cost tables. */
115 #define MODE_INDEX(mode) \
116 ((mode) == QImode ? 0 \
117 : (mode) == HImode ? 1 \
118 : (mode) == SImode ? 2 \
119 : (mode) == DImode ? 3 \
120 : 4)
123 /* Set by -mtune. */
124 const struct processor_costs *ix86_tune_cost = NULL;
126 /* Set by -mtune or -Os. */
127 const struct processor_costs *ix86_cost = NULL;
129 /* Processor feature/optimization bitmasks. */
130 #define m_386 (1U<<PROCESSOR_I386)
131 #define m_486 (1U<<PROCESSOR_I486)
132 #define m_PENT (1U<<PROCESSOR_PENTIUM)
133 #define m_LAKEMONT (1U<<PROCESSOR_LAKEMONT)
134 #define m_PPRO (1U<<PROCESSOR_PENTIUMPRO)
135 #define m_PENT4 (1U<<PROCESSOR_PENTIUM4)
136 #define m_NOCONA (1U<<PROCESSOR_NOCONA)
137 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
138 #define m_CORE2 (1U<<PROCESSOR_CORE2)
139 #define m_NEHALEM (1U<<PROCESSOR_NEHALEM)
140 #define m_SANDYBRIDGE (1U<<PROCESSOR_SANDYBRIDGE)
141 #define m_HASWELL (1U<<PROCESSOR_HASWELL)
142 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
143 #define m_BONNELL (1U<<PROCESSOR_BONNELL)
144 #define m_SILVERMONT (1U<<PROCESSOR_SILVERMONT)
145 #define m_KNL (1U<<PROCESSOR_KNL)
146 #define m_KNM (1U<<PROCESSOR_KNM)
147 #define m_SKYLAKE_AVX512 (1U<<PROCESSOR_SKYLAKE_AVX512)
148 #define m_CANNONLAKE (1U<<PROCESSOR_CANNONLAKE)
149 #define m_ICELAKE (1U<<PROCESSOR_ICELAKE)
150 #define m_INTEL (1U<<PROCESSOR_INTEL)
152 #define m_GEODE (1U<<PROCESSOR_GEODE)
153 #define m_K6 (1U<<PROCESSOR_K6)
154 #define m_K6_GEODE (m_K6 | m_GEODE)
155 #define m_K8 (1U<<PROCESSOR_K8)
156 #define m_ATHLON (1U<<PROCESSOR_ATHLON)
157 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
158 #define m_AMDFAM10 (1U<<PROCESSOR_AMDFAM10)
159 #define m_BDVER1 (1U<<PROCESSOR_BDVER1)
160 #define m_BDVER2 (1U<<PROCESSOR_BDVER2)
161 #define m_BDVER3 (1U<<PROCESSOR_BDVER3)
162 #define m_BDVER4 (1U<<PROCESSOR_BDVER4)
163 #define m_ZNVER1 (1U<<PROCESSOR_ZNVER1)
164 #define m_BTVER1 (1U<<PROCESSOR_BTVER1)
165 #define m_BTVER2 (1U<<PROCESSOR_BTVER2)
166 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
167 #define m_BTVER (m_BTVER1 | m_BTVER2)
168 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER \
169 | m_ZNVER1)
171 #define m_GENERIC (1U<<PROCESSOR_GENERIC)
173 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
174 #undef DEF_TUNE
175 #define DEF_TUNE(tune, name, selector) name,
176 #include "x86-tune.def"
177 #undef DEF_TUNE
180 /* Feature tests against the various tunings. */
181 unsigned char ix86_tune_features[X86_TUNE_LAST];
183 /* Feature tests against the various tunings used to create ix86_tune_features
184 based on the processor mask. */
185 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
186 #undef DEF_TUNE
187 #define DEF_TUNE(tune, name, selector) selector,
188 #include "x86-tune.def"
189 #undef DEF_TUNE
192 /* Feature tests against the various architecture variations. */
193 unsigned char ix86_arch_features[X86_ARCH_LAST];
195 /* Feature tests against the various architecture variations, used to create
196 ix86_arch_features based on the processor mask. */
197 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
198 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
199 ~(m_386 | m_486 | m_PENT | m_LAKEMONT | m_K6),
201 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
202 ~m_386,
204 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
205 ~(m_386 | m_486),
207 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
208 ~m_386,
210 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
211 ~m_386,
214 /* In case the average insn count for single function invocation is
215 lower than this constant, emit fast (but longer) prologue and
216 epilogue code. */
217 #define FAST_PROLOGUE_INSN_COUNT 20
219 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
220 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
221 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
222 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
224 /* Array of the smallest class containing reg number REGNO, indexed by
225 REGNO. Used by REGNO_REG_CLASS in i386.h. */
227 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
229 /* ax, dx, cx, bx */
230 AREG, DREG, CREG, BREG,
231 /* si, di, bp, sp */
232 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
233 /* FP registers */
234 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
235 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
236 /* arg pointer */
237 NON_Q_REGS,
238 /* flags, fpsr, fpcr, frame */
239 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
240 /* SSE registers */
241 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
242 SSE_REGS, SSE_REGS,
243 /* MMX registers */
244 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
245 MMX_REGS, MMX_REGS,
246 /* REX registers */
247 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
248 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
249 /* SSE REX registers */
250 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
251 SSE_REGS, SSE_REGS,
252 /* AVX-512 SSE registers */
253 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
254 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
255 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
256 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
257 /* Mask registers. */
258 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
259 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
260 /* MPX bound registers */
261 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
264 /* The "default" register map used in 32bit mode. */
266 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
268 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
269 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
270 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
271 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
272 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
273 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
274 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
275 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
276 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
277 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
278 101, 102, 103, 104, /* bound registers */
281 /* The "default" register map used in 64bit mode. */
283 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
285 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
286 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
287 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
288 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
289 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
290 8,9,10,11,12,13,14,15, /* extended integer registers */
291 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
292 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
293 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
294 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
295 126, 127, 128, 129, /* bound registers */
298 /* Define the register numbers to be used in Dwarf debugging information.
299 The SVR4 reference port C compiler uses the following register numbers
300 in its Dwarf output code:
301 0 for %eax (gcc regno = 0)
302 1 for %ecx (gcc regno = 2)
303 2 for %edx (gcc regno = 1)
304 3 for %ebx (gcc regno = 3)
305 4 for %esp (gcc regno = 7)
306 5 for %ebp (gcc regno = 6)
307 6 for %esi (gcc regno = 4)
308 7 for %edi (gcc regno = 5)
309 The following three DWARF register numbers are never generated by
310 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
311 believed these numbers have these meanings.
312 8 for %eip (no gcc equivalent)
313 9 for %eflags (gcc regno = 17)
314 10 for %trapno (no gcc equivalent)
315 It is not at all clear how we should number the FP stack registers
316 for the x86 architecture. If the version of SDB on x86/svr4 were
317 a bit less brain dead with respect to floating-point then we would
318 have a precedent to follow with respect to DWARF register numbers
319 for x86 FP registers, but the SDB on x86/svr4 was so completely
320 broken with respect to FP registers that it is hardly worth thinking
321 of it as something to strive for compatibility with.
322 The version of x86/svr4 SDB I had does (partially)
323 seem to believe that DWARF register number 11 is associated with
324 the x86 register %st(0), but that's about all. Higher DWARF
325 register numbers don't seem to be associated with anything in
326 particular, and even for DWARF regno 11, SDB only seemed to under-
327 stand that it should say that a variable lives in %st(0) (when
328 asked via an `=' command) if we said it was in DWARF regno 11,
329 but SDB still printed garbage when asked for the value of the
330 variable in question (via a `/' command).
331 (Also note that the labels SDB printed for various FP stack regs
332 when doing an `x' command were all wrong.)
333 Note that these problems generally don't affect the native SVR4
334 C compiler because it doesn't allow the use of -O with -g and
335 because when it is *not* optimizing, it allocates a memory
336 location for each floating-point variable, and the memory
337 location is what gets described in the DWARF AT_location
338 attribute for the variable in question.
339 Regardless of the severe mental illness of the x86/svr4 SDB, we
340 do something sensible here and we use the following DWARF
341 register numbers. Note that these are all stack-top-relative
342 numbers.
343 11 for %st(0) (gcc regno = 8)
344 12 for %st(1) (gcc regno = 9)
345 13 for %st(2) (gcc regno = 10)
346 14 for %st(3) (gcc regno = 11)
347 15 for %st(4) (gcc regno = 12)
348 16 for %st(5) (gcc regno = 13)
349 17 for %st(6) (gcc regno = 14)
350 18 for %st(7) (gcc regno = 15)
352 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
354 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
355 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
356 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
357 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
358 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
359 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
360 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
361 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
362 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
363 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
364 101, 102, 103, 104, /* bound registers */
367 /* Define parameter passing and return registers. */
369 static int const x86_64_int_parameter_registers[6] =
371 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
374 static int const x86_64_ms_abi_int_parameter_registers[4] =
376 CX_REG, DX_REG, R8_REG, R9_REG
379 static int const x86_64_int_return_registers[4] =
381 AX_REG, DX_REG, DI_REG, SI_REG
384 /* Additional registers that are clobbered by SYSV calls. */
386 #define NUM_X86_64_MS_CLOBBERED_REGS 12
387 static int const x86_64_ms_sysv_extra_clobbered_registers
388 [NUM_X86_64_MS_CLOBBERED_REGS] =
390 SI_REG, DI_REG,
391 XMM6_REG, XMM7_REG,
392 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
393 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
396 enum xlogue_stub {
397 XLOGUE_STUB_SAVE,
398 XLOGUE_STUB_RESTORE,
399 XLOGUE_STUB_RESTORE_TAIL,
400 XLOGUE_STUB_SAVE_HFP,
401 XLOGUE_STUB_RESTORE_HFP,
402 XLOGUE_STUB_RESTORE_HFP_TAIL,
404 XLOGUE_STUB_COUNT
407 enum xlogue_stub_sets {
408 XLOGUE_SET_ALIGNED,
409 XLOGUE_SET_ALIGNED_PLUS_8,
410 XLOGUE_SET_HFP_ALIGNED_OR_REALIGN,
411 XLOGUE_SET_HFP_ALIGNED_PLUS_8,
413 XLOGUE_SET_COUNT
416 /* Register save/restore layout used by out-of-line stubs. */
417 class xlogue_layout {
418 public:
419 struct reginfo
421 unsigned regno;
422 HOST_WIDE_INT offset; /* Offset used by stub base pointer (rax or
423 rsi) to where each register is stored. */
426 unsigned get_nregs () const {return m_nregs;}
427 HOST_WIDE_INT get_stack_align_off_in () const {return m_stack_align_off_in;}
429 const reginfo &get_reginfo (unsigned reg) const
431 gcc_assert (reg < m_nregs);
432 return m_regs[reg];
435 static const char *get_stub_name (enum xlogue_stub stub,
436 unsigned n_extra_args);
438 /* Returns an rtx for the stub's symbol based upon
439 1.) the specified stub (save, restore or restore_ret) and
440 2.) the value of cfun->machine->call_ms2sysv_extra_regs and
441 3.) rather or not stack alignment is being performed. */
442 static rtx get_stub_rtx (enum xlogue_stub stub);
444 /* Returns the amount of stack space (including padding) that the stub
445 needs to store registers based upon data in the machine_function. */
446 HOST_WIDE_INT get_stack_space_used () const
448 const struct machine_function *m = cfun->machine;
449 unsigned last_reg = m->call_ms2sysv_extra_regs + MIN_REGS - 1;
451 gcc_assert (m->call_ms2sysv_extra_regs <= MAX_EXTRA_REGS);
452 return m_regs[last_reg].offset + STUB_INDEX_OFFSET;
455 /* Returns the offset for the base pointer used by the stub. */
456 HOST_WIDE_INT get_stub_ptr_offset () const
458 return STUB_INDEX_OFFSET + m_stack_align_off_in;
461 static const struct xlogue_layout &get_instance ();
462 static unsigned count_stub_managed_regs ();
463 static bool is_stub_managed_reg (unsigned regno, unsigned count);
465 static const HOST_WIDE_INT STUB_INDEX_OFFSET = 0x70;
466 static const unsigned MIN_REGS = NUM_X86_64_MS_CLOBBERED_REGS;
467 static const unsigned MAX_REGS = 18;
468 static const unsigned MAX_EXTRA_REGS = MAX_REGS - MIN_REGS;
469 static const unsigned VARIANT_COUNT = MAX_EXTRA_REGS + 1;
470 static const unsigned STUB_NAME_MAX_LEN = 20;
471 static const char * const STUB_BASE_NAMES[XLOGUE_STUB_COUNT];
472 static const unsigned REG_ORDER[MAX_REGS];
473 static const unsigned REG_ORDER_REALIGN[MAX_REGS];
475 private:
476 xlogue_layout ();
477 xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp);
478 xlogue_layout (const xlogue_layout &);
480 /* True if hard frame pointer is used. */
481 bool m_hfp;
483 /* Max number of register this layout manages. */
484 unsigned m_nregs;
486 /* Incoming offset from 16-byte alignment. */
487 HOST_WIDE_INT m_stack_align_off_in;
489 /* Register order and offsets. */
490 struct reginfo m_regs[MAX_REGS];
492 /* Lazy-inited cache of symbol names for stubs. */
493 static char s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT]
494 [STUB_NAME_MAX_LEN];
496 static const xlogue_layout s_instances[XLOGUE_SET_COUNT];
499 const char * const xlogue_layout::STUB_BASE_NAMES[XLOGUE_STUB_COUNT] = {
500 "savms64",
501 "resms64",
502 "resms64x",
503 "savms64f",
504 "resms64f",
505 "resms64fx"
508 const unsigned xlogue_layout::REG_ORDER[xlogue_layout::MAX_REGS] = {
509 /* The below offset values are where each register is stored for the layout
510 relative to incoming stack pointer. The value of each m_regs[].offset will
511 be relative to the incoming base pointer (rax or rsi) used by the stub.
513 s_instances: 0 1 2 3
514 Offset: realigned or aligned + 8
515 Register aligned aligned + 8 aligned w/HFP w/HFP */
516 XMM15_REG, /* 0x10 0x18 0x10 0x18 */
517 XMM14_REG, /* 0x20 0x28 0x20 0x28 */
518 XMM13_REG, /* 0x30 0x38 0x30 0x38 */
519 XMM12_REG, /* 0x40 0x48 0x40 0x48 */
520 XMM11_REG, /* 0x50 0x58 0x50 0x58 */
521 XMM10_REG, /* 0x60 0x68 0x60 0x68 */
522 XMM9_REG, /* 0x70 0x78 0x70 0x78 */
523 XMM8_REG, /* 0x80 0x88 0x80 0x88 */
524 XMM7_REG, /* 0x90 0x98 0x90 0x98 */
525 XMM6_REG, /* 0xa0 0xa8 0xa0 0xa8 */
526 SI_REG, /* 0xa8 0xb0 0xa8 0xb0 */
527 DI_REG, /* 0xb0 0xb8 0xb0 0xb8 */
528 BX_REG, /* 0xb8 0xc0 0xb8 0xc0 */
529 BP_REG, /* 0xc0 0xc8 N/A N/A */
530 R12_REG, /* 0xc8 0xd0 0xc0 0xc8 */
531 R13_REG, /* 0xd0 0xd8 0xc8 0xd0 */
532 R14_REG, /* 0xd8 0xe0 0xd0 0xd8 */
533 R15_REG, /* 0xe0 0xe8 0xd8 0xe0 */
536 /* Instantiate static const values. */
537 const HOST_WIDE_INT xlogue_layout::STUB_INDEX_OFFSET;
538 const unsigned xlogue_layout::MIN_REGS;
539 const unsigned xlogue_layout::MAX_REGS;
540 const unsigned xlogue_layout::MAX_EXTRA_REGS;
541 const unsigned xlogue_layout::VARIANT_COUNT;
542 const unsigned xlogue_layout::STUB_NAME_MAX_LEN;
544 /* Initialize xlogue_layout::s_stub_names to zero. */
545 char xlogue_layout::s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT]
546 [STUB_NAME_MAX_LEN];
548 /* Instantiates all xlogue_layout instances. */
549 const xlogue_layout xlogue_layout::s_instances[XLOGUE_SET_COUNT] = {
550 xlogue_layout (0, false),
551 xlogue_layout (8, false),
552 xlogue_layout (0, true),
553 xlogue_layout (8, true)
556 /* Return an appropriate const instance of xlogue_layout based upon values
557 in cfun->machine and crtl. */
558 const struct xlogue_layout &
559 xlogue_layout::get_instance ()
561 enum xlogue_stub_sets stub_set;
562 bool aligned_plus_8 = cfun->machine->call_ms2sysv_pad_in;
564 if (stack_realign_fp)
565 stub_set = XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
566 else if (frame_pointer_needed)
567 stub_set = aligned_plus_8
568 ? XLOGUE_SET_HFP_ALIGNED_PLUS_8
569 : XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
570 else
571 stub_set = aligned_plus_8 ? XLOGUE_SET_ALIGNED_PLUS_8 : XLOGUE_SET_ALIGNED;
573 return s_instances[stub_set];
576 /* Determine how many clobbered registers can be saved by the stub.
577 Returns the count of registers the stub will save and restore. */
578 unsigned
579 xlogue_layout::count_stub_managed_regs ()
581 bool hfp = frame_pointer_needed || stack_realign_fp;
582 unsigned i, count;
583 unsigned regno;
585 for (count = i = MIN_REGS; i < MAX_REGS; ++i)
587 regno = REG_ORDER[i];
588 if (regno == BP_REG && hfp)
589 continue;
590 if (!ix86_save_reg (regno, false, false))
591 break;
592 ++count;
594 return count;
597 /* Determine if register REGNO is a stub managed register given the
598 total COUNT of stub managed registers. */
599 bool
600 xlogue_layout::is_stub_managed_reg (unsigned regno, unsigned count)
602 bool hfp = frame_pointer_needed || stack_realign_fp;
603 unsigned i;
605 for (i = 0; i < count; ++i)
607 gcc_assert (i < MAX_REGS);
608 if (REG_ORDER[i] == BP_REG && hfp)
609 ++count;
610 else if (REG_ORDER[i] == regno)
611 return true;
613 return false;
616 /* Constructor for xlogue_layout. */
617 xlogue_layout::xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp)
618 : m_hfp (hfp) , m_nregs (hfp ? 17 : 18),
619 m_stack_align_off_in (stack_align_off_in)
621 HOST_WIDE_INT offset = stack_align_off_in;
622 unsigned i, j;
624 for (i = j = 0; i < MAX_REGS; ++i)
626 unsigned regno = REG_ORDER[i];
628 if (regno == BP_REG && hfp)
629 continue;
630 if (SSE_REGNO_P (regno))
632 offset += 16;
633 /* Verify that SSE regs are always aligned. */
634 gcc_assert (!((stack_align_off_in + offset) & 15));
636 else
637 offset += 8;
639 m_regs[j].regno = regno;
640 m_regs[j++].offset = offset - STUB_INDEX_OFFSET;
642 gcc_assert (j == m_nregs);
645 const char *
646 xlogue_layout::get_stub_name (enum xlogue_stub stub,
647 unsigned n_extra_regs)
649 const int have_avx = TARGET_AVX;
650 char *name = s_stub_names[!!have_avx][stub][n_extra_regs];
652 /* Lazy init */
653 if (!*name)
655 int res = snprintf (name, STUB_NAME_MAX_LEN, "__%s_%s_%u",
656 (have_avx ? "avx" : "sse"),
657 STUB_BASE_NAMES[stub],
658 MIN_REGS + n_extra_regs);
659 gcc_checking_assert (res < (int)STUB_NAME_MAX_LEN);
662 return name;
665 /* Return rtx of a symbol ref for the entry point (based upon
666 cfun->machine->call_ms2sysv_extra_regs) of the specified stub. */
668 xlogue_layout::get_stub_rtx (enum xlogue_stub stub)
670 const unsigned n_extra_regs = cfun->machine->call_ms2sysv_extra_regs;
671 gcc_checking_assert (n_extra_regs <= MAX_EXTRA_REGS);
672 gcc_assert (stub < XLOGUE_STUB_COUNT);
673 gcc_assert (crtl->stack_realign_finalized);
675 return gen_rtx_SYMBOL_REF (Pmode, get_stub_name (stub, n_extra_regs));
678 /* Define the structure for the machine field in struct function. */
680 struct GTY(()) stack_local_entry {
681 unsigned short mode;
682 unsigned short n;
683 rtx rtl;
684 struct stack_local_entry *next;
687 /* Which cpu are we scheduling for. */
688 enum attr_cpu ix86_schedule;
690 /* Which cpu are we optimizing for. */
691 enum processor_type ix86_tune;
693 /* Which instruction set architecture to use. */
694 enum processor_type ix86_arch;
696 /* True if processor has SSE prefetch instruction. */
697 unsigned char x86_prefetch_sse;
699 /* -mstackrealign option */
700 static const char ix86_force_align_arg_pointer_string[]
701 = "force_align_arg_pointer";
703 static rtx (*ix86_gen_leave) (void);
704 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
705 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
706 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
707 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
708 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
709 static rtx (*ix86_gen_monitorx) (rtx, rtx, rtx);
710 static rtx (*ix86_gen_clzero) (rtx);
711 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
712 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
713 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
714 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
715 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
716 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
718 /* Preferred alignment for stack boundary in bits. */
719 unsigned int ix86_preferred_stack_boundary;
721 /* Alignment for incoming stack boundary in bits specified at
722 command line. */
723 static unsigned int ix86_user_incoming_stack_boundary;
725 /* Default alignment for incoming stack boundary in bits. */
726 static unsigned int ix86_default_incoming_stack_boundary;
728 /* Alignment for incoming stack boundary in bits. */
729 unsigned int ix86_incoming_stack_boundary;
731 /* Calling abi specific va_list type nodes. */
732 static GTY(()) tree sysv_va_list_type_node;
733 static GTY(()) tree ms_va_list_type_node;
735 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
736 char internal_label_prefix[16];
737 int internal_label_prefix_len;
739 /* Fence to use after loop using movnt. */
740 tree x86_mfence;
742 /* Register class used for passing given 64bit part of the argument.
743 These represent classes as documented by the PS ABI, with the exception
744 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
745 use SF or DFmode move instead of DImode to avoid reformatting penalties.
747 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
748 whenever possible (upper half does contain padding). */
749 enum x86_64_reg_class
751 X86_64_NO_CLASS,
752 X86_64_INTEGER_CLASS,
753 X86_64_INTEGERSI_CLASS,
754 X86_64_SSE_CLASS,
755 X86_64_SSESF_CLASS,
756 X86_64_SSEDF_CLASS,
757 X86_64_SSEUP_CLASS,
758 X86_64_X87_CLASS,
759 X86_64_X87UP_CLASS,
760 X86_64_COMPLEX_X87_CLASS,
761 X86_64_MEMORY_CLASS
764 #define MAX_CLASSES 8
766 /* Table of constants used by fldpi, fldln2, etc.... */
767 static REAL_VALUE_TYPE ext_80387_constants_table [5];
768 static bool ext_80387_constants_init;
771 static struct machine_function * ix86_init_machine_status (void);
772 static rtx ix86_function_value (const_tree, const_tree, bool);
773 static bool ix86_function_value_regno_p (const unsigned int);
774 static unsigned int ix86_function_arg_boundary (machine_mode,
775 const_tree);
776 static rtx ix86_static_chain (const_tree, bool);
777 static int ix86_function_regparm (const_tree, const_tree);
778 static void ix86_compute_frame_layout (void);
779 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
780 rtx, rtx, int);
781 static void ix86_add_new_builtins (HOST_WIDE_INT, HOST_WIDE_INT);
782 static tree ix86_canonical_va_list_type (tree);
783 static void predict_jump (int);
784 static unsigned int split_stack_prologue_scratch_regno (void);
785 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
787 enum ix86_function_specific_strings
789 IX86_FUNCTION_SPECIFIC_ARCH,
790 IX86_FUNCTION_SPECIFIC_TUNE,
791 IX86_FUNCTION_SPECIFIC_MAX
794 static char *ix86_target_string (HOST_WIDE_INT, HOST_WIDE_INT, int, int,
795 const char *, const char *, enum fpmath_unit,
796 bool);
797 static void ix86_function_specific_save (struct cl_target_option *,
798 struct gcc_options *opts);
799 static void ix86_function_specific_restore (struct gcc_options *opts,
800 struct cl_target_option *);
801 static void ix86_function_specific_post_stream_in (struct cl_target_option *);
802 static void ix86_function_specific_print (FILE *, int,
803 struct cl_target_option *);
804 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
805 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
806 struct gcc_options *,
807 struct gcc_options *,
808 struct gcc_options *);
809 static bool ix86_can_inline_p (tree, tree);
810 static void ix86_set_current_function (tree);
811 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
813 static enum calling_abi ix86_function_abi (const_tree);
816 #ifndef SUBTARGET32_DEFAULT_CPU
817 #define SUBTARGET32_DEFAULT_CPU "i386"
818 #endif
820 /* Whether -mtune= or -march= were specified */
821 static int ix86_tune_defaulted;
822 static int ix86_arch_specified;
824 /* Vectorization library interface and handlers. */
825 static tree (*ix86_veclib_handler) (combined_fn, tree, tree);
827 static tree ix86_veclibabi_svml (combined_fn, tree, tree);
828 static tree ix86_veclibabi_acml (combined_fn, tree, tree);
830 /* Processor target table, indexed by processor number */
831 struct ptt
833 const char *const name; /* processor name */
834 const struct processor_costs *cost; /* Processor costs */
835 const int align_loop; /* Default alignments. */
836 const int align_loop_max_skip;
837 const int align_jump;
838 const int align_jump_max_skip;
839 const int align_func;
842 /* This table must be in sync with enum processor_type in i386.h. */
843 static const struct ptt processor_target_table[PROCESSOR_max] =
845 {"generic", &generic_cost, 16, 10, 16, 10, 16},
846 {"i386", &i386_cost, 4, 3, 4, 3, 4},
847 {"i486", &i486_cost, 16, 15, 16, 15, 16},
848 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
849 {"lakemont", &lakemont_cost, 16, 7, 16, 7, 16},
850 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
851 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
852 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
853 {"core2", &core_cost, 16, 10, 16, 10, 16},
854 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
855 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
856 {"haswell", &core_cost, 16, 10, 16, 10, 16},
857 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
858 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
859 {"knl", &slm_cost, 16, 15, 16, 7, 16},
860 {"knm", &slm_cost, 16, 15, 16, 7, 16},
861 {"skylake-avx512", &skylake_cost, 16, 10, 16, 10, 16},
862 {"cannonlake", &skylake_cost, 16, 10, 16, 10, 16},
863 {"icelake", &skylake_cost, 16, 10, 16, 10, 16},
864 {"intel", &intel_cost, 16, 15, 16, 7, 16},
865 {"geode", &geode_cost, 0, 0, 0, 0, 0},
866 {"k6", &k6_cost, 32, 7, 32, 7, 32},
867 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
868 {"k8", &k8_cost, 16, 7, 16, 7, 16},
869 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
870 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
871 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
872 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
873 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
874 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
875 {"btver2", &btver2_cost, 16, 10, 16, 7, 11},
876 {"znver1", &znver1_cost, 16, 15, 16, 15, 16}
879 static unsigned int
880 rest_of_handle_insert_vzeroupper (void)
882 int i;
884 /* vzeroupper instructions are inserted immediately after reload to
885 account for possible spills from 256bit or 512bit registers. The pass
886 reuses mode switching infrastructure by re-running mode insertion
887 pass, so disable entities that have already been processed. */
888 for (i = 0; i < MAX_386_ENTITIES; i++)
889 ix86_optimize_mode_switching[i] = 0;
891 ix86_optimize_mode_switching[AVX_U128] = 1;
893 /* Call optimize_mode_switching. */
894 g->get_passes ()->execute_pass_mode_switching ();
895 return 0;
898 /* Return 1 if INSN uses or defines a hard register.
899 Hard register uses in a memory address are ignored.
900 Clobbers and flags definitions are ignored. */
902 static bool
903 has_non_address_hard_reg (rtx_insn *insn)
905 df_ref ref;
906 FOR_EACH_INSN_DEF (ref, insn)
907 if (HARD_REGISTER_P (DF_REF_REAL_REG (ref))
908 && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER)
909 && DF_REF_REGNO (ref) != FLAGS_REG)
910 return true;
912 FOR_EACH_INSN_USE (ref, insn)
913 if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref)))
914 return true;
916 return false;
919 /* Check if comparison INSN may be transformed
920 into vector comparison. Currently we transform
921 zero checks only which look like:
923 (set (reg:CCZ 17 flags)
924 (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4)
925 (subreg:SI (reg:DI x) 0))
926 (const_int 0 [0]))) */
928 static bool
929 convertible_comparison_p (rtx_insn *insn)
931 if (!TARGET_SSE4_1)
932 return false;
934 rtx def_set = single_set (insn);
936 gcc_assert (def_set);
938 rtx src = SET_SRC (def_set);
939 rtx dst = SET_DEST (def_set);
941 gcc_assert (GET_CODE (src) == COMPARE);
943 if (GET_CODE (dst) != REG
944 || REGNO (dst) != FLAGS_REG
945 || GET_MODE (dst) != CCZmode)
946 return false;
948 rtx op1 = XEXP (src, 0);
949 rtx op2 = XEXP (src, 1);
951 if (op2 != CONST0_RTX (GET_MODE (op2)))
952 return false;
954 if (GET_CODE (op1) != IOR)
955 return false;
957 op2 = XEXP (op1, 1);
958 op1 = XEXP (op1, 0);
960 if (!SUBREG_P (op1)
961 || !SUBREG_P (op2)
962 || GET_MODE (op1) != SImode
963 || GET_MODE (op2) != SImode
964 || ((SUBREG_BYTE (op1) != 0
965 || SUBREG_BYTE (op2) != GET_MODE_SIZE (SImode))
966 && (SUBREG_BYTE (op2) != 0
967 || SUBREG_BYTE (op1) != GET_MODE_SIZE (SImode))))
968 return false;
970 op1 = SUBREG_REG (op1);
971 op2 = SUBREG_REG (op2);
973 if (op1 != op2
974 || !REG_P (op1)
975 || GET_MODE (op1) != DImode)
976 return false;
978 return true;
981 /* The DImode version of scalar_to_vector_candidate_p. */
983 static bool
984 dimode_scalar_to_vector_candidate_p (rtx_insn *insn)
986 rtx def_set = single_set (insn);
988 if (!def_set)
989 return false;
991 if (has_non_address_hard_reg (insn))
992 return false;
994 rtx src = SET_SRC (def_set);
995 rtx dst = SET_DEST (def_set);
997 if (GET_CODE (src) == COMPARE)
998 return convertible_comparison_p (insn);
1000 /* We are interested in DImode promotion only. */
1001 if ((GET_MODE (src) != DImode
1002 && !CONST_INT_P (src))
1003 || GET_MODE (dst) != DImode)
1004 return false;
1006 if (!REG_P (dst) && !MEM_P (dst))
1007 return false;
1009 switch (GET_CODE (src))
1011 case ASHIFTRT:
1012 if (!TARGET_AVX512VL)
1013 return false;
1014 /* FALLTHRU */
1016 case ASHIFT:
1017 case LSHIFTRT:
1018 if (!REG_P (XEXP (src, 1))
1019 && (!SUBREG_P (XEXP (src, 1))
1020 || SUBREG_BYTE (XEXP (src, 1)) != 0
1021 || !REG_P (SUBREG_REG (XEXP (src, 1))))
1022 && (!CONST_INT_P (XEXP (src, 1))
1023 || !IN_RANGE (INTVAL (XEXP (src, 1)), 0, 63)))
1024 return false;
1026 if (GET_MODE (XEXP (src, 1)) != QImode
1027 && !CONST_INT_P (XEXP (src, 1)))
1028 return false;
1029 break;
1031 case PLUS:
1032 case MINUS:
1033 case IOR:
1034 case XOR:
1035 case AND:
1036 if (!REG_P (XEXP (src, 1))
1037 && !MEM_P (XEXP (src, 1))
1038 && !CONST_INT_P (XEXP (src, 1)))
1039 return false;
1041 if (GET_MODE (XEXP (src, 1)) != DImode
1042 && !CONST_INT_P (XEXP (src, 1)))
1043 return false;
1044 break;
1046 case NEG:
1047 case NOT:
1048 break;
1050 case REG:
1051 return true;
1053 case MEM:
1054 case CONST_INT:
1055 return REG_P (dst);
1057 default:
1058 return false;
1061 if (!REG_P (XEXP (src, 0))
1062 && !MEM_P (XEXP (src, 0))
1063 && !CONST_INT_P (XEXP (src, 0))
1064 /* Check for andnot case. */
1065 && (GET_CODE (src) != AND
1066 || GET_CODE (XEXP (src, 0)) != NOT
1067 || !REG_P (XEXP (XEXP (src, 0), 0))))
1068 return false;
1070 if (GET_MODE (XEXP (src, 0)) != DImode
1071 && !CONST_INT_P (XEXP (src, 0)))
1072 return false;
1074 return true;
1077 /* The TImode version of scalar_to_vector_candidate_p. */
1079 static bool
1080 timode_scalar_to_vector_candidate_p (rtx_insn *insn)
1082 rtx def_set = single_set (insn);
1084 if (!def_set)
1085 return false;
1087 if (has_non_address_hard_reg (insn))
1088 return false;
1090 rtx src = SET_SRC (def_set);
1091 rtx dst = SET_DEST (def_set);
1093 /* Only TImode load and store are allowed. */
1094 if (GET_MODE (dst) != TImode)
1095 return false;
1097 if (MEM_P (dst))
1099 /* Check for store. Memory must be aligned or unaligned store
1100 is optimal. Only support store from register, standard SSE
1101 constant or CONST_WIDE_INT generated from piecewise store.
1103 ??? Verify performance impact before enabling CONST_INT for
1104 __int128 store. */
1105 if (misaligned_operand (dst, TImode)
1106 && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
1107 return false;
1109 switch (GET_CODE (src))
1111 default:
1112 return false;
1114 case REG:
1115 case CONST_WIDE_INT:
1116 return true;
1118 case CONST_INT:
1119 return standard_sse_constant_p (src, TImode);
1122 else if (MEM_P (src))
1124 /* Check for load. Memory must be aligned or unaligned load is
1125 optimal. */
1126 return (REG_P (dst)
1127 && (!misaligned_operand (src, TImode)
1128 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL));
1131 return false;
1134 /* Return 1 if INSN may be converted into vector
1135 instruction. */
1137 static bool
1138 scalar_to_vector_candidate_p (rtx_insn *insn)
1140 if (TARGET_64BIT)
1141 return timode_scalar_to_vector_candidate_p (insn);
1142 else
1143 return dimode_scalar_to_vector_candidate_p (insn);
1146 /* The DImode version of remove_non_convertible_regs. */
1148 static void
1149 dimode_remove_non_convertible_regs (bitmap candidates)
1151 bitmap_iterator bi;
1152 unsigned id;
1153 bitmap regs = BITMAP_ALLOC (NULL);
1155 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
1157 rtx def_set = single_set (DF_INSN_UID_GET (id)->insn);
1158 rtx reg = SET_DEST (def_set);
1160 if (!REG_P (reg)
1161 || bitmap_bit_p (regs, REGNO (reg))
1162 || HARD_REGISTER_P (reg))
1163 continue;
1165 for (df_ref def = DF_REG_DEF_CHAIN (REGNO (reg));
1166 def;
1167 def = DF_REF_NEXT_REG (def))
1169 if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1171 if (dump_file)
1172 fprintf (dump_file,
1173 "r%d has non convertible definition in insn %d\n",
1174 REGNO (reg), DF_REF_INSN_UID (def));
1176 bitmap_set_bit (regs, REGNO (reg));
1177 break;
1182 EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
1184 for (df_ref def = DF_REG_DEF_CHAIN (id);
1185 def;
1186 def = DF_REF_NEXT_REG (def))
1187 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1189 if (dump_file)
1190 fprintf (dump_file, "Removing insn %d from candidates list\n",
1191 DF_REF_INSN_UID (def));
1193 bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
1197 BITMAP_FREE (regs);
1200 /* For a register REGNO, scan instructions for its defs and uses.
1201 Put REGNO in REGS if a def or use isn't in CANDIDATES. */
1203 static void
1204 timode_check_non_convertible_regs (bitmap candidates, bitmap regs,
1205 unsigned int regno)
1207 for (df_ref def = DF_REG_DEF_CHAIN (regno);
1208 def;
1209 def = DF_REF_NEXT_REG (def))
1211 if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1213 if (dump_file)
1214 fprintf (dump_file,
1215 "r%d has non convertible def in insn %d\n",
1216 regno, DF_REF_INSN_UID (def));
1218 bitmap_set_bit (regs, regno);
1219 break;
1223 for (df_ref ref = DF_REG_USE_CHAIN (regno);
1224 ref;
1225 ref = DF_REF_NEXT_REG (ref))
1227 /* Debug instructions are skipped. */
1228 if (NONDEBUG_INSN_P (DF_REF_INSN (ref))
1229 && !bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
1231 if (dump_file)
1232 fprintf (dump_file,
1233 "r%d has non convertible use in insn %d\n",
1234 regno, DF_REF_INSN_UID (ref));
1236 bitmap_set_bit (regs, regno);
1237 break;
1242 /* The TImode version of remove_non_convertible_regs. */
1244 static void
1245 timode_remove_non_convertible_regs (bitmap candidates)
1247 bitmap_iterator bi;
1248 unsigned id;
1249 bitmap regs = BITMAP_ALLOC (NULL);
1251 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
1253 rtx def_set = single_set (DF_INSN_UID_GET (id)->insn);
1254 rtx dest = SET_DEST (def_set);
1255 rtx src = SET_SRC (def_set);
1257 if ((!REG_P (dest)
1258 || bitmap_bit_p (regs, REGNO (dest))
1259 || HARD_REGISTER_P (dest))
1260 && (!REG_P (src)
1261 || bitmap_bit_p (regs, REGNO (src))
1262 || HARD_REGISTER_P (src)))
1263 continue;
1265 if (REG_P (dest))
1266 timode_check_non_convertible_regs (candidates, regs,
1267 REGNO (dest));
1269 if (REG_P (src))
1270 timode_check_non_convertible_regs (candidates, regs,
1271 REGNO (src));
1274 EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
1276 for (df_ref def = DF_REG_DEF_CHAIN (id);
1277 def;
1278 def = DF_REF_NEXT_REG (def))
1279 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1281 if (dump_file)
1282 fprintf (dump_file, "Removing insn %d from candidates list\n",
1283 DF_REF_INSN_UID (def));
1285 bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
1288 for (df_ref ref = DF_REG_USE_CHAIN (id);
1289 ref;
1290 ref = DF_REF_NEXT_REG (ref))
1291 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
1293 if (dump_file)
1294 fprintf (dump_file, "Removing insn %d from candidates list\n",
1295 DF_REF_INSN_UID (ref));
1297 bitmap_clear_bit (candidates, DF_REF_INSN_UID (ref));
1301 BITMAP_FREE (regs);
1304 /* For a given bitmap of insn UIDs scans all instruction and
1305 remove insn from CANDIDATES in case it has both convertible
1306 and not convertible definitions.
1308 All insns in a bitmap are conversion candidates according to
1309 scalar_to_vector_candidate_p. Currently it implies all insns
1310 are single_set. */
1312 static void
1313 remove_non_convertible_regs (bitmap candidates)
1315 if (TARGET_64BIT)
1316 timode_remove_non_convertible_regs (candidates);
1317 else
1318 dimode_remove_non_convertible_regs (candidates);
1321 class scalar_chain
1323 public:
1324 scalar_chain ();
1325 virtual ~scalar_chain ();
1327 static unsigned max_id;
1329 /* ID of a chain. */
1330 unsigned int chain_id;
1331 /* A queue of instructions to be included into a chain. */
1332 bitmap queue;
1333 /* Instructions included into a chain. */
1334 bitmap insns;
1335 /* All registers defined by a chain. */
1336 bitmap defs;
1337 /* Registers used in both vector and sclar modes. */
1338 bitmap defs_conv;
1340 void build (bitmap candidates, unsigned insn_uid);
1341 virtual int compute_convert_gain () = 0;
1342 int convert ();
1344 protected:
1345 void add_to_queue (unsigned insn_uid);
1346 void emit_conversion_insns (rtx insns, rtx_insn *pos);
1348 private:
1349 void add_insn (bitmap candidates, unsigned insn_uid);
1350 void analyze_register_chain (bitmap candidates, df_ref ref);
1351 virtual void mark_dual_mode_def (df_ref def) = 0;
1352 virtual void convert_insn (rtx_insn *insn) = 0;
1353 virtual void convert_registers () = 0;
1356 class dimode_scalar_chain : public scalar_chain
1358 public:
1359 int compute_convert_gain ();
1360 private:
1361 void mark_dual_mode_def (df_ref def);
1362 rtx replace_with_subreg (rtx x, rtx reg, rtx subreg);
1363 void replace_with_subreg_in_insn (rtx_insn *insn, rtx reg, rtx subreg);
1364 void convert_insn (rtx_insn *insn);
1365 void convert_op (rtx *op, rtx_insn *insn);
1366 void convert_reg (unsigned regno);
1367 void make_vector_copies (unsigned regno);
1368 void convert_registers ();
1369 int vector_const_cost (rtx exp);
1372 class timode_scalar_chain : public scalar_chain
1374 public:
1375 /* Convert from TImode to V1TImode is always faster. */
1376 int compute_convert_gain () { return 1; }
1378 private:
1379 void mark_dual_mode_def (df_ref def);
1380 void fix_debug_reg_uses (rtx reg);
1381 void convert_insn (rtx_insn *insn);
1382 /* We don't convert registers to difference size. */
1383 void convert_registers () {}
1386 unsigned scalar_chain::max_id = 0;
1388 /* Initialize new chain. */
1390 scalar_chain::scalar_chain ()
1392 chain_id = ++max_id;
1394 if (dump_file)
1395 fprintf (dump_file, "Created a new instruction chain #%d\n", chain_id);
1397 bitmap_obstack_initialize (NULL);
1398 insns = BITMAP_ALLOC (NULL);
1399 defs = BITMAP_ALLOC (NULL);
1400 defs_conv = BITMAP_ALLOC (NULL);
1401 queue = NULL;
1404 /* Free chain's data. */
1406 scalar_chain::~scalar_chain ()
1408 BITMAP_FREE (insns);
1409 BITMAP_FREE (defs);
1410 BITMAP_FREE (defs_conv);
1411 bitmap_obstack_release (NULL);
1414 /* Add instruction into chains' queue. */
1416 void
1417 scalar_chain::add_to_queue (unsigned insn_uid)
1419 if (bitmap_bit_p (insns, insn_uid)
1420 || bitmap_bit_p (queue, insn_uid))
1421 return;
1423 if (dump_file)
1424 fprintf (dump_file, " Adding insn %d into chain's #%d queue\n",
1425 insn_uid, chain_id);
1426 bitmap_set_bit (queue, insn_uid);
1429 /* For DImode conversion, mark register defined by DEF as requiring
1430 conversion. */
1432 void
1433 dimode_scalar_chain::mark_dual_mode_def (df_ref def)
1435 gcc_assert (DF_REF_REG_DEF_P (def));
1437 if (bitmap_bit_p (defs_conv, DF_REF_REGNO (def)))
1438 return;
1440 if (dump_file)
1441 fprintf (dump_file,
1442 " Mark r%d def in insn %d as requiring both modes in chain #%d\n",
1443 DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id);
1445 bitmap_set_bit (defs_conv, DF_REF_REGNO (def));
1448 /* For TImode conversion, it is unused. */
1450 void
1451 timode_scalar_chain::mark_dual_mode_def (df_ref)
1453 gcc_unreachable ();
1456 /* Check REF's chain to add new insns into a queue
1457 and find registers requiring conversion. */
1459 void
1460 scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref)
1462 df_link *chain;
1464 gcc_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))
1465 || bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)));
1466 add_to_queue (DF_REF_INSN_UID (ref));
1468 for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next)
1470 unsigned uid = DF_REF_INSN_UID (chain->ref);
1472 if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref)))
1473 continue;
1475 if (!DF_REF_REG_MEM_P (chain->ref))
1477 if (bitmap_bit_p (insns, uid))
1478 continue;
1480 if (bitmap_bit_p (candidates, uid))
1482 add_to_queue (uid);
1483 continue;
1487 if (DF_REF_REG_DEF_P (chain->ref))
1489 if (dump_file)
1490 fprintf (dump_file, " r%d def in insn %d isn't convertible\n",
1491 DF_REF_REGNO (chain->ref), uid);
1492 mark_dual_mode_def (chain->ref);
1494 else
1496 if (dump_file)
1497 fprintf (dump_file, " r%d use in insn %d isn't convertible\n",
1498 DF_REF_REGNO (chain->ref), uid);
1499 mark_dual_mode_def (ref);
1504 /* Add instruction into a chain. */
1506 void
1507 scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid)
1509 if (bitmap_bit_p (insns, insn_uid))
1510 return;
1512 if (dump_file)
1513 fprintf (dump_file, " Adding insn %d to chain #%d\n", insn_uid, chain_id);
1515 bitmap_set_bit (insns, insn_uid);
1517 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
1518 rtx def_set = single_set (insn);
1519 if (def_set && REG_P (SET_DEST (def_set))
1520 && !HARD_REGISTER_P (SET_DEST (def_set)))
1521 bitmap_set_bit (defs, REGNO (SET_DEST (def_set)));
1523 df_ref ref;
1524 df_ref def;
1525 for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
1526 if (!HARD_REGISTER_P (DF_REF_REG (ref)))
1527 for (def = DF_REG_DEF_CHAIN (DF_REF_REGNO (ref));
1528 def;
1529 def = DF_REF_NEXT_REG (def))
1530 analyze_register_chain (candidates, def);
1531 for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
1532 if (!DF_REF_REG_MEM_P (ref))
1533 analyze_register_chain (candidates, ref);
1536 /* Build new chain starting from insn INSN_UID recursively
1537 adding all dependent uses and definitions. */
1539 void
1540 scalar_chain::build (bitmap candidates, unsigned insn_uid)
1542 queue = BITMAP_ALLOC (NULL);
1543 bitmap_set_bit (queue, insn_uid);
1545 if (dump_file)
1546 fprintf (dump_file, "Building chain #%d...\n", chain_id);
1548 while (!bitmap_empty_p (queue))
1550 insn_uid = bitmap_first_set_bit (queue);
1551 bitmap_clear_bit (queue, insn_uid);
1552 bitmap_clear_bit (candidates, insn_uid);
1553 add_insn (candidates, insn_uid);
1556 if (dump_file)
1558 fprintf (dump_file, "Collected chain #%d...\n", chain_id);
1559 fprintf (dump_file, " insns: ");
1560 dump_bitmap (dump_file, insns);
1561 if (!bitmap_empty_p (defs_conv))
1563 bitmap_iterator bi;
1564 unsigned id;
1565 const char *comma = "";
1566 fprintf (dump_file, " defs to convert: ");
1567 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
1569 fprintf (dump_file, "%sr%d", comma, id);
1570 comma = ", ";
1572 fprintf (dump_file, "\n");
1576 BITMAP_FREE (queue);
1579 /* Return a cost of building a vector costant
1580 instead of using a scalar one. */
1583 dimode_scalar_chain::vector_const_cost (rtx exp)
1585 gcc_assert (CONST_INT_P (exp));
1587 if (standard_sse_constant_p (exp, V2DImode))
1588 return COSTS_N_INSNS (1);
1589 return ix86_cost->sse_load[1];
1592 /* Compute a gain for chain conversion. */
1595 dimode_scalar_chain::compute_convert_gain ()
1597 bitmap_iterator bi;
1598 unsigned insn_uid;
1599 int gain = 0;
1600 int cost = 0;
1602 if (dump_file)
1603 fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
1605 EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
1607 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
1608 rtx def_set = single_set (insn);
1609 rtx src = SET_SRC (def_set);
1610 rtx dst = SET_DEST (def_set);
1612 if (REG_P (src) && REG_P (dst))
1613 gain += COSTS_N_INSNS (2) - ix86_cost->xmm_move;
1614 else if (REG_P (src) && MEM_P (dst))
1615 gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1];
1616 else if (MEM_P (src) && REG_P (dst))
1617 gain += 2 * ix86_cost->int_load[2] - ix86_cost->sse_load[1];
1618 else if (GET_CODE (src) == ASHIFT
1619 || GET_CODE (src) == ASHIFTRT
1620 || GET_CODE (src) == LSHIFTRT)
1622 if (CONST_INT_P (XEXP (src, 0)))
1623 gain -= vector_const_cost (XEXP (src, 0));
1624 if (CONST_INT_P (XEXP (src, 1)))
1626 gain += ix86_cost->shift_const;
1627 if (INTVAL (XEXP (src, 1)) >= 32)
1628 gain -= COSTS_N_INSNS (1);
1630 else
1631 /* Additional gain for omitting two CMOVs. */
1632 gain += ix86_cost->shift_var + COSTS_N_INSNS (2);
1634 else if (GET_CODE (src) == PLUS
1635 || GET_CODE (src) == MINUS
1636 || GET_CODE (src) == IOR
1637 || GET_CODE (src) == XOR
1638 || GET_CODE (src) == AND)
1640 gain += ix86_cost->add;
1641 /* Additional gain for andnot for targets without BMI. */
1642 if (GET_CODE (XEXP (src, 0)) == NOT
1643 && !TARGET_BMI)
1644 gain += 2 * ix86_cost->add;
1646 if (CONST_INT_P (XEXP (src, 0)))
1647 gain -= vector_const_cost (XEXP (src, 0));
1648 if (CONST_INT_P (XEXP (src, 1)))
1649 gain -= vector_const_cost (XEXP (src, 1));
1651 else if (GET_CODE (src) == NEG
1652 || GET_CODE (src) == NOT)
1653 gain += ix86_cost->add - COSTS_N_INSNS (1);
1654 else if (GET_CODE (src) == COMPARE)
1656 /* Assume comparison cost is the same. */
1658 else if (CONST_INT_P (src))
1660 if (REG_P (dst))
1661 gain += COSTS_N_INSNS (2);
1662 else if (MEM_P (dst))
1663 gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1];
1664 gain -= vector_const_cost (src);
1666 else
1667 gcc_unreachable ();
1670 if (dump_file)
1671 fprintf (dump_file, " Instruction conversion gain: %d\n", gain);
1673 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, insn_uid, bi)
1674 cost += DF_REG_DEF_COUNT (insn_uid) * ix86_cost->mmxsse_to_integer;
1676 if (dump_file)
1677 fprintf (dump_file, " Registers conversion cost: %d\n", cost);
1679 gain -= cost;
1681 if (dump_file)
1682 fprintf (dump_file, " Total gain: %d\n", gain);
1684 return gain;
1687 /* Replace REG in X with a V2DI subreg of NEW_REG. */
1690 dimode_scalar_chain::replace_with_subreg (rtx x, rtx reg, rtx new_reg)
1692 if (x == reg)
1693 return gen_rtx_SUBREG (V2DImode, new_reg, 0);
1695 const char *fmt = GET_RTX_FORMAT (GET_CODE (x));
1696 int i, j;
1697 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
1699 if (fmt[i] == 'e')
1700 XEXP (x, i) = replace_with_subreg (XEXP (x, i), reg, new_reg);
1701 else if (fmt[i] == 'E')
1702 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
1703 XVECEXP (x, i, j) = replace_with_subreg (XVECEXP (x, i, j),
1704 reg, new_reg);
1707 return x;
1710 /* Replace REG in INSN with a V2DI subreg of NEW_REG. */
1712 void
1713 dimode_scalar_chain::replace_with_subreg_in_insn (rtx_insn *insn,
1714 rtx reg, rtx new_reg)
1716 replace_with_subreg (single_set (insn), reg, new_reg);
1719 /* Insert generated conversion instruction sequence INSNS
1720 after instruction AFTER. New BB may be required in case
1721 instruction has EH region attached. */
1723 void
1724 scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after)
1726 if (!control_flow_insn_p (after))
1728 emit_insn_after (insns, after);
1729 return;
1732 basic_block bb = BLOCK_FOR_INSN (after);
1733 edge e = find_fallthru_edge (bb->succs);
1734 gcc_assert (e);
1736 basic_block new_bb = split_edge (e);
1737 emit_insn_after (insns, BB_HEAD (new_bb));
1740 /* Make vector copies for all register REGNO definitions
1741 and replace its uses in a chain. */
1743 void
1744 dimode_scalar_chain::make_vector_copies (unsigned regno)
1746 rtx reg = regno_reg_rtx[regno];
1747 rtx vreg = gen_reg_rtx (DImode);
1748 bool count_reg = false;
1749 df_ref ref;
1751 for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
1752 if (!bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
1754 df_ref use;
1756 /* Detect the count register of a shift instruction. */
1757 for (use = DF_REG_USE_CHAIN (regno); use; use = DF_REF_NEXT_REG (use))
1758 if (bitmap_bit_p (insns, DF_REF_INSN_UID (use)))
1760 rtx_insn *insn = DF_REF_INSN (use);
1761 rtx def_set = single_set (insn);
1763 gcc_assert (def_set);
1765 rtx src = SET_SRC (def_set);
1767 if ((GET_CODE (src) == ASHIFT
1768 || GET_CODE (src) == ASHIFTRT
1769 || GET_CODE (src) == LSHIFTRT)
1770 && !CONST_INT_P (XEXP (src, 1))
1771 && reg_or_subregno (XEXP (src, 1)) == regno)
1772 count_reg = true;
1775 start_sequence ();
1776 if (count_reg)
1778 rtx qreg = gen_lowpart (QImode, reg);
1779 rtx tmp = gen_reg_rtx (SImode);
1781 if (TARGET_ZERO_EXTEND_WITH_AND
1782 && optimize_function_for_speed_p (cfun))
1784 emit_move_insn (tmp, const0_rtx);
1785 emit_insn (gen_movstrictqi
1786 (gen_lowpart (QImode, tmp), qreg));
1788 else
1789 emit_insn (gen_rtx_SET
1790 (tmp, gen_rtx_ZERO_EXTEND (SImode, qreg)));
1792 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
1794 rtx slot = assign_386_stack_local (SImode, SLOT_STV_TEMP);
1795 emit_move_insn (slot, tmp);
1796 tmp = copy_rtx (slot);
1799 emit_insn (gen_zero_extendsidi2 (vreg, tmp));
1801 else if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
1803 rtx tmp = assign_386_stack_local (DImode, SLOT_STV_TEMP);
1804 emit_move_insn (adjust_address (tmp, SImode, 0),
1805 gen_rtx_SUBREG (SImode, reg, 0));
1806 emit_move_insn (adjust_address (tmp, SImode, 4),
1807 gen_rtx_SUBREG (SImode, reg, 4));
1808 emit_move_insn (vreg, tmp);
1810 else if (TARGET_SSE4_1)
1812 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
1813 CONST0_RTX (V4SImode),
1814 gen_rtx_SUBREG (SImode, reg, 0)));
1815 emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0),
1816 gen_rtx_SUBREG (V4SImode, vreg, 0),
1817 gen_rtx_SUBREG (SImode, reg, 4),
1818 GEN_INT (2)));
1820 else
1822 rtx tmp = gen_reg_rtx (DImode);
1823 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
1824 CONST0_RTX (V4SImode),
1825 gen_rtx_SUBREG (SImode, reg, 0)));
1826 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0),
1827 CONST0_RTX (V4SImode),
1828 gen_rtx_SUBREG (SImode, reg, 4)));
1829 emit_insn (gen_vec_interleave_lowv4si
1830 (gen_rtx_SUBREG (V4SImode, vreg, 0),
1831 gen_rtx_SUBREG (V4SImode, vreg, 0),
1832 gen_rtx_SUBREG (V4SImode, tmp, 0)));
1834 rtx_insn *seq = get_insns ();
1835 end_sequence ();
1836 rtx_insn *insn = DF_REF_INSN (ref);
1837 emit_conversion_insns (seq, insn);
1839 if (dump_file)
1840 fprintf (dump_file,
1841 " Copied r%d to a vector register r%d for insn %d\n",
1842 regno, REGNO (vreg), INSN_UID (insn));
1845 for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
1846 if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
1848 rtx_insn *insn = DF_REF_INSN (ref);
1849 if (count_reg)
1851 rtx def_set = single_set (insn);
1852 gcc_assert (def_set);
1854 rtx src = SET_SRC (def_set);
1856 if ((GET_CODE (src) == ASHIFT
1857 || GET_CODE (src) == ASHIFTRT
1858 || GET_CODE (src) == LSHIFTRT)
1859 && !CONST_INT_P (XEXP (src, 1))
1860 && reg_or_subregno (XEXP (src, 1)) == regno)
1861 XEXP (src, 1) = vreg;
1863 else
1864 replace_with_subreg_in_insn (insn, reg, vreg);
1866 if (dump_file)
1867 fprintf (dump_file, " Replaced r%d with r%d in insn %d\n",
1868 regno, REGNO (vreg), INSN_UID (insn));
1872 /* Convert all definitions of register REGNO
1873 and fix its uses. Scalar copies may be created
1874 in case register is used in not convertible insn. */
1876 void
1877 dimode_scalar_chain::convert_reg (unsigned regno)
1879 bool scalar_copy = bitmap_bit_p (defs_conv, regno);
1880 rtx reg = regno_reg_rtx[regno];
1881 rtx scopy = NULL_RTX;
1882 df_ref ref;
1883 bitmap conv;
1885 conv = BITMAP_ALLOC (NULL);
1886 bitmap_copy (conv, insns);
1888 if (scalar_copy)
1889 scopy = gen_reg_rtx (DImode);
1891 for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
1893 rtx_insn *insn = DF_REF_INSN (ref);
1894 rtx def_set = single_set (insn);
1895 rtx src = SET_SRC (def_set);
1896 rtx reg = DF_REF_REG (ref);
1898 if (!MEM_P (src))
1900 replace_with_subreg_in_insn (insn, reg, reg);
1901 bitmap_clear_bit (conv, INSN_UID (insn));
1904 if (scalar_copy)
1906 start_sequence ();
1907 if (!TARGET_INTER_UNIT_MOVES_FROM_VEC)
1909 rtx tmp = assign_386_stack_local (DImode, SLOT_STV_TEMP);
1910 emit_move_insn (tmp, reg);
1911 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0),
1912 adjust_address (tmp, SImode, 0));
1913 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
1914 adjust_address (tmp, SImode, 4));
1916 else if (TARGET_SSE4_1)
1918 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
1919 emit_insn
1920 (gen_rtx_SET
1921 (gen_rtx_SUBREG (SImode, scopy, 0),
1922 gen_rtx_VEC_SELECT (SImode,
1923 gen_rtx_SUBREG (V4SImode, reg, 0), tmp)));
1925 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const1_rtx));
1926 emit_insn
1927 (gen_rtx_SET
1928 (gen_rtx_SUBREG (SImode, scopy, 4),
1929 gen_rtx_VEC_SELECT (SImode,
1930 gen_rtx_SUBREG (V4SImode, reg, 0), tmp)));
1932 else
1934 rtx vcopy = gen_reg_rtx (V2DImode);
1935 emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, reg, 0));
1936 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0),
1937 gen_rtx_SUBREG (SImode, vcopy, 0));
1938 emit_move_insn (vcopy,
1939 gen_rtx_LSHIFTRT (V2DImode, vcopy, GEN_INT (32)));
1940 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
1941 gen_rtx_SUBREG (SImode, vcopy, 0));
1943 rtx_insn *seq = get_insns ();
1944 end_sequence ();
1945 emit_conversion_insns (seq, insn);
1947 if (dump_file)
1948 fprintf (dump_file,
1949 " Copied r%d to a scalar register r%d for insn %d\n",
1950 regno, REGNO (scopy), INSN_UID (insn));
1954 for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
1955 if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
1957 if (bitmap_bit_p (conv, DF_REF_INSN_UID (ref)))
1959 rtx_insn *insn = DF_REF_INSN (ref);
1961 rtx def_set = single_set (insn);
1962 gcc_assert (def_set);
1964 rtx src = SET_SRC (def_set);
1965 rtx dst = SET_DEST (def_set);
1967 if ((GET_CODE (src) == ASHIFT
1968 || GET_CODE (src) == ASHIFTRT
1969 || GET_CODE (src) == LSHIFTRT)
1970 && !CONST_INT_P (XEXP (src, 1))
1971 && reg_or_subregno (XEXP (src, 1)) == regno)
1973 rtx tmp2 = gen_reg_rtx (V2DImode);
1975 start_sequence ();
1977 if (TARGET_SSE4_1)
1978 emit_insn (gen_sse4_1_zero_extendv2qiv2di2
1979 (tmp2, gen_rtx_SUBREG (V16QImode, reg, 0)));
1980 else
1982 rtx vec_cst
1983 = gen_rtx_CONST_VECTOR (V2DImode,
1984 gen_rtvec (2, GEN_INT (0xff),
1985 const0_rtx));
1986 vec_cst
1987 = validize_mem (force_const_mem (V2DImode, vec_cst));
1989 emit_insn (gen_rtx_SET
1990 (tmp2,
1991 gen_rtx_AND (V2DImode,
1992 gen_rtx_SUBREG (V2DImode, reg, 0),
1993 vec_cst)));
1995 rtx_insn *seq = get_insns ();
1996 end_sequence ();
1998 emit_insn_before (seq, insn);
2000 XEXP (src, 1) = gen_rtx_SUBREG (DImode, tmp2, 0);
2002 else if (!MEM_P (dst) || !REG_P (src))
2003 replace_with_subreg_in_insn (insn, reg, reg);
2005 bitmap_clear_bit (conv, INSN_UID (insn));
2008 /* Skip debug insns and uninitialized uses. */
2009 else if (DF_REF_CHAIN (ref)
2010 && NONDEBUG_INSN_P (DF_REF_INSN (ref)))
2012 gcc_assert (scopy);
2013 replace_rtx (DF_REF_INSN (ref), reg, scopy);
2014 df_insn_rescan (DF_REF_INSN (ref));
2017 BITMAP_FREE (conv);
2020 /* Convert operand OP in INSN. We should handle
2021 memory operands and uninitialized registers.
2022 All other register uses are converted during
2023 registers conversion. */
2025 void
2026 dimode_scalar_chain::convert_op (rtx *op, rtx_insn *insn)
2028 *op = copy_rtx_if_shared (*op);
2030 if (GET_CODE (*op) == NOT)
2032 convert_op (&XEXP (*op, 0), insn);
2033 PUT_MODE (*op, V2DImode);
2035 else if (MEM_P (*op))
2037 rtx tmp = gen_reg_rtx (DImode);
2039 emit_insn_before (gen_move_insn (tmp, *op), insn);
2040 *op = gen_rtx_SUBREG (V2DImode, tmp, 0);
2042 if (dump_file)
2043 fprintf (dump_file, " Preloading operand for insn %d into r%d\n",
2044 INSN_UID (insn), REGNO (tmp));
2046 else if (REG_P (*op))
2048 /* We may have not converted register usage in case
2049 this register has no definition. Otherwise it
2050 should be converted in convert_reg. */
2051 df_ref ref;
2052 FOR_EACH_INSN_USE (ref, insn)
2053 if (DF_REF_REGNO (ref) == REGNO (*op))
2055 gcc_assert (!DF_REF_CHAIN (ref));
2056 break;
2058 *op = gen_rtx_SUBREG (V2DImode, *op, 0);
2060 else if (CONST_INT_P (*op))
2062 rtx vec_cst;
2063 rtx tmp = gen_rtx_SUBREG (V2DImode, gen_reg_rtx (DImode), 0);
2065 /* Prefer all ones vector in case of -1. */
2066 if (constm1_operand (*op, GET_MODE (*op)))
2067 vec_cst = CONSTM1_RTX (V2DImode);
2068 else
2069 vec_cst = gen_rtx_CONST_VECTOR (V2DImode,
2070 gen_rtvec (2, *op, const0_rtx));
2072 if (!standard_sse_constant_p (vec_cst, V2DImode))
2074 start_sequence ();
2075 vec_cst = validize_mem (force_const_mem (V2DImode, vec_cst));
2076 rtx_insn *seq = get_insns ();
2077 end_sequence ();
2078 emit_insn_before (seq, insn);
2081 emit_insn_before (gen_move_insn (copy_rtx (tmp), vec_cst), insn);
2082 *op = tmp;
2084 else
2086 gcc_assert (SUBREG_P (*op));
2087 gcc_assert (GET_MODE (*op) == V2DImode);
2091 /* Convert INSN to vector mode. */
2093 void
2094 dimode_scalar_chain::convert_insn (rtx_insn *insn)
2096 rtx def_set = single_set (insn);
2097 rtx src = SET_SRC (def_set);
2098 rtx dst = SET_DEST (def_set);
2099 rtx subreg;
2101 if (MEM_P (dst) && !REG_P (src))
2103 /* There are no scalar integer instructions and therefore
2104 temporary register usage is required. */
2105 rtx tmp = gen_reg_rtx (DImode);
2106 emit_conversion_insns (gen_move_insn (dst, tmp), insn);
2107 dst = gen_rtx_SUBREG (V2DImode, tmp, 0);
2110 switch (GET_CODE (src))
2112 case ASHIFT:
2113 case ASHIFTRT:
2114 case LSHIFTRT:
2115 convert_op (&XEXP (src, 0), insn);
2116 PUT_MODE (src, V2DImode);
2117 break;
2119 case PLUS:
2120 case MINUS:
2121 case IOR:
2122 case XOR:
2123 case AND:
2124 convert_op (&XEXP (src, 0), insn);
2125 convert_op (&XEXP (src, 1), insn);
2126 PUT_MODE (src, V2DImode);
2127 break;
2129 case NEG:
2130 src = XEXP (src, 0);
2131 convert_op (&src, insn);
2132 subreg = gen_reg_rtx (V2DImode);
2133 emit_insn_before (gen_move_insn (subreg, CONST0_RTX (V2DImode)), insn);
2134 src = gen_rtx_MINUS (V2DImode, subreg, src);
2135 break;
2137 case NOT:
2138 src = XEXP (src, 0);
2139 convert_op (&src, insn);
2140 subreg = gen_reg_rtx (V2DImode);
2141 emit_insn_before (gen_move_insn (subreg, CONSTM1_RTX (V2DImode)), insn);
2142 src = gen_rtx_XOR (V2DImode, src, subreg);
2143 break;
2145 case MEM:
2146 if (!REG_P (dst))
2147 convert_op (&src, insn);
2148 break;
2150 case REG:
2151 if (!MEM_P (dst))
2152 convert_op (&src, insn);
2153 break;
2155 case SUBREG:
2156 gcc_assert (GET_MODE (src) == V2DImode);
2157 break;
2159 case COMPARE:
2160 src = SUBREG_REG (XEXP (XEXP (src, 0), 0));
2162 gcc_assert ((REG_P (src) && GET_MODE (src) == DImode)
2163 || (SUBREG_P (src) && GET_MODE (src) == V2DImode));
2165 if (REG_P (src))
2166 subreg = gen_rtx_SUBREG (V2DImode, src, 0);
2167 else
2168 subreg = copy_rtx_if_shared (src);
2169 emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (subreg),
2170 copy_rtx_if_shared (subreg),
2171 copy_rtx_if_shared (subreg)),
2172 insn);
2173 dst = gen_rtx_REG (CCmode, FLAGS_REG);
2174 src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (src),
2175 copy_rtx_if_shared (src)),
2176 UNSPEC_PTEST);
2177 break;
2179 case CONST_INT:
2180 convert_op (&src, insn);
2181 break;
2183 default:
2184 gcc_unreachable ();
2187 SET_SRC (def_set) = src;
2188 SET_DEST (def_set) = dst;
2190 /* Drop possible dead definitions. */
2191 PATTERN (insn) = def_set;
2193 INSN_CODE (insn) = -1;
2194 recog_memoized (insn);
2195 df_insn_rescan (insn);
2198 /* Fix uses of converted REG in debug insns. */
2200 void
2201 timode_scalar_chain::fix_debug_reg_uses (rtx reg)
2203 if (!flag_var_tracking)
2204 return;
2206 df_ref ref, next;
2207 for (ref = DF_REG_USE_CHAIN (REGNO (reg)); ref; ref = next)
2209 rtx_insn *insn = DF_REF_INSN (ref);
2210 /* Make sure the next ref is for a different instruction,
2211 so that we're not affected by the rescan. */
2212 next = DF_REF_NEXT_REG (ref);
2213 while (next && DF_REF_INSN (next) == insn)
2214 next = DF_REF_NEXT_REG (next);
2216 if (DEBUG_INSN_P (insn))
2218 /* It may be a debug insn with a TImode variable in
2219 register. */
2220 bool changed = false;
2221 for (; ref != next; ref = DF_REF_NEXT_REG (ref))
2223 rtx *loc = DF_REF_LOC (ref);
2224 if (REG_P (*loc) && GET_MODE (*loc) == V1TImode)
2226 *loc = gen_rtx_SUBREG (TImode, *loc, 0);
2227 changed = true;
2230 if (changed)
2231 df_insn_rescan (insn);
2236 /* Convert INSN from TImode to V1T1mode. */
2238 void
2239 timode_scalar_chain::convert_insn (rtx_insn *insn)
2241 rtx def_set = single_set (insn);
2242 rtx src = SET_SRC (def_set);
2243 rtx dst = SET_DEST (def_set);
2245 switch (GET_CODE (dst))
2247 case REG:
2249 rtx tmp = find_reg_equal_equiv_note (insn);
2250 if (tmp)
2251 PUT_MODE (XEXP (tmp, 0), V1TImode);
2252 PUT_MODE (dst, V1TImode);
2253 fix_debug_reg_uses (dst);
2255 break;
2256 case MEM:
2257 PUT_MODE (dst, V1TImode);
2258 break;
2260 default:
2261 gcc_unreachable ();
2264 switch (GET_CODE (src))
2266 case REG:
2267 PUT_MODE (src, V1TImode);
2268 /* Call fix_debug_reg_uses only if SRC is never defined. */
2269 if (!DF_REG_DEF_CHAIN (REGNO (src)))
2270 fix_debug_reg_uses (src);
2271 break;
2273 case MEM:
2274 PUT_MODE (src, V1TImode);
2275 break;
2277 case CONST_WIDE_INT:
2278 if (NONDEBUG_INSN_P (insn))
2280 /* Since there are no instructions to store 128-bit constant,
2281 temporary register usage is required. */
2282 rtx tmp = gen_reg_rtx (V1TImode);
2283 start_sequence ();
2284 src = gen_rtx_CONST_VECTOR (V1TImode, gen_rtvec (1, src));
2285 src = validize_mem (force_const_mem (V1TImode, src));
2286 rtx_insn *seq = get_insns ();
2287 end_sequence ();
2288 if (seq)
2289 emit_insn_before (seq, insn);
2290 emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
2291 dst = tmp;
2293 break;
2295 case CONST_INT:
2296 switch (standard_sse_constant_p (src, TImode))
2298 case 1:
2299 src = CONST0_RTX (GET_MODE (dst));
2300 break;
2301 case 2:
2302 src = CONSTM1_RTX (GET_MODE (dst));
2303 break;
2304 default:
2305 gcc_unreachable ();
2307 if (NONDEBUG_INSN_P (insn))
2309 rtx tmp = gen_reg_rtx (V1TImode);
2310 /* Since there are no instructions to store standard SSE
2311 constant, temporary register usage is required. */
2312 emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
2313 dst = tmp;
2315 break;
2317 default:
2318 gcc_unreachable ();
2321 SET_SRC (def_set) = src;
2322 SET_DEST (def_set) = dst;
2324 /* Drop possible dead definitions. */
2325 PATTERN (insn) = def_set;
2327 INSN_CODE (insn) = -1;
2328 recog_memoized (insn);
2329 df_insn_rescan (insn);
2332 void
2333 dimode_scalar_chain::convert_registers ()
2335 bitmap_iterator bi;
2336 unsigned id;
2338 EXECUTE_IF_SET_IN_BITMAP (defs, 0, id, bi)
2339 convert_reg (id);
2341 EXECUTE_IF_AND_COMPL_IN_BITMAP (defs_conv, defs, 0, id, bi)
2342 make_vector_copies (id);
2345 /* Convert whole chain creating required register
2346 conversions and copies. */
2349 scalar_chain::convert ()
2351 bitmap_iterator bi;
2352 unsigned id;
2353 int converted_insns = 0;
2355 if (!dbg_cnt (stv_conversion))
2356 return 0;
2358 if (dump_file)
2359 fprintf (dump_file, "Converting chain #%d...\n", chain_id);
2361 convert_registers ();
2363 EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi)
2365 convert_insn (DF_INSN_UID_GET (id)->insn);
2366 converted_insns++;
2369 return converted_insns;
2372 /* Main STV pass function. Find and convert scalar
2373 instructions into vector mode when profitable. */
2375 static unsigned int
2376 convert_scalars_to_vector ()
2378 basic_block bb;
2379 bitmap candidates;
2380 int converted_insns = 0;
2382 bitmap_obstack_initialize (NULL);
2383 candidates = BITMAP_ALLOC (NULL);
2385 calculate_dominance_info (CDI_DOMINATORS);
2386 df_set_flags (DF_DEFER_INSN_RESCAN);
2387 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
2388 df_md_add_problem ();
2389 df_analyze ();
2391 /* Find all instructions we want to convert into vector mode. */
2392 if (dump_file)
2393 fprintf (dump_file, "Searching for mode conversion candidates...\n");
2395 FOR_EACH_BB_FN (bb, cfun)
2397 rtx_insn *insn;
2398 FOR_BB_INSNS (bb, insn)
2399 if (scalar_to_vector_candidate_p (insn))
2401 if (dump_file)
2402 fprintf (dump_file, " insn %d is marked as a candidate\n",
2403 INSN_UID (insn));
2405 bitmap_set_bit (candidates, INSN_UID (insn));
2409 remove_non_convertible_regs (candidates);
2411 if (bitmap_empty_p (candidates))
2412 if (dump_file)
2413 fprintf (dump_file, "There are no candidates for optimization.\n");
2415 while (!bitmap_empty_p (candidates))
2417 unsigned uid = bitmap_first_set_bit (candidates);
2418 scalar_chain *chain;
2420 if (TARGET_64BIT)
2421 chain = new timode_scalar_chain;
2422 else
2423 chain = new dimode_scalar_chain;
2425 /* Find instructions chain we want to convert to vector mode.
2426 Check all uses and definitions to estimate all required
2427 conversions. */
2428 chain->build (candidates, uid);
2430 if (chain->compute_convert_gain () > 0)
2431 converted_insns += chain->convert ();
2432 else
2433 if (dump_file)
2434 fprintf (dump_file, "Chain #%d conversion is not profitable\n",
2435 chain->chain_id);
2437 delete chain;
2440 if (dump_file)
2441 fprintf (dump_file, "Total insns converted: %d\n", converted_insns);
2443 BITMAP_FREE (candidates);
2444 bitmap_obstack_release (NULL);
2445 df_process_deferred_rescans ();
2447 /* Conversion means we may have 128bit register spills/fills
2448 which require aligned stack. */
2449 if (converted_insns)
2451 if (crtl->stack_alignment_needed < 128)
2452 crtl->stack_alignment_needed = 128;
2453 if (crtl->stack_alignment_estimated < 128)
2454 crtl->stack_alignment_estimated = 128;
2455 /* Fix up DECL_RTL/DECL_INCOMING_RTL of arguments. */
2456 if (TARGET_64BIT)
2457 for (tree parm = DECL_ARGUMENTS (current_function_decl);
2458 parm; parm = DECL_CHAIN (parm))
2460 if (TYPE_MODE (TREE_TYPE (parm)) != TImode)
2461 continue;
2462 if (DECL_RTL_SET_P (parm)
2463 && GET_MODE (DECL_RTL (parm)) == V1TImode)
2465 rtx r = DECL_RTL (parm);
2466 if (REG_P (r))
2467 SET_DECL_RTL (parm, gen_rtx_SUBREG (TImode, r, 0));
2469 if (DECL_INCOMING_RTL (parm)
2470 && GET_MODE (DECL_INCOMING_RTL (parm)) == V1TImode)
2472 rtx r = DECL_INCOMING_RTL (parm);
2473 if (REG_P (r))
2474 DECL_INCOMING_RTL (parm) = gen_rtx_SUBREG (TImode, r, 0);
2479 return 0;
2482 namespace {
2484 const pass_data pass_data_insert_vzeroupper =
2486 RTL_PASS, /* type */
2487 "vzeroupper", /* name */
2488 OPTGROUP_NONE, /* optinfo_flags */
2489 TV_MACH_DEP, /* tv_id */
2490 0, /* properties_required */
2491 0, /* properties_provided */
2492 0, /* properties_destroyed */
2493 0, /* todo_flags_start */
2494 TODO_df_finish, /* todo_flags_finish */
2497 class pass_insert_vzeroupper : public rtl_opt_pass
2499 public:
2500 pass_insert_vzeroupper(gcc::context *ctxt)
2501 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2504 /* opt_pass methods: */
2505 virtual bool gate (function *)
2507 return TARGET_AVX
2508 && TARGET_VZEROUPPER && flag_expensive_optimizations
2509 && !optimize_size;
2512 virtual unsigned int execute (function *)
2514 return rest_of_handle_insert_vzeroupper ();
2517 }; // class pass_insert_vzeroupper
2519 const pass_data pass_data_stv =
2521 RTL_PASS, /* type */
2522 "stv", /* name */
2523 OPTGROUP_NONE, /* optinfo_flags */
2524 TV_MACH_DEP, /* tv_id */
2525 0, /* properties_required */
2526 0, /* properties_provided */
2527 0, /* properties_destroyed */
2528 0, /* todo_flags_start */
2529 TODO_df_finish, /* todo_flags_finish */
2532 class pass_stv : public rtl_opt_pass
2534 public:
2535 pass_stv (gcc::context *ctxt)
2536 : rtl_opt_pass (pass_data_stv, ctxt),
2537 timode_p (false)
2540 /* opt_pass methods: */
2541 virtual bool gate (function *)
2543 return (timode_p == !!TARGET_64BIT
2544 && TARGET_STV && TARGET_SSE2 && optimize > 1);
2547 virtual unsigned int execute (function *)
2549 return convert_scalars_to_vector ();
2552 opt_pass *clone ()
2554 return new pass_stv (m_ctxt);
2557 void set_pass_param (unsigned int n, bool param)
2559 gcc_assert (n == 0);
2560 timode_p = param;
2563 private:
2564 bool timode_p;
2565 }; // class pass_stv
2567 } // anon namespace
2569 rtl_opt_pass *
2570 make_pass_insert_vzeroupper (gcc::context *ctxt)
2572 return new pass_insert_vzeroupper (ctxt);
2575 rtl_opt_pass *
2576 make_pass_stv (gcc::context *ctxt)
2578 return new pass_stv (ctxt);
2581 /* Inserting ENDBRANCH instructions. */
2583 static unsigned int
2584 rest_of_insert_endbranch (void)
2586 timevar_push (TV_MACH_DEP);
2588 rtx cet_eb;
2589 rtx_insn *insn;
2590 basic_block bb;
2592 /* Currently emit EB if it's a tracking function, i.e. 'nocf_check' is
2593 absent among function attributes. Later an optimization will be
2594 introduced to make analysis if an address of a static function is
2595 taken. A static function whose address is not taken will get a
2596 nocf_check attribute. This will allow to reduce the number of EB. */
2598 if (!lookup_attribute ("nocf_check",
2599 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
2600 && !cgraph_node::get (cfun->decl)->only_called_directly_p ())
2602 cet_eb = gen_nop_endbr ();
2604 bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
2605 insn = BB_HEAD (bb);
2606 emit_insn_before (cet_eb, insn);
2609 bb = 0;
2610 FOR_EACH_BB_FN (bb, cfun)
2612 for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
2613 insn = NEXT_INSN (insn))
2615 if (CALL_P (insn))
2617 if (find_reg_note (insn, REG_SETJMP, NULL) == NULL)
2618 continue;
2619 /* Generate ENDBRANCH after CALL, which can return more than
2620 twice, setjmp-like functions. */
2622 cet_eb = gen_nop_endbr ();
2623 emit_insn_after_setloc (cet_eb, insn, INSN_LOCATION (insn));
2624 continue;
2627 if (JUMP_P (insn) && flag_cet_switch)
2629 rtx target = JUMP_LABEL (insn);
2630 if (target == NULL_RTX || ANY_RETURN_P (target))
2631 continue;
2633 /* Check the jump is a switch table. */
2634 rtx_insn *label = as_a<rtx_insn *> (target);
2635 rtx_insn *table = next_insn (label);
2636 if (table == NULL_RTX || !JUMP_TABLE_DATA_P (table))
2637 continue;
2639 /* For the indirect jump find out all places it jumps and insert
2640 ENDBRANCH there. It should be done under a special flag to
2641 control ENDBRANCH generation for switch stmts. */
2642 edge_iterator ei;
2643 edge e;
2644 basic_block dest_blk;
2646 FOR_EACH_EDGE (e, ei, bb->succs)
2648 rtx_insn *insn;
2650 dest_blk = e->dest;
2651 insn = BB_HEAD (dest_blk);
2652 gcc_assert (LABEL_P (insn));
2653 cet_eb = gen_nop_endbr ();
2654 emit_insn_after (cet_eb, insn);
2656 continue;
2659 if ((LABEL_P (insn) && LABEL_PRESERVE_P (insn))
2660 || (NOTE_P (insn)
2661 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
2662 /* TODO. Check /s bit also. */
2664 cet_eb = gen_nop_endbr ();
2665 emit_insn_after (cet_eb, insn);
2666 continue;
2671 timevar_pop (TV_MACH_DEP);
2672 return 0;
2675 namespace {
2677 const pass_data pass_data_insert_endbranch =
2679 RTL_PASS, /* type. */
2680 "cet", /* name. */
2681 OPTGROUP_NONE, /* optinfo_flags. */
2682 TV_MACH_DEP, /* tv_id. */
2683 0, /* properties_required. */
2684 0, /* properties_provided. */
2685 0, /* properties_destroyed. */
2686 0, /* todo_flags_start. */
2687 0, /* todo_flags_finish. */
2690 class pass_insert_endbranch : public rtl_opt_pass
2692 public:
2693 pass_insert_endbranch (gcc::context *ctxt)
2694 : rtl_opt_pass (pass_data_insert_endbranch, ctxt)
2697 /* opt_pass methods: */
2698 virtual bool gate (function *)
2700 return ((flag_cf_protection & CF_BRANCH) && TARGET_IBT);
2703 virtual unsigned int execute (function *)
2705 return rest_of_insert_endbranch ();
2708 }; // class pass_insert_endbranch
2710 } // anon namespace
2712 rtl_opt_pass *
2713 make_pass_insert_endbranch (gcc::context *ctxt)
2715 return new pass_insert_endbranch (ctxt);
2718 /* Return true if a red-zone is in use. We can't use red-zone when
2719 there are local indirect jumps, like "indirect_jump" or "tablejump",
2720 which jumps to another place in the function, since "call" in the
2721 indirect thunk pushes the return address onto stack, destroying
2722 red-zone.
2724 TODO: If we can reserve the first 2 WORDs, for PUSH and, another
2725 for CALL, in red-zone, we can allow local indirect jumps with
2726 indirect thunk. */
2728 bool
2729 ix86_using_red_zone (void)
2731 return (TARGET_RED_ZONE
2732 && !TARGET_64BIT_MS_ABI
2733 && (!cfun->machine->has_local_indirect_jump
2734 || cfun->machine->indirect_branch_type == indirect_branch_keep));
2737 /* Return a string that documents the current -m options. The caller is
2738 responsible for freeing the string. */
2740 static char *
2741 ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2,
2742 int flags, int flags2,
2743 const char *arch, const char *tune,
2744 enum fpmath_unit fpmath, bool add_nl_p)
2746 struct ix86_target_opts
2748 const char *option; /* option string */
2749 HOST_WIDE_INT mask; /* isa mask options */
2752 /* This table is ordered so that options like -msse4.2 that imply other
2753 ISAs come first. Target string will be displayed in the same order. */
2754 static struct ix86_target_opts isa2_opts[] =
2756 { "-mcx16", OPTION_MASK_ISA_CX16 },
2757 { "-mmpx", OPTION_MASK_ISA_MPX },
2758 { "-mvaes", OPTION_MASK_ISA_VAES },
2759 { "-mrdpid", OPTION_MASK_ISA_RDPID },
2760 { "-msgx", OPTION_MASK_ISA_SGX },
2761 { "-mavx5124vnniw", OPTION_MASK_ISA_AVX5124VNNIW },
2762 { "-mavx5124fmaps", OPTION_MASK_ISA_AVX5124FMAPS },
2763 { "-mibt", OPTION_MASK_ISA_IBT },
2764 { "-mhle", OPTION_MASK_ISA_HLE },
2765 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2766 { "-mclzero", OPTION_MASK_ISA_CLZERO },
2767 { "-mmwaitx", OPTION_MASK_ISA_MWAITX }
2769 static struct ix86_target_opts isa_opts[] =
2771 { "-mavx512vpopcntdq", OPTION_MASK_ISA_AVX512VPOPCNTDQ },
2772 { "-mavx512bitalg", OPTION_MASK_ISA_AVX512BITALG },
2773 { "-mvpclmulqdq", OPTION_MASK_ISA_VPCLMULQDQ },
2774 { "-mgfni", OPTION_MASK_ISA_GFNI },
2775 { "-mavx512vnni", OPTION_MASK_ISA_AVX512VNNI },
2776 { "-mavx512vbmi2", OPTION_MASK_ISA_AVX512VBMI2 },
2777 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
2778 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
2779 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2780 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2781 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2782 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2783 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2784 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2785 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2786 { "-mavx2", OPTION_MASK_ISA_AVX2 },
2787 { "-mfma", OPTION_MASK_ISA_FMA },
2788 { "-mxop", OPTION_MASK_ISA_XOP },
2789 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2790 { "-mf16c", OPTION_MASK_ISA_F16C },
2791 { "-mavx", OPTION_MASK_ISA_AVX },
2792 /* { "-msse4" OPTION_MASK_ISA_SSE4 }, */
2793 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2794 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2795 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2796 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2797 { "-msse3", OPTION_MASK_ISA_SSE3 },
2798 { "-maes", OPTION_MASK_ISA_AES },
2799 { "-msha", OPTION_MASK_ISA_SHA },
2800 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2801 { "-msse2", OPTION_MASK_ISA_SSE2 },
2802 { "-msse", OPTION_MASK_ISA_SSE },
2803 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2804 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2805 { "-mmmx", OPTION_MASK_ISA_MMX },
2806 { "-mrtm", OPTION_MASK_ISA_RTM },
2807 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2808 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2809 { "-madx", OPTION_MASK_ISA_ADX },
2810 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2811 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2812 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2813 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2814 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2815 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2816 { "-mabm", OPTION_MASK_ISA_ABM },
2817 { "-mbmi", OPTION_MASK_ISA_BMI },
2818 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2819 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2820 { "-mtbm", OPTION_MASK_ISA_TBM },
2821 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2822 { "-msahf", OPTION_MASK_ISA_SAHF },
2823 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2824 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2825 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2826 { "-mpku", OPTION_MASK_ISA_PKU },
2827 { "-mlwp", OPTION_MASK_ISA_LWP },
2828 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2829 { "-mclwb", OPTION_MASK_ISA_CLWB },
2830 { "-mshstk", OPTION_MASK_ISA_SHSTK }
2833 /* Flag options. */
2834 static struct ix86_target_opts flag_opts[] =
2836 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2837 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2838 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2839 { "-m80387", MASK_80387 },
2840 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2841 { "-malign-double", MASK_ALIGN_DOUBLE },
2842 { "-mcld", MASK_CLD },
2843 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2844 { "-mieee-fp", MASK_IEEE_FP },
2845 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2846 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2847 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2848 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2849 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2850 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2851 { "-mno-red-zone", MASK_NO_RED_ZONE },
2852 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2853 { "-mrecip", MASK_RECIP },
2854 { "-mrtd", MASK_RTD },
2855 { "-msseregparm", MASK_SSEREGPARM },
2856 { "-mstack-arg-probe", MASK_STACK_PROBE },
2857 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2858 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2859 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2860 { "-mvzeroupper", MASK_VZEROUPPER },
2861 { "-mstv", MASK_STV },
2862 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD },
2863 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE },
2864 { "-mcall-ms2sysv-xlogues", MASK_CALL_MS2SYSV_XLOGUES }
2867 /* Additional flag options. */
2868 static struct ix86_target_opts flag2_opts[] =
2870 { "-mgeneral-regs-only", OPTION_MASK_GENERAL_REGS_ONLY }
2873 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (isa2_opts)
2874 + ARRAY_SIZE (flag_opts) + ARRAY_SIZE (flag2_opts) + 6][2];
2876 char isa_other[40];
2877 char isa2_other[40];
2878 char flags_other[40];
2879 char flags2_other[40];
2880 unsigned num = 0;
2881 unsigned i, j;
2882 char *ret;
2883 char *ptr;
2884 size_t len;
2885 size_t line_len;
2886 size_t sep_len;
2887 const char *abi;
2889 memset (opts, '\0', sizeof (opts));
2891 /* Add -march= option. */
2892 if (arch)
2894 opts[num][0] = "-march=";
2895 opts[num++][1] = arch;
2898 /* Add -mtune= option. */
2899 if (tune)
2901 opts[num][0] = "-mtune=";
2902 opts[num++][1] = tune;
2905 /* Add -m32/-m64/-mx32. */
2906 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2908 if ((isa & OPTION_MASK_ABI_64) != 0)
2909 abi = "-m64";
2910 else
2911 abi = "-mx32";
2912 isa &= ~ (OPTION_MASK_ISA_64BIT
2913 | OPTION_MASK_ABI_64
2914 | OPTION_MASK_ABI_X32);
2916 else
2917 abi = "-m32";
2918 opts[num++][0] = abi;
2920 /* Pick out the options in isa2 options. */
2921 for (i = 0; i < ARRAY_SIZE (isa2_opts); i++)
2923 if ((isa2 & isa2_opts[i].mask) != 0)
2925 opts[num++][0] = isa2_opts[i].option;
2926 isa2 &= ~ isa2_opts[i].mask;
2930 if (isa2 && add_nl_p)
2932 opts[num++][0] = isa2_other;
2933 sprintf (isa2_other, "(other isa2: %#" HOST_WIDE_INT_PRINT "x)", isa2);
2936 /* Pick out the options in isa options. */
2937 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2939 if ((isa & isa_opts[i].mask) != 0)
2941 opts[num++][0] = isa_opts[i].option;
2942 isa &= ~ isa_opts[i].mask;
2946 if (isa && add_nl_p)
2948 opts[num++][0] = isa_other;
2949 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)", isa);
2952 /* Add flag options. */
2953 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2955 if ((flags & flag_opts[i].mask) != 0)
2957 opts[num++][0] = flag_opts[i].option;
2958 flags &= ~ flag_opts[i].mask;
2962 if (flags && add_nl_p)
2964 opts[num++][0] = flags_other;
2965 sprintf (flags_other, "(other flags: %#x)", flags);
2968 /* Add additional flag options. */
2969 for (i = 0; i < ARRAY_SIZE (flag2_opts); i++)
2971 if ((flags2 & flag2_opts[i].mask) != 0)
2973 opts[num++][0] = flag2_opts[i].option;
2974 flags2 &= ~ flag2_opts[i].mask;
2978 if (flags2 && add_nl_p)
2980 opts[num++][0] = flags2_other;
2981 sprintf (flags2_other, "(other flags2: %#x)", flags2);
2984 /* Add -fpmath= option. */
2985 if (fpmath)
2987 opts[num][0] = "-mfpmath=";
2988 switch ((int) fpmath)
2990 case FPMATH_387:
2991 opts[num++][1] = "387";
2992 break;
2994 case FPMATH_SSE:
2995 opts[num++][1] = "sse";
2996 break;
2998 case FPMATH_387 | FPMATH_SSE:
2999 opts[num++][1] = "sse+387";
3000 break;
3002 default:
3003 gcc_unreachable ();
3007 /* Any options? */
3008 if (num == 0)
3009 return NULL;
3011 gcc_assert (num < ARRAY_SIZE (opts));
3013 /* Size the string. */
3014 len = 0;
3015 sep_len = (add_nl_p) ? 3 : 1;
3016 for (i = 0; i < num; i++)
3018 len += sep_len;
3019 for (j = 0; j < 2; j++)
3020 if (opts[i][j])
3021 len += strlen (opts[i][j]);
3024 /* Build the string. */
3025 ret = ptr = (char *) xmalloc (len);
3026 line_len = 0;
3028 for (i = 0; i < num; i++)
3030 size_t len2[2];
3032 for (j = 0; j < 2; j++)
3033 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
3035 if (i != 0)
3037 *ptr++ = ' ';
3038 line_len++;
3040 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
3042 *ptr++ = '\\';
3043 *ptr++ = '\n';
3044 line_len = 0;
3048 for (j = 0; j < 2; j++)
3049 if (opts[i][j])
3051 memcpy (ptr, opts[i][j], len2[j]);
3052 ptr += len2[j];
3053 line_len += len2[j];
3057 *ptr = '\0';
3058 gcc_assert (ret + len >= ptr);
3060 return ret;
3063 /* Return true, if profiling code should be emitted before
3064 prologue. Otherwise it returns false.
3065 Note: For x86 with "hotfix" it is sorried. */
3066 static bool
3067 ix86_profile_before_prologue (void)
3069 return flag_fentry != 0;
3072 /* Function that is callable from the debugger to print the current
3073 options. */
3074 void ATTRIBUTE_UNUSED
3075 ix86_debug_options (void)
3077 char *opts = ix86_target_string (ix86_isa_flags, ix86_isa_flags2,
3078 target_flags, ix86_target_flags,
3079 ix86_arch_string,ix86_tune_string,
3080 ix86_fpmath, true);
3082 if (opts)
3084 fprintf (stderr, "%s\n\n", opts);
3085 free (opts);
3087 else
3088 fputs ("<no options>\n\n", stderr);
3090 return;
3093 /* Return true if T is one of the bytes we should avoid with
3094 -mmitigate-rop. */
3096 static bool
3097 ix86_rop_should_change_byte_p (int t)
3099 return t == 0xc2 || t == 0xc3 || t == 0xca || t == 0xcb;
3102 static const char *stringop_alg_names[] = {
3103 #define DEF_ENUM
3104 #define DEF_ALG(alg, name) #name,
3105 #include "stringop.def"
3106 #undef DEF_ENUM
3107 #undef DEF_ALG
3110 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
3111 The string is of the following form (or comma separated list of it):
3113 strategy_alg:max_size:[align|noalign]
3115 where the full size range for the strategy is either [0, max_size] or
3116 [min_size, max_size], in which min_size is the max_size + 1 of the
3117 preceding range. The last size range must have max_size == -1.
3119 Examples:
3122 -mmemcpy-strategy=libcall:-1:noalign
3124 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
3128 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
3130 This is to tell the compiler to use the following strategy for memset
3131 1) when the expected size is between [1, 16], use rep_8byte strategy;
3132 2) when the size is between [17, 2048], use vector_loop;
3133 3) when the size is > 2048, use libcall. */
3135 struct stringop_size_range
3137 int max;
3138 stringop_alg alg;
3139 bool noalign;
3142 static void
3143 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
3145 const struct stringop_algs *default_algs;
3146 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
3147 char *curr_range_str, *next_range_str;
3148 const char *opt = is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=";
3149 int i = 0, n = 0;
3151 if (is_memset)
3152 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
3153 else
3154 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
3156 curr_range_str = strategy_str;
3160 int maxs;
3161 char alg_name[128];
3162 char align[16];
3163 next_range_str = strchr (curr_range_str, ',');
3164 if (next_range_str)
3165 *next_range_str++ = '\0';
3167 if (sscanf (curr_range_str, "%20[^:]:%d:%10s", alg_name, &maxs,
3168 align) != 3)
3170 error ("wrong argument %qs to option %qs", curr_range_str, opt);
3171 return;
3174 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
3176 error ("size ranges of option %qs should be increasing", opt);
3177 return;
3180 for (i = 0; i < last_alg; i++)
3181 if (!strcmp (alg_name, stringop_alg_names[i]))
3182 break;
3184 if (i == last_alg)
3186 error ("wrong strategy name %qs specified for option %qs",
3187 alg_name, opt);
3189 auto_vec <const char *> candidates;
3190 for (i = 0; i < last_alg; i++)
3191 if ((stringop_alg) i != rep_prefix_8_byte || TARGET_64BIT)
3192 candidates.safe_push (stringop_alg_names[i]);
3194 char *s;
3195 const char *hint
3196 = candidates_list_and_hint (alg_name, s, candidates);
3197 if (hint)
3198 inform (input_location,
3199 "valid arguments to %qs are: %s; did you mean %qs?",
3200 opt, s, hint);
3201 else
3202 inform (input_location, "valid arguments to %qs are: %s",
3203 opt, s);
3204 XDELETEVEC (s);
3205 return;
3208 if ((stringop_alg) i == rep_prefix_8_byte
3209 && !TARGET_64BIT)
3211 /* rep; movq isn't available in 32-bit code. */
3212 error ("strategy name %qs specified for option %qs "
3213 "not supported for 32-bit code", alg_name, opt);
3214 return;
3217 input_ranges[n].max = maxs;
3218 input_ranges[n].alg = (stringop_alg) i;
3219 if (!strcmp (align, "align"))
3220 input_ranges[n].noalign = false;
3221 else if (!strcmp (align, "noalign"))
3222 input_ranges[n].noalign = true;
3223 else
3225 error ("unknown alignment %qs specified for option %qs", align, opt);
3226 return;
3228 n++;
3229 curr_range_str = next_range_str;
3231 while (curr_range_str);
3233 if (input_ranges[n - 1].max != -1)
3235 error ("the max value for the last size range should be -1"
3236 " for option %qs", opt);
3237 return;
3240 if (n > MAX_STRINGOP_ALGS)
3242 error ("too many size ranges specified in option %qs", opt);
3243 return;
3246 /* Now override the default algs array. */
3247 for (i = 0; i < n; i++)
3249 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
3250 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
3251 = input_ranges[i].alg;
3252 *const_cast<int *>(&default_algs->size[i].noalign)
3253 = input_ranges[i].noalign;
3258 /* parse -mtune-ctrl= option. When DUMP is true,
3259 print the features that are explicitly set. */
3261 static void
3262 parse_mtune_ctrl_str (bool dump)
3264 if (!ix86_tune_ctrl_string)
3265 return;
3267 char *next_feature_string = NULL;
3268 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3269 char *orig = curr_feature_string;
3270 int i;
3273 bool clear = false;
3275 next_feature_string = strchr (curr_feature_string, ',');
3276 if (next_feature_string)
3277 *next_feature_string++ = '\0';
3278 if (*curr_feature_string == '^')
3280 curr_feature_string++;
3281 clear = true;
3283 for (i = 0; i < X86_TUNE_LAST; i++)
3285 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3287 ix86_tune_features[i] = !clear;
3288 if (dump)
3289 fprintf (stderr, "Explicitly %s feature %s\n",
3290 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3291 break;
3294 if (i == X86_TUNE_LAST)
3295 error ("unknown parameter to option -mtune-ctrl: %s",
3296 clear ? curr_feature_string - 1 : curr_feature_string);
3297 curr_feature_string = next_feature_string;
3299 while (curr_feature_string);
3300 free (orig);
3303 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3304 processor type. */
3306 static void
3307 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3309 unsigned int ix86_tune_mask = 1u << ix86_tune;
3310 int i;
3312 for (i = 0; i < X86_TUNE_LAST; ++i)
3314 if (ix86_tune_no_default)
3315 ix86_tune_features[i] = 0;
3316 else
3317 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3320 if (dump)
3322 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3323 for (i = 0; i < X86_TUNE_LAST; i++)
3324 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3325 ix86_tune_features[i] ? "on" : "off");
3328 parse_mtune_ctrl_str (dump);
3332 /* Default align_* from the processor table. */
3334 static void
3335 ix86_default_align (struct gcc_options *opts)
3337 if (opts->x_align_loops == 0)
3339 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3340 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3342 if (opts->x_align_jumps == 0)
3344 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3345 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3347 if (opts->x_align_functions == 0)
3349 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3353 /* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */
3355 static void
3356 ix86_override_options_after_change (void)
3358 ix86_default_align (&global_options);
3361 /* Override various settings based on options. If MAIN_ARGS_P, the
3362 options are from the command line, otherwise they are from
3363 attributes. Return true if there's an error related to march
3364 option. */
3366 static bool
3367 ix86_option_override_internal (bool main_args_p,
3368 struct gcc_options *opts,
3369 struct gcc_options *opts_set)
3371 int i;
3372 unsigned int ix86_arch_mask;
3373 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3375 const wide_int_bitmask PTA_3DNOW (HOST_WIDE_INT_1U << 0);
3376 const wide_int_bitmask PTA_3DNOW_A (HOST_WIDE_INT_1U << 1);
3377 const wide_int_bitmask PTA_64BIT (HOST_WIDE_INT_1U << 2);
3378 const wide_int_bitmask PTA_ABM (HOST_WIDE_INT_1U << 3);
3379 const wide_int_bitmask PTA_AES (HOST_WIDE_INT_1U << 4);
3380 const wide_int_bitmask PTA_AVX (HOST_WIDE_INT_1U << 5);
3381 const wide_int_bitmask PTA_BMI (HOST_WIDE_INT_1U << 6);
3382 const wide_int_bitmask PTA_CX16 (HOST_WIDE_INT_1U << 7);
3383 const wide_int_bitmask PTA_F16C (HOST_WIDE_INT_1U << 8);
3384 const wide_int_bitmask PTA_FMA (HOST_WIDE_INT_1U << 9);
3385 const wide_int_bitmask PTA_FMA4 (HOST_WIDE_INT_1U << 10);
3386 const wide_int_bitmask PTA_FSGSBASE (HOST_WIDE_INT_1U << 11);
3387 const wide_int_bitmask PTA_LWP (HOST_WIDE_INT_1U << 12);
3388 const wide_int_bitmask PTA_LZCNT (HOST_WIDE_INT_1U << 13);
3389 const wide_int_bitmask PTA_MMX (HOST_WIDE_INT_1U << 14);
3390 const wide_int_bitmask PTA_MOVBE (HOST_WIDE_INT_1U << 15);
3391 const wide_int_bitmask PTA_NO_SAHF (HOST_WIDE_INT_1U << 16);
3392 const wide_int_bitmask PTA_PCLMUL (HOST_WIDE_INT_1U << 17);
3393 const wide_int_bitmask PTA_POPCNT (HOST_WIDE_INT_1U << 18);
3394 const wide_int_bitmask PTA_PREFETCH_SSE (HOST_WIDE_INT_1U << 19);
3395 const wide_int_bitmask PTA_RDRND (HOST_WIDE_INT_1U << 20);
3396 const wide_int_bitmask PTA_SSE (HOST_WIDE_INT_1U << 21);
3397 const wide_int_bitmask PTA_SSE2 (HOST_WIDE_INT_1U << 22);
3398 const wide_int_bitmask PTA_SSE3 (HOST_WIDE_INT_1U << 23);
3399 const wide_int_bitmask PTA_SSE4_1 (HOST_WIDE_INT_1U << 24);
3400 const wide_int_bitmask PTA_SSE4_2 (HOST_WIDE_INT_1U << 25);
3401 const wide_int_bitmask PTA_SSE4A (HOST_WIDE_INT_1U << 26);
3402 const wide_int_bitmask PTA_SSSE3 (HOST_WIDE_INT_1U << 27);
3403 const wide_int_bitmask PTA_TBM (HOST_WIDE_INT_1U << 28);
3404 const wide_int_bitmask PTA_XOP (HOST_WIDE_INT_1U << 29);
3405 const wide_int_bitmask PTA_AVX2 (HOST_WIDE_INT_1U << 30);
3406 const wide_int_bitmask PTA_BMI2 (HOST_WIDE_INT_1U << 31);
3407 const wide_int_bitmask PTA_RTM (HOST_WIDE_INT_1U << 32);
3408 const wide_int_bitmask PTA_HLE (HOST_WIDE_INT_1U << 33);
3409 const wide_int_bitmask PTA_PRFCHW (HOST_WIDE_INT_1U << 34);
3410 const wide_int_bitmask PTA_RDSEED (HOST_WIDE_INT_1U << 35);
3411 const wide_int_bitmask PTA_ADX (HOST_WIDE_INT_1U << 36);
3412 const wide_int_bitmask PTA_FXSR (HOST_WIDE_INT_1U << 37);
3413 const wide_int_bitmask PTA_XSAVE (HOST_WIDE_INT_1U << 38);
3414 const wide_int_bitmask PTA_XSAVEOPT (HOST_WIDE_INT_1U << 39);
3415 const wide_int_bitmask PTA_AVX512F (HOST_WIDE_INT_1U << 40);
3416 const wide_int_bitmask PTA_AVX512ER (HOST_WIDE_INT_1U << 41);
3417 const wide_int_bitmask PTA_AVX512PF (HOST_WIDE_INT_1U << 42);
3418 const wide_int_bitmask PTA_AVX512CD (HOST_WIDE_INT_1U << 43);
3419 const wide_int_bitmask PTA_MPX (HOST_WIDE_INT_1U << 44);
3420 const wide_int_bitmask PTA_SHA (HOST_WIDE_INT_1U << 45);
3421 const wide_int_bitmask PTA_PREFETCHWT1 (HOST_WIDE_INT_1U << 46);
3422 const wide_int_bitmask PTA_CLFLUSHOPT (HOST_WIDE_INT_1U << 47);
3423 const wide_int_bitmask PTA_XSAVEC (HOST_WIDE_INT_1U << 48);
3424 const wide_int_bitmask PTA_XSAVES (HOST_WIDE_INT_1U << 49);
3425 const wide_int_bitmask PTA_AVX512DQ (HOST_WIDE_INT_1U << 50);
3426 const wide_int_bitmask PTA_AVX512BW (HOST_WIDE_INT_1U << 51);
3427 const wide_int_bitmask PTA_AVX512VL (HOST_WIDE_INT_1U << 52);
3428 const wide_int_bitmask PTA_AVX512IFMA (HOST_WIDE_INT_1U << 53);
3429 const wide_int_bitmask PTA_AVX512VBMI (HOST_WIDE_INT_1U << 54);
3430 const wide_int_bitmask PTA_CLWB (HOST_WIDE_INT_1U << 55);
3431 const wide_int_bitmask PTA_MWAITX (HOST_WIDE_INT_1U << 56);
3432 const wide_int_bitmask PTA_CLZERO (HOST_WIDE_INT_1U << 57);
3433 const wide_int_bitmask PTA_NO_80387 (HOST_WIDE_INT_1U << 58);
3434 const wide_int_bitmask PTA_PKU (HOST_WIDE_INT_1U << 59);
3435 const wide_int_bitmask PTA_AVX5124VNNIW (HOST_WIDE_INT_1U << 60);
3436 const wide_int_bitmask PTA_AVX5124FMAPS (HOST_WIDE_INT_1U << 61);
3437 const wide_int_bitmask PTA_AVX512VPOPCNTDQ (HOST_WIDE_INT_1U << 62);
3438 const wide_int_bitmask PTA_SGX (HOST_WIDE_INT_1U << 63);
3439 const wide_int_bitmask PTA_AVX512VNNI (0, HOST_WIDE_INT_1U);
3440 const wide_int_bitmask PTA_GFNI (0, HOST_WIDE_INT_1U << 1);
3441 const wide_int_bitmask PTA_VAES (0, HOST_WIDE_INT_1U << 2);
3442 const wide_int_bitmask PTA_AVX512VBMI2 (0, HOST_WIDE_INT_1U << 3);
3443 const wide_int_bitmask PTA_VPCLMULQDQ (0, HOST_WIDE_INT_1U << 4);
3444 const wide_int_bitmask PTA_AVX512BITALG (0, HOST_WIDE_INT_1U << 5);
3445 const wide_int_bitmask PTA_RDPID (0, HOST_WIDE_INT_1U << 6);
3447 const wide_int_bitmask PTA_CORE2 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2
3448 | PTA_SSE3 | PTA_SSSE3 | PTA_CX16 | PTA_FXSR;
3449 const wide_int_bitmask PTA_NEHALEM = PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2
3450 | PTA_POPCNT;
3451 const wide_int_bitmask PTA_WESTMERE = PTA_NEHALEM | PTA_AES | PTA_PCLMUL;
3452 const wide_int_bitmask PTA_SANDYBRIDGE = PTA_WESTMERE | PTA_AVX | PTA_XSAVE
3453 | PTA_XSAVEOPT;
3454 const wide_int_bitmask PTA_IVYBRIDGE = PTA_SANDYBRIDGE | PTA_FSGSBASE
3455 | PTA_RDRND | PTA_F16C;
3456 const wide_int_bitmask PTA_HASWELL = PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI
3457 | PTA_BMI2 | PTA_LZCNT | PTA_FMA | PTA_MOVBE | PTA_HLE;
3458 const wide_int_bitmask PTA_BROADWELL = PTA_HASWELL | PTA_ADX | PTA_PRFCHW
3459 | PTA_RDSEED;
3460 const wide_int_bitmask PTA_SKYLAKE = PTA_BROADWELL | PTA_CLFLUSHOPT
3461 | PTA_XSAVEC | PTA_XSAVES;
3462 const wide_int_bitmask PTA_SKYLAKE_AVX512 = PTA_SKYLAKE | PTA_AVX512F
3463 | PTA_AVX512CD | PTA_AVX512VL | PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU
3464 | PTA_CLWB;
3465 const wide_int_bitmask PTA_CANNONLAKE = PTA_SKYLAKE | PTA_AVX512F
3466 | PTA_AVX512CD | PTA_AVX512VL | PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU
3467 | PTA_AVX512VBMI | PTA_AVX512IFMA | PTA_SHA;
3468 const wide_int_bitmask PTA_ICELAKE = PTA_CANNONLAKE | PTA_AVX512VNNI
3469 | PTA_GFNI | PTA_VAES | PTA_AVX512VBMI2 | PTA_VPCLMULQDQ | PTA_AVX512BITALG
3470 | PTA_RDPID;
3471 const wide_int_bitmask PTA_KNL = PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER
3472 | PTA_AVX512F | PTA_AVX512CD;
3473 const wide_int_bitmask PTA_BONNELL = PTA_CORE2 | PTA_MOVBE;
3474 const wide_int_bitmask PTA_SILVERMONT = PTA_WESTMERE | PTA_MOVBE | PTA_RDRND;
3475 const wide_int_bitmask PTA_KNM = PTA_KNL | PTA_AVX5124VNNIW
3476 | PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ;
3478 static struct pta
3480 const char *const name; /* processor name or nickname. */
3481 const enum processor_type processor;
3482 const enum attr_cpu schedule;
3483 const wide_int_bitmask flags;
3485 const processor_alias_table[] =
3487 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3488 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3489 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3490 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3491 {"lakemont", PROCESSOR_LAKEMONT, CPU_PENTIUM, PTA_NO_80387},
3492 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3493 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3494 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3495 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3496 {"samuel-2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3497 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3498 PTA_MMX | PTA_SSE | PTA_FXSR},
3499 {"nehemiah", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3500 PTA_MMX | PTA_SSE | PTA_FXSR},
3501 {"c7", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3502 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3503 {"esther", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3504 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3505 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3506 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3507 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3508 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3509 PTA_MMX | PTA_SSE | PTA_FXSR},
3510 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3511 PTA_MMX | PTA_SSE | PTA_FXSR},
3512 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3513 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3514 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3515 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3516 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3517 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3518 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3519 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3520 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3521 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3522 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3523 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3524 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3525 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3526 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3527 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3528 PTA_SANDYBRIDGE},
3529 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3530 PTA_SANDYBRIDGE},
3531 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3532 PTA_IVYBRIDGE},
3533 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3534 PTA_IVYBRIDGE},
3535 {"haswell", PROCESSOR_HASWELL, CPU_HASWELL, PTA_HASWELL},
3536 {"core-avx2", PROCESSOR_HASWELL, CPU_HASWELL, PTA_HASWELL},
3537 {"broadwell", PROCESSOR_HASWELL, CPU_HASWELL, PTA_BROADWELL},
3538 {"skylake", PROCESSOR_HASWELL, CPU_HASWELL, PTA_SKYLAKE},
3539 {"skylake-avx512", PROCESSOR_SKYLAKE_AVX512, CPU_HASWELL,
3540 PTA_SKYLAKE_AVX512},
3541 {"cannonlake", PROCESSOR_SKYLAKE_AVX512, CPU_HASWELL, PTA_CANNONLAKE},
3542 {"icelake", PROCESSOR_SKYLAKE_AVX512, CPU_HASWELL, PTA_ICELAKE},
3543 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3544 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3545 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3546 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3547 {"knl", PROCESSOR_KNL, CPU_SLM, PTA_KNL},
3548 {"knm", PROCESSOR_KNM, CPU_SLM, PTA_KNM},
3549 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3550 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3551 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3552 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3553 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3554 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3555 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3556 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3557 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3558 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3559 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3560 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_FXSR},
3561 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3562 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_FXSR},
3563 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3564 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_FXSR},
3565 {"x86-64", PROCESSOR_K8, CPU_K8,
3566 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3567 {"eden-x2", PROCESSOR_K8, CPU_K8,
3568 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3569 {"nano", PROCESSOR_K8, CPU_K8,
3570 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3571 | PTA_SSSE3 | PTA_FXSR},
3572 {"nano-1000", PROCESSOR_K8, CPU_K8,
3573 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3574 | PTA_SSSE3 | PTA_FXSR},
3575 {"nano-2000", PROCESSOR_K8, CPU_K8,
3576 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3577 | PTA_SSSE3 | PTA_FXSR},
3578 {"nano-3000", PROCESSOR_K8, CPU_K8,
3579 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3580 | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR},
3581 {"nano-x2", PROCESSOR_K8, CPU_K8,
3582 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3583 | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR},
3584 {"eden-x4", PROCESSOR_K8, CPU_K8,
3585 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3586 | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR},
3587 {"nano-x4", PROCESSOR_K8, CPU_K8,
3588 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3589 | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR},
3590 {"k8", PROCESSOR_K8, CPU_K8,
3591 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3592 | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3593 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3594 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3595 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_FXSR},
3596 {"opteron", PROCESSOR_K8, CPU_K8,
3597 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3598 | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3599 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3600 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3601 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_FXSR},
3602 {"athlon64", PROCESSOR_K8, CPU_K8,
3603 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3604 | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3605 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3606 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3607 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_FXSR},
3608 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3609 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3610 | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3611 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3612 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3613 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3614 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3615 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3616 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3617 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3618 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3619 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3620 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3621 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3622 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3623 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3624 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3625 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3626 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3627 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3628 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3629 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3630 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3631 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3632 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3633 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3634 | PTA_XSAVEOPT | PTA_FSGSBASE},
3635 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3636 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3637 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3638 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3639 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3640 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3641 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3642 | PTA_MOVBE | PTA_MWAITX},
3643 {"znver1", PROCESSOR_ZNVER1, CPU_ZNVER1,
3644 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3645 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3646 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3647 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_PRFCHW
3648 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE
3649 | PTA_RDRND | PTA_MOVBE | PTA_MWAITX | PTA_ADX | PTA_RDSEED
3650 | PTA_CLZERO | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES
3651 | PTA_SHA | PTA_LZCNT | PTA_POPCNT},
3652 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3653 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3654 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3655 | PTA_FXSR | PTA_XSAVE},
3656 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3657 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3658 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3659 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3660 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3661 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3663 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3664 PTA_64BIT
3665 | PTA_HLE /* flags are only used for -march switch. */ },
3668 /* -mrecip options. */
3669 static struct
3671 const char *string; /* option name */
3672 unsigned int mask; /* mask bits to set */
3674 const recip_options[] =
3676 { "all", RECIP_MASK_ALL },
3677 { "none", RECIP_MASK_NONE },
3678 { "div", RECIP_MASK_DIV },
3679 { "sqrt", RECIP_MASK_SQRT },
3680 { "vec-div", RECIP_MASK_VEC_DIV },
3681 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3684 int const pta_size = ARRAY_SIZE (processor_alias_table);
3686 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3687 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3688 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3689 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3690 #ifdef TARGET_BI_ARCH
3691 else
3693 #if TARGET_BI_ARCH == 1
3694 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3695 is on and OPTION_MASK_ABI_X32 is off. We turn off
3696 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3697 -mx32. */
3698 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3699 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3700 #else
3701 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3702 on and OPTION_MASK_ABI_64 is off. We turn off
3703 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3704 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3705 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3706 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3707 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3708 #endif
3709 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3710 && TARGET_IAMCU_P (opts->x_target_flags))
3711 sorry ("Intel MCU psABI isn%'t supported in %s mode",
3712 TARGET_X32_P (opts->x_ix86_isa_flags) ? "x32" : "64-bit");
3714 #endif
3716 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3718 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3719 OPTION_MASK_ABI_64 for TARGET_X32. */
3720 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3721 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3723 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3724 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3725 | OPTION_MASK_ABI_X32
3726 | OPTION_MASK_ABI_64);
3727 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3729 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3730 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3731 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3732 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3735 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3736 SUBTARGET_OVERRIDE_OPTIONS;
3737 #endif
3739 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3740 SUBSUBTARGET_OVERRIDE_OPTIONS;
3741 #endif
3743 /* -fPIC is the default for x86_64. */
3744 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3745 opts->x_flag_pic = 2;
3747 /* Need to check -mtune=generic first. */
3748 if (opts->x_ix86_tune_string)
3750 /* As special support for cross compilers we read -mtune=native
3751 as -mtune=generic. With native compilers we won't see the
3752 -mtune=native, as it was changed by the driver. */
3753 if (!strcmp (opts->x_ix86_tune_string, "native"))
3755 opts->x_ix86_tune_string = "generic";
3757 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3758 warning (OPT_Wdeprecated,
3759 main_args_p
3760 ? G_("%<-mtune=x86-64%> is deprecated; use %<-mtune=k8%> "
3761 "or %<-mtune=generic%> instead as appropriate")
3762 : G_("%<target(\"tune=x86-64\")%> is deprecated; use "
3763 "%<target(\"tune=k8\")%> or %<target(\"tune=generic\")%>"
3764 " instead as appropriate"));
3766 else
3768 if (opts->x_ix86_arch_string)
3769 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3770 if (!opts->x_ix86_tune_string)
3772 opts->x_ix86_tune_string
3773 = processor_target_table[TARGET_CPU_DEFAULT].name;
3774 ix86_tune_defaulted = 1;
3777 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3778 or defaulted. We need to use a sensible tune option. */
3779 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3781 opts->x_ix86_tune_string = "generic";
3785 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3786 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3788 /* rep; movq isn't available in 32-bit code. */
3789 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3790 opts->x_ix86_stringop_alg = no_stringop;
3793 if (!opts->x_ix86_arch_string)
3794 opts->x_ix86_arch_string
3795 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3796 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3797 else
3798 ix86_arch_specified = 1;
3800 if (opts_set->x_ix86_pmode)
3802 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3803 && opts->x_ix86_pmode == PMODE_SI)
3804 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3805 && opts->x_ix86_pmode == PMODE_DI))
3806 error ("address mode %qs not supported in the %s bit mode",
3807 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3808 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3810 else
3811 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3812 ? PMODE_DI : PMODE_SI;
3814 if (!opts_set->x_ix86_abi)
3815 opts->x_ix86_abi = DEFAULT_ABI;
3817 if (opts->x_ix86_abi == MS_ABI && TARGET_X32_P (opts->x_ix86_isa_flags))
3818 error ("-mabi=ms not supported with X32 ABI");
3819 gcc_assert (opts->x_ix86_abi == SYSV_ABI || opts->x_ix86_abi == MS_ABI);
3821 /* For targets using ms ABI enable ms-extensions, if not
3822 explicit turned off. For non-ms ABI we turn off this
3823 option. */
3824 if (!opts_set->x_flag_ms_extensions)
3825 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3827 if (opts_set->x_ix86_cmodel)
3829 switch (opts->x_ix86_cmodel)
3831 case CM_SMALL:
3832 case CM_SMALL_PIC:
3833 if (opts->x_flag_pic)
3834 opts->x_ix86_cmodel = CM_SMALL_PIC;
3835 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3836 error ("code model %qs not supported in the %s bit mode",
3837 "small", "32");
3838 break;
3840 case CM_MEDIUM:
3841 case CM_MEDIUM_PIC:
3842 if (opts->x_flag_pic)
3843 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3844 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3845 error ("code model %qs not supported in the %s bit mode",
3846 "medium", "32");
3847 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3848 error ("code model %qs not supported in x32 mode",
3849 "medium");
3850 break;
3852 case CM_LARGE:
3853 case CM_LARGE_PIC:
3854 if (opts->x_flag_pic)
3855 opts->x_ix86_cmodel = CM_LARGE_PIC;
3856 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3857 error ("code model %qs not supported in the %s bit mode",
3858 "large", "32");
3859 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3860 error ("code model %qs not supported in x32 mode",
3861 "large");
3862 break;
3864 case CM_32:
3865 if (opts->x_flag_pic)
3866 error ("code model %s does not support PIC mode", "32");
3867 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3868 error ("code model %qs not supported in the %s bit mode",
3869 "32", "64");
3870 break;
3872 case CM_KERNEL:
3873 if (opts->x_flag_pic)
3875 error ("code model %s does not support PIC mode", "kernel");
3876 opts->x_ix86_cmodel = CM_32;
3878 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3879 error ("code model %qs not supported in the %s bit mode",
3880 "kernel", "32");
3881 break;
3883 default:
3884 gcc_unreachable ();
3887 else
3889 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3890 use of rip-relative addressing. This eliminates fixups that
3891 would otherwise be needed if this object is to be placed in a
3892 DLL, and is essentially just as efficient as direct addressing. */
3893 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3894 && (TARGET_RDOS || TARGET_PECOFF))
3895 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3896 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3897 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3898 else
3899 opts->x_ix86_cmodel = CM_32;
3901 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3903 error ("-masm=intel not supported in this configuration");
3904 opts->x_ix86_asm_dialect = ASM_ATT;
3906 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3907 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3908 sorry ("%i-bit mode not compiled in",
3909 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3911 for (i = 0; i < pta_size; i++)
3912 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3914 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3916 error (main_args_p
3917 ? G_("%<generic%> CPU can be used only for %<-mtune=%> "
3918 "switch")
3919 : G_("%<generic%> CPU can be used only for "
3920 "%<target(\"tune=\")%> attribute"));
3921 return false;
3923 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3925 error (main_args_p
3926 ? G_("%<intel%> CPU can be used only for %<-mtune=%> "
3927 "switch")
3928 : G_("%<intel%> CPU can be used only for "
3929 "%<target(\"tune=\")%> attribute"));
3930 return false;
3933 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3934 && !((processor_alias_table[i].flags & PTA_64BIT) != 0))
3936 error ("CPU you selected does not support x86-64 "
3937 "instruction set");
3938 return false;
3941 ix86_schedule = processor_alias_table[i].schedule;
3942 ix86_arch = processor_alias_table[i].processor;
3943 /* Default cpu tuning to the architecture. */
3944 ix86_tune = ix86_arch;
3946 if (((processor_alias_table[i].flags & PTA_MMX) != 0)
3947 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3948 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3949 if (((processor_alias_table[i].flags & PTA_3DNOW) != 0)
3950 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3951 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3952 if (((processor_alias_table[i].flags & PTA_3DNOW_A) != 0)
3953 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3954 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3955 if (((processor_alias_table[i].flags & PTA_SSE) != 0)
3956 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3957 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3958 if (((processor_alias_table[i].flags & PTA_SSE2) != 0)
3959 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3960 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3961 if (((processor_alias_table[i].flags & PTA_SSE3) != 0)
3962 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3963 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3964 if (((processor_alias_table[i].flags & PTA_SSSE3) != 0)
3965 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3966 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3967 if (((processor_alias_table[i].flags & PTA_SSE4_1) != 0)
3968 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3969 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3970 if (((processor_alias_table[i].flags & PTA_SSE4_2) != 0)
3971 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3972 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3973 if (((processor_alias_table[i].flags & PTA_AVX) != 0)
3974 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3975 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3976 if (((processor_alias_table[i].flags & PTA_AVX2) != 0)
3977 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3978 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3979 if (((processor_alias_table[i].flags & PTA_FMA) != 0)
3980 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3981 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3982 if (((processor_alias_table[i].flags & PTA_SSE4A) != 0)
3983 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3984 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3985 if (((processor_alias_table[i].flags & PTA_FMA4) != 0)
3986 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3987 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3988 if (((processor_alias_table[i].flags & PTA_XOP) != 0)
3989 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3990 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3991 if (((processor_alias_table[i].flags & PTA_LWP) != 0)
3992 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3993 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3994 if (((processor_alias_table[i].flags & PTA_ABM) != 0)
3995 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3996 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3997 if (((processor_alias_table[i].flags & PTA_BMI) != 0)
3998 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3999 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
4000 if (((processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)) != 0)
4001 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
4002 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
4003 if (((processor_alias_table[i].flags & PTA_TBM) != 0)
4004 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
4005 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
4006 if (((processor_alias_table[i].flags & PTA_BMI2) != 0)
4007 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
4008 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
4009 if (((processor_alias_table[i].flags & PTA_CX16) != 0)
4010 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_CX16))
4011 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_CX16;
4012 if (((processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)) != 0)
4013 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
4014 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
4015 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
4016 && ((processor_alias_table[i].flags & PTA_NO_SAHF) != 0))
4017 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
4018 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
4019 if (((processor_alias_table[i].flags & PTA_MOVBE) != 0)
4020 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_MOVBE))
4021 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_MOVBE;
4022 if (((processor_alias_table[i].flags & PTA_AES) != 0)
4023 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
4024 ix86_isa_flags |= OPTION_MASK_ISA_AES;
4025 if (((processor_alias_table[i].flags & PTA_SHA) != 0)
4026 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
4027 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
4028 if (((processor_alias_table[i].flags & PTA_PCLMUL) != 0)
4029 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
4030 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
4031 if (((processor_alias_table[i].flags & PTA_FSGSBASE) != 0)
4032 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
4033 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
4034 if (((processor_alias_table[i].flags & PTA_RDRND) != 0)
4035 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
4036 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
4037 if (((processor_alias_table[i].flags & PTA_F16C) != 0)
4038 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
4039 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
4040 if (((processor_alias_table[i].flags & PTA_RTM) != 0)
4041 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
4042 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
4043 if (((processor_alias_table[i].flags & PTA_HLE) != 0)
4044 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_HLE))
4045 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_HLE;
4046 if (((processor_alias_table[i].flags & PTA_PRFCHW) != 0)
4047 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
4048 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
4049 if (((processor_alias_table[i].flags & PTA_RDSEED) != 0)
4050 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
4051 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
4052 if (((processor_alias_table[i].flags & PTA_ADX) != 0)
4053 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
4054 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
4055 if (((processor_alias_table[i].flags & PTA_FXSR) != 0)
4056 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
4057 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
4058 if (((processor_alias_table[i].flags & PTA_XSAVE) != 0)
4059 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
4060 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
4061 if (((processor_alias_table[i].flags & PTA_XSAVEOPT) != 0)
4062 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
4063 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
4064 if (((processor_alias_table[i].flags & PTA_AVX512F) != 0)
4065 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
4066 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
4067 if (((processor_alias_table[i].flags & PTA_AVX512ER) != 0)
4068 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
4069 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
4070 if (((processor_alias_table[i].flags & PTA_AVX512PF) != 0)
4071 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
4072 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
4073 if (((processor_alias_table[i].flags & PTA_AVX512CD) != 0)
4074 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
4075 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
4076 if (((processor_alias_table[i].flags & PTA_PREFETCHWT1) != 0)
4077 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
4078 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
4079 if (((processor_alias_table[i].flags & PTA_CLWB) != 0)
4080 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
4081 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
4082 if (((processor_alias_table[i].flags & PTA_CLFLUSHOPT) != 0)
4083 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
4084 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
4085 if (((processor_alias_table[i].flags & PTA_CLZERO) != 0)
4086 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_CLZERO))
4087 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_CLZERO;
4088 if (((processor_alias_table[i].flags & PTA_XSAVEC) != 0)
4089 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
4090 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
4091 if (((processor_alias_table[i].flags & PTA_XSAVES) != 0)
4092 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
4093 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
4094 if (((processor_alias_table[i].flags & PTA_AVX512DQ) != 0)
4095 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
4096 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
4097 if (((processor_alias_table[i].flags & PTA_AVX512BW) != 0)
4098 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
4099 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
4100 if (((processor_alias_table[i].flags & PTA_AVX512VL) != 0)
4101 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
4102 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
4103 if (((processor_alias_table[i].flags & PTA_MPX) != 0)
4104 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_MPX))
4105 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_MPX;
4106 if (((processor_alias_table[i].flags & PTA_AVX512VBMI) != 0)
4107 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
4108 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
4109 if (((processor_alias_table[i].flags & PTA_AVX512IFMA) != 0)
4110 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
4111 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
4112 if (((processor_alias_table[i].flags & PTA_AVX512VNNI) != 0)
4113 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VNNI))
4114 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VNNI;
4115 if (((processor_alias_table[i].flags & PTA_GFNI) != 0)
4116 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_GFNI))
4117 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_GFNI;
4118 if (((processor_alias_table[i].flags & PTA_AVX512VBMI2) != 0)
4119 && !(opts->x_ix86_isa_flags_explicit
4120 & OPTION_MASK_ISA_AVX512VBMI2))
4121 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI2;
4122 if (((processor_alias_table[i].flags & PTA_VPCLMULQDQ) != 0)
4123 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_VPCLMULQDQ))
4124 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_VPCLMULQDQ;
4125 if (((processor_alias_table[i].flags & PTA_AVX512BITALG) != 0)
4126 && !(opts->x_ix86_isa_flags_explicit
4127 & OPTION_MASK_ISA_AVX512BITALG))
4128 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BITALG;
4130 if (((processor_alias_table[i].flags & PTA_AVX5124VNNIW) != 0)
4131 && !(opts->x_ix86_isa_flags2_explicit
4132 & OPTION_MASK_ISA_AVX5124VNNIW))
4133 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX5124VNNIW;
4134 if (((processor_alias_table[i].flags & PTA_AVX5124FMAPS) != 0)
4135 && !(opts->x_ix86_isa_flags2_explicit
4136 & OPTION_MASK_ISA_AVX5124FMAPS))
4137 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX5124FMAPS;
4138 if (((processor_alias_table[i].flags & PTA_AVX512VPOPCNTDQ) != 0)
4139 && !(opts->x_ix86_isa_flags_explicit
4140 & OPTION_MASK_ISA_AVX512VPOPCNTDQ))
4141 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VPOPCNTDQ;
4142 if (((processor_alias_table[i].flags & PTA_SGX) != 0)
4143 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_SGX))
4144 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_SGX;
4145 if (((processor_alias_table[i].flags & PTA_VAES) != 0)
4146 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_VAES))
4147 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_VAES;
4148 if (((processor_alias_table[i].flags & PTA_RDPID) != 0)
4149 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_RDPID))
4150 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_RDPID;
4152 if ((processor_alias_table[i].flags
4153 & (PTA_PREFETCH_SSE | PTA_SSE)) != 0)
4154 x86_prefetch_sse = true;
4155 if (((processor_alias_table[i].flags & PTA_MWAITX) != 0)
4156 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_MWAITX))
4157 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_MWAITX;
4158 if (((processor_alias_table[i].flags & PTA_PKU) != 0)
4159 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PKU))
4160 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PKU;
4162 /* Don't enable x87 instructions if only
4163 general registers are allowed. */
4164 if (!(opts_set->x_ix86_target_flags & OPTION_MASK_GENERAL_REGS_ONLY)
4165 && !(opts_set->x_target_flags & MASK_80387))
4167 if (((processor_alias_table[i].flags & PTA_NO_80387) != 0))
4168 opts->x_target_flags &= ~MASK_80387;
4169 else
4170 opts->x_target_flags |= MASK_80387;
4172 break;
4175 if (TARGET_X32 && (opts->x_ix86_isa_flags2 & OPTION_MASK_ISA_MPX))
4176 error ("Intel MPX does not support x32");
4178 if (TARGET_X32 && (ix86_isa_flags2 & OPTION_MASK_ISA_MPX))
4179 error ("Intel MPX does not support x32");
4181 if (i == pta_size)
4183 error (main_args_p
4184 ? G_("bad value (%qs) for %<-march=%> switch")
4185 : G_("bad value (%qs) for %<target(\"arch=\")%> attribute"),
4186 opts->x_ix86_arch_string);
4188 auto_vec <const char *> candidates;
4189 for (i = 0; i < pta_size; i++)
4190 if (strcmp (processor_alias_table[i].name, "generic")
4191 && strcmp (processor_alias_table[i].name, "intel")
4192 && (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4193 || ((processor_alias_table[i].flags & PTA_64BIT) != 0)))
4194 candidates.safe_push (processor_alias_table[i].name);
4196 #ifdef HAVE_LOCAL_CPU_DETECT
4197 /* Add also "native" as possible value. */
4198 candidates.safe_push ("native");
4199 #endif
4201 char *s;
4202 const char *hint
4203 = candidates_list_and_hint (opts->x_ix86_arch_string, s, candidates);
4204 if (hint)
4205 inform (input_location,
4206 main_args_p
4207 ? G_("valid arguments to %<-march=%> switch are: "
4208 "%s; did you mean %qs?")
4209 : G_("valid arguments to %<target(\"arch=\")%> attribute are: "
4210 "%s; did you mean %qs?"), s, hint);
4211 else
4212 inform (input_location,
4213 main_args_p
4214 ? G_("valid arguments to %<-march=%> switch are: %s")
4215 : G_("valid arguments to %<target(\"arch=\")%> attribute "
4216 "are: %s"), s);
4217 XDELETEVEC (s);
4220 ix86_arch_mask = 1u << ix86_arch;
4221 for (i = 0; i < X86_ARCH_LAST; ++i)
4222 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4224 for (i = 0; i < pta_size; i++)
4225 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
4227 ix86_schedule = processor_alias_table[i].schedule;
4228 ix86_tune = processor_alias_table[i].processor;
4229 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4231 if (!((processor_alias_table[i].flags & PTA_64BIT) != 0))
4233 if (ix86_tune_defaulted)
4235 opts->x_ix86_tune_string = "x86-64";
4236 for (i = 0; i < pta_size; i++)
4237 if (! strcmp (opts->x_ix86_tune_string,
4238 processor_alias_table[i].name))
4239 break;
4240 ix86_schedule = processor_alias_table[i].schedule;
4241 ix86_tune = processor_alias_table[i].processor;
4243 else
4244 error ("CPU you selected does not support x86-64 "
4245 "instruction set");
4248 /* Intel CPUs have always interpreted SSE prefetch instructions as
4249 NOPs; so, we can enable SSE prefetch instructions even when
4250 -mtune (rather than -march) points us to a processor that has them.
4251 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
4252 higher processors. */
4253 if (TARGET_CMOV
4254 && ((processor_alias_table[i].flags
4255 & (PTA_PREFETCH_SSE | PTA_SSE)) != 0))
4256 x86_prefetch_sse = true;
4257 break;
4260 if (ix86_tune_specified && i == pta_size)
4262 error (main_args_p
4263 ? G_("bad value (%qs) for %<-mtune=%> switch")
4264 : G_("bad value (%qs) for %<target(\"tune=\")%> attribute"),
4265 opts->x_ix86_tune_string);
4267 auto_vec <const char *> candidates;
4268 for (i = 0; i < pta_size; i++)
4269 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4270 || ((processor_alias_table[i].flags & PTA_64BIT) != 0))
4271 candidates.safe_push (processor_alias_table[i].name);
4273 #ifdef HAVE_LOCAL_CPU_DETECT
4274 /* Add also "native" as possible value. */
4275 candidates.safe_push ("native");
4276 #endif
4278 char *s;
4279 const char *hint
4280 = candidates_list_and_hint (opts->x_ix86_tune_string, s, candidates);
4281 if (hint)
4282 inform (input_location,
4283 main_args_p
4284 ? G_("valid arguments to %<-mtune=%> switch are: "
4285 "%s; did you mean %qs?")
4286 : G_("valid arguments to %<target(\"tune=\")%> attribute are: "
4287 "%s; did you mean %qs?"), s, hint);
4288 else
4289 inform (input_location,
4290 main_args_p
4291 ? G_("valid arguments to %<-mtune=%> switch are: %s")
4292 : G_("valid arguments to %<target(\"tune=\")%> attribute "
4293 "are: %s"), s);
4294 XDELETEVEC (s);
4297 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
4299 #ifndef USE_IX86_FRAME_POINTER
4300 #define USE_IX86_FRAME_POINTER 0
4301 #endif
4303 #ifndef USE_X86_64_FRAME_POINTER
4304 #define USE_X86_64_FRAME_POINTER 0
4305 #endif
4307 /* Set the default values for switches whose default depends on TARGET_64BIT
4308 in case they weren't overwritten by command line options. */
4309 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4311 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
4312 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
4313 if (opts->x_flag_asynchronous_unwind_tables
4314 && !opts_set->x_flag_unwind_tables
4315 && TARGET_64BIT_MS_ABI)
4316 opts->x_flag_unwind_tables = 1;
4317 if (opts->x_flag_asynchronous_unwind_tables == 2)
4318 opts->x_flag_unwind_tables
4319 = opts->x_flag_asynchronous_unwind_tables = 1;
4320 if (opts->x_flag_pcc_struct_return == 2)
4321 opts->x_flag_pcc_struct_return = 0;
4323 else
4325 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
4326 opts->x_flag_omit_frame_pointer
4327 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
4328 if (opts->x_flag_asynchronous_unwind_tables == 2)
4329 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
4330 if (opts->x_flag_pcc_struct_return == 2)
4332 /* Intel MCU psABI specifies that -freg-struct-return should
4333 be on. Instead of setting DEFAULT_PCC_STRUCT_RETURN to 1,
4334 we check -miamcu so that -freg-struct-return is always
4335 turned on if -miamcu is used. */
4336 if (TARGET_IAMCU_P (opts->x_target_flags))
4337 opts->x_flag_pcc_struct_return = 0;
4338 else
4339 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
4343 ix86_tune_cost = processor_target_table[ix86_tune].cost;
4344 /* TODO: ix86_cost should be chosen at instruction or function granuality
4345 so for cold code we use size_cost even in !optimize_size compilation. */
4346 if (opts->x_optimize_size)
4347 ix86_cost = &ix86_size_cost;
4348 else
4349 ix86_cost = ix86_tune_cost;
4351 /* Arrange to set up i386_stack_locals for all functions. */
4352 init_machine_status = ix86_init_machine_status;
4354 /* Validate -mregparm= value. */
4355 if (opts_set->x_ix86_regparm)
4357 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4358 warning (0, "-mregparm is ignored in 64-bit mode");
4359 else if (TARGET_IAMCU_P (opts->x_target_flags))
4360 warning (0, "-mregparm is ignored for Intel MCU psABI");
4361 if (opts->x_ix86_regparm > REGPARM_MAX)
4363 error ("-mregparm=%d is not between 0 and %d",
4364 opts->x_ix86_regparm, REGPARM_MAX);
4365 opts->x_ix86_regparm = 0;
4368 if (TARGET_IAMCU_P (opts->x_target_flags)
4369 || TARGET_64BIT_P (opts->x_ix86_isa_flags))
4370 opts->x_ix86_regparm = REGPARM_MAX;
4372 /* Default align_* from the processor table. */
4373 ix86_default_align (opts);
4375 /* Provide default for -mbranch-cost= value. */
4376 if (!opts_set->x_ix86_branch_cost)
4377 opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost;
4379 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4381 opts->x_target_flags
4382 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
4384 /* Enable by default the SSE and MMX builtins. Do allow the user to
4385 explicitly disable any of these. In particular, disabling SSE and
4386 MMX for kernel code is extremely useful. */
4387 if (!ix86_arch_specified)
4388 opts->x_ix86_isa_flags
4389 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
4390 | TARGET_SUBTARGET64_ISA_DEFAULT)
4391 & ~opts->x_ix86_isa_flags_explicit);
4393 if (TARGET_RTD_P (opts->x_target_flags))
4394 warning (0,
4395 main_args_p
4396 ? G_("%<-mrtd%> is ignored in 64bit mode")
4397 : G_("%<target(\"rtd\")%> is ignored in 64bit mode"));
4399 else
4401 opts->x_target_flags
4402 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
4404 if (!ix86_arch_specified)
4405 opts->x_ix86_isa_flags
4406 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
4408 /* i386 ABI does not specify red zone. It still makes sense to use it
4409 when programmer takes care to stack from being destroyed. */
4410 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
4411 opts->x_target_flags |= MASK_NO_RED_ZONE;
4414 /* Keep nonleaf frame pointers. */
4415 if (opts->x_flag_omit_frame_pointer)
4416 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
4417 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
4418 opts->x_flag_omit_frame_pointer = 1;
4420 /* If we're doing fast math, we don't care about comparison order
4421 wrt NaNs. This lets us use a shorter comparison sequence. */
4422 if (opts->x_flag_finite_math_only)
4423 opts->x_target_flags &= ~MASK_IEEE_FP;
4425 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
4426 since the insns won't need emulation. */
4427 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
4428 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
4430 /* Likewise, if the target doesn't have a 387, or we've specified
4431 software floating point, don't use 387 inline intrinsics. */
4432 if (!TARGET_80387_P (opts->x_target_flags))
4433 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
4435 /* Turn on MMX builtins for -msse. */
4436 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
4437 opts->x_ix86_isa_flags
4438 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
4440 /* Enable SSE prefetch. */
4441 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
4442 || (TARGET_PRFCHW_P (opts->x_ix86_isa_flags)
4443 && !TARGET_3DNOW_P (opts->x_ix86_isa_flags))
4444 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
4445 x86_prefetch_sse = true;
4447 /* Enable popcnt instruction for -msse4.2 or -mabm. */
4448 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
4449 || TARGET_ABM_P (opts->x_ix86_isa_flags))
4450 opts->x_ix86_isa_flags
4451 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
4453 /* Enable lzcnt instruction for -mabm. */
4454 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
4455 opts->x_ix86_isa_flags
4456 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
4458 /* Disable BMI, BMI2 and TBM instructions for -m16. */
4459 if (TARGET_16BIT_P(opts->x_ix86_isa_flags))
4460 opts->x_ix86_isa_flags
4461 &= ~((OPTION_MASK_ISA_BMI | OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_TBM)
4462 & ~opts->x_ix86_isa_flags_explicit);
4464 /* Validate -mpreferred-stack-boundary= value or default it to
4465 PREFERRED_STACK_BOUNDARY_DEFAULT. */
4466 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
4467 if (opts_set->x_ix86_preferred_stack_boundary_arg)
4469 int min = TARGET_64BIT_P (opts->x_ix86_isa_flags)? 3 : 2;
4470 int max = TARGET_SEH ? 4 : 12;
4472 if (opts->x_ix86_preferred_stack_boundary_arg < min
4473 || opts->x_ix86_preferred_stack_boundary_arg > max)
4475 if (min == max)
4476 error ("-mpreferred-stack-boundary is not supported "
4477 "for this target");
4478 else
4479 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
4480 opts->x_ix86_preferred_stack_boundary_arg, min, max);
4482 else
4483 ix86_preferred_stack_boundary
4484 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
4487 /* Set the default value for -mstackrealign. */
4488 if (!opts_set->x_ix86_force_align_arg_pointer)
4489 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
4491 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
4493 /* Validate -mincoming-stack-boundary= value or default it to
4494 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
4495 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4496 if (opts_set->x_ix86_incoming_stack_boundary_arg)
4498 int min = TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 3 : 2;
4500 if (opts->x_ix86_incoming_stack_boundary_arg < min
4501 || opts->x_ix86_incoming_stack_boundary_arg > 12)
4502 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4503 opts->x_ix86_incoming_stack_boundary_arg, min);
4504 else
4506 ix86_user_incoming_stack_boundary
4507 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
4508 ix86_incoming_stack_boundary
4509 = ix86_user_incoming_stack_boundary;
4513 #ifndef NO_PROFILE_COUNTERS
4514 if (flag_nop_mcount)
4515 error ("-mnop-mcount is not compatible with this target");
4516 #endif
4517 if (flag_nop_mcount && flag_pic)
4518 error ("-mnop-mcount is not implemented for -fPIC");
4520 /* Accept -msseregparm only if at least SSE support is enabled. */
4521 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
4522 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
4523 error (main_args_p
4524 ? G_("%<-msseregparm%> used without SSE enabled")
4525 : G_("%<target(\"sseregparm\")%> used without SSE enabled"));
4527 if (opts_set->x_ix86_fpmath)
4529 if (opts->x_ix86_fpmath & FPMATH_SSE)
4531 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
4533 if (TARGET_80387_P (opts->x_target_flags))
4535 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4536 opts->x_ix86_fpmath = FPMATH_387;
4539 else if ((opts->x_ix86_fpmath & FPMATH_387)
4540 && !TARGET_80387_P (opts->x_target_flags))
4542 warning (0, "387 instruction set disabled, using SSE arithmetics");
4543 opts->x_ix86_fpmath = FPMATH_SSE;
4547 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4548 fpmath=387. The second is however default at many targets since the
4549 extra 80bit precision of temporaries is considered to be part of ABI.
4550 Overwrite the default at least for -ffast-math.
4551 TODO: -mfpmath=both seems to produce same performing code with bit
4552 smaller binaries. It is however not clear if register allocation is
4553 ready for this setting.
4554 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4555 codegen. We may switch to 387 with -ffast-math for size optimized
4556 functions. */
4557 else if (fast_math_flags_set_p (&global_options)
4558 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4559 opts->x_ix86_fpmath = FPMATH_SSE;
4560 else
4561 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4563 /* Use external vectorized library in vectorizing intrinsics. */
4564 if (opts_set->x_ix86_veclibabi_type)
4565 switch (opts->x_ix86_veclibabi_type)
4567 case ix86_veclibabi_type_svml:
4568 ix86_veclib_handler = ix86_veclibabi_svml;
4569 break;
4571 case ix86_veclibabi_type_acml:
4572 ix86_veclib_handler = ix86_veclibabi_acml;
4573 break;
4575 default:
4576 gcc_unreachable ();
4579 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4580 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4581 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4583 /* If stack probes are required, the space used for large function
4584 arguments on the stack must also be probed, so enable
4585 -maccumulate-outgoing-args so this happens in the prologue. */
4586 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4587 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4589 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4590 warning (0,
4591 main_args_p
4592 ? G_("stack probing requires %<-maccumulate-outgoing-args%> "
4593 "for correctness")
4594 : G_("stack probing requires "
4595 "%<target(\"accumulate-outgoing-args\")%> for "
4596 "correctness"));
4597 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4600 /* Stack realignment without -maccumulate-outgoing-args requires %ebp,
4601 so enable -maccumulate-outgoing-args when %ebp is fixed. */
4602 if (fixed_regs[BP_REG]
4603 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4605 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4606 warning (0,
4607 main_args_p
4608 ? G_("fixed ebp register requires "
4609 "%<-maccumulate-outgoing-args%>")
4610 : G_("fixed ebp register requires "
4611 "%<target(\"accumulate-outgoing-args\")%>"));
4612 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4615 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4617 char *p;
4618 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4619 p = strchr (internal_label_prefix, 'X');
4620 internal_label_prefix_len = p - internal_label_prefix;
4621 *p = '\0';
4624 /* When scheduling description is not available, disable scheduler pass
4625 so it won't slow down the compilation and make x87 code slower. */
4626 if (!TARGET_SCHEDULE)
4627 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4629 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4630 ix86_tune_cost->simultaneous_prefetches,
4631 opts->x_param_values,
4632 opts_set->x_param_values);
4633 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4634 ix86_tune_cost->prefetch_block,
4635 opts->x_param_values,
4636 opts_set->x_param_values);
4637 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4638 ix86_tune_cost->l1_cache_size,
4639 opts->x_param_values,
4640 opts_set->x_param_values);
4641 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4642 ix86_tune_cost->l2_cache_size,
4643 opts->x_param_values,
4644 opts_set->x_param_values);
4646 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4647 if (opts->x_flag_prefetch_loop_arrays < 0
4648 && HAVE_prefetch
4649 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4650 && !opts->x_optimize_size
4651 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4652 opts->x_flag_prefetch_loop_arrays = 1;
4654 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4655 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4656 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4657 targetm.expand_builtin_va_start = NULL;
4659 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4661 ix86_gen_leave = gen_leave_rex64;
4662 if (Pmode == DImode)
4664 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4665 ix86_gen_tls_local_dynamic_base_64
4666 = gen_tls_local_dynamic_base_64_di;
4668 else
4670 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4671 ix86_gen_tls_local_dynamic_base_64
4672 = gen_tls_local_dynamic_base_64_si;
4675 else
4676 ix86_gen_leave = gen_leave;
4678 if (Pmode == DImode)
4680 ix86_gen_add3 = gen_adddi3;
4681 ix86_gen_sub3 = gen_subdi3;
4682 ix86_gen_sub3_carry = gen_subdi3_carry;
4683 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4684 ix86_gen_andsp = gen_anddi3;
4685 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4686 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4687 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4688 ix86_gen_monitor = gen_sse3_monitor_di;
4689 ix86_gen_monitorx = gen_monitorx_di;
4690 ix86_gen_clzero = gen_clzero_di;
4692 else
4694 ix86_gen_add3 = gen_addsi3;
4695 ix86_gen_sub3 = gen_subsi3;
4696 ix86_gen_sub3_carry = gen_subsi3_carry;
4697 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4698 ix86_gen_andsp = gen_andsi3;
4699 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4700 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4701 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4702 ix86_gen_monitor = gen_sse3_monitor_si;
4703 ix86_gen_monitorx = gen_monitorx_si;
4704 ix86_gen_clzero = gen_clzero_si;
4707 #ifdef USE_IX86_CLD
4708 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4709 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4710 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4711 #endif
4713 /* Set the default value for -mfentry. */
4714 if (!opts_set->x_flag_fentry)
4715 opts->x_flag_fentry = TARGET_SEH;
4716 else
4718 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic
4719 && opts->x_flag_fentry)
4720 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4721 "with -fpic");
4722 else if (TARGET_SEH && !opts->x_flag_fentry)
4723 sorry ("-mno-fentry isn%'t compatible with SEH");
4726 if (TARGET_SEH && TARGET_CALL_MS2SYSV_XLOGUES)
4727 sorry ("-mcall-ms2sysv-xlogues isn%'t currently supported with SEH");
4729 if (!(opts_set->x_target_flags & MASK_VZEROUPPER)
4730 && TARGET_EMIT_VZEROUPPER)
4731 opts->x_target_flags |= MASK_VZEROUPPER;
4732 if (!(opts_set->x_target_flags & MASK_STV))
4733 opts->x_target_flags |= MASK_STV;
4734 /* Disable STV if -mpreferred-stack-boundary={2,3} or
4735 -mincoming-stack-boundary={2,3} or -mstackrealign - the needed
4736 stack realignment will be extra cost the pass doesn't take into
4737 account and the pass can't realign the stack. */
4738 if (ix86_preferred_stack_boundary < 128
4739 || ix86_incoming_stack_boundary < 128
4740 || opts->x_ix86_force_align_arg_pointer)
4741 opts->x_target_flags &= ~MASK_STV;
4742 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4743 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4744 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4745 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4746 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4747 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4749 /* Enable 128-bit AVX instruction generation
4750 for the auto-vectorizer. */
4751 if (TARGET_AVX128_OPTIMAL
4752 && (opts_set->x_prefer_vector_width_type == PVW_NONE))
4753 opts->x_prefer_vector_width_type = PVW_AVX128;
4755 /* Use 256-bit AVX instruction generation
4756 in the auto-vectorizer. */
4757 if (ix86_tune_features[X86_TUNE_AVX256_OPTIMAL]
4758 && (opts_set->x_prefer_vector_width_type == PVW_NONE))
4759 opts->x_prefer_vector_width_type = PVW_AVX256;
4761 if (opts->x_ix86_recip_name)
4763 char *p = ASTRDUP (opts->x_ix86_recip_name);
4764 char *q;
4765 unsigned int mask, i;
4766 bool invert;
4768 while ((q = strtok (p, ",")) != NULL)
4770 p = NULL;
4771 if (*q == '!')
4773 invert = true;
4774 q++;
4776 else
4777 invert = false;
4779 if (!strcmp (q, "default"))
4780 mask = RECIP_MASK_ALL;
4781 else
4783 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4784 if (!strcmp (q, recip_options[i].string))
4786 mask = recip_options[i].mask;
4787 break;
4790 if (i == ARRAY_SIZE (recip_options))
4792 error ("unknown option for -mrecip=%s", q);
4793 invert = false;
4794 mask = RECIP_MASK_NONE;
4798 opts->x_recip_mask_explicit |= mask;
4799 if (invert)
4800 opts->x_recip_mask &= ~mask;
4801 else
4802 opts->x_recip_mask |= mask;
4806 if (TARGET_RECIP_P (opts->x_target_flags))
4807 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4808 else if (opts_set->x_target_flags & MASK_RECIP)
4809 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4811 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4812 for 64-bit Bionic. Also default long double to 64-bit for Intel
4813 MCU psABI. */
4814 if ((TARGET_HAS_BIONIC || TARGET_IAMCU)
4815 && !(opts_set->x_target_flags
4816 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4817 opts->x_target_flags |= (TARGET_64BIT
4818 ? MASK_LONG_DOUBLE_128
4819 : MASK_LONG_DOUBLE_64);
4821 /* Only one of them can be active. */
4822 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4823 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4825 /* Handle stack protector */
4826 if (!opts_set->x_ix86_stack_protector_guard)
4827 opts->x_ix86_stack_protector_guard
4828 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4830 #ifdef TARGET_THREAD_SSP_OFFSET
4831 ix86_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4832 #endif
4834 if (global_options_set.x_ix86_stack_protector_guard_offset_str)
4836 char *endp;
4837 const char *str = ix86_stack_protector_guard_offset_str;
4839 errno = 0;
4840 int64_t offset;
4842 #if defined(INT64_T_IS_LONG)
4843 offset = strtol (str, &endp, 0);
4844 #else
4845 offset = strtoll (str, &endp, 0);
4846 #endif
4848 if (!*str || *endp || errno)
4849 error ("%qs is not a valid number "
4850 "in -mstack-protector-guard-offset=", str);
4852 if (!IN_RANGE (offset, HOST_WIDE_INT_C (-0x80000000),
4853 HOST_WIDE_INT_C (0x7fffffff)))
4854 error ("%qs is not a valid offset "
4855 "in -mstack-protector-guard-offset=", str);
4857 ix86_stack_protector_guard_offset = offset;
4860 ix86_stack_protector_guard_reg = DEFAULT_TLS_SEG_REG;
4862 /* The kernel uses a different segment register for performance
4863 reasons; a system call would not have to trash the userspace
4864 segment register, which would be expensive. */
4865 if (ix86_cmodel == CM_KERNEL)
4866 ix86_stack_protector_guard_reg = ADDR_SPACE_SEG_GS;
4868 if (global_options_set.x_ix86_stack_protector_guard_reg_str)
4870 const char *str = ix86_stack_protector_guard_reg_str;
4871 addr_space_t seg = ADDR_SPACE_GENERIC;
4873 /* Discard optional register prefix. */
4874 if (str[0] == '%')
4875 str++;
4877 if (strlen (str) == 2 && str[1] == 's')
4879 if (str[0] == 'f')
4880 seg = ADDR_SPACE_SEG_FS;
4881 else if (str[0] == 'g')
4882 seg = ADDR_SPACE_SEG_GS;
4885 if (seg == ADDR_SPACE_GENERIC)
4886 error ("%qs is not a valid base register "
4887 "in -mstack-protector-guard-reg=",
4888 ix86_stack_protector_guard_reg_str);
4890 ix86_stack_protector_guard_reg = seg;
4893 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4894 if (opts->x_ix86_tune_memcpy_strategy)
4896 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4897 ix86_parse_stringop_strategy_string (str, false);
4898 free (str);
4901 if (opts->x_ix86_tune_memset_strategy)
4903 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4904 ix86_parse_stringop_strategy_string (str, true);
4905 free (str);
4908 /* Save the initial options in case the user does function specific
4909 options. */
4910 if (main_args_p)
4911 target_option_default_node = target_option_current_node
4912 = build_target_option_node (opts);
4914 /* Do not support control flow instrumentation if CET is not enabled. */
4915 cf_protection_level cf_protection
4916 = (cf_protection_level) (opts->x_flag_cf_protection & ~CF_SET);
4917 if (cf_protection != CF_NONE)
4919 switch (cf_protection)
4921 case CF_BRANCH:
4922 if (! TARGET_IBT_P (opts->x_ix86_isa_flags2))
4924 error ("%<-fcf-protection=branch%> requires Intel CET "
4925 "support. Use -mcet or -mibt option to enable CET");
4926 flag_cf_protection = CF_NONE;
4927 return false;
4929 break;
4930 case CF_RETURN:
4931 if (! TARGET_SHSTK_P (opts->x_ix86_isa_flags))
4933 error ("%<-fcf-protection=return%> requires Intel CET "
4934 "support. Use -mcet or -mshstk option to enable CET");
4935 flag_cf_protection = CF_NONE;
4936 return false;
4938 break;
4939 case CF_FULL:
4940 if ( ! TARGET_IBT_P (opts->x_ix86_isa_flags2)
4941 || ! TARGET_SHSTK_P (opts->x_ix86_isa_flags))
4943 error ("%<-fcf-protection=full%> requires Intel CET "
4944 "support. Use -mcet or both of -mibt and "
4945 "-mshstk options to enable CET");
4946 flag_cf_protection = CF_NONE;
4947 return false;
4949 break;
4950 default:
4951 gcc_unreachable ();
4954 opts->x_flag_cf_protection =
4955 (cf_protection_level) (cf_protection | CF_SET);
4958 if (ix86_tune_features [X86_TUNE_AVOID_128FMA_CHAINS])
4959 maybe_set_param_value (PARAM_AVOID_FMA_MAX_BITS, 128,
4960 opts->x_param_values,
4961 opts_set->x_param_values);
4963 return true;
4966 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4968 static void
4969 ix86_option_override (void)
4971 ix86_option_override_internal (true, &global_options, &global_options_set);
4974 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
4975 static char *
4976 ix86_offload_options (void)
4978 if (TARGET_LP64)
4979 return xstrdup ("-foffload-abi=lp64");
4980 return xstrdup ("-foffload-abi=ilp32");
4983 /* Update register usage after having seen the compiler flags. */
4985 static void
4986 ix86_conditional_register_usage (void)
4988 int i, c_mask;
4990 /* If there are no caller-saved registers, preserve all registers.
4991 except fixed_regs and registers used for function return value
4992 since aggregate_value_p checks call_used_regs[regno] on return
4993 value. */
4994 if (cfun && cfun->machine->no_caller_saved_registers)
4995 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4996 if (!fixed_regs[i] && !ix86_function_value_regno_p (i))
4997 call_used_regs[i] = 0;
4999 /* For 32-bit targets, squash the REX registers. */
5000 if (! TARGET_64BIT)
5002 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
5003 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5004 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
5005 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5006 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
5007 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5010 /* See the definition of CALL_USED_REGISTERS in i386.h. */
5011 c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
5013 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
5015 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5017 /* Set/reset conditionally defined registers from
5018 CALL_USED_REGISTERS initializer. */
5019 if (call_used_regs[i] > 1)
5020 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
5022 /* Calculate registers of CLOBBERED_REGS register set
5023 as call used registers from GENERAL_REGS register set. */
5024 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
5025 && call_used_regs[i])
5026 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
5029 /* If MMX is disabled, squash the registers. */
5030 if (! TARGET_MMX)
5031 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5032 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
5033 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5035 /* If SSE is disabled, squash the registers. */
5036 if (! TARGET_SSE)
5037 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5038 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
5039 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5041 /* If the FPU is disabled, squash the registers. */
5042 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
5043 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5044 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
5045 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5047 /* If AVX512F is disabled, squash the registers. */
5048 if (! TARGET_AVX512F)
5050 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
5051 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5053 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
5054 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5057 /* If MPX is disabled, squash the registers. */
5058 if (! TARGET_MPX)
5059 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
5060 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5063 /* Canonicalize a comparison from one we don't have to one we do have. */
5065 static void
5066 ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5067 bool op0_preserve_value)
5069 /* The order of operands in x87 ficom compare is forced by combine in
5070 simplify_comparison () function. Float operator is treated as RTX_OBJ
5071 with a precedence over other operators and is always put in the first
5072 place. Swap condition and operands to match ficom instruction. */
5073 if (!op0_preserve_value
5074 && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1))
5076 enum rtx_code scode = swap_condition ((enum rtx_code) *code);
5078 /* We are called only for compares that are split to SAHF instruction.
5079 Ensure that we have setcc/jcc insn for the swapped condition. */
5080 if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN)
5082 std::swap (*op0, *op1);
5083 *code = (int) scode;
5088 /* Save the current options */
5090 static void
5091 ix86_function_specific_save (struct cl_target_option *ptr,
5092 struct gcc_options *opts)
5094 ptr->arch = ix86_arch;
5095 ptr->schedule = ix86_schedule;
5096 ptr->prefetch_sse = x86_prefetch_sse;
5097 ptr->tune = ix86_tune;
5098 ptr->branch_cost = ix86_branch_cost;
5099 ptr->tune_defaulted = ix86_tune_defaulted;
5100 ptr->arch_specified = ix86_arch_specified;
5101 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
5102 ptr->x_ix86_isa_flags2_explicit = opts->x_ix86_isa_flags2_explicit;
5103 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
5104 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
5105 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
5106 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
5107 ptr->x_ix86_abi = opts->x_ix86_abi;
5108 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
5109 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
5110 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
5111 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
5112 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
5113 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
5114 ptr->x_ix86_pmode = opts->x_ix86_pmode;
5115 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
5116 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
5117 ptr->x_ix86_regparm = opts->x_ix86_regparm;
5118 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
5119 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
5120 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
5121 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
5122 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
5123 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
5124 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
5125 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
5126 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
5127 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
5129 /* The fields are char but the variables are not; make sure the
5130 values fit in the fields. */
5131 gcc_assert (ptr->arch == ix86_arch);
5132 gcc_assert (ptr->schedule == ix86_schedule);
5133 gcc_assert (ptr->tune == ix86_tune);
5134 gcc_assert (ptr->branch_cost == ix86_branch_cost);
5137 /* Restore the current options */
5139 static void
5140 ix86_function_specific_restore (struct gcc_options *opts,
5141 struct cl_target_option *ptr)
5143 enum processor_type old_tune = ix86_tune;
5144 enum processor_type old_arch = ix86_arch;
5145 unsigned int ix86_arch_mask;
5146 int i;
5148 /* We don't change -fPIC. */
5149 opts->x_flag_pic = flag_pic;
5151 ix86_arch = (enum processor_type) ptr->arch;
5152 ix86_schedule = (enum attr_cpu) ptr->schedule;
5153 ix86_tune = (enum processor_type) ptr->tune;
5154 x86_prefetch_sse = ptr->prefetch_sse;
5155 opts->x_ix86_branch_cost = ptr->branch_cost;
5156 ix86_tune_defaulted = ptr->tune_defaulted;
5157 ix86_arch_specified = ptr->arch_specified;
5158 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
5159 opts->x_ix86_isa_flags2_explicit = ptr->x_ix86_isa_flags2_explicit;
5160 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
5161 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
5162 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
5163 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
5164 opts->x_ix86_abi = ptr->x_ix86_abi;
5165 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
5166 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
5167 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
5168 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
5169 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
5170 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
5171 opts->x_ix86_pmode = ptr->x_ix86_pmode;
5172 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
5173 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
5174 opts->x_ix86_regparm = ptr->x_ix86_regparm;
5175 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
5176 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
5177 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
5178 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
5179 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
5180 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
5181 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
5182 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
5183 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
5184 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
5185 ix86_tune_cost = processor_target_table[ix86_tune].cost;
5186 /* TODO: ix86_cost should be chosen at instruction or function granuality
5187 so for cold code we use size_cost even in !optimize_size compilation. */
5188 if (opts->x_optimize_size)
5189 ix86_cost = &ix86_size_cost;
5190 else
5191 ix86_cost = ix86_tune_cost;
5193 /* Recreate the arch feature tests if the arch changed */
5194 if (old_arch != ix86_arch)
5196 ix86_arch_mask = 1u << ix86_arch;
5197 for (i = 0; i < X86_ARCH_LAST; ++i)
5198 ix86_arch_features[i]
5199 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
5202 /* Recreate the tune optimization tests */
5203 if (old_tune != ix86_tune)
5204 set_ix86_tune_features (ix86_tune, false);
5207 /* Adjust target options after streaming them in. This is mainly about
5208 reconciling them with global options. */
5210 static void
5211 ix86_function_specific_post_stream_in (struct cl_target_option *ptr)
5213 /* flag_pic is a global option, but ix86_cmodel is target saved option
5214 partly computed from flag_pic. If flag_pic is on, adjust x_ix86_cmodel
5215 for PIC, or error out. */
5216 if (flag_pic)
5217 switch (ptr->x_ix86_cmodel)
5219 case CM_SMALL:
5220 ptr->x_ix86_cmodel = CM_SMALL_PIC;
5221 break;
5223 case CM_MEDIUM:
5224 ptr->x_ix86_cmodel = CM_MEDIUM_PIC;
5225 break;
5227 case CM_LARGE:
5228 ptr->x_ix86_cmodel = CM_LARGE_PIC;
5229 break;
5231 case CM_KERNEL:
5232 error ("code model %s does not support PIC mode", "kernel");
5233 break;
5235 default:
5236 break;
5238 else
5239 switch (ptr->x_ix86_cmodel)
5241 case CM_SMALL_PIC:
5242 ptr->x_ix86_cmodel = CM_SMALL;
5243 break;
5245 case CM_MEDIUM_PIC:
5246 ptr->x_ix86_cmodel = CM_MEDIUM;
5247 break;
5249 case CM_LARGE_PIC:
5250 ptr->x_ix86_cmodel = CM_LARGE;
5251 break;
5253 default:
5254 break;
5258 /* Print the current options */
5260 static void
5261 ix86_function_specific_print (FILE *file, int indent,
5262 struct cl_target_option *ptr)
5264 char *target_string
5265 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_ix86_isa_flags2,
5266 ptr->x_target_flags, ptr->x_ix86_target_flags,
5267 NULL, NULL, ptr->x_ix86_fpmath, false);
5269 gcc_assert (ptr->arch < PROCESSOR_max);
5270 fprintf (file, "%*sarch = %d (%s)\n",
5271 indent, "",
5272 ptr->arch, processor_target_table[ptr->arch].name);
5274 gcc_assert (ptr->tune < PROCESSOR_max);
5275 fprintf (file, "%*stune = %d (%s)\n",
5276 indent, "",
5277 ptr->tune, processor_target_table[ptr->tune].name);
5279 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
5281 if (target_string)
5283 fprintf (file, "%*s%s\n", indent, "", target_string);
5284 free (target_string);
5289 /* Inner function to process the attribute((target(...))), take an argument and
5290 set the current options from the argument. If we have a list, recursively go
5291 over the list. */
5293 static bool
5294 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
5295 struct gcc_options *opts,
5296 struct gcc_options *opts_set,
5297 struct gcc_options *enum_opts_set)
5299 char *next_optstr;
5300 bool ret = true;
5302 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
5303 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
5304 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
5305 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
5306 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
5308 enum ix86_opt_type
5310 ix86_opt_unknown,
5311 ix86_opt_yes,
5312 ix86_opt_no,
5313 ix86_opt_str,
5314 ix86_opt_enum,
5315 ix86_opt_isa
5318 static const struct
5320 const char *string;
5321 size_t len;
5322 enum ix86_opt_type type;
5323 int opt;
5324 int mask;
5325 } attrs[] = {
5326 /* isa options */
5327 IX86_ATTR_ISA ("sgx", OPT_msgx),
5328 IX86_ATTR_ISA ("avx5124fmaps", OPT_mavx5124fmaps),
5329 IX86_ATTR_ISA ("avx5124vnniw", OPT_mavx5124vnniw),
5330 IX86_ATTR_ISA ("avx512vpopcntdq", OPT_mavx512vpopcntdq),
5331 IX86_ATTR_ISA ("avx512vbmi2", OPT_mavx512vbmi2),
5332 IX86_ATTR_ISA ("avx512vnni", OPT_mavx512vnni),
5333 IX86_ATTR_ISA ("avx512bitalg", OPT_mavx512bitalg),
5335 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
5336 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
5337 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
5338 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
5339 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
5340 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
5341 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
5342 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
5343 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
5344 IX86_ATTR_ISA ("avx2", OPT_mavx2),
5345 IX86_ATTR_ISA ("fma", OPT_mfma),
5346 IX86_ATTR_ISA ("xop", OPT_mxop),
5347 IX86_ATTR_ISA ("fma4", OPT_mfma4),
5348 IX86_ATTR_ISA ("f16c", OPT_mf16c),
5349 IX86_ATTR_ISA ("avx", OPT_mavx),
5350 IX86_ATTR_ISA ("sse4", OPT_msse4),
5351 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
5352 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
5353 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
5354 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
5355 IX86_ATTR_ISA ("sse3", OPT_msse3),
5356 IX86_ATTR_ISA ("aes", OPT_maes),
5357 IX86_ATTR_ISA ("sha", OPT_msha),
5358 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
5359 IX86_ATTR_ISA ("sse2", OPT_msse2),
5360 IX86_ATTR_ISA ("sse", OPT_msse),
5361 IX86_ATTR_ISA ("3dnowa", OPT_m3dnowa),
5362 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
5363 IX86_ATTR_ISA ("mmx", OPT_mmmx),
5364 IX86_ATTR_ISA ("rtm", OPT_mrtm),
5365 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
5366 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
5367 IX86_ATTR_ISA ("adx", OPT_madx),
5368 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
5369 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
5370 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
5371 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
5372 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
5373 IX86_ATTR_ISA ("xsave", OPT_mxsave),
5374 IX86_ATTR_ISA ("abm", OPT_mabm),
5375 IX86_ATTR_ISA ("bmi", OPT_mbmi),
5376 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
5377 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
5378 IX86_ATTR_ISA ("tbm", OPT_mtbm),
5379 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
5380 IX86_ATTR_ISA ("cx16", OPT_mcx16),
5381 IX86_ATTR_ISA ("sahf", OPT_msahf),
5382 IX86_ATTR_ISA ("movbe", OPT_mmovbe),
5383 IX86_ATTR_ISA ("crc32", OPT_mcrc32),
5384 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
5385 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
5386 IX86_ATTR_ISA ("mwaitx", OPT_mmwaitx),
5387 IX86_ATTR_ISA ("clzero", OPT_mclzero),
5388 IX86_ATTR_ISA ("pku", OPT_mpku),
5389 IX86_ATTR_ISA ("lwp", OPT_mlwp),
5390 IX86_ATTR_ISA ("hle", OPT_mhle),
5391 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
5392 IX86_ATTR_ISA ("mpx", OPT_mmpx),
5393 IX86_ATTR_ISA ("clwb", OPT_mclwb),
5394 IX86_ATTR_ISA ("rdpid", OPT_mrdpid),
5395 IX86_ATTR_ISA ("gfni", OPT_mgfni),
5396 IX86_ATTR_ISA ("ibt", OPT_mibt),
5397 IX86_ATTR_ISA ("shstk", OPT_mshstk),
5398 IX86_ATTR_ISA ("vaes", OPT_mvaes),
5399 IX86_ATTR_ISA ("vpclmulqdq", OPT_mvpclmulqdq),
5401 /* enum options */
5402 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
5404 /* string options */
5405 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
5406 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
5408 /* flag options */
5409 IX86_ATTR_YES ("cld",
5410 OPT_mcld,
5411 MASK_CLD),
5413 IX86_ATTR_NO ("fancy-math-387",
5414 OPT_mfancy_math_387,
5415 MASK_NO_FANCY_MATH_387),
5417 IX86_ATTR_YES ("ieee-fp",
5418 OPT_mieee_fp,
5419 MASK_IEEE_FP),
5421 IX86_ATTR_YES ("inline-all-stringops",
5422 OPT_minline_all_stringops,
5423 MASK_INLINE_ALL_STRINGOPS),
5425 IX86_ATTR_YES ("inline-stringops-dynamically",
5426 OPT_minline_stringops_dynamically,
5427 MASK_INLINE_STRINGOPS_DYNAMICALLY),
5429 IX86_ATTR_NO ("align-stringops",
5430 OPT_mno_align_stringops,
5431 MASK_NO_ALIGN_STRINGOPS),
5433 IX86_ATTR_YES ("recip",
5434 OPT_mrecip,
5435 MASK_RECIP),
5439 /* If this is a list, recurse to get the options. */
5440 if (TREE_CODE (args) == TREE_LIST)
5442 bool ret = true;
5444 for (; args; args = TREE_CHAIN (args))
5445 if (TREE_VALUE (args)
5446 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
5447 p_strings, opts, opts_set,
5448 enum_opts_set))
5449 ret = false;
5451 return ret;
5454 else if (TREE_CODE (args) != STRING_CST)
5456 error ("attribute %<target%> argument not a string");
5457 return false;
5460 /* Handle multiple arguments separated by commas. */
5461 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
5463 while (next_optstr && *next_optstr != '\0')
5465 char *p = next_optstr;
5466 char *orig_p = p;
5467 char *comma = strchr (next_optstr, ',');
5468 const char *opt_string;
5469 size_t len, opt_len;
5470 int opt;
5471 bool opt_set_p;
5472 char ch;
5473 unsigned i;
5474 enum ix86_opt_type type = ix86_opt_unknown;
5475 int mask = 0;
5477 if (comma)
5479 *comma = '\0';
5480 len = comma - next_optstr;
5481 next_optstr = comma + 1;
5483 else
5485 len = strlen (p);
5486 next_optstr = NULL;
5489 /* Recognize no-xxx. */
5490 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
5492 opt_set_p = false;
5493 p += 3;
5494 len -= 3;
5496 else
5497 opt_set_p = true;
5499 /* Find the option. */
5500 ch = *p;
5501 opt = N_OPTS;
5502 for (i = 0; i < ARRAY_SIZE (attrs); i++)
5504 type = attrs[i].type;
5505 opt_len = attrs[i].len;
5506 if (ch == attrs[i].string[0]
5507 && ((type != ix86_opt_str && type != ix86_opt_enum)
5508 ? len == opt_len
5509 : len > opt_len)
5510 && memcmp (p, attrs[i].string, opt_len) == 0)
5512 opt = attrs[i].opt;
5513 mask = attrs[i].mask;
5514 opt_string = attrs[i].string;
5515 break;
5519 /* Process the option. */
5520 if (opt == N_OPTS)
5522 error ("attribute(target(\"%s\")) is unknown", orig_p);
5523 ret = false;
5526 else if (type == ix86_opt_isa)
5528 struct cl_decoded_option decoded;
5530 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
5531 ix86_handle_option (opts, opts_set,
5532 &decoded, input_location);
5535 else if (type == ix86_opt_yes || type == ix86_opt_no)
5537 if (type == ix86_opt_no)
5538 opt_set_p = !opt_set_p;
5540 if (opt_set_p)
5541 opts->x_target_flags |= mask;
5542 else
5543 opts->x_target_flags &= ~mask;
5546 else if (type == ix86_opt_str)
5548 if (p_strings[opt])
5550 error ("option(\"%s\") was already specified", opt_string);
5551 ret = false;
5553 else
5554 p_strings[opt] = xstrdup (p + opt_len);
5557 else if (type == ix86_opt_enum)
5559 bool arg_ok;
5560 int value;
5562 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
5563 if (arg_ok)
5564 set_option (opts, enum_opts_set, opt, value,
5565 p + opt_len, DK_UNSPECIFIED, input_location,
5566 global_dc);
5567 else
5569 error ("attribute(target(\"%s\")) is unknown", orig_p);
5570 ret = false;
5574 else
5575 gcc_unreachable ();
5578 return ret;
5581 /* Release allocated strings. */
5582 static void
5583 release_options_strings (char **option_strings)
5585 /* Free up memory allocated to hold the strings */
5586 for (unsigned i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
5587 free (option_strings[i]);
5590 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
5592 tree
5593 ix86_valid_target_attribute_tree (tree args,
5594 struct gcc_options *opts,
5595 struct gcc_options *opts_set)
5597 const char *orig_arch_string = opts->x_ix86_arch_string;
5598 const char *orig_tune_string = opts->x_ix86_tune_string;
5599 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
5600 int orig_tune_defaulted = ix86_tune_defaulted;
5601 int orig_arch_specified = ix86_arch_specified;
5602 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
5603 tree t = NULL_TREE;
5604 struct cl_target_option *def
5605 = TREE_TARGET_OPTION (target_option_default_node);
5606 struct gcc_options enum_opts_set;
5608 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
5610 /* Process each of the options on the chain. */
5611 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
5612 opts_set, &enum_opts_set))
5613 return error_mark_node;
5615 /* If the changed options are different from the default, rerun
5616 ix86_option_override_internal, and then save the options away.
5617 The string options are attribute options, and will be undone
5618 when we copy the save structure. */
5619 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
5620 || opts->x_ix86_isa_flags2 != def->x_ix86_isa_flags2
5621 || opts->x_target_flags != def->x_target_flags
5622 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
5623 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
5624 || enum_opts_set.x_ix86_fpmath)
5626 /* If we are using the default tune= or arch=, undo the string assigned,
5627 and use the default. */
5628 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
5630 opts->x_ix86_arch_string
5631 = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]);
5633 /* If arch= is set, clear all bits in x_ix86_isa_flags,
5634 except for ISA_64BIT, ABI_64, ABI_X32, and CODE16. */
5635 opts->x_ix86_isa_flags &= (OPTION_MASK_ISA_64BIT
5636 | OPTION_MASK_ABI_64
5637 | OPTION_MASK_ABI_X32
5638 | OPTION_MASK_CODE16);
5639 opts->x_ix86_isa_flags2 = 0;
5641 else if (!orig_arch_specified)
5642 opts->x_ix86_arch_string = NULL;
5644 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
5645 opts->x_ix86_tune_string
5646 = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]);
5647 else if (orig_tune_defaulted)
5648 opts->x_ix86_tune_string = NULL;
5650 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
5651 if (enum_opts_set.x_ix86_fpmath)
5652 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
5654 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
5655 bool r = ix86_option_override_internal (false, opts, opts_set);
5656 if (!r)
5658 release_options_strings (option_strings);
5659 return error_mark_node;
5662 /* Add any builtin functions with the new isa if any. */
5663 ix86_add_new_builtins (opts->x_ix86_isa_flags, opts->x_ix86_isa_flags2);
5665 /* Save the current options unless we are validating options for
5666 #pragma. */
5667 t = build_target_option_node (opts);
5669 opts->x_ix86_arch_string = orig_arch_string;
5670 opts->x_ix86_tune_string = orig_tune_string;
5671 opts_set->x_ix86_fpmath = orig_fpmath_set;
5673 release_options_strings (option_strings);
5676 return t;
5679 /* Hook to validate attribute((target("string"))). */
5681 static bool
5682 ix86_valid_target_attribute_p (tree fndecl,
5683 tree ARG_UNUSED (name),
5684 tree args,
5685 int ARG_UNUSED (flags))
5687 struct gcc_options func_options;
5688 tree new_target, new_optimize;
5689 bool ret = true;
5691 /* attribute((target("default"))) does nothing, beyond
5692 affecting multi-versioning. */
5693 if (TREE_VALUE (args)
5694 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
5695 && TREE_CHAIN (args) == NULL_TREE
5696 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
5697 return true;
5699 tree old_optimize = build_optimization_node (&global_options);
5701 /* Get the optimization options of the current function. */
5702 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
5704 if (!func_optimize)
5705 func_optimize = old_optimize;
5707 /* Init func_options. */
5708 memset (&func_options, 0, sizeof (func_options));
5709 init_options_struct (&func_options, NULL);
5710 lang_hooks.init_options_struct (&func_options);
5712 cl_optimization_restore (&func_options,
5713 TREE_OPTIMIZATION (func_optimize));
5715 /* Initialize func_options to the default before its target options can
5716 be set. */
5717 cl_target_option_restore (&func_options,
5718 TREE_TARGET_OPTION (target_option_default_node));
5720 new_target = ix86_valid_target_attribute_tree (args, &func_options,
5721 &global_options_set);
5723 new_optimize = build_optimization_node (&func_options);
5725 if (new_target == error_mark_node)
5726 ret = false;
5728 else if (fndecl && new_target)
5730 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
5732 if (old_optimize != new_optimize)
5733 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
5736 finalize_options_struct (&func_options);
5738 return ret;
5742 /* Hook to determine if one function can safely inline another. */
5744 static bool
5745 ix86_can_inline_p (tree caller, tree callee)
5747 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
5748 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
5749 if (!callee_tree)
5750 callee_tree = target_option_default_node;
5751 if (!caller_tree)
5752 caller_tree = target_option_default_node;
5753 if (callee_tree == caller_tree)
5754 return true;
5756 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
5757 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
5758 bool ret = false;
5760 /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
5761 function can inline a SSE2 function but a SSE2 function can't inline
5762 a SSE4 function. */
5763 if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
5764 != callee_opts->x_ix86_isa_flags)
5765 || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)
5766 != callee_opts->x_ix86_isa_flags2))
5767 ret = false;
5769 /* See if we have the same non-isa options. */
5770 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
5771 ret = false;
5773 /* See if arch, tune, etc. are the same. */
5774 else if (caller_opts->arch != callee_opts->arch)
5775 ret = false;
5777 else if (caller_opts->tune != callee_opts->tune)
5778 ret = false;
5780 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath
5781 /* If the calle doesn't use FP expressions differences in
5782 ix86_fpmath can be ignored. We are called from FEs
5783 for multi-versioning call optimization, so beware of
5784 ipa_fn_summaries not available. */
5785 && (! ipa_fn_summaries
5786 || ipa_fn_summaries->get
5787 (cgraph_node::get (callee))->fp_expressions))
5788 ret = false;
5790 else if (caller_opts->branch_cost != callee_opts->branch_cost)
5791 ret = false;
5793 else
5794 ret = true;
5796 return ret;
5800 /* Remember the last target of ix86_set_current_function. */
5801 static GTY(()) tree ix86_previous_fndecl;
5803 /* Set targets globals to the default (or current #pragma GCC target
5804 if active). Invalidate ix86_previous_fndecl cache. */
5806 void
5807 ix86_reset_previous_fndecl (void)
5809 tree new_tree = target_option_current_node;
5810 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5811 if (TREE_TARGET_GLOBALS (new_tree))
5812 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5813 else if (new_tree == target_option_default_node)
5814 restore_target_globals (&default_target_globals);
5815 else
5816 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5817 ix86_previous_fndecl = NULL_TREE;
5820 /* Set the func_type field from the function FNDECL. */
5822 static void
5823 ix86_set_func_type (tree fndecl)
5825 if (cfun->machine->func_type == TYPE_UNKNOWN)
5827 if (lookup_attribute ("interrupt",
5828 TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
5830 if (ix86_function_naked (fndecl))
5831 error_at (DECL_SOURCE_LOCATION (fndecl),
5832 "interrupt and naked attributes are not compatible");
5834 int nargs = 0;
5835 for (tree arg = DECL_ARGUMENTS (fndecl);
5836 arg;
5837 arg = TREE_CHAIN (arg))
5838 nargs++;
5839 cfun->machine->no_caller_saved_registers = true;
5840 cfun->machine->func_type
5841 = nargs == 2 ? TYPE_EXCEPTION : TYPE_INTERRUPT;
5843 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
5845 /* Only dwarf2out.c can handle -WORD(AP) as a pointer argument. */
5846 if (write_symbols != NO_DEBUG && write_symbols != DWARF2_DEBUG)
5847 sorry ("Only DWARF debug format is supported for interrupt "
5848 "service routine.");
5850 else
5852 cfun->machine->func_type = TYPE_NORMAL;
5853 if (lookup_attribute ("no_caller_saved_registers",
5854 TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
5855 cfun->machine->no_caller_saved_registers = true;
5860 /* Set the indirect_branch_type field from the function FNDECL. */
5862 static void
5863 ix86_set_indirect_branch_type (tree fndecl)
5865 if (cfun->machine->indirect_branch_type == indirect_branch_unset)
5867 tree attr = lookup_attribute ("indirect_branch",
5868 DECL_ATTRIBUTES (fndecl));
5869 if (attr != NULL)
5871 tree args = TREE_VALUE (attr);
5872 if (args == NULL)
5873 gcc_unreachable ();
5874 tree cst = TREE_VALUE (args);
5875 if (strcmp (TREE_STRING_POINTER (cst), "keep") == 0)
5876 cfun->machine->indirect_branch_type = indirect_branch_keep;
5877 else if (strcmp (TREE_STRING_POINTER (cst), "thunk") == 0)
5878 cfun->machine->indirect_branch_type = indirect_branch_thunk;
5879 else if (strcmp (TREE_STRING_POINTER (cst), "thunk-inline") == 0)
5880 cfun->machine->indirect_branch_type = indirect_branch_thunk_inline;
5881 else if (strcmp (TREE_STRING_POINTER (cst), "thunk-extern") == 0)
5882 cfun->machine->indirect_branch_type = indirect_branch_thunk_extern;
5883 else
5884 gcc_unreachable ();
5886 else
5887 cfun->machine->indirect_branch_type = ix86_indirect_branch;
5889 /* -mcmodel=large is not compatible with -mindirect-branch=thunk
5890 nor -mindirect-branch=thunk-extern. */
5891 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
5892 && ((cfun->machine->indirect_branch_type
5893 == indirect_branch_thunk_extern)
5894 || (cfun->machine->indirect_branch_type
5895 == indirect_branch_thunk)))
5896 error ("%<-mindirect-branch=%s%> and %<-mcmodel=large%> are not "
5897 "compatible",
5898 ((cfun->machine->indirect_branch_type
5899 == indirect_branch_thunk_extern)
5900 ? "thunk-extern" : "thunk"));
5902 /* -mindirect-branch=thunk-extern, -fcf-protection=branch and
5903 -fcheck-pointer-bounds are not compatible. */
5904 if ((cfun->machine->indirect_branch_type
5905 == indirect_branch_thunk_extern)
5906 && flag_check_pointer_bounds
5907 && (flag_cf_protection & CF_BRANCH) != 0)
5908 error ("%<-mindirect-branch=thunk-extern%>, "
5909 "%<-fcf-protection=branch%> and "
5910 "%<-fcheck-pointer-bounds%> are not compatible");
5913 if (cfun->machine->function_return_type == indirect_branch_unset)
5915 tree attr = lookup_attribute ("function_return",
5916 DECL_ATTRIBUTES (fndecl));
5917 if (attr != NULL)
5919 tree args = TREE_VALUE (attr);
5920 if (args == NULL)
5921 gcc_unreachable ();
5922 tree cst = TREE_VALUE (args);
5923 if (strcmp (TREE_STRING_POINTER (cst), "keep") == 0)
5924 cfun->machine->function_return_type = indirect_branch_keep;
5925 else if (strcmp (TREE_STRING_POINTER (cst), "thunk") == 0)
5926 cfun->machine->function_return_type = indirect_branch_thunk;
5927 else if (strcmp (TREE_STRING_POINTER (cst), "thunk-inline") == 0)
5928 cfun->machine->function_return_type = indirect_branch_thunk_inline;
5929 else if (strcmp (TREE_STRING_POINTER (cst), "thunk-extern") == 0)
5930 cfun->machine->function_return_type = indirect_branch_thunk_extern;
5931 else
5932 gcc_unreachable ();
5934 else
5935 cfun->machine->function_return_type = ix86_function_return;
5937 /* -mcmodel=large is not compatible with -mfunction-return=thunk
5938 nor -mfunction-return=thunk-extern. */
5939 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
5940 && ((cfun->machine->function_return_type
5941 == indirect_branch_thunk_extern)
5942 || (cfun->machine->function_return_type
5943 == indirect_branch_thunk)))
5944 error ("%<-mfunction-return=%s%> and %<-mcmodel=large%> are not "
5945 "compatible",
5946 ((cfun->machine->function_return_type
5947 == indirect_branch_thunk_extern)
5948 ? "thunk-extern" : "thunk"));
5952 /* Establish appropriate back-end context for processing the function
5953 FNDECL. The argument might be NULL to indicate processing at top
5954 level, outside of any function scope. */
5955 static void
5956 ix86_set_current_function (tree fndecl)
5958 /* Only change the context if the function changes. This hook is called
5959 several times in the course of compiling a function, and we don't want to
5960 slow things down too much or call target_reinit when it isn't safe. */
5961 if (fndecl == ix86_previous_fndecl)
5963 /* There may be 2 function bodies for the same function FNDECL,
5964 one is extern inline and one isn't. Call ix86_set_func_type
5965 to set the func_type field. */
5966 if (fndecl != NULL_TREE)
5968 ix86_set_func_type (fndecl);
5969 ix86_set_indirect_branch_type (fndecl);
5971 return;
5974 tree old_tree;
5975 if (ix86_previous_fndecl == NULL_TREE)
5976 old_tree = target_option_current_node;
5977 else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl))
5978 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl);
5979 else
5980 old_tree = target_option_default_node;
5982 if (fndecl == NULL_TREE)
5984 if (old_tree != target_option_current_node)
5985 ix86_reset_previous_fndecl ();
5986 return;
5989 ix86_set_func_type (fndecl);
5990 ix86_set_indirect_branch_type (fndecl);
5992 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
5993 if (new_tree == NULL_TREE)
5994 new_tree = target_option_default_node;
5996 if (old_tree != new_tree)
5998 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5999 if (TREE_TARGET_GLOBALS (new_tree))
6000 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
6001 else if (new_tree == target_option_default_node)
6002 restore_target_globals (&default_target_globals);
6003 else
6004 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
6006 ix86_previous_fndecl = fndecl;
6008 static bool prev_no_caller_saved_registers;
6010 /* 64-bit MS and SYSV ABI have different set of call used registers.
6011 Avoid expensive re-initialization of init_regs each time we switch
6012 function context. */
6013 if (TARGET_64BIT
6014 && (call_used_regs[SI_REG]
6015 == (cfun->machine->call_abi == MS_ABI)))
6016 reinit_regs ();
6017 /* Need to re-initialize init_regs if caller-saved registers are
6018 changed. */
6019 else if (prev_no_caller_saved_registers
6020 != cfun->machine->no_caller_saved_registers)
6021 reinit_regs ();
6023 if (cfun->machine->func_type != TYPE_NORMAL
6024 || cfun->machine->no_caller_saved_registers)
6026 /* Don't allow MPX, SSE, MMX nor x87 instructions since they
6027 may change processor state. */
6028 const char *isa;
6029 if (TARGET_MPX)
6030 isa = "MPX";
6031 else if (TARGET_SSE)
6032 isa = "SSE";
6033 else if (TARGET_MMX)
6034 isa = "MMX/3Dnow";
6035 else if (TARGET_80387)
6036 isa = "80387";
6037 else
6038 isa = NULL;
6039 if (isa != NULL)
6041 if (cfun->machine->func_type != TYPE_NORMAL)
6042 sorry ("%s instructions aren't allowed in %s service routine",
6043 isa, (cfun->machine->func_type == TYPE_EXCEPTION
6044 ? "exception" : "interrupt"));
6045 else
6046 sorry ("%s instructions aren't allowed in function with "
6047 "no_caller_saved_registers attribute", isa);
6048 /* Don't issue the same error twice. */
6049 cfun->machine->func_type = TYPE_NORMAL;
6050 cfun->machine->no_caller_saved_registers = false;
6054 prev_no_caller_saved_registers
6055 = cfun->machine->no_caller_saved_registers;
6059 /* Return true if this goes in large data/bss. */
6061 static bool
6062 ix86_in_large_data_p (tree exp)
6064 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
6065 return false;
6067 if (exp == NULL_TREE)
6068 return false;
6070 /* Functions are never large data. */
6071 if (TREE_CODE (exp) == FUNCTION_DECL)
6072 return false;
6074 /* Automatic variables are never large data. */
6075 if (VAR_P (exp) && !is_global_var (exp))
6076 return false;
6078 if (VAR_P (exp) && DECL_SECTION_NAME (exp))
6080 const char *section = DECL_SECTION_NAME (exp);
6081 if (strcmp (section, ".ldata") == 0
6082 || strcmp (section, ".lbss") == 0)
6083 return true;
6084 return false;
6086 else
6088 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
6090 /* If this is an incomplete type with size 0, then we can't put it
6091 in data because it might be too big when completed. Also,
6092 int_size_in_bytes returns -1 if size can vary or is larger than
6093 an integer in which case also it is safer to assume that it goes in
6094 large data. */
6095 if (size <= 0 || size > ix86_section_threshold)
6096 return true;
6099 return false;
6102 /* i386-specific section flag to mark large sections. */
6103 #define SECTION_LARGE SECTION_MACH_DEP
6105 /* Switch to the appropriate section for output of DECL.
6106 DECL is either a `VAR_DECL' node or a constant of some sort.
6107 RELOC indicates whether forming the initial value of DECL requires
6108 link-time relocations. */
6110 ATTRIBUTE_UNUSED static section *
6111 x86_64_elf_select_section (tree decl, int reloc,
6112 unsigned HOST_WIDE_INT align)
6114 if (ix86_in_large_data_p (decl))
6116 const char *sname = NULL;
6117 unsigned int flags = SECTION_WRITE | SECTION_LARGE;
6118 switch (categorize_decl_for_section (decl, reloc))
6120 case SECCAT_DATA:
6121 sname = ".ldata";
6122 break;
6123 case SECCAT_DATA_REL:
6124 sname = ".ldata.rel";
6125 break;
6126 case SECCAT_DATA_REL_LOCAL:
6127 sname = ".ldata.rel.local";
6128 break;
6129 case SECCAT_DATA_REL_RO:
6130 sname = ".ldata.rel.ro";
6131 break;
6132 case SECCAT_DATA_REL_RO_LOCAL:
6133 sname = ".ldata.rel.ro.local";
6134 break;
6135 case SECCAT_BSS:
6136 sname = ".lbss";
6137 flags |= SECTION_BSS;
6138 break;
6139 case SECCAT_RODATA:
6140 case SECCAT_RODATA_MERGE_STR:
6141 case SECCAT_RODATA_MERGE_STR_INIT:
6142 case SECCAT_RODATA_MERGE_CONST:
6143 sname = ".lrodata";
6144 flags &= ~SECTION_WRITE;
6145 break;
6146 case SECCAT_SRODATA:
6147 case SECCAT_SDATA:
6148 case SECCAT_SBSS:
6149 gcc_unreachable ();
6150 case SECCAT_TEXT:
6151 case SECCAT_TDATA:
6152 case SECCAT_TBSS:
6153 /* We don't split these for medium model. Place them into
6154 default sections and hope for best. */
6155 break;
6157 if (sname)
6159 /* We might get called with string constants, but get_named_section
6160 doesn't like them as they are not DECLs. Also, we need to set
6161 flags in that case. */
6162 if (!DECL_P (decl))
6163 return get_section (sname, flags, NULL);
6164 return get_named_section (decl, sname, reloc);
6167 return default_elf_select_section (decl, reloc, align);
6170 /* Select a set of attributes for section NAME based on the properties
6171 of DECL and whether or not RELOC indicates that DECL's initializer
6172 might contain runtime relocations. */
6174 static unsigned int ATTRIBUTE_UNUSED
6175 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
6177 unsigned int flags = default_section_type_flags (decl, name, reloc);
6179 if (ix86_in_large_data_p (decl))
6180 flags |= SECTION_LARGE;
6182 if (decl == NULL_TREE
6183 && (strcmp (name, ".ldata.rel.ro") == 0
6184 || strcmp (name, ".ldata.rel.ro.local") == 0))
6185 flags |= SECTION_RELRO;
6187 if (strcmp (name, ".lbss") == 0
6188 || strncmp (name, ".lbss.", 5) == 0
6189 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
6190 flags |= SECTION_BSS;
6192 return flags;
6195 /* Build up a unique section name, expressed as a
6196 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
6197 RELOC indicates whether the initial value of EXP requires
6198 link-time relocations. */
6200 static void ATTRIBUTE_UNUSED
6201 x86_64_elf_unique_section (tree decl, int reloc)
6203 if (ix86_in_large_data_p (decl))
6205 const char *prefix = NULL;
6206 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
6207 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
6209 switch (categorize_decl_for_section (decl, reloc))
6211 case SECCAT_DATA:
6212 case SECCAT_DATA_REL:
6213 case SECCAT_DATA_REL_LOCAL:
6214 case SECCAT_DATA_REL_RO:
6215 case SECCAT_DATA_REL_RO_LOCAL:
6216 prefix = one_only ? ".ld" : ".ldata";
6217 break;
6218 case SECCAT_BSS:
6219 prefix = one_only ? ".lb" : ".lbss";
6220 break;
6221 case SECCAT_RODATA:
6222 case SECCAT_RODATA_MERGE_STR:
6223 case SECCAT_RODATA_MERGE_STR_INIT:
6224 case SECCAT_RODATA_MERGE_CONST:
6225 prefix = one_only ? ".lr" : ".lrodata";
6226 break;
6227 case SECCAT_SRODATA:
6228 case SECCAT_SDATA:
6229 case SECCAT_SBSS:
6230 gcc_unreachable ();
6231 case SECCAT_TEXT:
6232 case SECCAT_TDATA:
6233 case SECCAT_TBSS:
6234 /* We don't split these for medium model. Place them into
6235 default sections and hope for best. */
6236 break;
6238 if (prefix)
6240 const char *name, *linkonce;
6241 char *string;
6243 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
6244 name = targetm.strip_name_encoding (name);
6246 /* If we're using one_only, then there needs to be a .gnu.linkonce
6247 prefix to the section name. */
6248 linkonce = one_only ? ".gnu.linkonce" : "";
6250 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
6252 set_decl_section_name (decl, string);
6253 return;
6256 default_unique_section (decl, reloc);
6259 #ifdef COMMON_ASM_OP
6261 #ifndef LARGECOMM_SECTION_ASM_OP
6262 #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
6263 #endif
6265 /* This says how to output assembler code to declare an
6266 uninitialized external linkage data object.
6268 For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
6269 large objects. */
6270 void
6271 x86_elf_aligned_decl_common (FILE *file, tree decl,
6272 const char *name, unsigned HOST_WIDE_INT size,
6273 int align)
6275 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
6276 && size > (unsigned int)ix86_section_threshold)
6278 switch_to_section (get_named_section (decl, ".lbss", 0));
6279 fputs (LARGECOMM_SECTION_ASM_OP, file);
6281 else
6282 fputs (COMMON_ASM_OP, file);
6283 assemble_name (file, name);
6284 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
6285 size, align / BITS_PER_UNIT);
6287 #endif
6289 /* Utility function for targets to use in implementing
6290 ASM_OUTPUT_ALIGNED_BSS. */
6292 void
6293 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
6294 unsigned HOST_WIDE_INT size, int align)
6296 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
6297 && size > (unsigned int)ix86_section_threshold)
6298 switch_to_section (get_named_section (decl, ".lbss", 0));
6299 else
6300 switch_to_section (bss_section);
6301 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
6302 #ifdef ASM_DECLARE_OBJECT_NAME
6303 last_assemble_variable_decl = decl;
6304 ASM_DECLARE_OBJECT_NAME (file, name, decl);
6305 #else
6306 /* Standard thing is just output label for the object. */
6307 ASM_OUTPUT_LABEL (file, name);
6308 #endif /* ASM_DECLARE_OBJECT_NAME */
6309 ASM_OUTPUT_SKIP (file, size ? size : 1);
6312 /* Decide whether we must probe the stack before any space allocation
6313 on this target. It's essentially TARGET_STACK_PROBE except when
6314 -fstack-check causes the stack to be already probed differently. */
6316 bool
6317 ix86_target_stack_probe (void)
6319 /* Do not probe the stack twice if static stack checking is enabled. */
6320 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
6321 return false;
6323 return TARGET_STACK_PROBE;
6326 /* Decide whether we can make a sibling call to a function. DECL is the
6327 declaration of the function being targeted by the call and EXP is the
6328 CALL_EXPR representing the call. */
6330 static bool
6331 ix86_function_ok_for_sibcall (tree decl, tree exp)
6333 tree type, decl_or_type;
6334 rtx a, b;
6335 bool bind_global = decl && !targetm.binds_local_p (decl);
6337 if (ix86_function_naked (current_function_decl))
6338 return false;
6340 /* Sibling call isn't OK if there are no caller-saved registers
6341 since all registers must be preserved before return. */
6342 if (cfun->machine->no_caller_saved_registers)
6343 return false;
6345 /* If we are generating position-independent code, we cannot sibcall
6346 optimize direct calls to global functions, as the PLT requires
6347 %ebx be live. (Darwin does not have a PLT.) */
6348 if (!TARGET_MACHO
6349 && !TARGET_64BIT
6350 && flag_pic
6351 && flag_plt
6352 && bind_global)
6353 return false;
6355 /* If we need to align the outgoing stack, then sibcalling would
6356 unalign the stack, which may break the called function. */
6357 if (ix86_minimum_incoming_stack_boundary (true)
6358 < PREFERRED_STACK_BOUNDARY)
6359 return false;
6361 if (decl)
6363 decl_or_type = decl;
6364 type = TREE_TYPE (decl);
6366 else
6368 /* We're looking at the CALL_EXPR, we need the type of the function. */
6369 type = CALL_EXPR_FN (exp); /* pointer expression */
6370 type = TREE_TYPE (type); /* pointer type */
6371 type = TREE_TYPE (type); /* function type */
6372 decl_or_type = type;
6375 /* Check that the return value locations are the same. Like
6376 if we are returning floats on the 80387 register stack, we cannot
6377 make a sibcall from a function that doesn't return a float to a
6378 function that does or, conversely, from a function that does return
6379 a float to a function that doesn't; the necessary stack adjustment
6380 would not be executed. This is also the place we notice
6381 differences in the return value ABI. Note that it is ok for one
6382 of the functions to have void return type as long as the return
6383 value of the other is passed in a register. */
6384 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
6385 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6386 cfun->decl, false);
6387 if (STACK_REG_P (a) || STACK_REG_P (b))
6389 if (!rtx_equal_p (a, b))
6390 return false;
6392 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6394 else if (!rtx_equal_p (a, b))
6395 return false;
6397 if (TARGET_64BIT)
6399 /* The SYSV ABI has more call-clobbered registers;
6400 disallow sibcalls from MS to SYSV. */
6401 if (cfun->machine->call_abi == MS_ABI
6402 && ix86_function_type_abi (type) == SYSV_ABI)
6403 return false;
6405 else
6407 /* If this call is indirect, we'll need to be able to use a
6408 call-clobbered register for the address of the target function.
6409 Make sure that all such registers are not used for passing
6410 parameters. Note that DLLIMPORT functions and call to global
6411 function via GOT slot are indirect. */
6412 if (!decl
6413 || (bind_global && flag_pic && !flag_plt)
6414 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
6416 /* Check if regparm >= 3 since arg_reg_available is set to
6417 false if regparm == 0. If regparm is 1 or 2, there is
6418 always a call-clobbered register available.
6420 ??? The symbol indirect call doesn't need a call-clobbered
6421 register. But we don't know if this is a symbol indirect
6422 call or not here. */
6423 if (ix86_function_regparm (type, NULL) >= 3
6424 && !cfun->machine->arg_reg_available)
6425 return false;
6429 /* Otherwise okay. That also includes certain types of indirect calls. */
6430 return true;
6433 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
6434 and "sseregparm" calling convention attributes;
6435 arguments as in struct attribute_spec.handler. */
6437 static tree
6438 ix86_handle_cconv_attribute (tree *node, tree name, tree args, int,
6439 bool *no_add_attrs)
6441 if (TREE_CODE (*node) != FUNCTION_TYPE
6442 && TREE_CODE (*node) != METHOD_TYPE
6443 && TREE_CODE (*node) != FIELD_DECL
6444 && TREE_CODE (*node) != TYPE_DECL)
6446 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6447 name);
6448 *no_add_attrs = true;
6449 return NULL_TREE;
6452 /* Can combine regparm with all attributes but fastcall, and thiscall. */
6453 if (is_attribute_p ("regparm", name))
6455 tree cst;
6457 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6459 error ("fastcall and regparm attributes are not compatible");
6462 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6464 error ("regparam and thiscall attributes are not compatible");
6467 cst = TREE_VALUE (args);
6468 if (TREE_CODE (cst) != INTEGER_CST)
6470 warning (OPT_Wattributes,
6471 "%qE attribute requires an integer constant argument",
6472 name);
6473 *no_add_attrs = true;
6475 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
6477 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
6478 name, REGPARM_MAX);
6479 *no_add_attrs = true;
6482 return NULL_TREE;
6485 if (TARGET_64BIT)
6487 /* Do not warn when emulating the MS ABI. */
6488 if ((TREE_CODE (*node) != FUNCTION_TYPE
6489 && TREE_CODE (*node) != METHOD_TYPE)
6490 || ix86_function_type_abi (*node) != MS_ABI)
6491 warning (OPT_Wattributes, "%qE attribute ignored",
6492 name);
6493 *no_add_attrs = true;
6494 return NULL_TREE;
6497 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
6498 if (is_attribute_p ("fastcall", name))
6500 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6502 error ("fastcall and cdecl attributes are not compatible");
6504 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6506 error ("fastcall and stdcall attributes are not compatible");
6508 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
6510 error ("fastcall and regparm attributes are not compatible");
6512 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6514 error ("fastcall and thiscall attributes are not compatible");
6518 /* Can combine stdcall with fastcall (redundant), regparm and
6519 sseregparm. */
6520 else if (is_attribute_p ("stdcall", name))
6522 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6524 error ("stdcall and cdecl attributes are not compatible");
6526 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6528 error ("stdcall and fastcall attributes are not compatible");
6530 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6532 error ("stdcall and thiscall attributes are not compatible");
6536 /* Can combine cdecl with regparm and sseregparm. */
6537 else if (is_attribute_p ("cdecl", name))
6539 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6541 error ("stdcall and cdecl attributes are not compatible");
6543 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6545 error ("fastcall and cdecl attributes are not compatible");
6547 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6549 error ("cdecl and thiscall attributes are not compatible");
6552 else if (is_attribute_p ("thiscall", name))
6554 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
6555 warning (OPT_Wattributes, "%qE attribute is used for non-class method",
6556 name);
6557 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6559 error ("stdcall and thiscall attributes are not compatible");
6561 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6563 error ("fastcall and thiscall attributes are not compatible");
6565 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6567 error ("cdecl and thiscall attributes are not compatible");
6571 /* Can combine sseregparm with all attributes. */
6573 return NULL_TREE;
6576 /* The transactional memory builtins are implicitly regparm or fastcall
6577 depending on the ABI. Override the generic do-nothing attribute that
6578 these builtins were declared with, and replace it with one of the two
6579 attributes that we expect elsewhere. */
6581 static tree
6582 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
6583 int flags, bool *no_add_attrs)
6585 tree alt;
6587 /* In no case do we want to add the placeholder attribute. */
6588 *no_add_attrs = true;
6590 /* The 64-bit ABI is unchanged for transactional memory. */
6591 if (TARGET_64BIT)
6592 return NULL_TREE;
6594 /* ??? Is there a better way to validate 32-bit windows? We have
6595 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
6596 if (CHECK_STACK_LIMIT > 0)
6597 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
6598 else
6600 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
6601 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
6603 decl_attributes (node, alt, flags);
6605 return NULL_TREE;
6608 /* This function determines from TYPE the calling-convention. */
6610 unsigned int
6611 ix86_get_callcvt (const_tree type)
6613 unsigned int ret = 0;
6614 bool is_stdarg;
6615 tree attrs;
6617 if (TARGET_64BIT)
6618 return IX86_CALLCVT_CDECL;
6620 attrs = TYPE_ATTRIBUTES (type);
6621 if (attrs != NULL_TREE)
6623 if (lookup_attribute ("cdecl", attrs))
6624 ret |= IX86_CALLCVT_CDECL;
6625 else if (lookup_attribute ("stdcall", attrs))
6626 ret |= IX86_CALLCVT_STDCALL;
6627 else if (lookup_attribute ("fastcall", attrs))
6628 ret |= IX86_CALLCVT_FASTCALL;
6629 else if (lookup_attribute ("thiscall", attrs))
6630 ret |= IX86_CALLCVT_THISCALL;
6632 /* Regparam isn't allowed for thiscall and fastcall. */
6633 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
6635 if (lookup_attribute ("regparm", attrs))
6636 ret |= IX86_CALLCVT_REGPARM;
6637 if (lookup_attribute ("sseregparm", attrs))
6638 ret |= IX86_CALLCVT_SSEREGPARM;
6641 if (IX86_BASE_CALLCVT(ret) != 0)
6642 return ret;
6645 is_stdarg = stdarg_p (type);
6646 if (TARGET_RTD && !is_stdarg)
6647 return IX86_CALLCVT_STDCALL | ret;
6649 if (ret != 0
6650 || is_stdarg
6651 || TREE_CODE (type) != METHOD_TYPE
6652 || ix86_function_type_abi (type) != MS_ABI)
6653 return IX86_CALLCVT_CDECL | ret;
6655 return IX86_CALLCVT_THISCALL;
6658 /* Return 0 if the attributes for two types are incompatible, 1 if they
6659 are compatible, and 2 if they are nearly compatible (which causes a
6660 warning to be generated). */
6662 static int
6663 ix86_comp_type_attributes (const_tree type1, const_tree type2)
6665 unsigned int ccvt1, ccvt2;
6667 if (TREE_CODE (type1) != FUNCTION_TYPE
6668 && TREE_CODE (type1) != METHOD_TYPE)
6669 return 1;
6671 ccvt1 = ix86_get_callcvt (type1);
6672 ccvt2 = ix86_get_callcvt (type2);
6673 if (ccvt1 != ccvt2)
6674 return 0;
6675 if (ix86_function_regparm (type1, NULL)
6676 != ix86_function_regparm (type2, NULL))
6677 return 0;
6679 return 1;
6682 /* Return the regparm value for a function with the indicated TYPE and DECL.
6683 DECL may be NULL when calling function indirectly
6684 or considering a libcall. */
6686 static int
6687 ix86_function_regparm (const_tree type, const_tree decl)
6689 tree attr;
6690 int regparm;
6691 unsigned int ccvt;
6693 if (TARGET_64BIT)
6694 return (ix86_function_type_abi (type) == SYSV_ABI
6695 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
6696 ccvt = ix86_get_callcvt (type);
6697 regparm = ix86_regparm;
6699 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
6701 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
6702 if (attr)
6704 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
6705 return regparm;
6708 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6709 return 2;
6710 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6711 return 1;
6713 /* Use register calling convention for local functions when possible. */
6714 if (decl
6715 && TREE_CODE (decl) == FUNCTION_DECL)
6717 cgraph_node *target = cgraph_node::get (decl);
6718 if (target)
6719 target = target->function_symbol ();
6721 /* Caller and callee must agree on the calling convention, so
6722 checking here just optimize means that with
6723 __attribute__((optimize (...))) caller could use regparm convention
6724 and callee not, or vice versa. Instead look at whether the callee
6725 is optimized or not. */
6726 if (target && opt_for_fn (target->decl, optimize)
6727 && !(profile_flag && !flag_fentry))
6729 cgraph_local_info *i = &target->local;
6730 if (i && i->local && i->can_change_signature)
6732 int local_regparm, globals = 0, regno;
6734 /* Make sure no regparm register is taken by a
6735 fixed register variable. */
6736 for (local_regparm = 0; local_regparm < REGPARM_MAX;
6737 local_regparm++)
6738 if (fixed_regs[local_regparm])
6739 break;
6741 /* We don't want to use regparm(3) for nested functions as
6742 these use a static chain pointer in the third argument. */
6743 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
6744 local_regparm = 2;
6746 /* Save a register for the split stack. */
6747 if (flag_split_stack)
6749 if (local_regparm == 3)
6750 local_regparm = 2;
6751 else if (local_regparm == 2
6752 && DECL_STATIC_CHAIN (target->decl))
6753 local_regparm = 1;
6756 /* Each fixed register usage increases register pressure,
6757 so less registers should be used for argument passing.
6758 This functionality can be overriden by an explicit
6759 regparm value. */
6760 for (regno = AX_REG; regno <= DI_REG; regno++)
6761 if (fixed_regs[regno])
6762 globals++;
6764 local_regparm
6765 = globals < local_regparm ? local_regparm - globals : 0;
6767 if (local_regparm > regparm)
6768 regparm = local_regparm;
6773 return regparm;
6776 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
6777 DFmode (2) arguments in SSE registers for a function with the
6778 indicated TYPE and DECL. DECL may be NULL when calling function
6779 indirectly or considering a libcall. Return -1 if any FP parameter
6780 should be rejected by error. This is used in siutation we imply SSE
6781 calling convetion but the function is called from another function with
6782 SSE disabled. Otherwise return 0. */
6784 static int
6785 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
6787 gcc_assert (!TARGET_64BIT);
6789 /* Use SSE registers to pass SFmode and DFmode arguments if requested
6790 by the sseregparm attribute. */
6791 if (TARGET_SSEREGPARM
6792 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
6794 if (!TARGET_SSE)
6796 if (warn)
6798 if (decl)
6799 error ("calling %qD with attribute sseregparm without "
6800 "SSE/SSE2 enabled", decl);
6801 else
6802 error ("calling %qT with attribute sseregparm without "
6803 "SSE/SSE2 enabled", type);
6805 return 0;
6808 return 2;
6811 if (!decl)
6812 return 0;
6814 cgraph_node *target = cgraph_node::get (decl);
6815 if (target)
6816 target = target->function_symbol ();
6818 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
6819 (and DFmode for SSE2) arguments in SSE registers. */
6820 if (target
6821 /* TARGET_SSE_MATH */
6822 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
6823 && opt_for_fn (target->decl, optimize)
6824 && !(profile_flag && !flag_fentry))
6826 cgraph_local_info *i = &target->local;
6827 if (i && i->local && i->can_change_signature)
6829 /* Refuse to produce wrong code when local function with SSE enabled
6830 is called from SSE disabled function.
6831 FIXME: We need a way to detect these cases cross-ltrans partition
6832 and avoid using SSE calling conventions on local functions called
6833 from function with SSE disabled. For now at least delay the
6834 warning until we know we are going to produce wrong code.
6835 See PR66047 */
6836 if (!TARGET_SSE && warn)
6837 return -1;
6838 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
6839 ->x_ix86_isa_flags) ? 2 : 1;
6843 return 0;
6846 /* Return true if EAX is live at the start of the function. Used by
6847 ix86_expand_prologue to determine if we need special help before
6848 calling allocate_stack_worker. */
6850 static bool
6851 ix86_eax_live_at_start_p (void)
6853 /* Cheat. Don't bother working forward from ix86_function_regparm
6854 to the function type to whether an actual argument is located in
6855 eax. Instead just look at cfg info, which is still close enough
6856 to correct at this point. This gives false positives for broken
6857 functions that might use uninitialized data that happens to be
6858 allocated in eax, but who cares? */
6859 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
6862 static bool
6863 ix86_keep_aggregate_return_pointer (tree fntype)
6865 tree attr;
6867 if (!TARGET_64BIT)
6869 attr = lookup_attribute ("callee_pop_aggregate_return",
6870 TYPE_ATTRIBUTES (fntype));
6871 if (attr)
6872 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
6874 /* For 32-bit MS-ABI the default is to keep aggregate
6875 return pointer. */
6876 if (ix86_function_type_abi (fntype) == MS_ABI)
6877 return true;
6879 return KEEP_AGGREGATE_RETURN_POINTER != 0;
6882 /* Value is the number of bytes of arguments automatically
6883 popped when returning from a subroutine call.
6884 FUNDECL is the declaration node of the function (as a tree),
6885 FUNTYPE is the data type of the function (as a tree),
6886 or for a library call it is an identifier node for the subroutine name.
6887 SIZE is the number of bytes of arguments passed on the stack.
6889 On the 80386, the RTD insn may be used to pop them if the number
6890 of args is fixed, but if the number is variable then the caller
6891 must pop them all. RTD can't be used for library calls now
6892 because the library is compiled with the Unix compiler.
6893 Use of RTD is a selectable option, since it is incompatible with
6894 standard Unix calling sequences. If the option is not selected,
6895 the caller must always pop the args.
6897 The attribute stdcall is equivalent to RTD on a per module basis. */
6899 static poly_int64
6900 ix86_return_pops_args (tree fundecl, tree funtype, poly_int64 size)
6902 unsigned int ccvt;
6904 /* None of the 64-bit ABIs pop arguments. */
6905 if (TARGET_64BIT)
6906 return 0;
6908 ccvt = ix86_get_callcvt (funtype);
6910 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
6911 | IX86_CALLCVT_THISCALL)) != 0
6912 && ! stdarg_p (funtype))
6913 return size;
6915 /* Lose any fake structure return argument if it is passed on the stack. */
6916 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
6917 && !ix86_keep_aggregate_return_pointer (funtype))
6919 int nregs = ix86_function_regparm (funtype, fundecl);
6920 if (nregs == 0)
6921 return GET_MODE_SIZE (Pmode);
6924 return 0;
6927 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
6929 static bool
6930 ix86_legitimate_combined_insn (rtx_insn *insn)
6932 int i;
6934 /* Check operand constraints in case hard registers were propagated
6935 into insn pattern. This check prevents combine pass from
6936 generating insn patterns with invalid hard register operands.
6937 These invalid insns can eventually confuse reload to error out
6938 with a spill failure. See also PRs 46829 and 46843. */
6940 gcc_assert (INSN_CODE (insn) >= 0);
6942 extract_insn (insn);
6943 preprocess_constraints (insn);
6945 int n_operands = recog_data.n_operands;
6946 int n_alternatives = recog_data.n_alternatives;
6947 for (i = 0; i < n_operands; i++)
6949 rtx op = recog_data.operand[i];
6950 machine_mode mode = GET_MODE (op);
6951 const operand_alternative *op_alt;
6952 int offset = 0;
6953 bool win;
6954 int j;
6956 /* A unary operator may be accepted by the predicate, but it
6957 is irrelevant for matching constraints. */
6958 if (UNARY_P (op))
6959 op = XEXP (op, 0);
6961 if (SUBREG_P (op))
6963 if (REG_P (SUBREG_REG (op))
6964 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
6965 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
6966 GET_MODE (SUBREG_REG (op)),
6967 SUBREG_BYTE (op),
6968 GET_MODE (op));
6969 op = SUBREG_REG (op);
6972 if (!(REG_P (op) && HARD_REGISTER_P (op)))
6973 continue;
6975 op_alt = recog_op_alt;
6977 /* Operand has no constraints, anything is OK. */
6978 win = !n_alternatives;
6980 alternative_mask preferred = get_preferred_alternatives (insn);
6981 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
6983 if (!TEST_BIT (preferred, j))
6984 continue;
6985 if (op_alt[i].anything_ok
6986 || (op_alt[i].matches != -1
6987 && operands_match_p
6988 (recog_data.operand[i],
6989 recog_data.operand[op_alt[i].matches]))
6990 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
6992 win = true;
6993 break;
6997 if (!win)
6998 return false;
7001 return true;
7004 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
7006 static unsigned HOST_WIDE_INT
7007 ix86_asan_shadow_offset (void)
7009 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
7010 : HOST_WIDE_INT_C (0x7fff8000))
7011 : (HOST_WIDE_INT_1 << 29);
7014 /* Argument support functions. */
7016 /* Return true when register may be used to pass function parameters. */
7017 bool
7018 ix86_function_arg_regno_p (int regno)
7020 int i;
7021 enum calling_abi call_abi;
7022 const int *parm_regs;
7024 if (TARGET_MPX && BND_REGNO_P (regno))
7025 return true;
7027 if (!TARGET_64BIT)
7029 if (TARGET_MACHO)
7030 return (regno < REGPARM_MAX
7031 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
7032 else
7033 return (regno < REGPARM_MAX
7034 || (TARGET_MMX && MMX_REGNO_P (regno)
7035 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
7036 || (TARGET_SSE && SSE_REGNO_P (regno)
7037 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
7040 if (TARGET_SSE && SSE_REGNO_P (regno)
7041 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
7042 return true;
7044 /* TODO: The function should depend on current function ABI but
7045 builtins.c would need updating then. Therefore we use the
7046 default ABI. */
7047 call_abi = ix86_cfun_abi ();
7049 /* RAX is used as hidden argument to va_arg functions. */
7050 if (call_abi == SYSV_ABI && regno == AX_REG)
7051 return true;
7053 if (call_abi == MS_ABI)
7054 parm_regs = x86_64_ms_abi_int_parameter_registers;
7055 else
7056 parm_regs = x86_64_int_parameter_registers;
7058 for (i = 0; i < (call_abi == MS_ABI
7059 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
7060 if (regno == parm_regs[i])
7061 return true;
7062 return false;
7065 /* Return if we do not know how to pass TYPE solely in registers. */
7067 static bool
7068 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
7070 if (must_pass_in_stack_var_size_or_pad (mode, type))
7071 return true;
7073 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
7074 The layout_type routine is crafty and tries to trick us into passing
7075 currently unsupported vector types on the stack by using TImode. */
7076 return (!TARGET_64BIT && mode == TImode
7077 && type && TREE_CODE (type) != VECTOR_TYPE);
7080 /* It returns the size, in bytes, of the area reserved for arguments passed
7081 in registers for the function represented by fndecl dependent to the used
7082 abi format. */
7084 ix86_reg_parm_stack_space (const_tree fndecl)
7086 enum calling_abi call_abi = SYSV_ABI;
7087 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
7088 call_abi = ix86_function_abi (fndecl);
7089 else
7090 call_abi = ix86_function_type_abi (fndecl);
7091 if (TARGET_64BIT && call_abi == MS_ABI)
7092 return 32;
7093 return 0;
7096 /* We add this as a workaround in order to use libc_has_function
7097 hook in i386.md. */
7098 bool
7099 ix86_libc_has_function (enum function_class fn_class)
7101 return targetm.libc_has_function (fn_class);
7104 /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
7105 specifying the call abi used. */
7106 enum calling_abi
7107 ix86_function_type_abi (const_tree fntype)
7109 enum calling_abi abi = ix86_abi;
7111 if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
7112 return abi;
7114 if (abi == SYSV_ABI
7115 && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
7117 static int warned;
7118 if (TARGET_X32 && !warned)
7120 error ("X32 does not support ms_abi attribute");
7121 warned = 1;
7124 abi = MS_ABI;
7126 else if (abi == MS_ABI
7127 && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
7128 abi = SYSV_ABI;
7130 return abi;
7133 static enum calling_abi
7134 ix86_function_abi (const_tree fndecl)
7136 return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
7139 /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
7140 specifying the call abi used. */
7141 enum calling_abi
7142 ix86_cfun_abi (void)
7144 return cfun ? cfun->machine->call_abi : ix86_abi;
7147 static bool
7148 ix86_function_ms_hook_prologue (const_tree fn)
7150 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
7152 if (decl_function_context (fn) != NULL_TREE)
7153 error_at (DECL_SOURCE_LOCATION (fn),
7154 "ms_hook_prologue is not compatible with nested function");
7155 else
7156 return true;
7158 return false;
7161 static bool
7162 ix86_function_naked (const_tree fn)
7164 if (fn && lookup_attribute ("naked", DECL_ATTRIBUTES (fn)))
7165 return true;
7167 return false;
7170 /* Write the extra assembler code needed to declare a function properly. */
7172 void
7173 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
7174 tree decl)
7176 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
7178 if (is_ms_hook)
7180 int i, filler_count = (TARGET_64BIT ? 32 : 16);
7181 unsigned int filler_cc = 0xcccccccc;
7183 for (i = 0; i < filler_count; i += 4)
7184 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
7187 #ifdef SUBTARGET_ASM_UNWIND_INIT
7188 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
7189 #endif
7191 ASM_OUTPUT_LABEL (asm_out_file, fname);
7193 /* Output magic byte marker, if hot-patch attribute is set. */
7194 if (is_ms_hook)
7196 if (TARGET_64BIT)
7198 /* leaq [%rsp + 0], %rsp */
7199 fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
7200 asm_out_file);
7202 else
7204 /* movl.s %edi, %edi
7205 push %ebp
7206 movl.s %esp, %ebp */
7207 fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", asm_out_file);
7212 /* Implementation of call abi switching target hook. Specific to FNDECL
7213 the specific call register sets are set. See also
7214 ix86_conditional_register_usage for more details. */
7215 void
7216 ix86_call_abi_override (const_tree fndecl)
7218 cfun->machine->call_abi = ix86_function_abi (fndecl);
7221 /* Return 1 if pseudo register should be created and used to hold
7222 GOT address for PIC code. */
7223 bool
7224 ix86_use_pseudo_pic_reg (void)
7226 if ((TARGET_64BIT
7227 && (ix86_cmodel == CM_SMALL_PIC
7228 || TARGET_PECOFF))
7229 || !flag_pic)
7230 return false;
7231 return true;
7234 /* Initialize large model PIC register. */
7236 static void
7237 ix86_init_large_pic_reg (unsigned int tmp_regno)
7239 rtx_code_label *label;
7240 rtx tmp_reg;
7242 gcc_assert (Pmode == DImode);
7243 label = gen_label_rtx ();
7244 emit_label (label);
7245 LABEL_PRESERVE_P (label) = 1;
7246 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
7247 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
7248 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
7249 label));
7250 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
7251 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
7252 pic_offset_table_rtx, tmp_reg));
7253 const char *name = LABEL_NAME (label);
7254 PUT_CODE (label, NOTE);
7255 NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL;
7256 NOTE_DELETED_LABEL_NAME (label) = name;
7259 /* Create and initialize PIC register if required. */
7260 static void
7261 ix86_init_pic_reg (void)
7263 edge entry_edge;
7264 rtx_insn *seq;
7266 if (!ix86_use_pseudo_pic_reg ())
7267 return;
7269 start_sequence ();
7271 if (TARGET_64BIT)
7273 if (ix86_cmodel == CM_LARGE_PIC)
7274 ix86_init_large_pic_reg (R11_REG);
7275 else
7276 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
7278 else
7280 /* If there is future mcount call in the function it is more profitable
7281 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
7282 rtx reg = crtl->profile
7283 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
7284 : pic_offset_table_rtx;
7285 rtx_insn *insn = emit_insn (gen_set_got (reg));
7286 RTX_FRAME_RELATED_P (insn) = 1;
7287 if (crtl->profile)
7288 emit_move_insn (pic_offset_table_rtx, reg);
7289 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
7292 seq = get_insns ();
7293 end_sequence ();
7295 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
7296 insert_insn_on_edge (seq, entry_edge);
7297 commit_one_edge_insertion (entry_edge);
7300 /* Initialize a variable CUM of type CUMULATIVE_ARGS
7301 for a call to a function whose data type is FNTYPE.
7302 For a library call, FNTYPE is 0. */
7304 void
7305 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
7306 tree fntype, /* tree ptr for function decl */
7307 rtx libname, /* SYMBOL_REF of library name or 0 */
7308 tree fndecl,
7309 int caller)
7311 struct cgraph_local_info *i = NULL;
7312 struct cgraph_node *target = NULL;
7314 memset (cum, 0, sizeof (*cum));
7316 if (fndecl)
7318 target = cgraph_node::get (fndecl);
7319 if (target)
7321 target = target->function_symbol ();
7322 i = cgraph_node::local_info (target->decl);
7323 cum->call_abi = ix86_function_abi (target->decl);
7325 else
7326 cum->call_abi = ix86_function_abi (fndecl);
7328 else
7329 cum->call_abi = ix86_function_type_abi (fntype);
7331 cum->caller = caller;
7333 /* Set up the number of registers to use for passing arguments. */
7334 cum->nregs = ix86_regparm;
7335 if (TARGET_64BIT)
7337 cum->nregs = (cum->call_abi == SYSV_ABI
7338 ? X86_64_REGPARM_MAX
7339 : X86_64_MS_REGPARM_MAX);
7341 if (TARGET_SSE)
7343 cum->sse_nregs = SSE_REGPARM_MAX;
7344 if (TARGET_64BIT)
7346 cum->sse_nregs = (cum->call_abi == SYSV_ABI
7347 ? X86_64_SSE_REGPARM_MAX
7348 : X86_64_MS_SSE_REGPARM_MAX);
7351 if (TARGET_MMX)
7352 cum->mmx_nregs = MMX_REGPARM_MAX;
7353 cum->warn_avx512f = true;
7354 cum->warn_avx = true;
7355 cum->warn_sse = true;
7356 cum->warn_mmx = true;
7358 /* Because type might mismatch in between caller and callee, we need to
7359 use actual type of function for local calls.
7360 FIXME: cgraph_analyze can be told to actually record if function uses
7361 va_start so for local functions maybe_vaarg can be made aggressive
7362 helping K&R code.
7363 FIXME: once typesytem is fixed, we won't need this code anymore. */
7364 if (i && i->local && i->can_change_signature)
7365 fntype = TREE_TYPE (target->decl);
7366 cum->stdarg = stdarg_p (fntype);
7367 cum->maybe_vaarg = (fntype
7368 ? (!prototype_p (fntype) || stdarg_p (fntype))
7369 : !libname);
7371 cum->bnd_regno = FIRST_BND_REG;
7372 cum->bnds_in_bt = 0;
7373 cum->force_bnd_pass = 0;
7374 cum->decl = fndecl;
7376 cum->warn_empty = !warn_abi || cum->stdarg;
7377 if (!cum->warn_empty && fntype)
7379 function_args_iterator iter;
7380 tree argtype;
7381 bool seen_empty_type = false;
7382 FOREACH_FUNCTION_ARGS (fntype, argtype, iter)
7384 if (argtype == error_mark_node || VOID_TYPE_P (argtype))
7385 break;
7386 if (TYPE_EMPTY_P (argtype))
7387 seen_empty_type = true;
7388 else if (seen_empty_type)
7390 cum->warn_empty = true;
7391 break;
7396 if (!TARGET_64BIT)
7398 /* If there are variable arguments, then we won't pass anything
7399 in registers in 32-bit mode. */
7400 if (stdarg_p (fntype))
7402 cum->nregs = 0;
7403 /* Since in 32-bit, variable arguments are always passed on
7404 stack, there is scratch register available for indirect
7405 sibcall. */
7406 cfun->machine->arg_reg_available = true;
7407 cum->sse_nregs = 0;
7408 cum->mmx_nregs = 0;
7409 cum->warn_avx512f = false;
7410 cum->warn_avx = false;
7411 cum->warn_sse = false;
7412 cum->warn_mmx = false;
7413 return;
7416 /* Use ecx and edx registers if function has fastcall attribute,
7417 else look for regparm information. */
7418 if (fntype)
7420 unsigned int ccvt = ix86_get_callcvt (fntype);
7421 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
7423 cum->nregs = 1;
7424 cum->fastcall = 1; /* Same first register as in fastcall. */
7426 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
7428 cum->nregs = 2;
7429 cum->fastcall = 1;
7431 else
7432 cum->nregs = ix86_function_regparm (fntype, fndecl);
7435 /* Set up the number of SSE registers used for passing SFmode
7436 and DFmode arguments. Warn for mismatching ABI. */
7437 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
7440 cfun->machine->arg_reg_available = (cum->nregs > 0);
7443 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
7444 But in the case of vector types, it is some vector mode.
7446 When we have only some of our vector isa extensions enabled, then there
7447 are some modes for which vector_mode_supported_p is false. For these
7448 modes, the generic vector support in gcc will choose some non-vector mode
7449 in order to implement the type. By computing the natural mode, we'll
7450 select the proper ABI location for the operand and not depend on whatever
7451 the middle-end decides to do with these vector types.
7453 The midde-end can't deal with the vector types > 16 bytes. In this
7454 case, we return the original mode and warn ABI change if CUM isn't
7455 NULL.
7457 If INT_RETURN is true, warn ABI change if the vector mode isn't
7458 available for function return value. */
7460 static machine_mode
7461 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
7462 bool in_return)
7464 machine_mode mode = TYPE_MODE (type);
7466 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
7468 HOST_WIDE_INT size = int_size_in_bytes (type);
7469 if ((size == 8 || size == 16 || size == 32 || size == 64)
7470 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
7471 && TYPE_VECTOR_SUBPARTS (type) > 1)
7473 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
7475 /* There are no XFmode vector modes. */
7476 if (innermode == XFmode)
7477 return mode;
7479 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
7480 mode = MIN_MODE_VECTOR_FLOAT;
7481 else
7482 mode = MIN_MODE_VECTOR_INT;
7484 /* Get the mode which has this inner mode and number of units. */
7485 FOR_EACH_MODE_FROM (mode, mode)
7486 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
7487 && GET_MODE_INNER (mode) == innermode)
7489 if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
7491 static bool warnedavx512f;
7492 static bool warnedavx512f_ret;
7494 if (cum && cum->warn_avx512f && !warnedavx512f)
7496 if (warning (OPT_Wpsabi, "AVX512F vector argument "
7497 "without AVX512F enabled changes the ABI"))
7498 warnedavx512f = true;
7500 else if (in_return && !warnedavx512f_ret)
7502 if (warning (OPT_Wpsabi, "AVX512F vector return "
7503 "without AVX512F enabled changes the ABI"))
7504 warnedavx512f_ret = true;
7507 return TYPE_MODE (type);
7509 else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
7511 static bool warnedavx;
7512 static bool warnedavx_ret;
7514 if (cum && cum->warn_avx && !warnedavx)
7516 if (warning (OPT_Wpsabi, "AVX vector argument "
7517 "without AVX enabled changes the ABI"))
7518 warnedavx = true;
7520 else if (in_return && !warnedavx_ret)
7522 if (warning (OPT_Wpsabi, "AVX vector return "
7523 "without AVX enabled changes the ABI"))
7524 warnedavx_ret = true;
7527 return TYPE_MODE (type);
7529 else if (((size == 8 && TARGET_64BIT) || size == 16)
7530 && !TARGET_SSE
7531 && !TARGET_IAMCU)
7533 static bool warnedsse;
7534 static bool warnedsse_ret;
7536 if (cum && cum->warn_sse && !warnedsse)
7538 if (warning (OPT_Wpsabi, "SSE vector argument "
7539 "without SSE enabled changes the ABI"))
7540 warnedsse = true;
7542 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
7544 if (warning (OPT_Wpsabi, "SSE vector return "
7545 "without SSE enabled changes the ABI"))
7546 warnedsse_ret = true;
7549 else if ((size == 8 && !TARGET_64BIT)
7550 && (!cfun
7551 || cfun->machine->func_type == TYPE_NORMAL)
7552 && !TARGET_MMX
7553 && !TARGET_IAMCU)
7555 static bool warnedmmx;
7556 static bool warnedmmx_ret;
7558 if (cum && cum->warn_mmx && !warnedmmx)
7560 if (warning (OPT_Wpsabi, "MMX vector argument "
7561 "without MMX enabled changes the ABI"))
7562 warnedmmx = true;
7564 else if (in_return && !warnedmmx_ret)
7566 if (warning (OPT_Wpsabi, "MMX vector return "
7567 "without MMX enabled changes the ABI"))
7568 warnedmmx_ret = true;
7571 return mode;
7574 gcc_unreachable ();
7578 return mode;
7581 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
7582 this may not agree with the mode that the type system has chosen for the
7583 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
7584 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
7586 static rtx
7587 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
7588 unsigned int regno)
7590 rtx tmp;
7592 if (orig_mode != BLKmode)
7593 tmp = gen_rtx_REG (orig_mode, regno);
7594 else
7596 tmp = gen_rtx_REG (mode, regno);
7597 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
7598 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
7601 return tmp;
7604 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
7605 of this code is to classify each 8bytes of incoming argument by the register
7606 class and assign registers accordingly. */
7608 /* Return the union class of CLASS1 and CLASS2.
7609 See the x86-64 PS ABI for details. */
7611 static enum x86_64_reg_class
7612 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
7614 /* Rule #1: If both classes are equal, this is the resulting class. */
7615 if (class1 == class2)
7616 return class1;
7618 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
7619 the other class. */
7620 if (class1 == X86_64_NO_CLASS)
7621 return class2;
7622 if (class2 == X86_64_NO_CLASS)
7623 return class1;
7625 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
7626 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
7627 return X86_64_MEMORY_CLASS;
7629 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
7630 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
7631 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
7632 return X86_64_INTEGERSI_CLASS;
7633 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
7634 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
7635 return X86_64_INTEGER_CLASS;
7637 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
7638 MEMORY is used. */
7639 if (class1 == X86_64_X87_CLASS
7640 || class1 == X86_64_X87UP_CLASS
7641 || class1 == X86_64_COMPLEX_X87_CLASS
7642 || class2 == X86_64_X87_CLASS
7643 || class2 == X86_64_X87UP_CLASS
7644 || class2 == X86_64_COMPLEX_X87_CLASS)
7645 return X86_64_MEMORY_CLASS;
7647 /* Rule #6: Otherwise class SSE is used. */
7648 return X86_64_SSE_CLASS;
7651 /* Classify the argument of type TYPE and mode MODE.
7652 CLASSES will be filled by the register class used to pass each word
7653 of the operand. The number of words is returned. In case the parameter
7654 should be passed in memory, 0 is returned. As a special case for zero
7655 sized containers, classes[0] will be NO_CLASS and 1 is returned.
7657 BIT_OFFSET is used internally for handling records and specifies offset
7658 of the offset in bits modulo 512 to avoid overflow cases.
7660 See the x86-64 PS ABI for details.
7663 static int
7664 classify_argument (machine_mode mode, const_tree type,
7665 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
7667 HOST_WIDE_INT bytes =
7668 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7669 int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
7671 /* Variable sized entities are always passed/returned in memory. */
7672 if (bytes < 0)
7673 return 0;
7675 if (mode != VOIDmode
7676 && targetm.calls.must_pass_in_stack (mode, type))
7677 return 0;
7679 if (type && AGGREGATE_TYPE_P (type))
7681 int i;
7682 tree field;
7683 enum x86_64_reg_class subclasses[MAX_CLASSES];
7685 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
7686 if (bytes > 64)
7687 return 0;
7689 for (i = 0; i < words; i++)
7690 classes[i] = X86_64_NO_CLASS;
7692 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
7693 signalize memory class, so handle it as special case. */
7694 if (!words)
7696 classes[0] = X86_64_NO_CLASS;
7697 return 1;
7700 /* Classify each field of record and merge classes. */
7701 switch (TREE_CODE (type))
7703 case RECORD_TYPE:
7704 /* And now merge the fields of structure. */
7705 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7707 if (TREE_CODE (field) == FIELD_DECL)
7709 int num;
7711 if (TREE_TYPE (field) == error_mark_node)
7712 continue;
7714 /* Bitfields are always classified as integer. Handle them
7715 early, since later code would consider them to be
7716 misaligned integers. */
7717 if (DECL_BIT_FIELD (field))
7719 for (i = (int_bit_position (field)
7720 + (bit_offset % 64)) / 8 / 8;
7721 i < ((int_bit_position (field) + (bit_offset % 64))
7722 + tree_to_shwi (DECL_SIZE (field))
7723 + 63) / 8 / 8; i++)
7724 classes[i] =
7725 merge_classes (X86_64_INTEGER_CLASS,
7726 classes[i]);
7728 else
7730 int pos;
7732 type = TREE_TYPE (field);
7734 /* Flexible array member is ignored. */
7735 if (TYPE_MODE (type) == BLKmode
7736 && TREE_CODE (type) == ARRAY_TYPE
7737 && TYPE_SIZE (type) == NULL_TREE
7738 && TYPE_DOMAIN (type) != NULL_TREE
7739 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
7740 == NULL_TREE))
7742 static bool warned;
7744 if (!warned && warn_psabi)
7746 warned = true;
7747 inform (input_location,
7748 "the ABI of passing struct with"
7749 " a flexible array member has"
7750 " changed in GCC 4.4");
7752 continue;
7754 num = classify_argument (TYPE_MODE (type), type,
7755 subclasses,
7756 (int_bit_position (field)
7757 + bit_offset) % 512);
7758 if (!num)
7759 return 0;
7760 pos = (int_bit_position (field)
7761 + (bit_offset % 64)) / 8 / 8;
7762 for (i = 0; i < num && (i + pos) < words; i++)
7763 classes[i + pos] =
7764 merge_classes (subclasses[i], classes[i + pos]);
7768 break;
7770 case ARRAY_TYPE:
7771 /* Arrays are handled as small records. */
7773 int num;
7774 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
7775 TREE_TYPE (type), subclasses, bit_offset);
7776 if (!num)
7777 return 0;
7779 /* The partial classes are now full classes. */
7780 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
7781 subclasses[0] = X86_64_SSE_CLASS;
7782 if (subclasses[0] == X86_64_INTEGERSI_CLASS
7783 && !((bit_offset % 64) == 0 && bytes == 4))
7784 subclasses[0] = X86_64_INTEGER_CLASS;
7786 for (i = 0; i < words; i++)
7787 classes[i] = subclasses[i % num];
7789 break;
7791 case UNION_TYPE:
7792 case QUAL_UNION_TYPE:
7793 /* Unions are similar to RECORD_TYPE but offset is always 0.
7795 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7797 if (TREE_CODE (field) == FIELD_DECL)
7799 int num;
7801 if (TREE_TYPE (field) == error_mark_node)
7802 continue;
7804 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
7805 TREE_TYPE (field), subclasses,
7806 bit_offset);
7807 if (!num)
7808 return 0;
7809 for (i = 0; i < num && i < words; i++)
7810 classes[i] = merge_classes (subclasses[i], classes[i]);
7813 break;
7815 default:
7816 gcc_unreachable ();
7819 if (words > 2)
7821 /* When size > 16 bytes, if the first one isn't
7822 X86_64_SSE_CLASS or any other ones aren't
7823 X86_64_SSEUP_CLASS, everything should be passed in
7824 memory. */
7825 if (classes[0] != X86_64_SSE_CLASS)
7826 return 0;
7828 for (i = 1; i < words; i++)
7829 if (classes[i] != X86_64_SSEUP_CLASS)
7830 return 0;
7833 /* Final merger cleanup. */
7834 for (i = 0; i < words; i++)
7836 /* If one class is MEMORY, everything should be passed in
7837 memory. */
7838 if (classes[i] == X86_64_MEMORY_CLASS)
7839 return 0;
7841 /* The X86_64_SSEUP_CLASS should be always preceded by
7842 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
7843 if (classes[i] == X86_64_SSEUP_CLASS
7844 && classes[i - 1] != X86_64_SSE_CLASS
7845 && classes[i - 1] != X86_64_SSEUP_CLASS)
7847 /* The first one should never be X86_64_SSEUP_CLASS. */
7848 gcc_assert (i != 0);
7849 classes[i] = X86_64_SSE_CLASS;
7852 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
7853 everything should be passed in memory. */
7854 if (classes[i] == X86_64_X87UP_CLASS
7855 && (classes[i - 1] != X86_64_X87_CLASS))
7857 static bool warned;
7859 /* The first one should never be X86_64_X87UP_CLASS. */
7860 gcc_assert (i != 0);
7861 if (!warned && warn_psabi)
7863 warned = true;
7864 inform (input_location,
7865 "the ABI of passing union with long double"
7866 " has changed in GCC 4.4");
7868 return 0;
7871 return words;
7874 /* Compute alignment needed. We align all types to natural boundaries with
7875 exception of XFmode that is aligned to 64bits. */
7876 if (mode != VOIDmode && mode != BLKmode)
7878 int mode_alignment = GET_MODE_BITSIZE (mode);
7880 if (mode == XFmode)
7881 mode_alignment = 128;
7882 else if (mode == XCmode)
7883 mode_alignment = 256;
7884 if (COMPLEX_MODE_P (mode))
7885 mode_alignment /= 2;
7886 /* Misaligned fields are always returned in memory. */
7887 if (bit_offset % mode_alignment)
7888 return 0;
7891 /* for V1xx modes, just use the base mode */
7892 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
7893 && GET_MODE_UNIT_SIZE (mode) == bytes)
7894 mode = GET_MODE_INNER (mode);
7896 /* Classification of atomic types. */
7897 switch (mode)
7899 case E_SDmode:
7900 case E_DDmode:
7901 classes[0] = X86_64_SSE_CLASS;
7902 return 1;
7903 case E_TDmode:
7904 classes[0] = X86_64_SSE_CLASS;
7905 classes[1] = X86_64_SSEUP_CLASS;
7906 return 2;
7907 case E_DImode:
7908 case E_SImode:
7909 case E_HImode:
7910 case E_QImode:
7911 case E_CSImode:
7912 case E_CHImode:
7913 case E_CQImode:
7915 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
7917 /* Analyze last 128 bits only. */
7918 size = (size - 1) & 0x7f;
7920 if (size < 32)
7922 classes[0] = X86_64_INTEGERSI_CLASS;
7923 return 1;
7925 else if (size < 64)
7927 classes[0] = X86_64_INTEGER_CLASS;
7928 return 1;
7930 else if (size < 64+32)
7932 classes[0] = X86_64_INTEGER_CLASS;
7933 classes[1] = X86_64_INTEGERSI_CLASS;
7934 return 2;
7936 else if (size < 64+64)
7938 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7939 return 2;
7941 else
7942 gcc_unreachable ();
7944 case E_CDImode:
7945 case E_TImode:
7946 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7947 return 2;
7948 case E_COImode:
7949 case E_OImode:
7950 /* OImode shouldn't be used directly. */
7951 gcc_unreachable ();
7952 case E_CTImode:
7953 return 0;
7954 case E_SFmode:
7955 if (!(bit_offset % 64))
7956 classes[0] = X86_64_SSESF_CLASS;
7957 else
7958 classes[0] = X86_64_SSE_CLASS;
7959 return 1;
7960 case E_DFmode:
7961 classes[0] = X86_64_SSEDF_CLASS;
7962 return 1;
7963 case E_XFmode:
7964 classes[0] = X86_64_X87_CLASS;
7965 classes[1] = X86_64_X87UP_CLASS;
7966 return 2;
7967 case E_TFmode:
7968 classes[0] = X86_64_SSE_CLASS;
7969 classes[1] = X86_64_SSEUP_CLASS;
7970 return 2;
7971 case E_SCmode:
7972 classes[0] = X86_64_SSE_CLASS;
7973 if (!(bit_offset % 64))
7974 return 1;
7975 else
7977 static bool warned;
7979 if (!warned && warn_psabi)
7981 warned = true;
7982 inform (input_location,
7983 "the ABI of passing structure with complex float"
7984 " member has changed in GCC 4.4");
7986 classes[1] = X86_64_SSESF_CLASS;
7987 return 2;
7989 case E_DCmode:
7990 classes[0] = X86_64_SSEDF_CLASS;
7991 classes[1] = X86_64_SSEDF_CLASS;
7992 return 2;
7993 case E_XCmode:
7994 classes[0] = X86_64_COMPLEX_X87_CLASS;
7995 return 1;
7996 case E_TCmode:
7997 /* This modes is larger than 16 bytes. */
7998 return 0;
7999 case E_V8SFmode:
8000 case E_V8SImode:
8001 case E_V32QImode:
8002 case E_V16HImode:
8003 case E_V4DFmode:
8004 case E_V4DImode:
8005 classes[0] = X86_64_SSE_CLASS;
8006 classes[1] = X86_64_SSEUP_CLASS;
8007 classes[2] = X86_64_SSEUP_CLASS;
8008 classes[3] = X86_64_SSEUP_CLASS;
8009 return 4;
8010 case E_V8DFmode:
8011 case E_V16SFmode:
8012 case E_V8DImode:
8013 case E_V16SImode:
8014 case E_V32HImode:
8015 case E_V64QImode:
8016 classes[0] = X86_64_SSE_CLASS;
8017 classes[1] = X86_64_SSEUP_CLASS;
8018 classes[2] = X86_64_SSEUP_CLASS;
8019 classes[3] = X86_64_SSEUP_CLASS;
8020 classes[4] = X86_64_SSEUP_CLASS;
8021 classes[5] = X86_64_SSEUP_CLASS;
8022 classes[6] = X86_64_SSEUP_CLASS;
8023 classes[7] = X86_64_SSEUP_CLASS;
8024 return 8;
8025 case E_V4SFmode:
8026 case E_V4SImode:
8027 case E_V16QImode:
8028 case E_V8HImode:
8029 case E_V2DFmode:
8030 case E_V2DImode:
8031 classes[0] = X86_64_SSE_CLASS;
8032 classes[1] = X86_64_SSEUP_CLASS;
8033 return 2;
8034 case E_V1TImode:
8035 case E_V1DImode:
8036 case E_V2SFmode:
8037 case E_V2SImode:
8038 case E_V4HImode:
8039 case E_V8QImode:
8040 classes[0] = X86_64_SSE_CLASS;
8041 return 1;
8042 case E_BLKmode:
8043 case E_VOIDmode:
8044 return 0;
8045 default:
8046 gcc_assert (VECTOR_MODE_P (mode));
8048 if (bytes > 16)
8049 return 0;
8051 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
8053 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
8054 classes[0] = X86_64_INTEGERSI_CLASS;
8055 else
8056 classes[0] = X86_64_INTEGER_CLASS;
8057 classes[1] = X86_64_INTEGER_CLASS;
8058 return 1 + (bytes > 8);
8062 /* Examine the argument and return set number of register required in each
8063 class. Return true iff parameter should be passed in memory. */
8065 static bool
8066 examine_argument (machine_mode mode, const_tree type, int in_return,
8067 int *int_nregs, int *sse_nregs)
8069 enum x86_64_reg_class regclass[MAX_CLASSES];
8070 int n = classify_argument (mode, type, regclass, 0);
8072 *int_nregs = 0;
8073 *sse_nregs = 0;
8075 if (!n)
8076 return true;
8077 for (n--; n >= 0; n--)
8078 switch (regclass[n])
8080 case X86_64_INTEGER_CLASS:
8081 case X86_64_INTEGERSI_CLASS:
8082 (*int_nregs)++;
8083 break;
8084 case X86_64_SSE_CLASS:
8085 case X86_64_SSESF_CLASS:
8086 case X86_64_SSEDF_CLASS:
8087 (*sse_nregs)++;
8088 break;
8089 case X86_64_NO_CLASS:
8090 case X86_64_SSEUP_CLASS:
8091 break;
8092 case X86_64_X87_CLASS:
8093 case X86_64_X87UP_CLASS:
8094 case X86_64_COMPLEX_X87_CLASS:
8095 if (!in_return)
8096 return true;
8097 break;
8098 case X86_64_MEMORY_CLASS:
8099 gcc_unreachable ();
8102 return false;
8105 /* Construct container for the argument used by GCC interface. See
8106 FUNCTION_ARG for the detailed description. */
8108 static rtx
8109 construct_container (machine_mode mode, machine_mode orig_mode,
8110 const_tree type, int in_return, int nintregs, int nsseregs,
8111 const int *intreg, int sse_regno)
8113 /* The following variables hold the static issued_error state. */
8114 static bool issued_sse_arg_error;
8115 static bool issued_sse_ret_error;
8116 static bool issued_x87_ret_error;
8118 machine_mode tmpmode;
8119 int bytes =
8120 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
8121 enum x86_64_reg_class regclass[MAX_CLASSES];
8122 int n;
8123 int i;
8124 int nexps = 0;
8125 int needed_sseregs, needed_intregs;
8126 rtx exp[MAX_CLASSES];
8127 rtx ret;
8129 n = classify_argument (mode, type, regclass, 0);
8130 if (!n)
8131 return NULL;
8132 if (examine_argument (mode, type, in_return, &needed_intregs,
8133 &needed_sseregs))
8134 return NULL;
8135 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
8136 return NULL;
8138 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
8139 some less clueful developer tries to use floating-point anyway. */
8140 if (needed_sseregs && !TARGET_SSE)
8142 if (in_return)
8144 if (!issued_sse_ret_error)
8146 error ("SSE register return with SSE disabled");
8147 issued_sse_ret_error = true;
8150 else if (!issued_sse_arg_error)
8152 error ("SSE register argument with SSE disabled");
8153 issued_sse_arg_error = true;
8155 return NULL;
8158 /* Likewise, error if the ABI requires us to return values in the
8159 x87 registers and the user specified -mno-80387. */
8160 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
8161 for (i = 0; i < n; i++)
8162 if (regclass[i] == X86_64_X87_CLASS
8163 || regclass[i] == X86_64_X87UP_CLASS
8164 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
8166 if (!issued_x87_ret_error)
8168 error ("x87 register return with x87 disabled");
8169 issued_x87_ret_error = true;
8171 return NULL;
8174 /* First construct simple cases. Avoid SCmode, since we want to use
8175 single register to pass this type. */
8176 if (n == 1 && mode != SCmode)
8177 switch (regclass[0])
8179 case X86_64_INTEGER_CLASS:
8180 case X86_64_INTEGERSI_CLASS:
8181 return gen_rtx_REG (mode, intreg[0]);
8182 case X86_64_SSE_CLASS:
8183 case X86_64_SSESF_CLASS:
8184 case X86_64_SSEDF_CLASS:
8185 if (mode != BLKmode)
8186 return gen_reg_or_parallel (mode, orig_mode,
8187 SSE_REGNO (sse_regno));
8188 break;
8189 case X86_64_X87_CLASS:
8190 case X86_64_COMPLEX_X87_CLASS:
8191 return gen_rtx_REG (mode, FIRST_STACK_REG);
8192 case X86_64_NO_CLASS:
8193 /* Zero sized array, struct or class. */
8194 return NULL;
8195 default:
8196 gcc_unreachable ();
8198 if (n == 2
8199 && regclass[0] == X86_64_SSE_CLASS
8200 && regclass[1] == X86_64_SSEUP_CLASS
8201 && mode != BLKmode)
8202 return gen_reg_or_parallel (mode, orig_mode,
8203 SSE_REGNO (sse_regno));
8204 if (n == 4
8205 && regclass[0] == X86_64_SSE_CLASS
8206 && regclass[1] == X86_64_SSEUP_CLASS
8207 && regclass[2] == X86_64_SSEUP_CLASS
8208 && regclass[3] == X86_64_SSEUP_CLASS
8209 && mode != BLKmode)
8210 return gen_reg_or_parallel (mode, orig_mode,
8211 SSE_REGNO (sse_regno));
8212 if (n == 8
8213 && regclass[0] == X86_64_SSE_CLASS
8214 && regclass[1] == X86_64_SSEUP_CLASS
8215 && regclass[2] == X86_64_SSEUP_CLASS
8216 && regclass[3] == X86_64_SSEUP_CLASS
8217 && regclass[4] == X86_64_SSEUP_CLASS
8218 && regclass[5] == X86_64_SSEUP_CLASS
8219 && regclass[6] == X86_64_SSEUP_CLASS
8220 && regclass[7] == X86_64_SSEUP_CLASS
8221 && mode != BLKmode)
8222 return gen_reg_or_parallel (mode, orig_mode,
8223 SSE_REGNO (sse_regno));
8224 if (n == 2
8225 && regclass[0] == X86_64_X87_CLASS
8226 && regclass[1] == X86_64_X87UP_CLASS)
8227 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
8229 if (n == 2
8230 && regclass[0] == X86_64_INTEGER_CLASS
8231 && regclass[1] == X86_64_INTEGER_CLASS
8232 && (mode == CDImode || mode == TImode)
8233 && intreg[0] + 1 == intreg[1])
8234 return gen_rtx_REG (mode, intreg[0]);
8236 /* Otherwise figure out the entries of the PARALLEL. */
8237 for (i = 0; i < n; i++)
8239 int pos;
8241 switch (regclass[i])
8243 case X86_64_NO_CLASS:
8244 break;
8245 case X86_64_INTEGER_CLASS:
8246 case X86_64_INTEGERSI_CLASS:
8247 /* Merge TImodes on aligned occasions here too. */
8248 if (i * 8 + 8 > bytes)
8250 unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT;
8251 if (!int_mode_for_size (tmpbits, 0).exists (&tmpmode))
8252 /* We've requested 24 bytes we
8253 don't have mode for. Use DImode. */
8254 tmpmode = DImode;
8256 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
8257 tmpmode = SImode;
8258 else
8259 tmpmode = DImode;
8260 exp [nexps++]
8261 = gen_rtx_EXPR_LIST (VOIDmode,
8262 gen_rtx_REG (tmpmode, *intreg),
8263 GEN_INT (i*8));
8264 intreg++;
8265 break;
8266 case X86_64_SSESF_CLASS:
8267 exp [nexps++]
8268 = gen_rtx_EXPR_LIST (VOIDmode,
8269 gen_rtx_REG (SFmode,
8270 SSE_REGNO (sse_regno)),
8271 GEN_INT (i*8));
8272 sse_regno++;
8273 break;
8274 case X86_64_SSEDF_CLASS:
8275 exp [nexps++]
8276 = gen_rtx_EXPR_LIST (VOIDmode,
8277 gen_rtx_REG (DFmode,
8278 SSE_REGNO (sse_regno)),
8279 GEN_INT (i*8));
8280 sse_regno++;
8281 break;
8282 case X86_64_SSE_CLASS:
8283 pos = i;
8284 switch (n)
8286 case 1:
8287 tmpmode = DImode;
8288 break;
8289 case 2:
8290 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
8292 tmpmode = TImode;
8293 i++;
8295 else
8296 tmpmode = DImode;
8297 break;
8298 case 4:
8299 gcc_assert (i == 0
8300 && regclass[1] == X86_64_SSEUP_CLASS
8301 && regclass[2] == X86_64_SSEUP_CLASS
8302 && regclass[3] == X86_64_SSEUP_CLASS);
8303 tmpmode = OImode;
8304 i += 3;
8305 break;
8306 case 8:
8307 gcc_assert (i == 0
8308 && regclass[1] == X86_64_SSEUP_CLASS
8309 && regclass[2] == X86_64_SSEUP_CLASS
8310 && regclass[3] == X86_64_SSEUP_CLASS
8311 && regclass[4] == X86_64_SSEUP_CLASS
8312 && regclass[5] == X86_64_SSEUP_CLASS
8313 && regclass[6] == X86_64_SSEUP_CLASS
8314 && regclass[7] == X86_64_SSEUP_CLASS);
8315 tmpmode = XImode;
8316 i += 7;
8317 break;
8318 default:
8319 gcc_unreachable ();
8321 exp [nexps++]
8322 = gen_rtx_EXPR_LIST (VOIDmode,
8323 gen_rtx_REG (tmpmode,
8324 SSE_REGNO (sse_regno)),
8325 GEN_INT (pos*8));
8326 sse_regno++;
8327 break;
8328 default:
8329 gcc_unreachable ();
8333 /* Empty aligned struct, union or class. */
8334 if (nexps == 0)
8335 return NULL;
8337 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
8338 for (i = 0; i < nexps; i++)
8339 XVECEXP (ret, 0, i) = exp [i];
8340 return ret;
8343 /* Update the data in CUM to advance over an argument of mode MODE
8344 and data type TYPE. (TYPE is null for libcalls where that information
8345 may not be available.)
8347 Return a number of integer regsiters advanced over. */
8349 static int
8350 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
8351 const_tree type, HOST_WIDE_INT bytes,
8352 HOST_WIDE_INT words)
8354 int res = 0;
8355 bool error_p = false;
8357 if (TARGET_IAMCU)
8359 /* Intel MCU psABI passes scalars and aggregates no larger than 8
8360 bytes in registers. */
8361 if (!VECTOR_MODE_P (mode) && bytes <= 8)
8362 goto pass_in_reg;
8363 return res;
8366 switch (mode)
8368 default:
8369 break;
8371 case E_BLKmode:
8372 if (bytes < 0)
8373 break;
8374 /* FALLTHRU */
8376 case E_DImode:
8377 case E_SImode:
8378 case E_HImode:
8379 case E_QImode:
8380 pass_in_reg:
8381 cum->words += words;
8382 cum->nregs -= words;
8383 cum->regno += words;
8384 if (cum->nregs >= 0)
8385 res = words;
8386 if (cum->nregs <= 0)
8388 cum->nregs = 0;
8389 cfun->machine->arg_reg_available = false;
8390 cum->regno = 0;
8392 break;
8394 case E_OImode:
8395 /* OImode shouldn't be used directly. */
8396 gcc_unreachable ();
8398 case E_DFmode:
8399 if (cum->float_in_sse == -1)
8400 error_p = true;
8401 if (cum->float_in_sse < 2)
8402 break;
8403 /* FALLTHRU */
8404 case E_SFmode:
8405 if (cum->float_in_sse == -1)
8406 error_p = true;
8407 if (cum->float_in_sse < 1)
8408 break;
8409 /* FALLTHRU */
8411 case E_V8SFmode:
8412 case E_V8SImode:
8413 case E_V64QImode:
8414 case E_V32HImode:
8415 case E_V16SImode:
8416 case E_V8DImode:
8417 case E_V16SFmode:
8418 case E_V8DFmode:
8419 case E_V32QImode:
8420 case E_V16HImode:
8421 case E_V4DFmode:
8422 case E_V4DImode:
8423 case E_TImode:
8424 case E_V16QImode:
8425 case E_V8HImode:
8426 case E_V4SImode:
8427 case E_V2DImode:
8428 case E_V4SFmode:
8429 case E_V2DFmode:
8430 if (!type || !AGGREGATE_TYPE_P (type))
8432 cum->sse_words += words;
8433 cum->sse_nregs -= 1;
8434 cum->sse_regno += 1;
8435 if (cum->sse_nregs <= 0)
8437 cum->sse_nregs = 0;
8438 cum->sse_regno = 0;
8441 break;
8443 case E_V8QImode:
8444 case E_V4HImode:
8445 case E_V2SImode:
8446 case E_V2SFmode:
8447 case E_V1TImode:
8448 case E_V1DImode:
8449 if (!type || !AGGREGATE_TYPE_P (type))
8451 cum->mmx_words += words;
8452 cum->mmx_nregs -= 1;
8453 cum->mmx_regno += 1;
8454 if (cum->mmx_nregs <= 0)
8456 cum->mmx_nregs = 0;
8457 cum->mmx_regno = 0;
8460 break;
8462 if (error_p)
8464 cum->float_in_sse = 0;
8465 error ("calling %qD with SSE calling convention without "
8466 "SSE/SSE2 enabled", cum->decl);
8467 sorry ("this is a GCC bug that can be worked around by adding "
8468 "attribute used to function called");
8471 return res;
8474 static int
8475 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
8476 const_tree type, HOST_WIDE_INT words, bool named)
8478 int int_nregs, sse_nregs;
8480 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
8481 if (!named && (VALID_AVX512F_REG_MODE (mode)
8482 || VALID_AVX256_REG_MODE (mode)))
8483 return 0;
8485 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
8486 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
8488 cum->nregs -= int_nregs;
8489 cum->sse_nregs -= sse_nregs;
8490 cum->regno += int_nregs;
8491 cum->sse_regno += sse_nregs;
8492 return int_nregs;
8494 else
8496 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
8497 cum->words = ROUND_UP (cum->words, align);
8498 cum->words += words;
8499 return 0;
8503 static int
8504 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
8505 HOST_WIDE_INT words)
8507 /* Otherwise, this should be passed indirect. */
8508 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
8510 cum->words += words;
8511 if (cum->nregs > 0)
8513 cum->nregs -= 1;
8514 cum->regno += 1;
8515 return 1;
8517 return 0;
8520 /* Update the data in CUM to advance over an argument of mode MODE and
8521 data type TYPE. (TYPE is null for libcalls where that information
8522 may not be available.) */
8524 static void
8525 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
8526 const_tree type, bool named)
8528 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8529 HOST_WIDE_INT bytes, words;
8530 int nregs;
8532 /* The argument of interrupt handler is a special case and is
8533 handled in ix86_function_arg. */
8534 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
8535 return;
8537 if (mode == BLKmode)
8538 bytes = int_size_in_bytes (type);
8539 else
8540 bytes = GET_MODE_SIZE (mode);
8541 words = CEIL (bytes, UNITS_PER_WORD);
8543 if (type)
8544 mode = type_natural_mode (type, NULL, false);
8546 if ((type && POINTER_BOUNDS_TYPE_P (type))
8547 || POINTER_BOUNDS_MODE_P (mode))
8549 /* If we pass bounds in BT then just update remained bounds count. */
8550 if (cum->bnds_in_bt)
8552 cum->bnds_in_bt--;
8553 return;
8556 /* Update remained number of bounds to force. */
8557 if (cum->force_bnd_pass)
8558 cum->force_bnd_pass--;
8560 cum->bnd_regno++;
8562 return;
8565 /* The first arg not going to Bounds Tables resets this counter. */
8566 cum->bnds_in_bt = 0;
8567 /* For unnamed args we always pass bounds to avoid bounds mess when
8568 passed and received types do not match. If bounds do not follow
8569 unnamed arg, still pretend required number of bounds were passed. */
8570 if (cum->force_bnd_pass)
8572 cum->bnd_regno += cum->force_bnd_pass;
8573 cum->force_bnd_pass = 0;
8576 if (TARGET_64BIT)
8578 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8580 if (call_abi == MS_ABI)
8581 nregs = function_arg_advance_ms_64 (cum, bytes, words);
8582 else
8583 nregs = function_arg_advance_64 (cum, mode, type, words, named);
8585 else
8586 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
8588 /* For stdarg we expect bounds to be passed for each value passed
8589 in register. */
8590 if (cum->stdarg)
8591 cum->force_bnd_pass = nregs;
8592 /* For pointers passed in memory we expect bounds passed in Bounds
8593 Table. */
8594 if (!nregs)
8596 /* Track if there are outgoing arguments on stack. */
8597 if (cum->caller)
8598 cfun->machine->outgoing_args_on_stack = true;
8600 cum->bnds_in_bt = chkp_type_bounds_count (type);
8604 /* Define where to put the arguments to a function.
8605 Value is zero to push the argument on the stack,
8606 or a hard register in which to store the argument.
8608 MODE is the argument's machine mode.
8609 TYPE is the data type of the argument (as a tree).
8610 This is null for libcalls where that information may
8611 not be available.
8612 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8613 the preceding args and about the function being called.
8614 NAMED is nonzero if this argument is a named parameter
8615 (otherwise it is an extra parameter matching an ellipsis). */
8617 static rtx
8618 function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
8619 machine_mode orig_mode, const_tree type,
8620 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
8622 bool error_p = false;
8624 /* Avoid the AL settings for the Unix64 ABI. */
8625 if (mode == VOIDmode)
8626 return constm1_rtx;
8628 if (TARGET_IAMCU)
8630 /* Intel MCU psABI passes scalars and aggregates no larger than 8
8631 bytes in registers. */
8632 if (!VECTOR_MODE_P (mode) && bytes <= 8)
8633 goto pass_in_reg;
8634 return NULL_RTX;
8637 switch (mode)
8639 default:
8640 break;
8642 case E_BLKmode:
8643 if (bytes < 0)
8644 break;
8645 /* FALLTHRU */
8646 case E_DImode:
8647 case E_SImode:
8648 case E_HImode:
8649 case E_QImode:
8650 pass_in_reg:
8651 if (words <= cum->nregs)
8653 int regno = cum->regno;
8655 /* Fastcall allocates the first two DWORD (SImode) or
8656 smaller arguments to ECX and EDX if it isn't an
8657 aggregate type . */
8658 if (cum->fastcall)
8660 if (mode == BLKmode
8661 || mode == DImode
8662 || (type && AGGREGATE_TYPE_P (type)))
8663 break;
8665 /* ECX not EAX is the first allocated register. */
8666 if (regno == AX_REG)
8667 regno = CX_REG;
8669 return gen_rtx_REG (mode, regno);
8671 break;
8673 case E_DFmode:
8674 if (cum->float_in_sse == -1)
8675 error_p = true;
8676 if (cum->float_in_sse < 2)
8677 break;
8678 /* FALLTHRU */
8679 case E_SFmode:
8680 if (cum->float_in_sse == -1)
8681 error_p = true;
8682 if (cum->float_in_sse < 1)
8683 break;
8684 /* FALLTHRU */
8685 case E_TImode:
8686 /* In 32bit, we pass TImode in xmm registers. */
8687 case E_V16QImode:
8688 case E_V8HImode:
8689 case E_V4SImode:
8690 case E_V2DImode:
8691 case E_V4SFmode:
8692 case E_V2DFmode:
8693 if (!type || !AGGREGATE_TYPE_P (type))
8695 if (cum->sse_nregs)
8696 return gen_reg_or_parallel (mode, orig_mode,
8697 cum->sse_regno + FIRST_SSE_REG);
8699 break;
8701 case E_OImode:
8702 case E_XImode:
8703 /* OImode and XImode shouldn't be used directly. */
8704 gcc_unreachable ();
8706 case E_V64QImode:
8707 case E_V32HImode:
8708 case E_V16SImode:
8709 case E_V8DImode:
8710 case E_V16SFmode:
8711 case E_V8DFmode:
8712 case E_V8SFmode:
8713 case E_V8SImode:
8714 case E_V32QImode:
8715 case E_V16HImode:
8716 case E_V4DFmode:
8717 case E_V4DImode:
8718 if (!type || !AGGREGATE_TYPE_P (type))
8720 if (cum->sse_nregs)
8721 return gen_reg_or_parallel (mode, orig_mode,
8722 cum->sse_regno + FIRST_SSE_REG);
8724 break;
8726 case E_V8QImode:
8727 case E_V4HImode:
8728 case E_V2SImode:
8729 case E_V2SFmode:
8730 case E_V1TImode:
8731 case E_V1DImode:
8732 if (!type || !AGGREGATE_TYPE_P (type))
8734 if (cum->mmx_nregs)
8735 return gen_reg_or_parallel (mode, orig_mode,
8736 cum->mmx_regno + FIRST_MMX_REG);
8738 break;
8740 if (error_p)
8742 cum->float_in_sse = 0;
8743 error ("calling %qD with SSE calling convention without "
8744 "SSE/SSE2 enabled", cum->decl);
8745 sorry ("this is a GCC bug that can be worked around by adding "
8746 "attribute used to function called");
8749 return NULL_RTX;
8752 static rtx
8753 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
8754 machine_mode orig_mode, const_tree type, bool named)
8756 /* Handle a hidden AL argument containing number of registers
8757 for varargs x86-64 functions. */
8758 if (mode == VOIDmode)
8759 return GEN_INT (cum->maybe_vaarg
8760 ? (cum->sse_nregs < 0
8761 ? X86_64_SSE_REGPARM_MAX
8762 : cum->sse_regno)
8763 : -1);
8765 switch (mode)
8767 default:
8768 break;
8770 case E_V8SFmode:
8771 case E_V8SImode:
8772 case E_V32QImode:
8773 case E_V16HImode:
8774 case E_V4DFmode:
8775 case E_V4DImode:
8776 case E_V16SFmode:
8777 case E_V16SImode:
8778 case E_V64QImode:
8779 case E_V32HImode:
8780 case E_V8DFmode:
8781 case E_V8DImode:
8782 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8783 if (!named)
8784 return NULL;
8785 break;
8788 return construct_container (mode, orig_mode, type, 0, cum->nregs,
8789 cum->sse_nregs,
8790 &x86_64_int_parameter_registers [cum->regno],
8791 cum->sse_regno);
8794 static rtx
8795 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
8796 machine_mode orig_mode, bool named,
8797 HOST_WIDE_INT bytes)
8799 unsigned int regno;
8801 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
8802 We use value of -2 to specify that current function call is MSABI. */
8803 if (mode == VOIDmode)
8804 return GEN_INT (-2);
8806 /* If we've run out of registers, it goes on the stack. */
8807 if (cum->nregs == 0)
8808 return NULL_RTX;
8810 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
8812 /* Only floating point modes are passed in anything but integer regs. */
8813 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
8815 if (named)
8816 regno = cum->regno + FIRST_SSE_REG;
8817 else
8819 rtx t1, t2;
8821 /* Unnamed floating parameters are passed in both the
8822 SSE and integer registers. */
8823 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
8824 t2 = gen_rtx_REG (mode, regno);
8825 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
8826 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
8827 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
8830 /* Handle aggregated types passed in register. */
8831 if (orig_mode == BLKmode)
8833 if (bytes > 0 && bytes <= 8)
8834 mode = (bytes > 4 ? DImode : SImode);
8835 if (mode == BLKmode)
8836 mode = DImode;
8839 return gen_reg_or_parallel (mode, orig_mode, regno);
8842 /* Return where to put the arguments to a function.
8843 Return zero to push the argument on the stack, or a hard register in which to store the argument.
8845 MODE is the argument's machine mode. TYPE is the data type of the
8846 argument. It is null for libcalls where that information may not be
8847 available. CUM gives information about the preceding args and about
8848 the function being called. NAMED is nonzero if this argument is a
8849 named parameter (otherwise it is an extra parameter matching an
8850 ellipsis). */
8852 static rtx
8853 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
8854 const_tree type, bool named)
8856 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8857 machine_mode mode = omode;
8858 HOST_WIDE_INT bytes, words;
8859 rtx arg;
8861 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
8863 gcc_assert (type != NULL_TREE);
8864 if (POINTER_TYPE_P (type))
8866 /* This is the pointer argument. */
8867 gcc_assert (TYPE_MODE (type) == Pmode);
8868 /* It is at -WORD(AP) in the current frame in interrupt and
8869 exception handlers. */
8870 arg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD);
8872 else
8874 gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION
8875 && TREE_CODE (type) == INTEGER_TYPE
8876 && TYPE_MODE (type) == word_mode);
8877 /* The error code is the word-mode integer argument at
8878 -2 * WORD(AP) in the current frame of the exception
8879 handler. */
8880 arg = gen_rtx_MEM (word_mode,
8881 plus_constant (Pmode,
8882 arg_pointer_rtx,
8883 -2 * UNITS_PER_WORD));
8885 return arg;
8888 /* All pointer bounds arguments are handled separately here. */
8889 if ((type && POINTER_BOUNDS_TYPE_P (type))
8890 || POINTER_BOUNDS_MODE_P (mode))
8892 /* Return NULL if bounds are forced to go in Bounds Table. */
8893 if (cum->bnds_in_bt)
8894 arg = NULL;
8895 /* Return the next available bound reg if any. */
8896 else if (cum->bnd_regno <= LAST_BND_REG)
8897 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
8898 /* Return the next special slot number otherwise. */
8899 else
8900 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
8902 return arg;
8905 if (mode == BLKmode)
8906 bytes = int_size_in_bytes (type);
8907 else
8908 bytes = GET_MODE_SIZE (mode);
8909 words = CEIL (bytes, UNITS_PER_WORD);
8911 /* To simplify the code below, represent vector types with a vector mode
8912 even if MMX/SSE are not active. */
8913 if (type && TREE_CODE (type) == VECTOR_TYPE)
8914 mode = type_natural_mode (type, cum, false);
8916 if (TARGET_64BIT)
8918 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8920 if (call_abi == MS_ABI)
8921 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
8922 else
8923 arg = function_arg_64 (cum, mode, omode, type, named);
8925 else
8926 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
8928 /* Track if there are outgoing arguments on stack. */
8929 if (arg == NULL_RTX && cum->caller)
8930 cfun->machine->outgoing_args_on_stack = true;
8932 return arg;
8935 /* A C expression that indicates when an argument must be passed by
8936 reference. If nonzero for an argument, a copy of that argument is
8937 made in memory and a pointer to the argument is passed instead of
8938 the argument itself. The pointer is passed in whatever way is
8939 appropriate for passing a pointer to that type. */
8941 static bool
8942 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
8943 const_tree type, bool)
8945 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8947 /* Bounds are never passed by reference. */
8948 if ((type && POINTER_BOUNDS_TYPE_P (type))
8949 || POINTER_BOUNDS_MODE_P (mode))
8950 return false;
8952 if (TARGET_64BIT)
8954 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8956 /* See Windows x64 Software Convention. */
8957 if (call_abi == MS_ABI)
8959 HOST_WIDE_INT msize = GET_MODE_SIZE (mode);
8961 if (type)
8963 /* Arrays are passed by reference. */
8964 if (TREE_CODE (type) == ARRAY_TYPE)
8965 return true;
8967 if (RECORD_OR_UNION_TYPE_P (type))
8969 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
8970 are passed by reference. */
8971 msize = int_size_in_bytes (type);
8975 /* __m128 is passed by reference. */
8976 return msize != 1 && msize != 2 && msize != 4 && msize != 8;
8978 else if (type && int_size_in_bytes (type) == -1)
8979 return true;
8982 return false;
8985 /* Return true when TYPE should be 128bit aligned for 32bit argument
8986 passing ABI. XXX: This function is obsolete and is only used for
8987 checking psABI compatibility with previous versions of GCC. */
8989 static bool
8990 ix86_compat_aligned_value_p (const_tree type)
8992 machine_mode mode = TYPE_MODE (type);
8993 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
8994 || mode == TDmode
8995 || mode == TFmode
8996 || mode == TCmode)
8997 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
8998 return true;
8999 if (TYPE_ALIGN (type) < 128)
9000 return false;
9002 if (AGGREGATE_TYPE_P (type))
9004 /* Walk the aggregates recursively. */
9005 switch (TREE_CODE (type))
9007 case RECORD_TYPE:
9008 case UNION_TYPE:
9009 case QUAL_UNION_TYPE:
9011 tree field;
9013 /* Walk all the structure fields. */
9014 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
9016 if (TREE_CODE (field) == FIELD_DECL
9017 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
9018 return true;
9020 break;
9023 case ARRAY_TYPE:
9024 /* Just for use if some languages passes arrays by value. */
9025 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
9026 return true;
9027 break;
9029 default:
9030 gcc_unreachable ();
9033 return false;
9036 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
9037 XXX: This function is obsolete and is only used for checking psABI
9038 compatibility with previous versions of GCC. */
9040 static unsigned int
9041 ix86_compat_function_arg_boundary (machine_mode mode,
9042 const_tree type, unsigned int align)
9044 /* In 32bit, only _Decimal128 and __float128 are aligned to their
9045 natural boundaries. */
9046 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
9048 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
9049 make an exception for SSE modes since these require 128bit
9050 alignment.
9052 The handling here differs from field_alignment. ICC aligns MMX
9053 arguments to 4 byte boundaries, while structure fields are aligned
9054 to 8 byte boundaries. */
9055 if (!type)
9057 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
9058 align = PARM_BOUNDARY;
9060 else
9062 if (!ix86_compat_aligned_value_p (type))
9063 align = PARM_BOUNDARY;
9066 if (align > BIGGEST_ALIGNMENT)
9067 align = BIGGEST_ALIGNMENT;
9068 return align;
9071 /* Return true when TYPE should be 128bit aligned for 32bit argument
9072 passing ABI. */
9074 static bool
9075 ix86_contains_aligned_value_p (const_tree type)
9077 machine_mode mode = TYPE_MODE (type);
9079 if (mode == XFmode || mode == XCmode)
9080 return false;
9082 if (TYPE_ALIGN (type) < 128)
9083 return false;
9085 if (AGGREGATE_TYPE_P (type))
9087 /* Walk the aggregates recursively. */
9088 switch (TREE_CODE (type))
9090 case RECORD_TYPE:
9091 case UNION_TYPE:
9092 case QUAL_UNION_TYPE:
9094 tree field;
9096 /* Walk all the structure fields. */
9097 for (field = TYPE_FIELDS (type);
9098 field;
9099 field = DECL_CHAIN (field))
9101 if (TREE_CODE (field) == FIELD_DECL
9102 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
9103 return true;
9105 break;
9108 case ARRAY_TYPE:
9109 /* Just for use if some languages passes arrays by value. */
9110 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
9111 return true;
9112 break;
9114 default:
9115 gcc_unreachable ();
9118 else
9119 return TYPE_ALIGN (type) >= 128;
9121 return false;
9124 /* Gives the alignment boundary, in bits, of an argument with the
9125 specified mode and type. */
9127 static unsigned int
9128 ix86_function_arg_boundary (machine_mode mode, const_tree type)
9130 unsigned int align;
9131 if (type)
9133 /* Since the main variant type is used for call, we convert it to
9134 the main variant type. */
9135 type = TYPE_MAIN_VARIANT (type);
9136 align = TYPE_ALIGN (type);
9137 if (TYPE_EMPTY_P (type))
9138 return PARM_BOUNDARY;
9140 else
9141 align = GET_MODE_ALIGNMENT (mode);
9142 if (align < PARM_BOUNDARY)
9143 align = PARM_BOUNDARY;
9144 else
9146 static bool warned;
9147 unsigned int saved_align = align;
9149 if (!TARGET_64BIT)
9151 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
9152 if (!type)
9154 if (mode == XFmode || mode == XCmode)
9155 align = PARM_BOUNDARY;
9157 else if (!ix86_contains_aligned_value_p (type))
9158 align = PARM_BOUNDARY;
9160 if (align < 128)
9161 align = PARM_BOUNDARY;
9164 if (warn_psabi
9165 && !warned
9166 && align != ix86_compat_function_arg_boundary (mode, type,
9167 saved_align))
9169 warned = true;
9170 inform (input_location,
9171 "The ABI for passing parameters with %d-byte"
9172 " alignment has changed in GCC 4.6",
9173 align / BITS_PER_UNIT);
9177 return align;
9180 /* Return true if N is a possible register number of function value. */
9182 static bool
9183 ix86_function_value_regno_p (const unsigned int regno)
9185 switch (regno)
9187 case AX_REG:
9188 return true;
9189 case DX_REG:
9190 return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
9191 case DI_REG:
9192 case SI_REG:
9193 return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
9195 case BND0_REG:
9196 case BND1_REG:
9197 return chkp_function_instrumented_p (current_function_decl);
9199 /* Complex values are returned in %st(0)/%st(1) pair. */
9200 case ST0_REG:
9201 case ST1_REG:
9202 /* TODO: The function should depend on current function ABI but
9203 builtins.c would need updating then. Therefore we use the
9204 default ABI. */
9205 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
9206 return false;
9207 return TARGET_FLOAT_RETURNS_IN_80387;
9209 /* Complex values are returned in %xmm0/%xmm1 pair. */
9210 case XMM0_REG:
9211 case XMM1_REG:
9212 return TARGET_SSE;
9214 case MM0_REG:
9215 if (TARGET_MACHO || TARGET_64BIT)
9216 return false;
9217 return TARGET_MMX;
9220 return false;
9223 /* Define how to find the value returned by a function.
9224 VALTYPE is the data type of the value (as a tree).
9225 If the precise function being called is known, FUNC is its FUNCTION_DECL;
9226 otherwise, FUNC is 0. */
9228 static rtx
9229 function_value_32 (machine_mode orig_mode, machine_mode mode,
9230 const_tree fntype, const_tree fn)
9232 unsigned int regno;
9234 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
9235 we normally prevent this case when mmx is not available. However
9236 some ABIs may require the result to be returned like DImode. */
9237 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
9238 regno = FIRST_MMX_REG;
9240 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
9241 we prevent this case when sse is not available. However some ABIs
9242 may require the result to be returned like integer TImode. */
9243 else if (mode == TImode
9244 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
9245 regno = FIRST_SSE_REG;
9247 /* 32-byte vector modes in %ymm0. */
9248 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
9249 regno = FIRST_SSE_REG;
9251 /* 64-byte vector modes in %zmm0. */
9252 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
9253 regno = FIRST_SSE_REG;
9255 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
9256 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
9257 regno = FIRST_FLOAT_REG;
9258 else
9259 /* Most things go in %eax. */
9260 regno = AX_REG;
9262 /* Override FP return register with %xmm0 for local functions when
9263 SSE math is enabled or for functions with sseregparm attribute. */
9264 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
9266 int sse_level = ix86_function_sseregparm (fntype, fn, false);
9267 if (sse_level == -1)
9269 error ("calling %qD with SSE calling convention without "
9270 "SSE/SSE2 enabled", fn);
9271 sorry ("this is a GCC bug that can be worked around by adding "
9272 "attribute used to function called");
9274 else if ((sse_level >= 1 && mode == SFmode)
9275 || (sse_level == 2 && mode == DFmode))
9276 regno = FIRST_SSE_REG;
9279 /* OImode shouldn't be used directly. */
9280 gcc_assert (mode != OImode);
9282 return gen_rtx_REG (orig_mode, regno);
9285 static rtx
9286 function_value_64 (machine_mode orig_mode, machine_mode mode,
9287 const_tree valtype)
9289 rtx ret;
9291 /* Handle libcalls, which don't provide a type node. */
9292 if (valtype == NULL)
9294 unsigned int regno;
9296 switch (mode)
9298 case E_SFmode:
9299 case E_SCmode:
9300 case E_DFmode:
9301 case E_DCmode:
9302 case E_TFmode:
9303 case E_SDmode:
9304 case E_DDmode:
9305 case E_TDmode:
9306 regno = FIRST_SSE_REG;
9307 break;
9308 case E_XFmode:
9309 case E_XCmode:
9310 regno = FIRST_FLOAT_REG;
9311 break;
9312 case E_TCmode:
9313 return NULL;
9314 default:
9315 regno = AX_REG;
9318 return gen_rtx_REG (mode, regno);
9320 else if (POINTER_TYPE_P (valtype))
9322 /* Pointers are always returned in word_mode. */
9323 mode = word_mode;
9326 ret = construct_container (mode, orig_mode, valtype, 1,
9327 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
9328 x86_64_int_return_registers, 0);
9330 /* For zero sized structures, construct_container returns NULL, but we
9331 need to keep rest of compiler happy by returning meaningful value. */
9332 if (!ret)
9333 ret = gen_rtx_REG (orig_mode, AX_REG);
9335 return ret;
9338 static rtx
9339 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
9340 const_tree valtype)
9342 unsigned int regno = AX_REG;
9344 if (TARGET_SSE)
9346 switch (GET_MODE_SIZE (mode))
9348 case 16:
9349 if (valtype != NULL_TREE
9350 && !VECTOR_INTEGER_TYPE_P (valtype)
9351 && !VECTOR_INTEGER_TYPE_P (valtype)
9352 && !INTEGRAL_TYPE_P (valtype)
9353 && !VECTOR_FLOAT_TYPE_P (valtype))
9354 break;
9355 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
9356 && !COMPLEX_MODE_P (mode))
9357 regno = FIRST_SSE_REG;
9358 break;
9359 case 8:
9360 case 4:
9361 if (mode == SFmode || mode == DFmode)
9362 regno = FIRST_SSE_REG;
9363 break;
9364 default:
9365 break;
9368 return gen_rtx_REG (orig_mode, regno);
9371 static rtx
9372 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
9373 machine_mode orig_mode, machine_mode mode)
9375 const_tree fn, fntype;
9377 fn = NULL_TREE;
9378 if (fntype_or_decl && DECL_P (fntype_or_decl))
9379 fn = fntype_or_decl;
9380 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
9382 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
9383 || POINTER_BOUNDS_MODE_P (mode))
9384 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
9385 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
9386 return function_value_ms_64 (orig_mode, mode, valtype);
9387 else if (TARGET_64BIT)
9388 return function_value_64 (orig_mode, mode, valtype);
9389 else
9390 return function_value_32 (orig_mode, mode, fntype, fn);
9393 static rtx
9394 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
9396 machine_mode mode, orig_mode;
9398 orig_mode = TYPE_MODE (valtype);
9399 mode = type_natural_mode (valtype, NULL, true);
9400 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
9403 /* Return an RTX representing a place where a function returns
9404 or recieves pointer bounds or NULL if no bounds are returned.
9406 VALTYPE is a data type of a value returned by the function.
9408 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
9409 or FUNCTION_TYPE of the function.
9411 If OUTGOING is false, return a place in which the caller will
9412 see the return value. Otherwise, return a place where a
9413 function returns a value. */
9415 static rtx
9416 ix86_function_value_bounds (const_tree valtype,
9417 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
9418 bool outgoing ATTRIBUTE_UNUSED)
9420 rtx res = NULL_RTX;
9422 if (BOUNDED_TYPE_P (valtype))
9423 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
9424 else if (chkp_type_has_pointer (valtype))
9426 bitmap slots;
9427 rtx bounds[2];
9428 bitmap_iterator bi;
9429 unsigned i, bnd_no = 0;
9431 bitmap_obstack_initialize (NULL);
9432 slots = BITMAP_ALLOC (NULL);
9433 chkp_find_bound_slots (valtype, slots);
9435 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
9437 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
9438 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
9439 gcc_assert (bnd_no < 2);
9440 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
9443 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
9445 BITMAP_FREE (slots);
9446 bitmap_obstack_release (NULL);
9448 else
9449 res = NULL_RTX;
9451 return res;
9454 /* Pointer function arguments and return values are promoted to
9455 word_mode for normal functions. */
9457 static machine_mode
9458 ix86_promote_function_mode (const_tree type, machine_mode mode,
9459 int *punsignedp, const_tree fntype,
9460 int for_return)
9462 if (cfun->machine->func_type == TYPE_NORMAL
9463 && type != NULL_TREE
9464 && POINTER_TYPE_P (type))
9466 *punsignedp = POINTERS_EXTEND_UNSIGNED;
9467 return word_mode;
9469 return default_promote_function_mode (type, mode, punsignedp, fntype,
9470 for_return);
9473 /* Return true if a structure, union or array with MODE containing FIELD
9474 should be accessed using BLKmode. */
9476 static bool
9477 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
9479 /* Union with XFmode must be in BLKmode. */
9480 return (mode == XFmode
9481 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
9482 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
9486 ix86_libcall_value (machine_mode mode)
9488 return ix86_function_value_1 (NULL, NULL, mode, mode);
9491 /* Return true iff type is returned in memory. */
9493 static bool
9494 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9496 #ifdef SUBTARGET_RETURN_IN_MEMORY
9497 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
9498 #else
9499 const machine_mode mode = type_natural_mode (type, NULL, true);
9500 HOST_WIDE_INT size;
9502 if (POINTER_BOUNDS_TYPE_P (type))
9503 return false;
9505 if (TARGET_64BIT)
9507 if (ix86_function_type_abi (fntype) == MS_ABI)
9509 size = int_size_in_bytes (type);
9511 /* __m128 is returned in xmm0. */
9512 if ((!type || VECTOR_INTEGER_TYPE_P (type)
9513 || INTEGRAL_TYPE_P (type)
9514 || VECTOR_FLOAT_TYPE_P (type))
9515 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
9516 && !COMPLEX_MODE_P (mode)
9517 && (GET_MODE_SIZE (mode) == 16 || size == 16))
9518 return false;
9520 /* Otherwise, the size must be exactly in [1248]. */
9521 return size != 1 && size != 2 && size != 4 && size != 8;
9523 else
9525 int needed_intregs, needed_sseregs;
9527 return examine_argument (mode, type, 1,
9528 &needed_intregs, &needed_sseregs);
9531 else
9533 size = int_size_in_bytes (type);
9535 /* Intel MCU psABI returns scalars and aggregates no larger than 8
9536 bytes in registers. */
9537 if (TARGET_IAMCU)
9538 return VECTOR_MODE_P (mode) || size < 0 || size > 8;
9540 if (mode == BLKmode)
9541 return true;
9543 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
9544 return false;
9546 if (VECTOR_MODE_P (mode) || mode == TImode)
9548 /* User-created vectors small enough to fit in EAX. */
9549 if (size < 8)
9550 return false;
9552 /* Unless ABI prescibes otherwise,
9553 MMX/3dNow values are returned in MM0 if available. */
9555 if (size == 8)
9556 return TARGET_VECT8_RETURNS || !TARGET_MMX;
9558 /* SSE values are returned in XMM0 if available. */
9559 if (size == 16)
9560 return !TARGET_SSE;
9562 /* AVX values are returned in YMM0 if available. */
9563 if (size == 32)
9564 return !TARGET_AVX;
9566 /* AVX512F values are returned in ZMM0 if available. */
9567 if (size == 64)
9568 return !TARGET_AVX512F;
9571 if (mode == XFmode)
9572 return false;
9574 if (size > 12)
9575 return true;
9577 /* OImode shouldn't be used directly. */
9578 gcc_assert (mode != OImode);
9580 return false;
9582 #endif
9586 /* Create the va_list data type. */
9588 static tree
9589 ix86_build_builtin_va_list_64 (void)
9591 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
9593 record = lang_hooks.types.make_type (RECORD_TYPE);
9594 type_decl = build_decl (BUILTINS_LOCATION,
9595 TYPE_DECL, get_identifier ("__va_list_tag"), record);
9597 f_gpr = build_decl (BUILTINS_LOCATION,
9598 FIELD_DECL, get_identifier ("gp_offset"),
9599 unsigned_type_node);
9600 f_fpr = build_decl (BUILTINS_LOCATION,
9601 FIELD_DECL, get_identifier ("fp_offset"),
9602 unsigned_type_node);
9603 f_ovf = build_decl (BUILTINS_LOCATION,
9604 FIELD_DECL, get_identifier ("overflow_arg_area"),
9605 ptr_type_node);
9606 f_sav = build_decl (BUILTINS_LOCATION,
9607 FIELD_DECL, get_identifier ("reg_save_area"),
9608 ptr_type_node);
9610 va_list_gpr_counter_field = f_gpr;
9611 va_list_fpr_counter_field = f_fpr;
9613 DECL_FIELD_CONTEXT (f_gpr) = record;
9614 DECL_FIELD_CONTEXT (f_fpr) = record;
9615 DECL_FIELD_CONTEXT (f_ovf) = record;
9616 DECL_FIELD_CONTEXT (f_sav) = record;
9618 TYPE_STUB_DECL (record) = type_decl;
9619 TYPE_NAME (record) = type_decl;
9620 TYPE_FIELDS (record) = f_gpr;
9621 DECL_CHAIN (f_gpr) = f_fpr;
9622 DECL_CHAIN (f_fpr) = f_ovf;
9623 DECL_CHAIN (f_ovf) = f_sav;
9625 layout_type (record);
9627 TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list"),
9628 NULL_TREE, TYPE_ATTRIBUTES (record));
9630 /* The correct type is an array type of one element. */
9631 return build_array_type (record, build_index_type (size_zero_node));
9634 /* Setup the builtin va_list data type and for 64-bit the additional
9635 calling convention specific va_list data types. */
9637 static tree
9638 ix86_build_builtin_va_list (void)
9640 if (TARGET_64BIT)
9642 /* Initialize ABI specific va_list builtin types.
9644 In lto1, we can encounter two va_list types:
9645 - one as a result of the type-merge across TUs, and
9646 - the one constructed here.
9647 These two types will not have the same TYPE_MAIN_VARIANT, and therefore
9648 a type identity check in canonical_va_list_type based on
9649 TYPE_MAIN_VARIANT (which we used to have) will not work.
9650 Instead, we tag each va_list_type_node with its unique attribute, and
9651 look for the attribute in the type identity check in
9652 canonical_va_list_type.
9654 Tagging sysv_va_list_type_node directly with the attribute is
9655 problematic since it's a array of one record, which will degrade into a
9656 pointer to record when used as parameter (see build_va_arg comments for
9657 an example), dropping the attribute in the process. So we tag the
9658 record instead. */
9660 /* For SYSV_ABI we use an array of one record. */
9661 sysv_va_list_type_node = ix86_build_builtin_va_list_64 ();
9663 /* For MS_ABI we use plain pointer to argument area. */
9664 tree char_ptr_type = build_pointer_type (char_type_node);
9665 tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE,
9666 TYPE_ATTRIBUTES (char_ptr_type));
9667 ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr);
9669 return ((ix86_abi == MS_ABI)
9670 ? ms_va_list_type_node
9671 : sysv_va_list_type_node);
9673 else
9675 /* For i386 we use plain pointer to argument area. */
9676 return build_pointer_type (char_type_node);
9680 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
9682 static void
9683 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
9685 rtx save_area, mem;
9686 alias_set_type set;
9687 int i, max;
9689 /* GPR size of varargs save area. */
9690 if (cfun->va_list_gpr_size)
9691 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
9692 else
9693 ix86_varargs_gpr_size = 0;
9695 /* FPR size of varargs save area. We don't need it if we don't pass
9696 anything in SSE registers. */
9697 if (TARGET_SSE && cfun->va_list_fpr_size)
9698 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
9699 else
9700 ix86_varargs_fpr_size = 0;
9702 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
9703 return;
9705 save_area = frame_pointer_rtx;
9706 set = get_varargs_alias_set ();
9708 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
9709 if (max > X86_64_REGPARM_MAX)
9710 max = X86_64_REGPARM_MAX;
9712 for (i = cum->regno; i < max; i++)
9714 mem = gen_rtx_MEM (word_mode,
9715 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
9716 MEM_NOTRAP_P (mem) = 1;
9717 set_mem_alias_set (mem, set);
9718 emit_move_insn (mem,
9719 gen_rtx_REG (word_mode,
9720 x86_64_int_parameter_registers[i]));
9723 if (ix86_varargs_fpr_size)
9725 machine_mode smode;
9726 rtx_code_label *label;
9727 rtx test;
9729 /* Now emit code to save SSE registers. The AX parameter contains number
9730 of SSE parameter registers used to call this function, though all we
9731 actually check here is the zero/non-zero status. */
9733 label = gen_label_rtx ();
9734 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
9735 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
9736 label));
9738 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
9739 we used movdqa (i.e. TImode) instead? Perhaps even better would
9740 be if we could determine the real mode of the data, via a hook
9741 into pass_stdarg. Ignore all that for now. */
9742 smode = V4SFmode;
9743 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
9744 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
9746 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
9747 if (max > X86_64_SSE_REGPARM_MAX)
9748 max = X86_64_SSE_REGPARM_MAX;
9750 for (i = cum->sse_regno; i < max; ++i)
9752 mem = plus_constant (Pmode, save_area,
9753 i * 16 + ix86_varargs_gpr_size);
9754 mem = gen_rtx_MEM (smode, mem);
9755 MEM_NOTRAP_P (mem) = 1;
9756 set_mem_alias_set (mem, set);
9757 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
9759 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
9762 emit_label (label);
9766 static void
9767 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
9769 alias_set_type set = get_varargs_alias_set ();
9770 int i;
9772 /* Reset to zero, as there might be a sysv vaarg used
9773 before. */
9774 ix86_varargs_gpr_size = 0;
9775 ix86_varargs_fpr_size = 0;
9777 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
9779 rtx reg, mem;
9781 mem = gen_rtx_MEM (Pmode,
9782 plus_constant (Pmode, virtual_incoming_args_rtx,
9783 i * UNITS_PER_WORD));
9784 MEM_NOTRAP_P (mem) = 1;
9785 set_mem_alias_set (mem, set);
9787 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
9788 emit_move_insn (mem, reg);
9792 static void
9793 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
9794 tree type, int *, int no_rtl)
9796 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9797 CUMULATIVE_ARGS next_cum;
9798 tree fntype;
9800 /* This argument doesn't appear to be used anymore. Which is good,
9801 because the old code here didn't suppress rtl generation. */
9802 gcc_assert (!no_rtl);
9804 if (!TARGET_64BIT)
9805 return;
9807 fntype = TREE_TYPE (current_function_decl);
9809 /* For varargs, we do not want to skip the dummy va_dcl argument.
9810 For stdargs, we do want to skip the last named argument. */
9811 next_cum = *cum;
9812 if (stdarg_p (fntype))
9813 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
9814 true);
9816 if (cum->call_abi == MS_ABI)
9817 setup_incoming_varargs_ms_64 (&next_cum);
9818 else
9819 setup_incoming_varargs_64 (&next_cum);
9822 static void
9823 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
9824 machine_mode mode,
9825 tree type,
9826 int *pretend_size ATTRIBUTE_UNUSED,
9827 int no_rtl)
9829 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9830 CUMULATIVE_ARGS next_cum;
9831 tree fntype;
9832 rtx save_area;
9833 int bnd_reg, i, max;
9835 gcc_assert (!no_rtl);
9837 /* Do nothing if we use plain pointer to argument area. */
9838 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
9839 return;
9841 fntype = TREE_TYPE (current_function_decl);
9843 /* For varargs, we do not want to skip the dummy va_dcl argument.
9844 For stdargs, we do want to skip the last named argument. */
9845 next_cum = *cum;
9846 if (stdarg_p (fntype))
9847 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
9848 true);
9849 save_area = frame_pointer_rtx;
9851 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
9852 if (max > X86_64_REGPARM_MAX)
9853 max = X86_64_REGPARM_MAX;
9855 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
9856 if (chkp_function_instrumented_p (current_function_decl))
9857 for (i = cum->regno; i < max; i++)
9859 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
9860 rtx ptr = gen_rtx_REG (Pmode,
9861 x86_64_int_parameter_registers[i]);
9862 rtx bounds;
9864 if (bnd_reg <= LAST_BND_REG)
9865 bounds = gen_rtx_REG (BNDmode, bnd_reg);
9866 else
9868 rtx ldx_addr =
9869 plus_constant (Pmode, arg_pointer_rtx,
9870 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
9871 bounds = gen_reg_rtx (BNDmode);
9872 emit_insn (BNDmode == BND64mode
9873 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
9874 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
9877 emit_insn (BNDmode == BND64mode
9878 ? gen_bnd64_stx (addr, ptr, bounds)
9879 : gen_bnd32_stx (addr, ptr, bounds));
9881 bnd_reg++;
9886 /* Checks if TYPE is of kind va_list char *. */
9888 static bool
9889 is_va_list_char_pointer (tree type)
9891 tree canonic;
9893 /* For 32-bit it is always true. */
9894 if (!TARGET_64BIT)
9895 return true;
9896 canonic = ix86_canonical_va_list_type (type);
9897 return (canonic == ms_va_list_type_node
9898 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
9901 /* Implement va_start. */
9903 static void
9904 ix86_va_start (tree valist, rtx nextarg)
9906 HOST_WIDE_INT words, n_gpr, n_fpr;
9907 tree f_gpr, f_fpr, f_ovf, f_sav;
9908 tree gpr, fpr, ovf, sav, t;
9909 tree type;
9910 rtx ovf_rtx;
9912 if (flag_split_stack
9913 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9915 unsigned int scratch_regno;
9917 /* When we are splitting the stack, we can't refer to the stack
9918 arguments using internal_arg_pointer, because they may be on
9919 the old stack. The split stack prologue will arrange to
9920 leave a pointer to the old stack arguments in a scratch
9921 register, which we here copy to a pseudo-register. The split
9922 stack prologue can't set the pseudo-register directly because
9923 it (the prologue) runs before any registers have been saved. */
9925 scratch_regno = split_stack_prologue_scratch_regno ();
9926 if (scratch_regno != INVALID_REGNUM)
9928 rtx reg;
9929 rtx_insn *seq;
9931 reg = gen_reg_rtx (Pmode);
9932 cfun->machine->split_stack_varargs_pointer = reg;
9934 start_sequence ();
9935 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
9936 seq = get_insns ();
9937 end_sequence ();
9939 push_topmost_sequence ();
9940 emit_insn_after (seq, entry_of_function ());
9941 pop_topmost_sequence ();
9945 /* Only 64bit target needs something special. */
9946 if (is_va_list_char_pointer (TREE_TYPE (valist)))
9948 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9949 std_expand_builtin_va_start (valist, nextarg);
9950 else
9952 rtx va_r, next;
9954 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
9955 next = expand_binop (ptr_mode, add_optab,
9956 cfun->machine->split_stack_varargs_pointer,
9957 crtl->args.arg_offset_rtx,
9958 NULL_RTX, 0, OPTAB_LIB_WIDEN);
9959 convert_move (va_r, next, 0);
9961 /* Store zero bounds for va_list. */
9962 if (chkp_function_instrumented_p (current_function_decl))
9963 chkp_expand_bounds_reset_for_mem (valist,
9964 make_tree (TREE_TYPE (valist),
9965 next));
9968 return;
9971 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9972 f_fpr = DECL_CHAIN (f_gpr);
9973 f_ovf = DECL_CHAIN (f_fpr);
9974 f_sav = DECL_CHAIN (f_ovf);
9976 valist = build_simple_mem_ref (valist);
9977 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
9978 /* The following should be folded into the MEM_REF offset. */
9979 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
9980 f_gpr, NULL_TREE);
9981 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
9982 f_fpr, NULL_TREE);
9983 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
9984 f_ovf, NULL_TREE);
9985 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
9986 f_sav, NULL_TREE);
9988 /* Count number of gp and fp argument registers used. */
9989 words = crtl->args.info.words;
9990 n_gpr = crtl->args.info.regno;
9991 n_fpr = crtl->args.info.sse_regno;
9993 if (cfun->va_list_gpr_size)
9995 type = TREE_TYPE (gpr);
9996 t = build2 (MODIFY_EXPR, type,
9997 gpr, build_int_cst (type, n_gpr * 8));
9998 TREE_SIDE_EFFECTS (t) = 1;
9999 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10002 if (TARGET_SSE && cfun->va_list_fpr_size)
10004 type = TREE_TYPE (fpr);
10005 t = build2 (MODIFY_EXPR, type, fpr,
10006 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
10007 TREE_SIDE_EFFECTS (t) = 1;
10008 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10011 /* Find the overflow area. */
10012 type = TREE_TYPE (ovf);
10013 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
10014 ovf_rtx = crtl->args.internal_arg_pointer;
10015 else
10016 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
10017 t = make_tree (type, ovf_rtx);
10018 if (words != 0)
10019 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
10021 /* Store zero bounds for overflow area pointer. */
10022 if (chkp_function_instrumented_p (current_function_decl))
10023 chkp_expand_bounds_reset_for_mem (ovf, t);
10025 t = build2 (MODIFY_EXPR, type, ovf, t);
10026 TREE_SIDE_EFFECTS (t) = 1;
10027 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10029 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
10031 /* Find the register save area.
10032 Prologue of the function save it right above stack frame. */
10033 type = TREE_TYPE (sav);
10034 t = make_tree (type, frame_pointer_rtx);
10035 if (!ix86_varargs_gpr_size)
10036 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
10038 /* Store zero bounds for save area pointer. */
10039 if (chkp_function_instrumented_p (current_function_decl))
10040 chkp_expand_bounds_reset_for_mem (sav, t);
10042 t = build2 (MODIFY_EXPR, type, sav, t);
10043 TREE_SIDE_EFFECTS (t) = 1;
10044 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10048 /* Implement va_arg. */
10050 static tree
10051 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
10052 gimple_seq *post_p)
10054 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
10055 tree f_gpr, f_fpr, f_ovf, f_sav;
10056 tree gpr, fpr, ovf, sav, t;
10057 int size, rsize;
10058 tree lab_false, lab_over = NULL_TREE;
10059 tree addr, t2;
10060 rtx container;
10061 int indirect_p = 0;
10062 tree ptrtype;
10063 machine_mode nat_mode;
10064 unsigned int arg_boundary;
10066 /* Only 64bit target needs something special. */
10067 if (is_va_list_char_pointer (TREE_TYPE (valist)))
10068 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
10070 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
10071 f_fpr = DECL_CHAIN (f_gpr);
10072 f_ovf = DECL_CHAIN (f_fpr);
10073 f_sav = DECL_CHAIN (f_ovf);
10075 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
10076 valist, f_gpr, NULL_TREE);
10078 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
10079 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
10080 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
10082 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
10083 if (indirect_p)
10084 type = build_pointer_type (type);
10085 size = arg_int_size_in_bytes (type);
10086 rsize = CEIL (size, UNITS_PER_WORD);
10088 nat_mode = type_natural_mode (type, NULL, false);
10089 switch (nat_mode)
10091 case E_V8SFmode:
10092 case E_V8SImode:
10093 case E_V32QImode:
10094 case E_V16HImode:
10095 case E_V4DFmode:
10096 case E_V4DImode:
10097 case E_V16SFmode:
10098 case E_V16SImode:
10099 case E_V64QImode:
10100 case E_V32HImode:
10101 case E_V8DFmode:
10102 case E_V8DImode:
10103 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
10104 if (!TARGET_64BIT_MS_ABI)
10106 container = NULL;
10107 break;
10109 /* FALLTHRU */
10111 default:
10112 container = construct_container (nat_mode, TYPE_MODE (type),
10113 type, 0, X86_64_REGPARM_MAX,
10114 X86_64_SSE_REGPARM_MAX, intreg,
10116 break;
10119 /* Pull the value out of the saved registers. */
10121 addr = create_tmp_var (ptr_type_node, "addr");
10123 if (container)
10125 int needed_intregs, needed_sseregs;
10126 bool need_temp;
10127 tree int_addr, sse_addr;
10129 lab_false = create_artificial_label (UNKNOWN_LOCATION);
10130 lab_over = create_artificial_label (UNKNOWN_LOCATION);
10132 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
10134 need_temp = (!REG_P (container)
10135 && ((needed_intregs && TYPE_ALIGN (type) > 64)
10136 || TYPE_ALIGN (type) > 128));
10138 /* In case we are passing structure, verify that it is consecutive block
10139 on the register save area. If not we need to do moves. */
10140 if (!need_temp && !REG_P (container))
10142 /* Verify that all registers are strictly consecutive */
10143 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
10145 int i;
10147 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
10149 rtx slot = XVECEXP (container, 0, i);
10150 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
10151 || INTVAL (XEXP (slot, 1)) != i * 16)
10152 need_temp = true;
10155 else
10157 int i;
10159 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
10161 rtx slot = XVECEXP (container, 0, i);
10162 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
10163 || INTVAL (XEXP (slot, 1)) != i * 8)
10164 need_temp = true;
10168 if (!need_temp)
10170 int_addr = addr;
10171 sse_addr = addr;
10173 else
10175 int_addr = create_tmp_var (ptr_type_node, "int_addr");
10176 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
10179 /* First ensure that we fit completely in registers. */
10180 if (needed_intregs)
10182 t = build_int_cst (TREE_TYPE (gpr),
10183 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
10184 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
10185 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
10186 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
10187 gimplify_and_add (t, pre_p);
10189 if (needed_sseregs)
10191 t = build_int_cst (TREE_TYPE (fpr),
10192 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
10193 + X86_64_REGPARM_MAX * 8);
10194 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
10195 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
10196 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
10197 gimplify_and_add (t, pre_p);
10200 /* Compute index to start of area used for integer regs. */
10201 if (needed_intregs)
10203 /* int_addr = gpr + sav; */
10204 t = fold_build_pointer_plus (sav, gpr);
10205 gimplify_assign (int_addr, t, pre_p);
10207 if (needed_sseregs)
10209 /* sse_addr = fpr + sav; */
10210 t = fold_build_pointer_plus (sav, fpr);
10211 gimplify_assign (sse_addr, t, pre_p);
10213 if (need_temp)
10215 int i, prev_size = 0;
10216 tree temp = create_tmp_var (type, "va_arg_tmp");
10218 /* addr = &temp; */
10219 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
10220 gimplify_assign (addr, t, pre_p);
10222 for (i = 0; i < XVECLEN (container, 0); i++)
10224 rtx slot = XVECEXP (container, 0, i);
10225 rtx reg = XEXP (slot, 0);
10226 machine_mode mode = GET_MODE (reg);
10227 tree piece_type;
10228 tree addr_type;
10229 tree daddr_type;
10230 tree src_addr, src;
10231 int src_offset;
10232 tree dest_addr, dest;
10233 int cur_size = GET_MODE_SIZE (mode);
10235 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
10236 prev_size = INTVAL (XEXP (slot, 1));
10237 if (prev_size + cur_size > size)
10239 cur_size = size - prev_size;
10240 unsigned int nbits = cur_size * BITS_PER_UNIT;
10241 if (!int_mode_for_size (nbits, 1).exists (&mode))
10242 mode = QImode;
10244 piece_type = lang_hooks.types.type_for_mode (mode, 1);
10245 if (mode == GET_MODE (reg))
10246 addr_type = build_pointer_type (piece_type);
10247 else
10248 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
10249 true);
10250 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
10251 true);
10253 if (SSE_REGNO_P (REGNO (reg)))
10255 src_addr = sse_addr;
10256 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
10258 else
10260 src_addr = int_addr;
10261 src_offset = REGNO (reg) * 8;
10263 src_addr = fold_convert (addr_type, src_addr);
10264 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
10266 dest_addr = fold_convert (daddr_type, addr);
10267 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
10268 if (cur_size == GET_MODE_SIZE (mode))
10270 src = build_va_arg_indirect_ref (src_addr);
10271 dest = build_va_arg_indirect_ref (dest_addr);
10273 gimplify_assign (dest, src, pre_p);
10275 else
10277 tree copy
10278 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
10279 3, dest_addr, src_addr,
10280 size_int (cur_size));
10281 gimplify_and_add (copy, pre_p);
10283 prev_size += cur_size;
10287 if (needed_intregs)
10289 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
10290 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
10291 gimplify_assign (gpr, t, pre_p);
10294 if (needed_sseregs)
10296 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
10297 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
10298 gimplify_assign (unshare_expr (fpr), t, pre_p);
10301 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
10303 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
10306 /* ... otherwise out of the overflow area. */
10308 /* When we align parameter on stack for caller, if the parameter
10309 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
10310 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
10311 here with caller. */
10312 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
10313 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
10314 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
10316 /* Care for on-stack alignment if needed. */
10317 if (arg_boundary <= 64 || size == 0)
10318 t = ovf;
10319 else
10321 HOST_WIDE_INT align = arg_boundary / 8;
10322 t = fold_build_pointer_plus_hwi (ovf, align - 1);
10323 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
10324 build_int_cst (TREE_TYPE (t), -align));
10327 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
10328 gimplify_assign (addr, t, pre_p);
10330 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
10331 gimplify_assign (unshare_expr (ovf), t, pre_p);
10333 if (container)
10334 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
10336 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
10337 addr = fold_convert (ptrtype, addr);
10339 if (indirect_p)
10340 addr = build_va_arg_indirect_ref (addr);
10341 return build_va_arg_indirect_ref (addr);
10344 /* Return true if OPNUM's MEM should be matched
10345 in movabs* patterns. */
10347 bool
10348 ix86_check_movabs (rtx insn, int opnum)
10350 rtx set, mem;
10352 set = PATTERN (insn);
10353 if (GET_CODE (set) == PARALLEL)
10354 set = XVECEXP (set, 0, 0);
10355 gcc_assert (GET_CODE (set) == SET);
10356 mem = XEXP (set, opnum);
10357 while (SUBREG_P (mem))
10358 mem = SUBREG_REG (mem);
10359 gcc_assert (MEM_P (mem));
10360 return volatile_ok || !MEM_VOLATILE_P (mem);
10363 /* Return false if INSN contains a MEM with a non-default address space. */
10364 bool
10365 ix86_check_no_addr_space (rtx insn)
10367 subrtx_var_iterator::array_type array;
10368 FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL)
10370 rtx x = *iter;
10371 if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)))
10372 return false;
10374 return true;
10377 /* Initialize the table of extra 80387 mathematical constants. */
10379 static void
10380 init_ext_80387_constants (void)
10382 static const char * cst[5] =
10384 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
10385 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
10386 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
10387 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
10388 "3.1415926535897932385128089594061862044", /* 4: fldpi */
10390 int i;
10392 for (i = 0; i < 5; i++)
10394 real_from_string (&ext_80387_constants_table[i], cst[i]);
10395 /* Ensure each constant is rounded to XFmode precision. */
10396 real_convert (&ext_80387_constants_table[i],
10397 XFmode, &ext_80387_constants_table[i]);
10400 ext_80387_constants_init = 1;
10403 /* Return non-zero if the constant is something that
10404 can be loaded with a special instruction. */
10407 standard_80387_constant_p (rtx x)
10409 machine_mode mode = GET_MODE (x);
10411 const REAL_VALUE_TYPE *r;
10413 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
10414 return -1;
10416 if (x == CONST0_RTX (mode))
10417 return 1;
10418 if (x == CONST1_RTX (mode))
10419 return 2;
10421 r = CONST_DOUBLE_REAL_VALUE (x);
10423 /* For XFmode constants, try to find a special 80387 instruction when
10424 optimizing for size or on those CPUs that benefit from them. */
10425 if (mode == XFmode
10426 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
10428 int i;
10430 if (! ext_80387_constants_init)
10431 init_ext_80387_constants ();
10433 for (i = 0; i < 5; i++)
10434 if (real_identical (r, &ext_80387_constants_table[i]))
10435 return i + 3;
10438 /* Load of the constant -0.0 or -1.0 will be split as
10439 fldz;fchs or fld1;fchs sequence. */
10440 if (real_isnegzero (r))
10441 return 8;
10442 if (real_identical (r, &dconstm1))
10443 return 9;
10445 return 0;
10448 /* Return the opcode of the special instruction to be used to load
10449 the constant X. */
10451 const char *
10452 standard_80387_constant_opcode (rtx x)
10454 switch (standard_80387_constant_p (x))
10456 case 1:
10457 return "fldz";
10458 case 2:
10459 return "fld1";
10460 case 3:
10461 return "fldlg2";
10462 case 4:
10463 return "fldln2";
10464 case 5:
10465 return "fldl2e";
10466 case 6:
10467 return "fldl2t";
10468 case 7:
10469 return "fldpi";
10470 case 8:
10471 case 9:
10472 return "#";
10473 default:
10474 gcc_unreachable ();
10478 /* Return the CONST_DOUBLE representing the 80387 constant that is
10479 loaded by the specified special instruction. The argument IDX
10480 matches the return value from standard_80387_constant_p. */
10483 standard_80387_constant_rtx (int idx)
10485 int i;
10487 if (! ext_80387_constants_init)
10488 init_ext_80387_constants ();
10490 switch (idx)
10492 case 3:
10493 case 4:
10494 case 5:
10495 case 6:
10496 case 7:
10497 i = idx - 3;
10498 break;
10500 default:
10501 gcc_unreachable ();
10504 return const_double_from_real_value (ext_80387_constants_table[i],
10505 XFmode);
10508 /* Return 1 if X is all bits 0 and 2 if X is all bits 1
10509 in supported SSE/AVX vector mode. */
10512 standard_sse_constant_p (rtx x, machine_mode pred_mode)
10514 machine_mode mode;
10516 if (!TARGET_SSE)
10517 return 0;
10519 mode = GET_MODE (x);
10521 if (x == const0_rtx || const0_operand (x, mode))
10522 return 1;
10524 if (x == constm1_rtx || vector_all_ones_operand (x, mode))
10526 /* VOIDmode integer constant, get mode from the predicate. */
10527 if (mode == VOIDmode)
10528 mode = pred_mode;
10530 switch (GET_MODE_SIZE (mode))
10532 case 64:
10533 if (TARGET_AVX512F)
10534 return 2;
10535 break;
10536 case 32:
10537 if (TARGET_AVX2)
10538 return 2;
10539 break;
10540 case 16:
10541 if (TARGET_SSE2)
10542 return 2;
10543 break;
10544 case 0:
10545 /* VOIDmode */
10546 gcc_unreachable ();
10547 default:
10548 break;
10552 return 0;
10555 /* Return the opcode of the special instruction to be used to load
10556 the constant operands[1] into operands[0]. */
10558 const char *
10559 standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
10561 machine_mode mode;
10562 rtx x = operands[1];
10564 gcc_assert (TARGET_SSE);
10566 mode = GET_MODE (x);
10568 if (x == const0_rtx || const0_operand (x, mode))
10570 switch (get_attr_mode (insn))
10572 case MODE_TI:
10573 if (!EXT_REX_SSE_REG_P (operands[0]))
10574 return "%vpxor\t%0, %d0";
10575 /* FALLTHRU */
10576 case MODE_XI:
10577 case MODE_OI:
10578 if (EXT_REX_SSE_REG_P (operands[0]))
10579 return (TARGET_AVX512VL
10580 ? "vpxord\t%x0, %x0, %x0"
10581 : "vpxord\t%g0, %g0, %g0");
10582 return "vpxor\t%x0, %x0, %x0";
10584 case MODE_V2DF:
10585 if (!EXT_REX_SSE_REG_P (operands[0]))
10586 return "%vxorpd\t%0, %d0";
10587 /* FALLTHRU */
10588 case MODE_V8DF:
10589 case MODE_V4DF:
10590 if (!EXT_REX_SSE_REG_P (operands[0]))
10591 return "vxorpd\t%x0, %x0, %x0";
10592 else if (TARGET_AVX512DQ)
10593 return (TARGET_AVX512VL
10594 ? "vxorpd\t%x0, %x0, %x0"
10595 : "vxorpd\t%g0, %g0, %g0");
10596 else
10597 return (TARGET_AVX512VL
10598 ? "vpxorq\t%x0, %x0, %x0"
10599 : "vpxorq\t%g0, %g0, %g0");
10601 case MODE_V4SF:
10602 if (!EXT_REX_SSE_REG_P (operands[0]))
10603 return "%vxorps\t%0, %d0";
10604 /* FALLTHRU */
10605 case MODE_V16SF:
10606 case MODE_V8SF:
10607 if (!EXT_REX_SSE_REG_P (operands[0]))
10608 return "vxorps\t%x0, %x0, %x0";
10609 else if (TARGET_AVX512DQ)
10610 return (TARGET_AVX512VL
10611 ? "vxorps\t%x0, %x0, %x0"
10612 : "vxorps\t%g0, %g0, %g0");
10613 else
10614 return (TARGET_AVX512VL
10615 ? "vpxord\t%x0, %x0, %x0"
10616 : "vpxord\t%g0, %g0, %g0");
10618 default:
10619 gcc_unreachable ();
10622 else if (x == constm1_rtx || vector_all_ones_operand (x, mode))
10624 enum attr_mode insn_mode = get_attr_mode (insn);
10626 switch (insn_mode)
10628 case MODE_XI:
10629 case MODE_V8DF:
10630 case MODE_V16SF:
10631 gcc_assert (TARGET_AVX512F);
10632 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
10634 case MODE_OI:
10635 case MODE_V4DF:
10636 case MODE_V8SF:
10637 gcc_assert (TARGET_AVX2);
10638 /* FALLTHRU */
10639 case MODE_TI:
10640 case MODE_V2DF:
10641 case MODE_V4SF:
10642 gcc_assert (TARGET_SSE2);
10643 if (!EXT_REX_SSE_REG_P (operands[0]))
10644 return (TARGET_AVX
10645 ? "vpcmpeqd\t%0, %0, %0"
10646 : "pcmpeqd\t%0, %0");
10647 else if (TARGET_AVX512VL)
10648 return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
10649 else
10650 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
10652 default:
10653 gcc_unreachable ();
10657 gcc_unreachable ();
10660 /* Returns true if INSN can be transformed from a memory load
10661 to a supported FP constant load. */
10663 bool
10664 ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst)
10666 rtx src = find_constant_src (insn);
10668 gcc_assert (REG_P (dst));
10670 if (src == NULL
10671 || (SSE_REGNO_P (REGNO (dst))
10672 && standard_sse_constant_p (src, GET_MODE (dst)) != 1)
10673 || (STACK_REGNO_P (REGNO (dst))
10674 && standard_80387_constant_p (src) < 1))
10675 return false;
10677 return true;
10680 /* Returns true if OP contains a symbol reference */
10682 bool
10683 symbolic_reference_mentioned_p (rtx op)
10685 const char *fmt;
10686 int i;
10688 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
10689 return true;
10691 fmt = GET_RTX_FORMAT (GET_CODE (op));
10692 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
10694 if (fmt[i] == 'E')
10696 int j;
10698 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
10699 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
10700 return true;
10703 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
10704 return true;
10707 return false;
10710 /* Return true if it is appropriate to emit `ret' instructions in the
10711 body of a function. Do this only if the epilogue is simple, needing a
10712 couple of insns. Prior to reloading, we can't tell how many registers
10713 must be saved, so return false then. Return false if there is no frame
10714 marker to de-allocate. */
10716 bool
10717 ix86_can_use_return_insn_p (void)
10719 if (ix86_function_naked (current_function_decl))
10720 return false;
10722 /* Don't use `ret' instruction in interrupt handler. */
10723 if (! reload_completed
10724 || frame_pointer_needed
10725 || cfun->machine->func_type != TYPE_NORMAL)
10726 return 0;
10728 /* Don't allow more than 32k pop, since that's all we can do
10729 with one instruction. */
10730 if (crtl->args.pops_args && crtl->args.size >= 32768)
10731 return 0;
10733 struct ix86_frame &frame = cfun->machine->frame;
10734 return (frame.stack_pointer_offset == UNITS_PER_WORD
10735 && (frame.nregs + frame.nsseregs) == 0);
10738 /* Value should be nonzero if functions must have frame pointers.
10739 Zero means the frame pointer need not be set up (and parms may
10740 be accessed via the stack pointer) in functions that seem suitable. */
10742 static bool
10743 ix86_frame_pointer_required (void)
10745 /* If we accessed previous frames, then the generated code expects
10746 to be able to access the saved ebp value in our frame. */
10747 if (cfun->machine->accesses_prev_frame)
10748 return true;
10750 /* Several x86 os'es need a frame pointer for other reasons,
10751 usually pertaining to setjmp. */
10752 if (SUBTARGET_FRAME_POINTER_REQUIRED)
10753 return true;
10755 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
10756 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
10757 return true;
10759 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
10760 allocation is 4GB. */
10761 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
10762 return true;
10764 /* SSE saves require frame-pointer when stack is misaligned. */
10765 if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
10766 return true;
10768 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
10769 turns off the frame pointer by default. Turn it back on now if
10770 we've not got a leaf function. */
10771 if (TARGET_OMIT_LEAF_FRAME_POINTER
10772 && (!crtl->is_leaf
10773 || ix86_current_function_calls_tls_descriptor))
10774 return true;
10776 if (crtl->profile && !flag_fentry)
10777 return true;
10779 return false;
10782 /* Record that the current function accesses previous call frames. */
10784 void
10785 ix86_setup_frame_addresses (void)
10787 cfun->machine->accesses_prev_frame = 1;
10790 #ifndef USE_HIDDEN_LINKONCE
10791 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
10792 # define USE_HIDDEN_LINKONCE 1
10793 # else
10794 # define USE_HIDDEN_LINKONCE 0
10795 # endif
10796 #endif
10798 /* Label count for call and return thunks. It is used to make unique
10799 labels in call and return thunks. */
10800 static int indirectlabelno;
10802 /* True if call and return thunk functions are needed. */
10803 static bool indirect_thunk_needed = false;
10804 /* True if call and return thunk functions with the BND prefix are
10805 needed. */
10806 static bool indirect_thunk_bnd_needed = false;
10808 /* Bit masks of integer registers, which contain branch target, used
10809 by call and return thunks functions. */
10810 static int indirect_thunks_used;
10811 /* Bit masks of integer registers, which contain branch target, used
10812 by call and return thunks functions with the BND prefix. */
10813 static int indirect_thunks_bnd_used;
10815 /* True if return thunk function via CX is needed. */
10816 static bool indirect_return_via_cx;
10817 /* True if return thunk function via CX with the BND prefix is
10818 needed. */
10819 static bool indirect_return_via_cx_bnd;
10821 #ifndef INDIRECT_LABEL
10822 # define INDIRECT_LABEL "LIND"
10823 #endif
10825 /* Indicate what prefix is needed for an indirect branch. */
10826 enum indirect_thunk_prefix
10828 indirect_thunk_prefix_none,
10829 indirect_thunk_prefix_bnd,
10830 indirect_thunk_prefix_nt
10833 /* Return the prefix needed for an indirect branch INSN. */
10835 enum indirect_thunk_prefix
10836 indirect_thunk_need_prefix (rtx_insn *insn)
10838 enum indirect_thunk_prefix need_prefix;
10839 if (ix86_bnd_prefixed_insn_p (insn))
10840 need_prefix = indirect_thunk_prefix_bnd;
10841 else if ((cfun->machine->indirect_branch_type
10842 == indirect_branch_thunk_extern)
10843 && ix86_notrack_prefixed_insn_p (insn))
10845 /* NOTRACK prefix is only used with external thunk so that it
10846 can be properly updated to support CET at run-time. */
10847 need_prefix = indirect_thunk_prefix_nt;
10849 else
10850 need_prefix = indirect_thunk_prefix_none;
10851 return need_prefix;
10854 /* Fills in the label name that should be used for the indirect thunk. */
10856 static void
10857 indirect_thunk_name (char name[32], unsigned int regno,
10858 enum indirect_thunk_prefix need_prefix,
10859 bool ret_p)
10861 if (regno != INVALID_REGNUM && regno != CX_REG && ret_p)
10862 gcc_unreachable ();
10864 if (USE_HIDDEN_LINKONCE)
10866 const char *prefix;
10868 if (need_prefix == indirect_thunk_prefix_bnd)
10869 prefix = "_bnd";
10870 else if (need_prefix == indirect_thunk_prefix_nt
10871 && regno != INVALID_REGNUM)
10873 /* NOTRACK prefix is only used with external thunk via
10874 register so that NOTRACK prefix can be added to indirect
10875 branch via register to support CET at run-time. */
10876 prefix = "_nt";
10878 else
10879 prefix = "";
10881 const char *ret = ret_p ? "return" : "indirect";
10883 if (regno != INVALID_REGNUM)
10885 const char *reg_prefix;
10886 if (LEGACY_INT_REGNO_P (regno))
10887 reg_prefix = TARGET_64BIT ? "r" : "e";
10888 else
10889 reg_prefix = "";
10890 sprintf (name, "__x86_%s_thunk%s_%s%s",
10891 ret, prefix, reg_prefix, reg_names[regno]);
10893 else
10894 sprintf (name, "__x86_%s_thunk%s", ret, prefix);
10896 else
10898 if (regno != INVALID_REGNUM)
10900 if (need_prefix == indirect_thunk_prefix_bnd)
10901 ASM_GENERATE_INTERNAL_LABEL (name, "LITBR", regno);
10902 else
10903 ASM_GENERATE_INTERNAL_LABEL (name, "LITR", regno);
10905 else
10907 if (ret_p)
10909 if (need_prefix == indirect_thunk_prefix_bnd)
10910 ASM_GENERATE_INTERNAL_LABEL (name, "LRTB", 0);
10911 else
10912 ASM_GENERATE_INTERNAL_LABEL (name, "LRT", 0);
10914 else
10916 if (need_prefix == indirect_thunk_prefix_bnd)
10917 ASM_GENERATE_INTERNAL_LABEL (name, "LITB", 0);
10918 else
10919 ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0);
10925 /* Output a call and return thunk for indirect branch. If BND_P is
10926 true, the BND prefix is needed. If REGNO != -1, the function
10927 address is in REGNO and the call and return thunk looks like:
10929 call L2
10931 pause
10932 lfence
10933 jmp L1
10935 mov %REG, (%sp)
10938 Otherwise, the function address is on the top of stack and the
10939 call and return thunk looks like:
10941 call L2
10943 pause
10944 lfence
10945 jmp L1
10947 lea WORD_SIZE(%sp), %sp
10951 static void
10952 output_indirect_thunk (enum indirect_thunk_prefix need_prefix,
10953 unsigned int regno)
10955 char indirectlabel1[32];
10956 char indirectlabel2[32];
10958 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, INDIRECT_LABEL,
10959 indirectlabelno++);
10960 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, INDIRECT_LABEL,
10961 indirectlabelno++);
10963 /* Call */
10964 if (need_prefix == indirect_thunk_prefix_bnd)
10965 fputs ("\tbnd call\t", asm_out_file);
10966 else
10967 fputs ("\tcall\t", asm_out_file);
10968 assemble_name_raw (asm_out_file, indirectlabel2);
10969 fputc ('\n', asm_out_file);
10971 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
10973 /* AMD and Intel CPUs prefer each a different instruction as loop filler.
10974 Usage of both pause + lfence is compromise solution. */
10975 fprintf (asm_out_file, "\tpause\n\tlfence\n");
10977 /* Jump. */
10978 fputs ("\tjmp\t", asm_out_file);
10979 assemble_name_raw (asm_out_file, indirectlabel1);
10980 fputc ('\n', asm_out_file);
10982 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
10984 if (regno != INVALID_REGNUM)
10986 /* MOV. */
10987 rtx xops[2];
10988 xops[0] = gen_rtx_MEM (word_mode, stack_pointer_rtx);
10989 xops[1] = gen_rtx_REG (word_mode, regno);
10990 output_asm_insn ("mov\t{%1, %0|%0, %1}", xops);
10992 else
10994 /* LEA. */
10995 rtx xops[2];
10996 xops[0] = stack_pointer_rtx;
10997 xops[1] = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
10998 output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops);
11001 if (need_prefix == indirect_thunk_prefix_bnd)
11002 fputs ("\tbnd ret\n", asm_out_file);
11003 else
11004 fputs ("\tret\n", asm_out_file);
11007 /* Output a funtion with a call and return thunk for indirect branch.
11008 If BND_P is true, the BND prefix is needed. If REGNO != UNVALID_REGNUM,
11009 the function address is in REGNO. Otherwise, the function address is
11010 on the top of stack. */
11012 static void
11013 output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix,
11014 unsigned int regno)
11016 char name[32];
11017 tree decl;
11019 /* Create __x86_indirect_thunk/__x86_indirect_thunk_bnd. */
11020 indirect_thunk_name (name, regno, need_prefix, false);
11021 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
11022 get_identifier (name),
11023 build_function_type_list (void_type_node, NULL_TREE));
11024 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
11025 NULL_TREE, void_type_node);
11026 TREE_PUBLIC (decl) = 1;
11027 TREE_STATIC (decl) = 1;
11028 DECL_IGNORED_P (decl) = 1;
11030 #if TARGET_MACHO
11031 if (TARGET_MACHO)
11033 switch_to_section (darwin_sections[picbase_thunk_section]);
11034 fputs ("\t.weak_definition\t", asm_out_file);
11035 assemble_name (asm_out_file, name);
11036 fputs ("\n\t.private_extern\t", asm_out_file);
11037 assemble_name (asm_out_file, name);
11038 putc ('\n', asm_out_file);
11039 ASM_OUTPUT_LABEL (asm_out_file, name);
11040 DECL_WEAK (decl) = 1;
11042 else
11043 #endif
11044 if (USE_HIDDEN_LINKONCE)
11046 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
11048 targetm.asm_out.unique_section (decl, 0);
11049 switch_to_section (get_named_section (decl, NULL, 0));
11051 targetm.asm_out.globalize_label (asm_out_file, name);
11052 fputs ("\t.hidden\t", asm_out_file);
11053 assemble_name (asm_out_file, name);
11054 putc ('\n', asm_out_file);
11055 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
11057 else
11059 switch_to_section (text_section);
11060 ASM_OUTPUT_LABEL (asm_out_file, name);
11063 /* Create alias for __x86_return_thunk/__x86_return_thunk_bnd or
11064 __x86_return_thunk_ecx/__x86_return_thunk_ecx_bnd. */
11065 bool need_alias;
11066 if (regno == INVALID_REGNUM)
11067 need_alias = true;
11068 else if (regno == CX_REG)
11070 if (need_prefix == indirect_thunk_prefix_bnd)
11071 need_alias = indirect_return_via_cx_bnd;
11072 else
11073 need_alias = indirect_return_via_cx;
11075 else
11076 need_alias = false;
11078 if (need_alias)
11080 char alias[32];
11082 indirect_thunk_name (alias, regno, need_prefix, true);
11083 #if TARGET_MACHO
11084 if (TARGET_MACHO)
11086 fputs ("\t.weak_definition\t", asm_out_file);
11087 assemble_name (asm_out_file, alias);
11088 fputs ("\n\t.private_extern\t", asm_out_file);
11089 assemble_name (asm_out_file, alias);
11090 putc ('\n', asm_out_file);
11091 ASM_OUTPUT_LABEL (asm_out_file, alias);
11093 #else
11094 ASM_OUTPUT_DEF (asm_out_file, alias, name);
11095 if (USE_HIDDEN_LINKONCE)
11097 fputs ("\t.globl\t", asm_out_file);
11098 assemble_name (asm_out_file, alias);
11099 putc ('\n', asm_out_file);
11100 fputs ("\t.hidden\t", asm_out_file);
11101 assemble_name (asm_out_file, alias);
11102 putc ('\n', asm_out_file);
11104 #endif
11107 DECL_INITIAL (decl) = make_node (BLOCK);
11108 current_function_decl = decl;
11109 allocate_struct_function (decl, false);
11110 init_function_start (decl);
11111 /* We're about to hide the function body from callees of final_* by
11112 emitting it directly; tell them we're a thunk, if they care. */
11113 cfun->is_thunk = true;
11114 first_function_block_is_cold = false;
11115 /* Make sure unwind info is emitted for the thunk if needed. */
11116 final_start_function (emit_barrier (), asm_out_file, 1);
11118 output_indirect_thunk (need_prefix, regno);
11120 final_end_function ();
11121 init_insn_lengths ();
11122 free_after_compilation (cfun);
11123 set_cfun (NULL);
11124 current_function_decl = NULL;
11127 static int pic_labels_used;
11129 /* Fills in the label name that should be used for a pc thunk for
11130 the given register. */
11132 static void
11133 get_pc_thunk_name (char name[32], unsigned int regno)
11135 gcc_assert (!TARGET_64BIT);
11137 if (USE_HIDDEN_LINKONCE)
11138 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
11139 else
11140 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
11144 /* This function generates code for -fpic that loads %ebx with
11145 the return address of the caller and then returns. */
11147 static void
11148 ix86_code_end (void)
11150 rtx xops[2];
11151 unsigned int regno;
11153 if (indirect_thunk_needed)
11154 output_indirect_thunk_function (indirect_thunk_prefix_none,
11155 INVALID_REGNUM);
11156 if (indirect_thunk_bnd_needed)
11157 output_indirect_thunk_function (indirect_thunk_prefix_bnd,
11158 INVALID_REGNUM);
11160 for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++)
11162 unsigned int i = regno - FIRST_REX_INT_REG + LAST_INT_REG + 1;
11163 if ((indirect_thunks_used & (1 << i)))
11164 output_indirect_thunk_function (indirect_thunk_prefix_none,
11165 regno);
11167 if ((indirect_thunks_bnd_used & (1 << i)))
11168 output_indirect_thunk_function (indirect_thunk_prefix_bnd,
11169 regno);
11172 for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++)
11174 char name[32];
11175 tree decl;
11177 if ((indirect_thunks_used & (1 << regno)))
11178 output_indirect_thunk_function (indirect_thunk_prefix_none,
11179 regno);
11181 if ((indirect_thunks_bnd_used & (1 << regno)))
11182 output_indirect_thunk_function (indirect_thunk_prefix_bnd,
11183 regno);
11185 if (!(pic_labels_used & (1 << regno)))
11186 continue;
11188 get_pc_thunk_name (name, regno);
11190 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
11191 get_identifier (name),
11192 build_function_type_list (void_type_node, NULL_TREE));
11193 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
11194 NULL_TREE, void_type_node);
11195 TREE_PUBLIC (decl) = 1;
11196 TREE_STATIC (decl) = 1;
11197 DECL_IGNORED_P (decl) = 1;
11199 #if TARGET_MACHO
11200 if (TARGET_MACHO)
11202 switch_to_section (darwin_sections[picbase_thunk_section]);
11203 fputs ("\t.weak_definition\t", asm_out_file);
11204 assemble_name (asm_out_file, name);
11205 fputs ("\n\t.private_extern\t", asm_out_file);
11206 assemble_name (asm_out_file, name);
11207 putc ('\n', asm_out_file);
11208 ASM_OUTPUT_LABEL (asm_out_file, name);
11209 DECL_WEAK (decl) = 1;
11211 else
11212 #endif
11213 if (USE_HIDDEN_LINKONCE)
11215 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
11217 targetm.asm_out.unique_section (decl, 0);
11218 switch_to_section (get_named_section (decl, NULL, 0));
11220 targetm.asm_out.globalize_label (asm_out_file, name);
11221 fputs ("\t.hidden\t", asm_out_file);
11222 assemble_name (asm_out_file, name);
11223 putc ('\n', asm_out_file);
11224 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
11226 else
11228 switch_to_section (text_section);
11229 ASM_OUTPUT_LABEL (asm_out_file, name);
11232 DECL_INITIAL (decl) = make_node (BLOCK);
11233 current_function_decl = decl;
11234 allocate_struct_function (decl, false);
11235 init_function_start (decl);
11236 /* We're about to hide the function body from callees of final_* by
11237 emitting it directly; tell them we're a thunk, if they care. */
11238 cfun->is_thunk = true;
11239 first_function_block_is_cold = false;
11240 /* Make sure unwind info is emitted for the thunk if needed. */
11241 final_start_function (emit_barrier (), asm_out_file, 1);
11243 /* Pad stack IP move with 4 instructions (two NOPs count
11244 as one instruction). */
11245 if (TARGET_PAD_SHORT_FUNCTION)
11247 int i = 8;
11249 while (i--)
11250 fputs ("\tnop\n", asm_out_file);
11253 xops[0] = gen_rtx_REG (Pmode, regno);
11254 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
11255 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
11256 output_asm_insn ("%!ret", NULL);
11257 final_end_function ();
11258 init_insn_lengths ();
11259 free_after_compilation (cfun);
11260 set_cfun (NULL);
11261 current_function_decl = NULL;
11264 if (flag_split_stack)
11265 file_end_indicate_split_stack ();
11268 /* Emit code for the SET_GOT patterns. */
11270 const char *
11271 output_set_got (rtx dest, rtx label)
11273 rtx xops[3];
11275 xops[0] = dest;
11277 if (TARGET_VXWORKS_RTP && flag_pic)
11279 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
11280 xops[2] = gen_rtx_MEM (Pmode,
11281 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
11282 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
11284 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
11285 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
11286 an unadorned address. */
11287 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
11288 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
11289 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
11290 return "";
11293 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
11295 if (flag_pic)
11297 char name[32];
11298 get_pc_thunk_name (name, REGNO (dest));
11299 pic_labels_used |= 1 << REGNO (dest);
11301 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
11302 xops[2] = gen_rtx_MEM (QImode, xops[2]);
11303 output_asm_insn ("%!call\t%X2", xops);
11305 #if TARGET_MACHO
11306 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
11307 This is what will be referenced by the Mach-O PIC subsystem. */
11308 if (machopic_should_output_picbase_label () || !label)
11309 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
11311 /* When we are restoring the pic base at the site of a nonlocal label,
11312 and we decided to emit the pic base above, we will still output a
11313 local label used for calculating the correction offset (even though
11314 the offset will be 0 in that case). */
11315 if (label)
11316 targetm.asm_out.internal_label (asm_out_file, "L",
11317 CODE_LABEL_NUMBER (label));
11318 #endif
11320 else
11322 if (TARGET_MACHO)
11323 /* We don't need a pic base, we're not producing pic. */
11324 gcc_unreachable ();
11326 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
11327 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
11328 targetm.asm_out.internal_label (asm_out_file, "L",
11329 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
11332 if (!TARGET_MACHO)
11333 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
11335 return "";
11338 /* Generate an "push" pattern for input ARG. */
11340 static rtx
11341 gen_push (rtx arg)
11343 struct machine_function *m = cfun->machine;
11345 if (m->fs.cfa_reg == stack_pointer_rtx)
11346 m->fs.cfa_offset += UNITS_PER_WORD;
11347 m->fs.sp_offset += UNITS_PER_WORD;
11349 if (REG_P (arg) && GET_MODE (arg) != word_mode)
11350 arg = gen_rtx_REG (word_mode, REGNO (arg));
11352 return gen_rtx_SET (gen_rtx_MEM (word_mode,
11353 gen_rtx_PRE_DEC (Pmode,
11354 stack_pointer_rtx)),
11355 arg);
11358 /* Generate an "pop" pattern for input ARG. */
11360 static rtx
11361 gen_pop (rtx arg)
11363 if (REG_P (arg) && GET_MODE (arg) != word_mode)
11364 arg = gen_rtx_REG (word_mode, REGNO (arg));
11366 return gen_rtx_SET (arg,
11367 gen_rtx_MEM (word_mode,
11368 gen_rtx_POST_INC (Pmode,
11369 stack_pointer_rtx)));
11372 /* Return >= 0 if there is an unused call-clobbered register available
11373 for the entire function. */
11375 static unsigned int
11376 ix86_select_alt_pic_regnum (void)
11378 if (ix86_use_pseudo_pic_reg ())
11379 return INVALID_REGNUM;
11381 if (crtl->is_leaf
11382 && !crtl->profile
11383 && !ix86_current_function_calls_tls_descriptor)
11385 int i, drap;
11386 /* Can't use the same register for both PIC and DRAP. */
11387 if (crtl->drap_reg)
11388 drap = REGNO (crtl->drap_reg);
11389 else
11390 drap = -1;
11391 for (i = 2; i >= 0; --i)
11392 if (i != drap && !df_regs_ever_live_p (i))
11393 return i;
11396 return INVALID_REGNUM;
11399 /* Return true if REGNO is used by the epilogue. */
11401 bool
11402 ix86_epilogue_uses (int regno)
11404 /* If there are no caller-saved registers, we preserve all registers,
11405 except for MMX and x87 registers which aren't supported when saving
11406 and restoring registers. Don't explicitly save SP register since
11407 it is always preserved. */
11408 return (epilogue_completed
11409 && cfun->machine->no_caller_saved_registers
11410 && !fixed_regs[regno]
11411 && !STACK_REGNO_P (regno)
11412 && !MMX_REGNO_P (regno));
11415 /* Return nonzero if register REGNO can be used as a scratch register
11416 in peephole2. */
11418 static bool
11419 ix86_hard_regno_scratch_ok (unsigned int regno)
11421 /* If there are no caller-saved registers, we can't use any register
11422 as a scratch register after epilogue and use REGNO as scratch
11423 register only if it has been used before to avoid saving and
11424 restoring it. */
11425 return (!cfun->machine->no_caller_saved_registers
11426 || (!epilogue_completed
11427 && df_regs_ever_live_p (regno)));
11430 /* Return true if register class CL should be an additional allocno
11431 class. */
11433 static bool
11434 ix86_additional_allocno_class_p (reg_class_t cl)
11436 return cl == MOD4_SSE_REGS;
11439 /* Return TRUE if we need to save REGNO. */
11441 static bool
11442 ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined)
11444 /* If there are no caller-saved registers, we preserve all registers,
11445 except for MMX and x87 registers which aren't supported when saving
11446 and restoring registers. Don't explicitly save SP register since
11447 it is always preserved. */
11448 if (cfun->machine->no_caller_saved_registers)
11450 /* Don't preserve registers used for function return value. */
11451 rtx reg = crtl->return_rtx;
11452 if (reg)
11454 unsigned int i = REGNO (reg);
11455 unsigned int nregs = REG_NREGS (reg);
11456 while (nregs-- > 0)
11457 if ((i + nregs) == regno)
11458 return false;
11460 reg = crtl->return_bnd;
11461 if (reg)
11463 i = REGNO (reg);
11464 nregs = REG_NREGS (reg);
11465 while (nregs-- > 0)
11466 if ((i + nregs) == regno)
11467 return false;
11471 return (df_regs_ever_live_p (regno)
11472 && !fixed_regs[regno]
11473 && !STACK_REGNO_P (regno)
11474 && !MMX_REGNO_P (regno)
11475 && (regno != HARD_FRAME_POINTER_REGNUM
11476 || !frame_pointer_needed));
11479 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
11480 && pic_offset_table_rtx)
11482 if (ix86_use_pseudo_pic_reg ())
11484 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
11485 _mcount in prologue. */
11486 if (!TARGET_64BIT && flag_pic && crtl->profile)
11487 return true;
11489 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
11490 || crtl->profile
11491 || crtl->calls_eh_return
11492 || crtl->uses_const_pool
11493 || cfun->has_nonlocal_label)
11494 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
11497 if (crtl->calls_eh_return && maybe_eh_return)
11499 unsigned i;
11500 for (i = 0; ; i++)
11502 unsigned test = EH_RETURN_DATA_REGNO (i);
11503 if (test == INVALID_REGNUM)
11504 break;
11505 if (test == regno)
11506 return true;
11510 if (ignore_outlined && cfun->machine->call_ms2sysv)
11512 unsigned count = cfun->machine->call_ms2sysv_extra_regs
11513 + xlogue_layout::MIN_REGS;
11514 if (xlogue_layout::is_stub_managed_reg (regno, count))
11515 return false;
11518 if (crtl->drap_reg
11519 && regno == REGNO (crtl->drap_reg)
11520 && !cfun->machine->no_drap_save_restore)
11521 return true;
11523 return (df_regs_ever_live_p (regno)
11524 && !call_used_regs[regno]
11525 && !fixed_regs[regno]
11526 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
11529 /* Return number of saved general prupose registers. */
11531 static int
11532 ix86_nsaved_regs (void)
11534 int nregs = 0;
11535 int regno;
11537 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11538 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
11539 nregs ++;
11540 return nregs;
11543 /* Return number of saved SSE registers. */
11545 static int
11546 ix86_nsaved_sseregs (void)
11548 int nregs = 0;
11549 int regno;
11551 if (!TARGET_64BIT_MS_ABI)
11552 return 0;
11553 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11554 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
11555 nregs ++;
11556 return nregs;
11559 /* Given FROM and TO register numbers, say whether this elimination is
11560 allowed. If stack alignment is needed, we can only replace argument
11561 pointer with hard frame pointer, or replace frame pointer with stack
11562 pointer. Otherwise, frame pointer elimination is automatically
11563 handled and all other eliminations are valid. */
11565 static bool
11566 ix86_can_eliminate (const int from, const int to)
11568 if (stack_realign_fp)
11569 return ((from == ARG_POINTER_REGNUM
11570 && to == HARD_FRAME_POINTER_REGNUM)
11571 || (from == FRAME_POINTER_REGNUM
11572 && to == STACK_POINTER_REGNUM));
11573 else
11574 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
11577 /* Return the offset between two registers, one to be eliminated, and the other
11578 its replacement, at the start of a routine. */
11580 HOST_WIDE_INT
11581 ix86_initial_elimination_offset (int from, int to)
11583 struct ix86_frame &frame = cfun->machine->frame;
11585 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
11586 return frame.hard_frame_pointer_offset;
11587 else if (from == FRAME_POINTER_REGNUM
11588 && to == HARD_FRAME_POINTER_REGNUM)
11589 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
11590 else
11592 gcc_assert (to == STACK_POINTER_REGNUM);
11594 if (from == ARG_POINTER_REGNUM)
11595 return frame.stack_pointer_offset;
11597 gcc_assert (from == FRAME_POINTER_REGNUM);
11598 return frame.stack_pointer_offset - frame.frame_pointer_offset;
11602 /* In a dynamically-aligned function, we can't know the offset from
11603 stack pointer to frame pointer, so we must ensure that setjmp
11604 eliminates fp against the hard fp (%ebp) rather than trying to
11605 index from %esp up to the top of the frame across a gap that is
11606 of unknown (at compile-time) size. */
11607 static rtx
11608 ix86_builtin_setjmp_frame_value (void)
11610 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
11613 /* Emits a warning for unsupported msabi to sysv pro/epilogues. */
11614 static void warn_once_call_ms2sysv_xlogues (const char *feature)
11616 static bool warned_once = false;
11617 if (!warned_once)
11619 warning (0, "-mcall-ms2sysv-xlogues is not compatible with %s",
11620 feature);
11621 warned_once = true;
11625 /* Return the probing interval for -fstack-clash-protection. */
11627 static HOST_WIDE_INT
11628 get_probe_interval (void)
11630 if (flag_stack_clash_protection)
11631 return (HOST_WIDE_INT_1U
11632 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL));
11633 else
11634 return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP);
11637 /* When using -fsplit-stack, the allocation routines set a field in
11638 the TCB to the bottom of the stack plus this much space, measured
11639 in bytes. */
11641 #define SPLIT_STACK_AVAILABLE 256
11643 /* Fill structure ix86_frame about frame of currently computed function. */
11645 static void
11646 ix86_compute_frame_layout (void)
11648 struct ix86_frame *frame = &cfun->machine->frame;
11649 struct machine_function *m = cfun->machine;
11650 unsigned HOST_WIDE_INT stack_alignment_needed;
11651 HOST_WIDE_INT offset;
11652 unsigned HOST_WIDE_INT preferred_alignment;
11653 HOST_WIDE_INT size = get_frame_size ();
11654 HOST_WIDE_INT to_allocate;
11656 /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
11657 * ms_abi functions that call a sysv function. We now need to prune away
11658 * cases where it should be disabled. */
11659 if (TARGET_64BIT && m->call_ms2sysv)
11661 gcc_assert (TARGET_64BIT_MS_ABI);
11662 gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES);
11663 gcc_assert (!TARGET_SEH);
11664 gcc_assert (TARGET_SSE);
11665 gcc_assert (!ix86_using_red_zone ());
11667 if (crtl->calls_eh_return)
11669 gcc_assert (!reload_completed);
11670 m->call_ms2sysv = false;
11671 warn_once_call_ms2sysv_xlogues ("__builtin_eh_return");
11674 else if (ix86_static_chain_on_stack)
11676 gcc_assert (!reload_completed);
11677 m->call_ms2sysv = false;
11678 warn_once_call_ms2sysv_xlogues ("static call chains");
11681 /* Finally, compute which registers the stub will manage. */
11682 else
11684 unsigned count = xlogue_layout::count_stub_managed_regs ();
11685 m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS;
11686 m->call_ms2sysv_pad_in = 0;
11690 frame->nregs = ix86_nsaved_regs ();
11691 frame->nsseregs = ix86_nsaved_sseregs ();
11693 /* 64-bit MS ABI seem to require stack alignment to be always 16,
11694 except for function prologues, leaf functions and when the defult
11695 incoming stack boundary is overriden at command line or via
11696 force_align_arg_pointer attribute. */
11697 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
11698 && (!crtl->is_leaf || cfun->calls_alloca != 0
11699 || ix86_current_function_calls_tls_descriptor
11700 || ix86_incoming_stack_boundary < 128))
11702 crtl->preferred_stack_boundary = 128;
11703 crtl->stack_alignment_needed = 128;
11706 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
11707 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
11709 gcc_assert (!size || stack_alignment_needed);
11710 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
11711 gcc_assert (preferred_alignment <= stack_alignment_needed);
11713 /* The only ABI saving SSE regs should be 64-bit ms_abi. */
11714 gcc_assert (TARGET_64BIT || !frame->nsseregs);
11715 if (TARGET_64BIT && m->call_ms2sysv)
11717 gcc_assert (stack_alignment_needed >= 16);
11718 gcc_assert (!frame->nsseregs);
11721 /* For SEH we have to limit the amount of code movement into the prologue.
11722 At present we do this via a BLOCKAGE, at which point there's very little
11723 scheduling that can be done, which means that there's very little point
11724 in doing anything except PUSHs. */
11725 if (TARGET_SEH)
11726 m->use_fast_prologue_epilogue = false;
11727 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)))
11729 int count = frame->nregs;
11730 struct cgraph_node *node = cgraph_node::get (current_function_decl);
11732 /* The fast prologue uses move instead of push to save registers. This
11733 is significantly longer, but also executes faster as modern hardware
11734 can execute the moves in parallel, but can't do that for push/pop.
11736 Be careful about choosing what prologue to emit: When function takes
11737 many instructions to execute we may use slow version as well as in
11738 case function is known to be outside hot spot (this is known with
11739 feedback only). Weight the size of function by number of registers
11740 to save as it is cheap to use one or two push instructions but very
11741 slow to use many of them. */
11742 if (count)
11743 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
11744 if (node->frequency < NODE_FREQUENCY_NORMAL
11745 || (flag_branch_probabilities
11746 && node->frequency < NODE_FREQUENCY_HOT))
11747 m->use_fast_prologue_epilogue = false;
11748 else
11749 m->use_fast_prologue_epilogue
11750 = !expensive_function_p (count);
11753 frame->save_regs_using_mov
11754 = (TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue
11755 /* If static stack checking is enabled and done with probes,
11756 the registers need to be saved before allocating the frame. */
11757 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
11759 /* Skip return address and error code in exception handler. */
11760 offset = INCOMING_FRAME_SP_OFFSET;
11762 /* Skip pushed static chain. */
11763 if (ix86_static_chain_on_stack)
11764 offset += UNITS_PER_WORD;
11766 /* Skip saved base pointer. */
11767 if (frame_pointer_needed)
11768 offset += UNITS_PER_WORD;
11769 frame->hfp_save_offset = offset;
11771 /* The traditional frame pointer location is at the top of the frame. */
11772 frame->hard_frame_pointer_offset = offset;
11774 /* Register save area */
11775 offset += frame->nregs * UNITS_PER_WORD;
11776 frame->reg_save_offset = offset;
11778 /* On SEH target, registers are pushed just before the frame pointer
11779 location. */
11780 if (TARGET_SEH)
11781 frame->hard_frame_pointer_offset = offset;
11783 /* Calculate the size of the va-arg area (not including padding, if any). */
11784 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
11786 /* Also adjust stack_realign_offset for the largest alignment of
11787 stack slot actually used. */
11788 if (stack_realign_fp
11789 || (cfun->machine->max_used_stack_alignment != 0
11790 && (offset % cfun->machine->max_used_stack_alignment) != 0))
11792 /* We may need a 16-byte aligned stack for the remainder of the
11793 register save area, but the stack frame for the local function
11794 may require a greater alignment if using AVX/2/512. In order
11795 to avoid wasting space, we first calculate the space needed for
11796 the rest of the register saves, add that to the stack pointer,
11797 and then realign the stack to the boundary of the start of the
11798 frame for the local function. */
11799 HOST_WIDE_INT space_needed = 0;
11800 HOST_WIDE_INT sse_reg_space_needed = 0;
11802 if (TARGET_64BIT)
11804 if (m->call_ms2sysv)
11806 m->call_ms2sysv_pad_in = 0;
11807 space_needed = xlogue_layout::get_instance ().get_stack_space_used ();
11810 else if (frame->nsseregs)
11811 /* The only ABI that has saved SSE registers (Win64) also has a
11812 16-byte aligned default stack. However, many programs violate
11813 the ABI, and Wine64 forces stack realignment to compensate. */
11814 space_needed = frame->nsseregs * 16;
11816 sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16);
11818 /* 64-bit frame->va_arg_size should always be a multiple of 16, but
11819 rounding to be pedantic. */
11820 space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16);
11822 else
11823 space_needed = frame->va_arg_size;
11825 /* Record the allocation size required prior to the realignment AND. */
11826 frame->stack_realign_allocate = space_needed;
11828 /* The re-aligned stack starts at frame->stack_realign_offset. Values
11829 before this point are not directly comparable with values below
11830 this point. Use sp_valid_at to determine if the stack pointer is
11831 valid for a given offset, fp_valid_at for the frame pointer, or
11832 choose_baseaddr to have a base register chosen for you.
11834 Note that the result of (frame->stack_realign_offset
11835 & (stack_alignment_needed - 1)) may not equal zero. */
11836 offset = ROUND_UP (offset + space_needed, stack_alignment_needed);
11837 frame->stack_realign_offset = offset - space_needed;
11838 frame->sse_reg_save_offset = frame->stack_realign_offset
11839 + sse_reg_space_needed;
11841 else
11843 frame->stack_realign_offset = offset;
11845 if (TARGET_64BIT && m->call_ms2sysv)
11847 m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD);
11848 offset += xlogue_layout::get_instance ().get_stack_space_used ();
11851 /* Align and set SSE register save area. */
11852 else if (frame->nsseregs)
11854 /* If the incoming stack boundary is at least 16 bytes, or DRAP is
11855 required and the DRAP re-alignment boundary is at least 16 bytes,
11856 then we want the SSE register save area properly aligned. */
11857 if (ix86_incoming_stack_boundary >= 128
11858 || (stack_realign_drap && stack_alignment_needed >= 16))
11859 offset = ROUND_UP (offset, 16);
11860 offset += frame->nsseregs * 16;
11862 frame->sse_reg_save_offset = offset;
11863 offset += frame->va_arg_size;
11866 /* Align start of frame for local function. When a function call
11867 is removed, it may become a leaf function. But if argument may
11868 be passed on stack, we need to align the stack when there is no
11869 tail call. */
11870 if (m->call_ms2sysv
11871 || frame->va_arg_size != 0
11872 || size != 0
11873 || !crtl->is_leaf
11874 || (!crtl->tail_call_emit
11875 && cfun->machine->outgoing_args_on_stack)
11876 || cfun->calls_alloca
11877 || ix86_current_function_calls_tls_descriptor)
11878 offset = ROUND_UP (offset, stack_alignment_needed);
11880 /* Frame pointer points here. */
11881 frame->frame_pointer_offset = offset;
11883 offset += size;
11885 /* Add outgoing arguments area. Can be skipped if we eliminated
11886 all the function calls as dead code.
11887 Skipping is however impossible when function calls alloca. Alloca
11888 expander assumes that last crtl->outgoing_args_size
11889 of stack frame are unused. */
11890 if (ACCUMULATE_OUTGOING_ARGS
11891 && (!crtl->is_leaf || cfun->calls_alloca
11892 || ix86_current_function_calls_tls_descriptor))
11894 offset += crtl->outgoing_args_size;
11895 frame->outgoing_arguments_size = crtl->outgoing_args_size;
11897 else
11898 frame->outgoing_arguments_size = 0;
11900 /* Align stack boundary. Only needed if we're calling another function
11901 or using alloca. */
11902 if (!crtl->is_leaf || cfun->calls_alloca
11903 || ix86_current_function_calls_tls_descriptor)
11904 offset = ROUND_UP (offset, preferred_alignment);
11906 /* We've reached end of stack frame. */
11907 frame->stack_pointer_offset = offset;
11909 /* Size prologue needs to allocate. */
11910 to_allocate = offset - frame->sse_reg_save_offset;
11912 if ((!to_allocate && frame->nregs <= 1)
11913 || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
11914 /* If stack clash probing needs a loop, then it needs a
11915 scratch register. But the returned register is only guaranteed
11916 to be safe to use after register saves are complete. So if
11917 stack clash protections are enabled and the allocated frame is
11918 larger than the probe interval, then use pushes to save
11919 callee saved registers. */
11920 || (flag_stack_clash_protection && to_allocate > get_probe_interval ()))
11921 frame->save_regs_using_mov = false;
11923 if (ix86_using_red_zone ()
11924 && crtl->sp_is_unchanging
11925 && crtl->is_leaf
11926 && !ix86_pc_thunk_call_expanded
11927 && !ix86_current_function_calls_tls_descriptor)
11929 frame->red_zone_size = to_allocate;
11930 if (frame->save_regs_using_mov)
11931 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
11932 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
11933 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
11935 else
11936 frame->red_zone_size = 0;
11937 frame->stack_pointer_offset -= frame->red_zone_size;
11939 /* The SEH frame pointer location is near the bottom of the frame.
11940 This is enforced by the fact that the difference between the
11941 stack pointer and the frame pointer is limited to 240 bytes in
11942 the unwind data structure. */
11943 if (TARGET_SEH)
11945 HOST_WIDE_INT diff;
11947 /* If we can leave the frame pointer where it is, do so. Also, returns
11948 the establisher frame for __builtin_frame_address (0). */
11949 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
11950 if (diff <= SEH_MAX_FRAME_SIZE
11951 && (diff > 240 || (diff & 15) != 0)
11952 && !crtl->accesses_prior_frames)
11954 /* Ideally we'd determine what portion of the local stack frame
11955 (within the constraint of the lowest 240) is most heavily used.
11956 But without that complication, simply bias the frame pointer
11957 by 128 bytes so as to maximize the amount of the local stack
11958 frame that is addressable with 8-bit offsets. */
11959 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
11964 /* This is semi-inlined memory_address_length, but simplified
11965 since we know that we're always dealing with reg+offset, and
11966 to avoid having to create and discard all that rtl. */
11968 static inline int
11969 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
11971 int len = 4;
11973 if (offset == 0)
11975 /* EBP and R13 cannot be encoded without an offset. */
11976 len = (regno == BP_REG || regno == R13_REG);
11978 else if (IN_RANGE (offset, -128, 127))
11979 len = 1;
11981 /* ESP and R12 must be encoded with a SIB byte. */
11982 if (regno == SP_REG || regno == R12_REG)
11983 len++;
11985 return len;
11988 /* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
11989 the frame save area. The register is saved at CFA - CFA_OFFSET. */
11991 static bool
11992 sp_valid_at (HOST_WIDE_INT cfa_offset)
11994 const struct machine_frame_state &fs = cfun->machine->fs;
11995 if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset)
11997 /* Validate that the cfa_offset isn't in a "no-man's land". */
11998 gcc_assert (cfa_offset <= fs.sp_realigned_fp_last);
11999 return false;
12001 return fs.sp_valid;
12004 /* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
12005 the frame save area. The register is saved at CFA - CFA_OFFSET. */
12007 static inline bool
12008 fp_valid_at (HOST_WIDE_INT cfa_offset)
12010 const struct machine_frame_state &fs = cfun->machine->fs;
12011 if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last)
12013 /* Validate that the cfa_offset isn't in a "no-man's land". */
12014 gcc_assert (cfa_offset >= fs.sp_realigned_offset);
12015 return false;
12017 return fs.fp_valid;
12020 /* Choose a base register based upon alignment requested, speed and/or
12021 size. */
12023 static void
12024 choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg,
12025 HOST_WIDE_INT &base_offset,
12026 unsigned int align_reqested, unsigned int *align)
12028 const struct machine_function *m = cfun->machine;
12029 unsigned int hfp_align;
12030 unsigned int drap_align;
12031 unsigned int sp_align;
12032 bool hfp_ok = fp_valid_at (cfa_offset);
12033 bool drap_ok = m->fs.drap_valid;
12034 bool sp_ok = sp_valid_at (cfa_offset);
12036 hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY;
12038 /* Filter out any registers that don't meet the requested alignment
12039 criteria. */
12040 if (align_reqested)
12042 if (m->fs.realigned)
12043 hfp_align = drap_align = sp_align = crtl->stack_alignment_needed;
12044 /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
12045 notes (which we would need to use a realigned stack pointer),
12046 so disable on SEH targets. */
12047 else if (m->fs.sp_realigned)
12048 sp_align = crtl->stack_alignment_needed;
12050 hfp_ok = hfp_ok && hfp_align >= align_reqested;
12051 drap_ok = drap_ok && drap_align >= align_reqested;
12052 sp_ok = sp_ok && sp_align >= align_reqested;
12055 if (m->use_fast_prologue_epilogue)
12057 /* Choose the base register most likely to allow the most scheduling
12058 opportunities. Generally FP is valid throughout the function,
12059 while DRAP must be reloaded within the epilogue. But choose either
12060 over the SP due to increased encoding size. */
12062 if (hfp_ok)
12064 base_reg = hard_frame_pointer_rtx;
12065 base_offset = m->fs.fp_offset - cfa_offset;
12067 else if (drap_ok)
12069 base_reg = crtl->drap_reg;
12070 base_offset = 0 - cfa_offset;
12072 else if (sp_ok)
12074 base_reg = stack_pointer_rtx;
12075 base_offset = m->fs.sp_offset - cfa_offset;
12078 else
12080 HOST_WIDE_INT toffset;
12081 int len = 16, tlen;
12083 /* Choose the base register with the smallest address encoding.
12084 With a tie, choose FP > DRAP > SP. */
12085 if (sp_ok)
12087 base_reg = stack_pointer_rtx;
12088 base_offset = m->fs.sp_offset - cfa_offset;
12089 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
12091 if (drap_ok)
12093 toffset = 0 - cfa_offset;
12094 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
12095 if (tlen <= len)
12097 base_reg = crtl->drap_reg;
12098 base_offset = toffset;
12099 len = tlen;
12102 if (hfp_ok)
12104 toffset = m->fs.fp_offset - cfa_offset;
12105 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
12106 if (tlen <= len)
12108 base_reg = hard_frame_pointer_rtx;
12109 base_offset = toffset;
12110 len = tlen;
12115 /* Set the align return value. */
12116 if (align)
12118 if (base_reg == stack_pointer_rtx)
12119 *align = sp_align;
12120 else if (base_reg == crtl->drap_reg)
12121 *align = drap_align;
12122 else if (base_reg == hard_frame_pointer_rtx)
12123 *align = hfp_align;
12127 /* Return an RTX that points to CFA_OFFSET within the stack frame and
12128 the alignment of address. If ALIGN is non-null, it should point to
12129 an alignment value (in bits) that is preferred or zero and will
12130 recieve the alignment of the base register that was selected,
12131 irrespective of rather or not CFA_OFFSET is a multiple of that
12132 alignment value. If it is possible for the base register offset to be
12133 non-immediate then SCRATCH_REGNO should specify a scratch register to
12134 use.
12136 The valid base registers are taken from CFUN->MACHINE->FS. */
12138 static rtx
12139 choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align,
12140 unsigned int scratch_regno = INVALID_REGNUM)
12142 rtx base_reg = NULL;
12143 HOST_WIDE_INT base_offset = 0;
12145 /* If a specific alignment is requested, try to get a base register
12146 with that alignment first. */
12147 if (align && *align)
12148 choose_basereg (cfa_offset, base_reg, base_offset, *align, align);
12150 if (!base_reg)
12151 choose_basereg (cfa_offset, base_reg, base_offset, 0, align);
12153 gcc_assert (base_reg != NULL);
12155 rtx base_offset_rtx = GEN_INT (base_offset);
12157 if (!x86_64_immediate_operand (base_offset_rtx, Pmode))
12159 gcc_assert (scratch_regno != INVALID_REGNUM);
12161 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12162 emit_move_insn (scratch_reg, base_offset_rtx);
12164 return gen_rtx_PLUS (Pmode, base_reg, scratch_reg);
12167 return plus_constant (Pmode, base_reg, base_offset);
12170 /* Emit code to save registers in the prologue. */
12172 static void
12173 ix86_emit_save_regs (void)
12175 unsigned int regno;
12176 rtx_insn *insn;
12178 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
12179 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
12181 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
12182 RTX_FRAME_RELATED_P (insn) = 1;
12186 /* Emit a single register save at CFA - CFA_OFFSET. */
12188 static void
12189 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
12190 HOST_WIDE_INT cfa_offset)
12192 struct machine_function *m = cfun->machine;
12193 rtx reg = gen_rtx_REG (mode, regno);
12194 rtx mem, addr, base, insn;
12195 unsigned int align = GET_MODE_ALIGNMENT (mode);
12197 addr = choose_baseaddr (cfa_offset, &align);
12198 mem = gen_frame_mem (mode, addr);
12200 /* The location aligment depends upon the base register. */
12201 align = MIN (GET_MODE_ALIGNMENT (mode), align);
12202 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
12203 set_mem_align (mem, align);
12205 insn = emit_insn (gen_rtx_SET (mem, reg));
12206 RTX_FRAME_RELATED_P (insn) = 1;
12208 base = addr;
12209 if (GET_CODE (base) == PLUS)
12210 base = XEXP (base, 0);
12211 gcc_checking_assert (REG_P (base));
12213 /* When saving registers into a re-aligned local stack frame, avoid
12214 any tricky guessing by dwarf2out. */
12215 if (m->fs.realigned)
12217 gcc_checking_assert (stack_realign_drap);
12219 if (regno == REGNO (crtl->drap_reg))
12221 /* A bit of a hack. We force the DRAP register to be saved in
12222 the re-aligned stack frame, which provides us with a copy
12223 of the CFA that will last past the prologue. Install it. */
12224 gcc_checking_assert (cfun->machine->fs.fp_valid);
12225 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
12226 cfun->machine->fs.fp_offset - cfa_offset);
12227 mem = gen_rtx_MEM (mode, addr);
12228 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
12230 else
12232 /* The frame pointer is a stable reference within the
12233 aligned frame. Use it. */
12234 gcc_checking_assert (cfun->machine->fs.fp_valid);
12235 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
12236 cfun->machine->fs.fp_offset - cfa_offset);
12237 mem = gen_rtx_MEM (mode, addr);
12238 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
12242 else if (base == stack_pointer_rtx && m->fs.sp_realigned
12243 && cfa_offset >= m->fs.sp_realigned_offset)
12245 gcc_checking_assert (stack_realign_fp);
12246 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
12249 /* The memory may not be relative to the current CFA register,
12250 which means that we may need to generate a new pattern for
12251 use by the unwind info. */
12252 else if (base != m->fs.cfa_reg)
12254 addr = plus_constant (Pmode, m->fs.cfa_reg,
12255 m->fs.cfa_offset - cfa_offset);
12256 mem = gen_rtx_MEM (mode, addr);
12257 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
12261 /* Emit code to save registers using MOV insns.
12262 First register is stored at CFA - CFA_OFFSET. */
12263 static void
12264 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
12266 unsigned int regno;
12268 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12269 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
12271 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
12272 cfa_offset -= UNITS_PER_WORD;
12276 /* Emit code to save SSE registers using MOV insns.
12277 First register is stored at CFA - CFA_OFFSET. */
12278 static void
12279 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
12281 unsigned int regno;
12283 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12284 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
12286 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
12287 cfa_offset -= GET_MODE_SIZE (V4SFmode);
12291 static GTY(()) rtx queued_cfa_restores;
12293 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
12294 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
12295 Don't add the note if the previously saved value will be left untouched
12296 within stack red-zone till return, as unwinders can find the same value
12297 in the register and on the stack. */
12299 static void
12300 ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
12302 if (!crtl->shrink_wrapped
12303 && cfa_offset <= cfun->machine->fs.red_zone_offset)
12304 return;
12306 if (insn)
12308 add_reg_note (insn, REG_CFA_RESTORE, reg);
12309 RTX_FRAME_RELATED_P (insn) = 1;
12311 else
12312 queued_cfa_restores
12313 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
12316 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
12318 static void
12319 ix86_add_queued_cfa_restore_notes (rtx insn)
12321 rtx last;
12322 if (!queued_cfa_restores)
12323 return;
12324 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
12326 XEXP (last, 1) = REG_NOTES (insn);
12327 REG_NOTES (insn) = queued_cfa_restores;
12328 queued_cfa_restores = NULL_RTX;
12329 RTX_FRAME_RELATED_P (insn) = 1;
12332 /* Expand prologue or epilogue stack adjustment.
12333 The pattern exist to put a dependency on all ebp-based memory accesses.
12334 STYLE should be negative if instructions should be marked as frame related,
12335 zero if %r11 register is live and cannot be freely used and positive
12336 otherwise. */
12338 static rtx
12339 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
12340 int style, bool set_cfa)
12342 struct machine_function *m = cfun->machine;
12343 rtx insn;
12344 bool add_frame_related_expr = false;
12346 if (Pmode == SImode)
12347 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
12348 else if (x86_64_immediate_operand (offset, DImode))
12349 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
12350 else
12352 rtx tmp;
12353 /* r11 is used by indirect sibcall return as well, set before the
12354 epilogue and used after the epilogue. */
12355 if (style)
12356 tmp = gen_rtx_REG (DImode, R11_REG);
12357 else
12359 gcc_assert (src != hard_frame_pointer_rtx
12360 && dest != hard_frame_pointer_rtx);
12361 tmp = hard_frame_pointer_rtx;
12363 insn = emit_insn (gen_rtx_SET (tmp, offset));
12364 if (style < 0)
12365 add_frame_related_expr = true;
12367 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
12370 insn = emit_insn (insn);
12371 if (style >= 0)
12372 ix86_add_queued_cfa_restore_notes (insn);
12374 if (set_cfa)
12376 rtx r;
12378 gcc_assert (m->fs.cfa_reg == src);
12379 m->fs.cfa_offset += INTVAL (offset);
12380 m->fs.cfa_reg = dest;
12382 r = gen_rtx_PLUS (Pmode, src, offset);
12383 r = gen_rtx_SET (dest, r);
12384 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
12385 RTX_FRAME_RELATED_P (insn) = 1;
12387 else if (style < 0)
12389 RTX_FRAME_RELATED_P (insn) = 1;
12390 if (add_frame_related_expr)
12392 rtx r = gen_rtx_PLUS (Pmode, src, offset);
12393 r = gen_rtx_SET (dest, r);
12394 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
12398 if (dest == stack_pointer_rtx)
12400 HOST_WIDE_INT ooffset = m->fs.sp_offset;
12401 bool valid = m->fs.sp_valid;
12402 bool realigned = m->fs.sp_realigned;
12404 if (src == hard_frame_pointer_rtx)
12406 valid = m->fs.fp_valid;
12407 realigned = false;
12408 ooffset = m->fs.fp_offset;
12410 else if (src == crtl->drap_reg)
12412 valid = m->fs.drap_valid;
12413 realigned = false;
12414 ooffset = 0;
12416 else
12418 /* Else there are two possibilities: SP itself, which we set
12419 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
12420 taken care of this by hand along the eh_return path. */
12421 gcc_checking_assert (src == stack_pointer_rtx
12422 || offset == const0_rtx);
12425 m->fs.sp_offset = ooffset - INTVAL (offset);
12426 m->fs.sp_valid = valid;
12427 m->fs.sp_realigned = realigned;
12429 return insn;
12432 /* Find an available register to be used as dynamic realign argument
12433 pointer regsiter. Such a register will be written in prologue and
12434 used in begin of body, so it must not be
12435 1. parameter passing register.
12436 2. GOT pointer.
12437 We reuse static-chain register if it is available. Otherwise, we
12438 use DI for i386 and R13 for x86-64. We chose R13 since it has
12439 shorter encoding.
12441 Return: the regno of chosen register. */
12443 static unsigned int
12444 find_drap_reg (void)
12446 tree decl = cfun->decl;
12448 /* Always use callee-saved register if there are no caller-saved
12449 registers. */
12450 if (TARGET_64BIT)
12452 /* Use R13 for nested function or function need static chain.
12453 Since function with tail call may use any caller-saved
12454 registers in epilogue, DRAP must not use caller-saved
12455 register in such case. */
12456 if (DECL_STATIC_CHAIN (decl)
12457 || cfun->machine->no_caller_saved_registers
12458 || crtl->tail_call_emit)
12459 return R13_REG;
12461 return R10_REG;
12463 else
12465 /* Use DI for nested function or function need static chain.
12466 Since function with tail call may use any caller-saved
12467 registers in epilogue, DRAP must not use caller-saved
12468 register in such case. */
12469 if (DECL_STATIC_CHAIN (decl)
12470 || cfun->machine->no_caller_saved_registers
12471 || crtl->tail_call_emit)
12472 return DI_REG;
12474 /* Reuse static chain register if it isn't used for parameter
12475 passing. */
12476 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
12478 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
12479 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
12480 return CX_REG;
12482 return DI_REG;
12486 /* Handle a "force_align_arg_pointer" attribute. */
12488 static tree
12489 ix86_handle_force_align_arg_pointer_attribute (tree *node, tree name,
12490 tree, int, bool *no_add_attrs)
12492 if (TREE_CODE (*node) != FUNCTION_TYPE
12493 && TREE_CODE (*node) != METHOD_TYPE
12494 && TREE_CODE (*node) != FIELD_DECL
12495 && TREE_CODE (*node) != TYPE_DECL)
12497 warning (OPT_Wattributes, "%qE attribute only applies to functions",
12498 name);
12499 *no_add_attrs = true;
12502 return NULL_TREE;
12505 /* Return minimum incoming stack alignment. */
12507 static unsigned int
12508 ix86_minimum_incoming_stack_boundary (bool sibcall)
12510 unsigned int incoming_stack_boundary;
12512 /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */
12513 if (cfun->machine->func_type != TYPE_NORMAL)
12514 incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY;
12515 /* Prefer the one specified at command line. */
12516 else if (ix86_user_incoming_stack_boundary)
12517 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
12518 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
12519 if -mstackrealign is used, it isn't used for sibcall check and
12520 estimated stack alignment is 128bit. */
12521 else if (!sibcall
12522 && ix86_force_align_arg_pointer
12523 && crtl->stack_alignment_estimated == 128)
12524 incoming_stack_boundary = MIN_STACK_BOUNDARY;
12525 else
12526 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
12528 /* Incoming stack alignment can be changed on individual functions
12529 via force_align_arg_pointer attribute. We use the smallest
12530 incoming stack boundary. */
12531 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
12532 && lookup_attribute (ix86_force_align_arg_pointer_string,
12533 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
12534 incoming_stack_boundary = MIN_STACK_BOUNDARY;
12536 /* The incoming stack frame has to be aligned at least at
12537 parm_stack_boundary. */
12538 if (incoming_stack_boundary < crtl->parm_stack_boundary)
12539 incoming_stack_boundary = crtl->parm_stack_boundary;
12541 /* Stack at entrance of main is aligned by runtime. We use the
12542 smallest incoming stack boundary. */
12543 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
12544 && DECL_NAME (current_function_decl)
12545 && MAIN_NAME_P (DECL_NAME (current_function_decl))
12546 && DECL_FILE_SCOPE_P (current_function_decl))
12547 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
12549 return incoming_stack_boundary;
12552 /* Update incoming stack boundary and estimated stack alignment. */
12554 static void
12555 ix86_update_stack_boundary (void)
12557 ix86_incoming_stack_boundary
12558 = ix86_minimum_incoming_stack_boundary (false);
12560 /* x86_64 vararg needs 16byte stack alignment for register save
12561 area. */
12562 if (TARGET_64BIT
12563 && cfun->stdarg
12564 && crtl->stack_alignment_estimated < 128)
12565 crtl->stack_alignment_estimated = 128;
12567 /* __tls_get_addr needs to be called with 16-byte aligned stack. */
12568 if (ix86_tls_descriptor_calls_expanded_in_cfun
12569 && crtl->preferred_stack_boundary < 128)
12570 crtl->preferred_stack_boundary = 128;
12573 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
12574 needed or an rtx for DRAP otherwise. */
12576 static rtx
12577 ix86_get_drap_rtx (void)
12579 /* We must use DRAP if there are outgoing arguments on stack and
12580 ACCUMULATE_OUTGOING_ARGS is false. */
12581 if (ix86_force_drap
12582 || (cfun->machine->outgoing_args_on_stack
12583 && !ACCUMULATE_OUTGOING_ARGS))
12584 crtl->need_drap = true;
12586 if (stack_realign_drap)
12588 /* Assign DRAP to vDRAP and returns vDRAP */
12589 unsigned int regno = find_drap_reg ();
12590 rtx drap_vreg;
12591 rtx arg_ptr;
12592 rtx_insn *seq, *insn;
12594 arg_ptr = gen_rtx_REG (Pmode, regno);
12595 crtl->drap_reg = arg_ptr;
12597 start_sequence ();
12598 drap_vreg = copy_to_reg (arg_ptr);
12599 seq = get_insns ();
12600 end_sequence ();
12602 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
12603 if (!optimize)
12605 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
12606 RTX_FRAME_RELATED_P (insn) = 1;
12608 return drap_vreg;
12610 else
12611 return NULL;
12614 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
12616 static rtx
12617 ix86_internal_arg_pointer (void)
12619 return virtual_incoming_args_rtx;
12622 struct scratch_reg {
12623 rtx reg;
12624 bool saved;
12627 /* Return a short-lived scratch register for use on function entry.
12628 In 32-bit mode, it is valid only after the registers are saved
12629 in the prologue. This register must be released by means of
12630 release_scratch_register_on_entry once it is dead. */
12632 static void
12633 get_scratch_register_on_entry (struct scratch_reg *sr)
12635 int regno;
12637 sr->saved = false;
12639 if (TARGET_64BIT)
12641 /* We always use R11 in 64-bit mode. */
12642 regno = R11_REG;
12644 else
12646 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
12647 bool fastcall_p
12648 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
12649 bool thiscall_p
12650 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
12651 bool static_chain_p = DECL_STATIC_CHAIN (decl);
12652 int regparm = ix86_function_regparm (fntype, decl);
12653 int drap_regno
12654 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
12656 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
12657 for the static chain register. */
12658 if ((regparm < 1 || (fastcall_p && !static_chain_p))
12659 && drap_regno != AX_REG)
12660 regno = AX_REG;
12661 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
12662 for the static chain register. */
12663 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
12664 regno = AX_REG;
12665 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
12666 regno = DX_REG;
12667 /* ecx is the static chain register. */
12668 else if (regparm < 3 && !fastcall_p && !thiscall_p
12669 && !static_chain_p
12670 && drap_regno != CX_REG)
12671 regno = CX_REG;
12672 else if (ix86_save_reg (BX_REG, true, false))
12673 regno = BX_REG;
12674 /* esi is the static chain register. */
12675 else if (!(regparm == 3 && static_chain_p)
12676 && ix86_save_reg (SI_REG, true, false))
12677 regno = SI_REG;
12678 else if (ix86_save_reg (DI_REG, true, false))
12679 regno = DI_REG;
12680 else
12682 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
12683 sr->saved = true;
12687 sr->reg = gen_rtx_REG (Pmode, regno);
12688 if (sr->saved)
12690 rtx_insn *insn = emit_insn (gen_push (sr->reg));
12691 RTX_FRAME_RELATED_P (insn) = 1;
12695 /* Release a scratch register obtained from the preceding function.
12697 If RELEASE_VIA_POP is true, we just pop the register off the stack
12698 to release it. This is what non-Linux systems use with -fstack-check.
12700 Otherwise we use OFFSET to locate the saved register and the
12701 allocated stack space becomes part of the local frame and is
12702 deallocated by the epilogue. */
12704 static void
12705 release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset,
12706 bool release_via_pop)
12708 if (sr->saved)
12710 if (release_via_pop)
12712 struct machine_function *m = cfun->machine;
12713 rtx x, insn = emit_insn (gen_pop (sr->reg));
12715 /* The RX FRAME_RELATED_P mechanism doesn't know about pop. */
12716 RTX_FRAME_RELATED_P (insn) = 1;
12717 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
12718 x = gen_rtx_SET (stack_pointer_rtx, x);
12719 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
12720 m->fs.sp_offset -= UNITS_PER_WORD;
12722 else
12724 rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset));
12725 x = gen_rtx_SET (sr->reg, gen_rtx_MEM (word_mode, x));
12726 emit_insn (x);
12731 /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
12733 This differs from the next routine in that it tries hard to prevent
12734 attacks that jump the stack guard. Thus it is never allowed to allocate
12735 more than PROBE_INTERVAL bytes of stack space without a suitable
12736 probe.
12738 INT_REGISTERS_SAVED is true if integer registers have already been
12739 pushed on the stack. */
12741 static void
12742 ix86_adjust_stack_and_probe_stack_clash (HOST_WIDE_INT size,
12743 const bool int_registers_saved)
12745 struct machine_function *m = cfun->machine;
12747 /* If this function does not statically allocate stack space, then
12748 no probes are needed. */
12749 if (!size)
12751 /* However, the allocation of space via pushes for register
12752 saves could be viewed as allocating space, but without the
12753 need to probe. */
12754 if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)
12755 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
12756 else
12757 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
12758 return;
12761 /* If we are a noreturn function, then we have to consider the
12762 possibility that we're called via a jump rather than a call.
12764 Thus we don't have the implicit probe generated by saving the
12765 return address into the stack at the call. Thus, the stack
12766 pointer could be anywhere in the guard page. The safe thing
12767 to do is emit a probe now.
12769 The probe can be avoided if we have already emitted any callee
12770 register saves into the stack or have a frame pointer (which will
12771 have been saved as well). Those saves will function as implicit
12772 probes.
12774 ?!? This should be revamped to work like aarch64 and s390 where
12775 we track the offset from the most recent probe. Normally that
12776 offset would be zero. For a noreturn function we would reset
12777 it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then
12778 we just probe when we cross PROBE_INTERVAL. */
12779 if (TREE_THIS_VOLATILE (cfun->decl)
12780 && !(m->frame.nregs || m->frame.nsseregs || frame_pointer_needed))
12782 /* We can safely use any register here since we're just going to push
12783 its value and immediately pop it back. But we do try and avoid
12784 argument passing registers so as not to introduce dependencies in
12785 the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */
12786 rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG);
12787 rtx_insn *insn_push = emit_insn (gen_push (dummy_reg));
12788 rtx_insn *insn_pop = emit_insn (gen_pop (dummy_reg));
12789 m->fs.sp_offset -= UNITS_PER_WORD;
12790 if (m->fs.cfa_reg == stack_pointer_rtx)
12792 m->fs.cfa_offset -= UNITS_PER_WORD;
12793 rtx x = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
12794 x = gen_rtx_SET (stack_pointer_rtx, x);
12795 add_reg_note (insn_push, REG_CFA_ADJUST_CFA, x);
12796 RTX_FRAME_RELATED_P (insn_push) = 1;
12797 x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
12798 x = gen_rtx_SET (stack_pointer_rtx, x);
12799 add_reg_note (insn_pop, REG_CFA_ADJUST_CFA, x);
12800 RTX_FRAME_RELATED_P (insn_pop) = 1;
12802 emit_insn (gen_blockage ());
12805 /* If we allocate less than the size of the guard statically,
12806 then no probing is necessary, but we do need to allocate
12807 the stack. */
12808 if (size < (1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE)))
12810 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12811 GEN_INT (-size), -1,
12812 m->fs.cfa_reg == stack_pointer_rtx);
12813 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
12814 return;
12817 /* We're allocating a large enough stack frame that we need to
12818 emit probes. Either emit them inline or in a loop depending
12819 on the size. */
12820 HOST_WIDE_INT probe_interval = get_probe_interval ();
12821 if (size <= 4 * probe_interval)
12823 HOST_WIDE_INT i;
12824 for (i = probe_interval; i <= size; i += probe_interval)
12826 /* Allocate PROBE_INTERVAL bytes. */
12827 rtx insn
12828 = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12829 GEN_INT (-probe_interval), -1,
12830 m->fs.cfa_reg == stack_pointer_rtx);
12831 add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
12833 /* And probe at *sp. */
12834 emit_stack_probe (stack_pointer_rtx);
12835 emit_insn (gen_blockage ());
12838 /* We need to allocate space for the residual, but we do not need
12839 to probe the residual. */
12840 HOST_WIDE_INT residual = (i - probe_interval - size);
12841 if (residual)
12842 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12843 GEN_INT (residual), -1,
12844 m->fs.cfa_reg == stack_pointer_rtx);
12845 dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
12847 else
12849 /* We expect the GP registers to be saved when probes are used
12850 as the probing sequences might need a scratch register and
12851 the routine to allocate one assumes the integer registers
12852 have already been saved. */
12853 gcc_assert (int_registers_saved);
12855 struct scratch_reg sr;
12856 get_scratch_register_on_entry (&sr);
12858 /* If we needed to save a register, then account for any space
12859 that was pushed (we are not going to pop the register when
12860 we do the restore). */
12861 if (sr.saved)
12862 size -= UNITS_PER_WORD;
12864 /* Step 1: round SIZE down to a multiple of the interval. */
12865 HOST_WIDE_INT rounded_size = size & -probe_interval;
12867 /* Step 2: compute final value of the loop counter. Use lea if
12868 possible. */
12869 rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size);
12870 rtx insn;
12871 if (address_no_seg_operand (addr, Pmode))
12872 insn = emit_insn (gen_rtx_SET (sr.reg, addr));
12873 else
12875 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
12876 insn = emit_insn (gen_rtx_SET (sr.reg,
12877 gen_rtx_PLUS (Pmode, sr.reg,
12878 stack_pointer_rtx)));
12880 if (m->fs.cfa_reg == stack_pointer_rtx)
12882 add_reg_note (insn, REG_CFA_DEF_CFA,
12883 plus_constant (Pmode, sr.reg,
12884 m->fs.cfa_offset + rounded_size));
12885 RTX_FRAME_RELATED_P (insn) = 1;
12888 /* Step 3: the loop. */
12889 rtx size_rtx = GEN_INT (rounded_size);
12890 insn = emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg,
12891 size_rtx));
12892 if (m->fs.cfa_reg == stack_pointer_rtx)
12894 m->fs.cfa_offset += rounded_size;
12895 add_reg_note (insn, REG_CFA_DEF_CFA,
12896 plus_constant (Pmode, stack_pointer_rtx,
12897 m->fs.cfa_offset));
12898 RTX_FRAME_RELATED_P (insn) = 1;
12900 m->fs.sp_offset += rounded_size;
12901 emit_insn (gen_blockage ());
12903 /* Step 4: adjust SP if we cannot assert at compile-time that SIZE
12904 is equal to ROUNDED_SIZE. */
12906 if (size != rounded_size)
12907 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12908 GEN_INT (rounded_size - size), -1,
12909 m->fs.cfa_reg == stack_pointer_rtx);
12910 dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
12912 /* This does not deallocate the space reserved for the scratch
12913 register. That will be deallocated in the epilogue. */
12914 release_scratch_register_on_entry (&sr, size, false);
12917 /* Make sure nothing is scheduled before we are done. */
12918 emit_insn (gen_blockage ());
12921 /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
12923 INT_REGISTERS_SAVED is true if integer registers have already been
12924 pushed on the stack. */
12926 static void
12927 ix86_adjust_stack_and_probe (HOST_WIDE_INT size,
12928 const bool int_registers_saved)
12930 /* We skip the probe for the first interval + a small dope of 4 words and
12931 probe that many bytes past the specified size to maintain a protection
12932 area at the botton of the stack. */
12933 const int dope = 4 * UNITS_PER_WORD;
12934 rtx size_rtx = GEN_INT (size), last;
12936 /* See if we have a constant small number of probes to generate. If so,
12937 that's the easy case. The run-time loop is made up of 9 insns in the
12938 generic case while the compile-time loop is made up of 3+2*(n-1) insns
12939 for n # of intervals. */
12940 if (size <= 4 * get_probe_interval ())
12942 HOST_WIDE_INT i, adjust;
12943 bool first_probe = true;
12945 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
12946 values of N from 1 until it exceeds SIZE. If only one probe is
12947 needed, this will not generate any code. Then adjust and probe
12948 to PROBE_INTERVAL + SIZE. */
12949 for (i = get_probe_interval (); i < size; i += get_probe_interval ())
12951 if (first_probe)
12953 adjust = 2 * get_probe_interval () + dope;
12954 first_probe = false;
12956 else
12957 adjust = get_probe_interval ();
12959 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12960 plus_constant (Pmode, stack_pointer_rtx,
12961 -adjust)));
12962 emit_stack_probe (stack_pointer_rtx);
12965 if (first_probe)
12966 adjust = size + get_probe_interval () + dope;
12967 else
12968 adjust = size + get_probe_interval () - i;
12970 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12971 plus_constant (Pmode, stack_pointer_rtx,
12972 -adjust)));
12973 emit_stack_probe (stack_pointer_rtx);
12975 /* Adjust back to account for the additional first interval. */
12976 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
12977 plus_constant (Pmode, stack_pointer_rtx,
12978 (get_probe_interval ()
12979 + dope))));
12982 /* Otherwise, do the same as above, but in a loop. Note that we must be
12983 extra careful with variables wrapping around because we might be at
12984 the very top (or the very bottom) of the address space and we have
12985 to be able to handle this case properly; in particular, we use an
12986 equality test for the loop condition. */
12987 else
12989 /* We expect the GP registers to be saved when probes are used
12990 as the probing sequences might need a scratch register and
12991 the routine to allocate one assumes the integer registers
12992 have already been saved. */
12993 gcc_assert (int_registers_saved);
12995 HOST_WIDE_INT rounded_size;
12996 struct scratch_reg sr;
12998 get_scratch_register_on_entry (&sr);
13000 /* If we needed to save a register, then account for any space
13001 that was pushed (we are not going to pop the register when
13002 we do the restore). */
13003 if (sr.saved)
13004 size -= UNITS_PER_WORD;
13006 /* Step 1: round SIZE to the previous multiple of the interval. */
13008 rounded_size = ROUND_DOWN (size, get_probe_interval ());
13011 /* Step 2: compute initial and final value of the loop counter. */
13013 /* SP = SP_0 + PROBE_INTERVAL. */
13014 emit_insn (gen_rtx_SET (stack_pointer_rtx,
13015 plus_constant (Pmode, stack_pointer_rtx,
13016 - (get_probe_interval () + dope))));
13018 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
13019 if (rounded_size <= (HOST_WIDE_INT_1 << 31))
13020 emit_insn (gen_rtx_SET (sr.reg,
13021 plus_constant (Pmode, stack_pointer_rtx,
13022 -rounded_size)));
13023 else
13025 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
13026 emit_insn (gen_rtx_SET (sr.reg,
13027 gen_rtx_PLUS (Pmode, sr.reg,
13028 stack_pointer_rtx)));
13032 /* Step 3: the loop
13036 SP = SP + PROBE_INTERVAL
13037 probe at SP
13039 while (SP != LAST_ADDR)
13041 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
13042 values of N from 1 until it is equal to ROUNDED_SIZE. */
13044 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
13047 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
13048 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
13050 if (size != rounded_size)
13052 emit_insn (gen_rtx_SET (stack_pointer_rtx,
13053 plus_constant (Pmode, stack_pointer_rtx,
13054 rounded_size - size)));
13055 emit_stack_probe (stack_pointer_rtx);
13058 /* Adjust back to account for the additional first interval. */
13059 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
13060 plus_constant (Pmode, stack_pointer_rtx,
13061 (get_probe_interval ()
13062 + dope))));
13064 /* This does not deallocate the space reserved for the scratch
13065 register. That will be deallocated in the epilogue. */
13066 release_scratch_register_on_entry (&sr, size, false);
13069 /* Even if the stack pointer isn't the CFA register, we need to correctly
13070 describe the adjustments made to it, in particular differentiate the
13071 frame-related ones from the frame-unrelated ones. */
13072 if (size > 0)
13074 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
13075 XVECEXP (expr, 0, 0)
13076 = gen_rtx_SET (stack_pointer_rtx,
13077 plus_constant (Pmode, stack_pointer_rtx, -size));
13078 XVECEXP (expr, 0, 1)
13079 = gen_rtx_SET (stack_pointer_rtx,
13080 plus_constant (Pmode, stack_pointer_rtx,
13081 get_probe_interval () + dope + size));
13082 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
13083 RTX_FRAME_RELATED_P (last) = 1;
13085 cfun->machine->fs.sp_offset += size;
13088 /* Make sure nothing is scheduled before we are done. */
13089 emit_insn (gen_blockage ());
13092 /* Adjust the stack pointer up to REG while probing it. */
13094 const char *
13095 output_adjust_stack_and_probe (rtx reg)
13097 static int labelno = 0;
13098 char loop_lab[32];
13099 rtx xops[2];
13101 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
13103 /* Loop. */
13104 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
13106 /* SP = SP + PROBE_INTERVAL. */
13107 xops[0] = stack_pointer_rtx;
13108 xops[1] = GEN_INT (get_probe_interval ());
13109 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
13111 /* Probe at SP. */
13112 xops[1] = const0_rtx;
13113 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
13115 /* Test if SP == LAST_ADDR. */
13116 xops[0] = stack_pointer_rtx;
13117 xops[1] = reg;
13118 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
13120 /* Branch. */
13121 fputs ("\tjne\t", asm_out_file);
13122 assemble_name_raw (asm_out_file, loop_lab);
13123 fputc ('\n', asm_out_file);
13125 return "";
13128 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
13129 inclusive. These are offsets from the current stack pointer.
13131 INT_REGISTERS_SAVED is true if integer registers have already been
13132 pushed on the stack. */
13134 static void
13135 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
13136 const bool int_registers_saved)
13138 /* See if we have a constant small number of probes to generate. If so,
13139 that's the easy case. The run-time loop is made up of 6 insns in the
13140 generic case while the compile-time loop is made up of n insns for n #
13141 of intervals. */
13142 if (size <= 6 * get_probe_interval ())
13144 HOST_WIDE_INT i;
13146 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
13147 it exceeds SIZE. If only one probe is needed, this will not
13148 generate any code. Then probe at FIRST + SIZE. */
13149 for (i = get_probe_interval (); i < size; i += get_probe_interval ())
13150 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
13151 -(first + i)));
13153 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
13154 -(first + size)));
13157 /* Otherwise, do the same as above, but in a loop. Note that we must be
13158 extra careful with variables wrapping around because we might be at
13159 the very top (or the very bottom) of the address space and we have
13160 to be able to handle this case properly; in particular, we use an
13161 equality test for the loop condition. */
13162 else
13164 /* We expect the GP registers to be saved when probes are used
13165 as the probing sequences might need a scratch register and
13166 the routine to allocate one assumes the integer registers
13167 have already been saved. */
13168 gcc_assert (int_registers_saved);
13170 HOST_WIDE_INT rounded_size, last;
13171 struct scratch_reg sr;
13173 get_scratch_register_on_entry (&sr);
13176 /* Step 1: round SIZE to the previous multiple of the interval. */
13178 rounded_size = ROUND_DOWN (size, get_probe_interval ());
13181 /* Step 2: compute initial and final value of the loop counter. */
13183 /* TEST_OFFSET = FIRST. */
13184 emit_move_insn (sr.reg, GEN_INT (-first));
13186 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
13187 last = first + rounded_size;
13190 /* Step 3: the loop
13194 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
13195 probe at TEST_ADDR
13197 while (TEST_ADDR != LAST_ADDR)
13199 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
13200 until it is equal to ROUNDED_SIZE. */
13202 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
13205 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
13206 that SIZE is equal to ROUNDED_SIZE. */
13208 if (size != rounded_size)
13209 emit_stack_probe (plus_constant (Pmode,
13210 gen_rtx_PLUS (Pmode,
13211 stack_pointer_rtx,
13212 sr.reg),
13213 rounded_size - size));
13215 release_scratch_register_on_entry (&sr, size, true);
13218 /* Make sure nothing is scheduled before we are done. */
13219 emit_insn (gen_blockage ());
13222 /* Probe a range of stack addresses from REG to END, inclusive. These are
13223 offsets from the current stack pointer. */
13225 const char *
13226 output_probe_stack_range (rtx reg, rtx end)
13228 static int labelno = 0;
13229 char loop_lab[32];
13230 rtx xops[3];
13232 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
13234 /* Loop. */
13235 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
13237 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
13238 xops[0] = reg;
13239 xops[1] = GEN_INT (get_probe_interval ());
13240 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
13242 /* Probe at TEST_ADDR. */
13243 xops[0] = stack_pointer_rtx;
13244 xops[1] = reg;
13245 xops[2] = const0_rtx;
13246 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
13248 /* Test if TEST_ADDR == LAST_ADDR. */
13249 xops[0] = reg;
13250 xops[1] = end;
13251 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
13253 /* Branch. */
13254 fputs ("\tjne\t", asm_out_file);
13255 assemble_name_raw (asm_out_file, loop_lab);
13256 fputc ('\n', asm_out_file);
13258 return "";
13261 /* Return true if stack frame is required. Update STACK_ALIGNMENT
13262 to the largest alignment, in bits, of stack slot used if stack
13263 frame is required and CHECK_STACK_SLOT is true. */
13265 static bool
13266 ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
13267 bool check_stack_slot)
13269 HARD_REG_SET set_up_by_prologue, prologue_used;
13270 basic_block bb;
13272 CLEAR_HARD_REG_SET (prologue_used);
13273 CLEAR_HARD_REG_SET (set_up_by_prologue);
13274 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
13275 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
13276 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
13277 HARD_FRAME_POINTER_REGNUM);
13279 /* The preferred stack alignment is the minimum stack alignment. */
13280 if (stack_alignment > crtl->preferred_stack_boundary)
13281 stack_alignment = crtl->preferred_stack_boundary;
13283 bool require_stack_frame = false;
13285 FOR_EACH_BB_FN (bb, cfun)
13287 rtx_insn *insn;
13288 FOR_BB_INSNS (bb, insn)
13289 if (NONDEBUG_INSN_P (insn)
13290 && requires_stack_frame_p (insn, prologue_used,
13291 set_up_by_prologue))
13293 require_stack_frame = true;
13295 if (check_stack_slot)
13297 /* Find the maximum stack alignment. */
13298 subrtx_iterator::array_type array;
13299 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13300 if (MEM_P (*iter)
13301 && (reg_mentioned_p (stack_pointer_rtx,
13302 *iter)
13303 || reg_mentioned_p (frame_pointer_rtx,
13304 *iter)))
13306 unsigned int alignment = MEM_ALIGN (*iter);
13307 if (alignment > stack_alignment)
13308 stack_alignment = alignment;
13314 return require_stack_frame;
13317 /* Finalize stack_realign_needed and frame_pointer_needed flags, which
13318 will guide prologue/epilogue to be generated in correct form. */
13320 static void
13321 ix86_finalize_stack_frame_flags (void)
13323 /* Check if stack realign is really needed after reload, and
13324 stores result in cfun */
13325 unsigned int incoming_stack_boundary
13326 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
13327 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
13328 unsigned int stack_alignment
13329 = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
13330 ? crtl->max_used_stack_slot_alignment
13331 : crtl->stack_alignment_needed);
13332 unsigned int stack_realign
13333 = (incoming_stack_boundary < stack_alignment);
13334 bool recompute_frame_layout_p = false;
13336 if (crtl->stack_realign_finalized)
13338 /* After stack_realign_needed is finalized, we can't no longer
13339 change it. */
13340 gcc_assert (crtl->stack_realign_needed == stack_realign);
13341 return;
13344 /* If the only reason for frame_pointer_needed is that we conservatively
13345 assumed stack realignment might be needed or -fno-omit-frame-pointer
13346 is used, but in the end nothing that needed the stack alignment had
13347 been spilled nor stack access, clear frame_pointer_needed and say we
13348 don't need stack realignment. */
13349 if ((stack_realign || !flag_omit_frame_pointer)
13350 && frame_pointer_needed
13351 && crtl->is_leaf
13352 && crtl->sp_is_unchanging
13353 && !ix86_current_function_calls_tls_descriptor
13354 && !crtl->accesses_prior_frames
13355 && !cfun->calls_alloca
13356 && !crtl->calls_eh_return
13357 /* See ira_setup_eliminable_regset for the rationale. */
13358 && !(STACK_CHECK_MOVING_SP
13359 && flag_stack_check
13360 && flag_exceptions
13361 && cfun->can_throw_non_call_exceptions)
13362 && !ix86_frame_pointer_required ()
13363 && get_frame_size () == 0
13364 && ix86_nsaved_sseregs () == 0
13365 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
13367 if (ix86_find_max_used_stack_alignment (stack_alignment,
13368 stack_realign))
13370 /* Stack frame is required. If stack alignment needed is less
13371 than incoming stack boundary, don't realign stack. */
13372 stack_realign = incoming_stack_boundary < stack_alignment;
13373 if (!stack_realign)
13375 crtl->max_used_stack_slot_alignment
13376 = incoming_stack_boundary;
13377 crtl->stack_alignment_needed
13378 = incoming_stack_boundary;
13379 /* Also update preferred_stack_boundary for leaf
13380 functions. */
13381 crtl->preferred_stack_boundary
13382 = incoming_stack_boundary;
13385 else
13387 /* If drap has been set, but it actually isn't live at the
13388 start of the function, there is no reason to set it up. */
13389 if (crtl->drap_reg)
13391 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
13392 if (! REGNO_REG_SET_P (DF_LR_IN (bb),
13393 REGNO (crtl->drap_reg)))
13395 crtl->drap_reg = NULL_RTX;
13396 crtl->need_drap = false;
13399 else
13400 cfun->machine->no_drap_save_restore = true;
13402 frame_pointer_needed = false;
13403 stack_realign = false;
13404 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
13405 crtl->stack_alignment_needed = incoming_stack_boundary;
13406 crtl->stack_alignment_estimated = incoming_stack_boundary;
13407 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
13408 crtl->preferred_stack_boundary = incoming_stack_boundary;
13409 df_finish_pass (true);
13410 df_scan_alloc (NULL);
13411 df_scan_blocks ();
13412 df_compute_regs_ever_live (true);
13413 df_analyze ();
13415 if (flag_var_tracking)
13417 /* Since frame pointer is no longer available, replace it with
13418 stack pointer - UNITS_PER_WORD in debug insns. */
13419 df_ref ref, next;
13420 for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM);
13421 ref; ref = next)
13423 next = DF_REF_NEXT_REG (ref);
13424 if (!DF_REF_INSN_INFO (ref))
13425 continue;
13427 /* Make sure the next ref is for a different instruction,
13428 so that we're not affected by the rescan. */
13429 rtx_insn *insn = DF_REF_INSN (ref);
13430 while (next && DF_REF_INSN (next) == insn)
13431 next = DF_REF_NEXT_REG (next);
13433 if (DEBUG_INSN_P (insn))
13435 bool changed = false;
13436 for (; ref != next; ref = DF_REF_NEXT_REG (ref))
13438 rtx *loc = DF_REF_LOC (ref);
13439 if (*loc == hard_frame_pointer_rtx)
13441 *loc = plus_constant (Pmode,
13442 stack_pointer_rtx,
13443 -UNITS_PER_WORD);
13444 changed = true;
13447 if (changed)
13448 df_insn_rescan (insn);
13453 recompute_frame_layout_p = true;
13456 else if (crtl->max_used_stack_slot_alignment
13457 > crtl->preferred_stack_boundary)
13459 /* We don't need to realign stack. But we still need to keep
13460 stack frame properly aligned to satisfy the largest alignment
13461 of stack slots. */
13462 if (ix86_find_max_used_stack_alignment (stack_alignment, true))
13463 cfun->machine->max_used_stack_alignment
13464 = stack_alignment / BITS_PER_UNIT;
13467 if (crtl->stack_realign_needed != stack_realign)
13468 recompute_frame_layout_p = true;
13469 crtl->stack_realign_needed = stack_realign;
13470 crtl->stack_realign_finalized = true;
13471 if (recompute_frame_layout_p)
13472 ix86_compute_frame_layout ();
13475 /* Delete SET_GOT right after entry block if it is allocated to reg. */
13477 static void
13478 ix86_elim_entry_set_got (rtx reg)
13480 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
13481 rtx_insn *c_insn = BB_HEAD (bb);
13482 if (!NONDEBUG_INSN_P (c_insn))
13483 c_insn = next_nonnote_nondebug_insn (c_insn);
13484 if (c_insn && NONJUMP_INSN_P (c_insn))
13486 rtx pat = PATTERN (c_insn);
13487 if (GET_CODE (pat) == PARALLEL)
13489 rtx vec = XVECEXP (pat, 0, 0);
13490 if (GET_CODE (vec) == SET
13491 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
13492 && REGNO (XEXP (vec, 0)) == REGNO (reg))
13493 delete_insn (c_insn);
13498 static rtx
13499 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
13501 rtx addr, mem;
13503 if (offset)
13504 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
13505 mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg);
13506 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
13509 static inline rtx
13510 gen_frame_load (rtx reg, rtx frame_reg, int offset)
13512 return gen_frame_set (reg, frame_reg, offset, false);
13515 static inline rtx
13516 gen_frame_store (rtx reg, rtx frame_reg, int offset)
13518 return gen_frame_set (reg, frame_reg, offset, true);
13521 static void
13522 ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame)
13524 struct machine_function *m = cfun->machine;
13525 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
13526 + m->call_ms2sysv_extra_regs;
13527 rtvec v = rtvec_alloc (ncregs + 1);
13528 unsigned int align, i, vi = 0;
13529 rtx_insn *insn;
13530 rtx sym, addr;
13531 rtx rax = gen_rtx_REG (word_mode, AX_REG);
13532 const struct xlogue_layout &xlogue = xlogue_layout::get_instance ();
13534 /* AL should only be live with sysv_abi. */
13535 gcc_assert (!ix86_eax_live_at_start_p ());
13536 gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset);
13538 /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather
13539 we've actually realigned the stack or not. */
13540 align = GET_MODE_ALIGNMENT (V4SFmode);
13541 addr = choose_baseaddr (frame.stack_realign_offset
13542 + xlogue.get_stub_ptr_offset (), &align, AX_REG);
13543 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
13545 emit_insn (gen_rtx_SET (rax, addr));
13547 /* Get the stub symbol. */
13548 sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
13549 : XLOGUE_STUB_SAVE);
13550 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
13552 for (i = 0; i < ncregs; ++i)
13554 const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
13555 rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode),
13556 r.regno);
13557 RTVEC_ELT (v, vi++) = gen_frame_store (reg, rax, -r.offset);
13560 gcc_assert (vi == (unsigned)GET_NUM_ELEM (v));
13562 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
13563 RTX_FRAME_RELATED_P (insn) = true;
13566 /* Expand the prologue into a bunch of separate insns. */
13568 void
13569 ix86_expand_prologue (void)
13571 struct machine_function *m = cfun->machine;
13572 rtx insn, t;
13573 HOST_WIDE_INT allocate;
13574 bool int_registers_saved;
13575 bool sse_registers_saved;
13576 bool save_stub_call_needed;
13577 rtx static_chain = NULL_RTX;
13579 if (ix86_function_naked (current_function_decl))
13580 return;
13582 ix86_finalize_stack_frame_flags ();
13584 /* DRAP should not coexist with stack_realign_fp */
13585 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
13587 memset (&m->fs, 0, sizeof (m->fs));
13589 /* Initialize CFA state for before the prologue. */
13590 m->fs.cfa_reg = stack_pointer_rtx;
13591 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
13593 /* Track SP offset to the CFA. We continue tracking this after we've
13594 swapped the CFA register away from SP. In the case of re-alignment
13595 this is fudged; we're interested to offsets within the local frame. */
13596 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
13597 m->fs.sp_valid = true;
13598 m->fs.sp_realigned = false;
13600 const struct ix86_frame &frame = cfun->machine->frame;
13602 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
13604 /* We should have already generated an error for any use of
13605 ms_hook on a nested function. */
13606 gcc_checking_assert (!ix86_static_chain_on_stack);
13608 /* Check if profiling is active and we shall use profiling before
13609 prologue variant. If so sorry. */
13610 if (crtl->profile && flag_fentry != 0)
13611 sorry ("ms_hook_prologue attribute isn%'t compatible "
13612 "with -mfentry for 32-bit");
13614 /* In ix86_asm_output_function_label we emitted:
13615 8b ff movl.s %edi,%edi
13616 55 push %ebp
13617 8b ec movl.s %esp,%ebp
13619 This matches the hookable function prologue in Win32 API
13620 functions in Microsoft Windows XP Service Pack 2 and newer.
13621 Wine uses this to enable Windows apps to hook the Win32 API
13622 functions provided by Wine.
13624 What that means is that we've already set up the frame pointer. */
13626 if (frame_pointer_needed
13627 && !(crtl->drap_reg && crtl->stack_realign_needed))
13629 rtx push, mov;
13631 /* We've decided to use the frame pointer already set up.
13632 Describe this to the unwinder by pretending that both
13633 push and mov insns happen right here.
13635 Putting the unwind info here at the end of the ms_hook
13636 is done so that we can make absolutely certain we get
13637 the required byte sequence at the start of the function,
13638 rather than relying on an assembler that can produce
13639 the exact encoding required.
13641 However it does mean (in the unpatched case) that we have
13642 a 1 insn window where the asynchronous unwind info is
13643 incorrect. However, if we placed the unwind info at
13644 its correct location we would have incorrect unwind info
13645 in the patched case. Which is probably all moot since
13646 I don't expect Wine generates dwarf2 unwind info for the
13647 system libraries that use this feature. */
13649 insn = emit_insn (gen_blockage ());
13651 push = gen_push (hard_frame_pointer_rtx);
13652 mov = gen_rtx_SET (hard_frame_pointer_rtx,
13653 stack_pointer_rtx);
13654 RTX_FRAME_RELATED_P (push) = 1;
13655 RTX_FRAME_RELATED_P (mov) = 1;
13657 RTX_FRAME_RELATED_P (insn) = 1;
13658 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
13659 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
13661 /* Note that gen_push incremented m->fs.cfa_offset, even
13662 though we didn't emit the push insn here. */
13663 m->fs.cfa_reg = hard_frame_pointer_rtx;
13664 m->fs.fp_offset = m->fs.cfa_offset;
13665 m->fs.fp_valid = true;
13667 else
13669 /* The frame pointer is not needed so pop %ebp again.
13670 This leaves us with a pristine state. */
13671 emit_insn (gen_pop (hard_frame_pointer_rtx));
13675 /* The first insn of a function that accepts its static chain on the
13676 stack is to push the register that would be filled in by a direct
13677 call. This insn will be skipped by the trampoline. */
13678 else if (ix86_static_chain_on_stack)
13680 static_chain = ix86_static_chain (cfun->decl, false);
13681 insn = emit_insn (gen_push (static_chain));
13682 emit_insn (gen_blockage ());
13684 /* We don't want to interpret this push insn as a register save,
13685 only as a stack adjustment. The real copy of the register as
13686 a save will be done later, if needed. */
13687 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
13688 t = gen_rtx_SET (stack_pointer_rtx, t);
13689 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
13690 RTX_FRAME_RELATED_P (insn) = 1;
13693 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
13694 of DRAP is needed and stack realignment is really needed after reload */
13695 if (stack_realign_drap)
13697 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
13699 /* Can't use DRAP in interrupt function. */
13700 if (cfun->machine->func_type != TYPE_NORMAL)
13701 sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
13702 "in interrupt service routine. This may be worked "
13703 "around by avoiding functions with aggregate return.");
13705 /* Only need to push parameter pointer reg if it is caller saved. */
13706 if (!call_used_regs[REGNO (crtl->drap_reg)])
13708 /* Push arg pointer reg */
13709 insn = emit_insn (gen_push (crtl->drap_reg));
13710 RTX_FRAME_RELATED_P (insn) = 1;
13713 /* Grab the argument pointer. */
13714 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
13715 insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
13716 RTX_FRAME_RELATED_P (insn) = 1;
13717 m->fs.cfa_reg = crtl->drap_reg;
13718 m->fs.cfa_offset = 0;
13720 /* Align the stack. */
13721 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
13722 stack_pointer_rtx,
13723 GEN_INT (-align_bytes)));
13724 RTX_FRAME_RELATED_P (insn) = 1;
13726 /* Replicate the return address on the stack so that return
13727 address can be reached via (argp - 1) slot. This is needed
13728 to implement macro RETURN_ADDR_RTX and intrinsic function
13729 expand_builtin_return_addr etc. */
13730 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
13731 t = gen_frame_mem (word_mode, t);
13732 insn = emit_insn (gen_push (t));
13733 RTX_FRAME_RELATED_P (insn) = 1;
13735 /* For the purposes of frame and register save area addressing,
13736 we've started over with a new frame. */
13737 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
13738 m->fs.realigned = true;
13740 if (static_chain)
13742 /* Replicate static chain on the stack so that static chain
13743 can be reached via (argp - 2) slot. This is needed for
13744 nested function with stack realignment. */
13745 insn = emit_insn (gen_push (static_chain));
13746 RTX_FRAME_RELATED_P (insn) = 1;
13750 int_registers_saved = (frame.nregs == 0);
13751 sse_registers_saved = (frame.nsseregs == 0);
13752 save_stub_call_needed = (m->call_ms2sysv);
13753 gcc_assert (sse_registers_saved || !save_stub_call_needed);
13755 if (frame_pointer_needed && !m->fs.fp_valid)
13757 /* Note: AT&T enter does NOT have reversed args. Enter is probably
13758 slower on all targets. Also sdb didn't like it. */
13759 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
13760 RTX_FRAME_RELATED_P (insn) = 1;
13762 /* Push registers now, before setting the frame pointer
13763 on SEH target. */
13764 if (!int_registers_saved
13765 && TARGET_SEH
13766 && !frame.save_regs_using_mov)
13768 ix86_emit_save_regs ();
13769 int_registers_saved = true;
13770 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
13773 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
13775 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
13776 RTX_FRAME_RELATED_P (insn) = 1;
13778 if (m->fs.cfa_reg == stack_pointer_rtx)
13779 m->fs.cfa_reg = hard_frame_pointer_rtx;
13780 m->fs.fp_offset = m->fs.sp_offset;
13781 m->fs.fp_valid = true;
13785 if (!int_registers_saved)
13787 /* If saving registers via PUSH, do so now. */
13788 if (!frame.save_regs_using_mov)
13790 ix86_emit_save_regs ();
13791 int_registers_saved = true;
13792 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
13795 /* When using red zone we may start register saving before allocating
13796 the stack frame saving one cycle of the prologue. However, avoid
13797 doing this if we have to probe the stack; at least on x86_64 the
13798 stack probe can turn into a call that clobbers a red zone location. */
13799 else if (ix86_using_red_zone ()
13800 && (! TARGET_STACK_PROBE
13801 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
13803 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
13804 int_registers_saved = true;
13808 if (stack_realign_fp)
13810 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
13811 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
13813 /* Record last valid frame pointer offset. */
13814 m->fs.sp_realigned_fp_last = frame.reg_save_offset;
13816 /* The computation of the size of the re-aligned stack frame means
13817 that we must allocate the size of the register save area before
13818 performing the actual alignment. Otherwise we cannot guarantee
13819 that there's enough storage above the realignment point. */
13820 allocate = frame.reg_save_offset - m->fs.sp_offset
13821 + frame.stack_realign_allocate;
13822 if (allocate)
13823 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13824 GEN_INT (-allocate), -1, false);
13826 /* Align the stack. */
13827 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
13828 stack_pointer_rtx,
13829 GEN_INT (-align_bytes)));
13830 m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
13831 m->fs.sp_realigned_offset = m->fs.sp_offset
13832 - frame.stack_realign_allocate;
13833 /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
13834 Beyond this point, stack access should be done via choose_baseaddr or
13835 by using sp_valid_at and fp_valid_at to determine the correct base
13836 register. Henceforth, any CFA offset should be thought of as logical
13837 and not physical. */
13838 gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last);
13839 gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset);
13840 m->fs.sp_realigned = true;
13842 /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
13843 is needed to describe where a register is saved using a realigned
13844 stack pointer, so we need to invalidate the stack pointer for that
13845 target. */
13846 if (TARGET_SEH)
13847 m->fs.sp_valid = false;
13849 /* If SP offset is non-immediate after allocation of the stack frame,
13850 then emit SSE saves or stub call prior to allocating the rest of the
13851 stack frame. This is less efficient for the out-of-line stub because
13852 we can't combine allocations across the call barrier, but it's better
13853 than using a scratch register. */
13854 else if (!x86_64_immediate_operand (GEN_INT (frame.stack_pointer_offset
13855 - m->fs.sp_realigned_offset),
13856 Pmode))
13858 if (!sse_registers_saved)
13860 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
13861 sse_registers_saved = true;
13863 else if (save_stub_call_needed)
13865 ix86_emit_outlined_ms2sysv_save (frame);
13866 save_stub_call_needed = false;
13871 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
13873 if (flag_stack_usage_info)
13875 /* We start to count from ARG_POINTER. */
13876 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
13878 /* If it was realigned, take into account the fake frame. */
13879 if (stack_realign_drap)
13881 if (ix86_static_chain_on_stack)
13882 stack_size += UNITS_PER_WORD;
13884 if (!call_used_regs[REGNO (crtl->drap_reg)])
13885 stack_size += UNITS_PER_WORD;
13887 /* This over-estimates by 1 minimal-stack-alignment-unit but
13888 mitigates that by counting in the new return address slot. */
13889 current_function_dynamic_stack_size
13890 += crtl->stack_alignment_needed / BITS_PER_UNIT;
13893 current_function_static_stack_size = stack_size;
13896 /* On SEH target with very large frame size, allocate an area to save
13897 SSE registers (as the very large allocation won't be described). */
13898 if (TARGET_SEH
13899 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
13900 && !sse_registers_saved)
13902 HOST_WIDE_INT sse_size =
13903 frame.sse_reg_save_offset - frame.reg_save_offset;
13905 gcc_assert (int_registers_saved);
13907 /* No need to do stack checking as the area will be immediately
13908 written. */
13909 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13910 GEN_INT (-sse_size), -1,
13911 m->fs.cfa_reg == stack_pointer_rtx);
13912 allocate -= sse_size;
13913 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
13914 sse_registers_saved = true;
13917 /* The stack has already been decremented by the instruction calling us
13918 so probe if the size is non-negative to preserve the protection area. */
13919 if (allocate >= 0
13920 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
13921 || flag_stack_clash_protection))
13923 if (flag_stack_clash_protection)
13925 ix86_adjust_stack_and_probe_stack_clash (allocate,
13926 int_registers_saved);
13927 allocate = 0;
13929 else if (STACK_CHECK_MOVING_SP)
13931 if (!(crtl->is_leaf && !cfun->calls_alloca
13932 && allocate <= get_probe_interval ()))
13934 ix86_adjust_stack_and_probe (allocate, int_registers_saved);
13935 allocate = 0;
13938 else
13940 HOST_WIDE_INT size = allocate;
13942 if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
13943 size = 0x80000000 - get_stack_check_protect () - 1;
13945 if (TARGET_STACK_PROBE)
13947 if (crtl->is_leaf && !cfun->calls_alloca)
13949 if (size > get_probe_interval ())
13950 ix86_emit_probe_stack_range (0, size, int_registers_saved);
13952 else
13953 ix86_emit_probe_stack_range (0,
13954 size + get_stack_check_protect (),
13955 int_registers_saved);
13957 else
13959 if (crtl->is_leaf && !cfun->calls_alloca)
13961 if (size > get_probe_interval ()
13962 && size > get_stack_check_protect ())
13963 ix86_emit_probe_stack_range (get_stack_check_protect (),
13964 (size
13965 - get_stack_check_protect ()),
13966 int_registers_saved);
13968 else
13969 ix86_emit_probe_stack_range (get_stack_check_protect (), size,
13970 int_registers_saved);
13975 if (allocate == 0)
13977 else if (!ix86_target_stack_probe ()
13978 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
13980 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13981 GEN_INT (-allocate), -1,
13982 m->fs.cfa_reg == stack_pointer_rtx);
13984 else
13986 rtx eax = gen_rtx_REG (Pmode, AX_REG);
13987 rtx r10 = NULL;
13988 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
13989 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
13990 bool eax_live = ix86_eax_live_at_start_p ();
13991 bool r10_live = false;
13993 if (TARGET_64BIT)
13994 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
13996 if (eax_live)
13998 insn = emit_insn (gen_push (eax));
13999 allocate -= UNITS_PER_WORD;
14000 /* Note that SEH directives need to continue tracking the stack
14001 pointer even after the frame pointer has been set up. */
14002 if (sp_is_cfa_reg || TARGET_SEH)
14004 if (sp_is_cfa_reg)
14005 m->fs.cfa_offset += UNITS_PER_WORD;
14006 RTX_FRAME_RELATED_P (insn) = 1;
14007 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
14008 gen_rtx_SET (stack_pointer_rtx,
14009 plus_constant (Pmode, stack_pointer_rtx,
14010 -UNITS_PER_WORD)));
14014 if (r10_live)
14016 r10 = gen_rtx_REG (Pmode, R10_REG);
14017 insn = emit_insn (gen_push (r10));
14018 allocate -= UNITS_PER_WORD;
14019 if (sp_is_cfa_reg || TARGET_SEH)
14021 if (sp_is_cfa_reg)
14022 m->fs.cfa_offset += UNITS_PER_WORD;
14023 RTX_FRAME_RELATED_P (insn) = 1;
14024 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
14025 gen_rtx_SET (stack_pointer_rtx,
14026 plus_constant (Pmode, stack_pointer_rtx,
14027 -UNITS_PER_WORD)));
14031 emit_move_insn (eax, GEN_INT (allocate));
14032 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
14034 /* Use the fact that AX still contains ALLOCATE. */
14035 adjust_stack_insn = (Pmode == DImode
14036 ? gen_pro_epilogue_adjust_stack_di_sub
14037 : gen_pro_epilogue_adjust_stack_si_sub);
14039 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
14040 stack_pointer_rtx, eax));
14042 if (sp_is_cfa_reg || TARGET_SEH)
14044 if (sp_is_cfa_reg)
14045 m->fs.cfa_offset += allocate;
14046 RTX_FRAME_RELATED_P (insn) = 1;
14047 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
14048 gen_rtx_SET (stack_pointer_rtx,
14049 plus_constant (Pmode, stack_pointer_rtx,
14050 -allocate)));
14052 m->fs.sp_offset += allocate;
14054 /* Use stack_pointer_rtx for relative addressing so that code
14055 works for realigned stack, too. */
14056 if (r10_live && eax_live)
14058 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
14059 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
14060 gen_frame_mem (word_mode, t));
14061 t = plus_constant (Pmode, t, UNITS_PER_WORD);
14062 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
14063 gen_frame_mem (word_mode, t));
14065 else if (eax_live || r10_live)
14067 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
14068 emit_move_insn (gen_rtx_REG (word_mode,
14069 (eax_live ? AX_REG : R10_REG)),
14070 gen_frame_mem (word_mode, t));
14073 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
14075 /* If we havn't already set up the frame pointer, do so now. */
14076 if (frame_pointer_needed && !m->fs.fp_valid)
14078 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
14079 GEN_INT (frame.stack_pointer_offset
14080 - frame.hard_frame_pointer_offset));
14081 insn = emit_insn (insn);
14082 RTX_FRAME_RELATED_P (insn) = 1;
14083 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
14085 if (m->fs.cfa_reg == stack_pointer_rtx)
14086 m->fs.cfa_reg = hard_frame_pointer_rtx;
14087 m->fs.fp_offset = frame.hard_frame_pointer_offset;
14088 m->fs.fp_valid = true;
14091 if (!int_registers_saved)
14092 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
14093 if (!sse_registers_saved)
14094 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
14095 else if (save_stub_call_needed)
14096 ix86_emit_outlined_ms2sysv_save (frame);
14098 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
14099 in PROLOGUE. */
14100 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
14102 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
14103 insn = emit_insn (gen_set_got (pic));
14104 RTX_FRAME_RELATED_P (insn) = 1;
14105 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
14106 emit_insn (gen_prologue_use (pic));
14107 /* Deleting already emmitted SET_GOT if exist and allocated to
14108 REAL_PIC_OFFSET_TABLE_REGNUM. */
14109 ix86_elim_entry_set_got (pic);
14112 if (crtl->drap_reg && !crtl->stack_realign_needed)
14114 /* vDRAP is setup but after reload it turns out stack realign
14115 isn't necessary, here we will emit prologue to setup DRAP
14116 without stack realign adjustment */
14117 t = choose_baseaddr (0, NULL);
14118 emit_insn (gen_rtx_SET (crtl->drap_reg, t));
14121 /* Prevent instructions from being scheduled into register save push
14122 sequence when access to the redzone area is done through frame pointer.
14123 The offset between the frame pointer and the stack pointer is calculated
14124 relative to the value of the stack pointer at the end of the function
14125 prologue, and moving instructions that access redzone area via frame
14126 pointer inside push sequence violates this assumption. */
14127 if (frame_pointer_needed && frame.red_zone_size)
14128 emit_insn (gen_memory_blockage ());
14130 /* SEH requires that the prologue end within 256 bytes of the start of
14131 the function. Prevent instruction schedules that would extend that.
14132 Further, prevent alloca modifications to the stack pointer from being
14133 combined with prologue modifications. */
14134 if (TARGET_SEH)
14135 emit_insn (gen_prologue_use (stack_pointer_rtx));
14138 /* Emit code to restore REG using a POP insn. */
14140 static void
14141 ix86_emit_restore_reg_using_pop (rtx reg)
14143 struct machine_function *m = cfun->machine;
14144 rtx_insn *insn = emit_insn (gen_pop (reg));
14146 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
14147 m->fs.sp_offset -= UNITS_PER_WORD;
14149 if (m->fs.cfa_reg == crtl->drap_reg
14150 && REGNO (reg) == REGNO (crtl->drap_reg))
14152 /* Previously we'd represented the CFA as an expression
14153 like *(%ebp - 8). We've just popped that value from
14154 the stack, which means we need to reset the CFA to
14155 the drap register. This will remain until we restore
14156 the stack pointer. */
14157 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
14158 RTX_FRAME_RELATED_P (insn) = 1;
14160 /* This means that the DRAP register is valid for addressing too. */
14161 m->fs.drap_valid = true;
14162 return;
14165 if (m->fs.cfa_reg == stack_pointer_rtx)
14167 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
14168 x = gen_rtx_SET (stack_pointer_rtx, x);
14169 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
14170 RTX_FRAME_RELATED_P (insn) = 1;
14172 m->fs.cfa_offset -= UNITS_PER_WORD;
14175 /* When the frame pointer is the CFA, and we pop it, we are
14176 swapping back to the stack pointer as the CFA. This happens
14177 for stack frames that don't allocate other data, so we assume
14178 the stack pointer is now pointing at the return address, i.e.
14179 the function entry state, which makes the offset be 1 word. */
14180 if (reg == hard_frame_pointer_rtx)
14182 m->fs.fp_valid = false;
14183 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
14185 m->fs.cfa_reg = stack_pointer_rtx;
14186 m->fs.cfa_offset -= UNITS_PER_WORD;
14188 add_reg_note (insn, REG_CFA_DEF_CFA,
14189 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14190 GEN_INT (m->fs.cfa_offset)));
14191 RTX_FRAME_RELATED_P (insn) = 1;
14196 /* Emit code to restore saved registers using POP insns. */
14198 static void
14199 ix86_emit_restore_regs_using_pop (void)
14201 unsigned int regno;
14203 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
14204 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
14205 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
14208 /* Emit code and notes for the LEAVE instruction. If insn is non-null,
14209 omits the emit and only attaches the notes. */
14211 static void
14212 ix86_emit_leave (rtx_insn *insn)
14214 struct machine_function *m = cfun->machine;
14215 if (!insn)
14216 insn = emit_insn (ix86_gen_leave ());
14218 ix86_add_queued_cfa_restore_notes (insn);
14220 gcc_assert (m->fs.fp_valid);
14221 m->fs.sp_valid = true;
14222 m->fs.sp_realigned = false;
14223 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
14224 m->fs.fp_valid = false;
14226 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
14228 m->fs.cfa_reg = stack_pointer_rtx;
14229 m->fs.cfa_offset = m->fs.sp_offset;
14231 add_reg_note (insn, REG_CFA_DEF_CFA,
14232 plus_constant (Pmode, stack_pointer_rtx,
14233 m->fs.sp_offset));
14234 RTX_FRAME_RELATED_P (insn) = 1;
14236 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
14237 m->fs.fp_offset);
14240 /* Emit code to restore saved registers using MOV insns.
14241 First register is restored from CFA - CFA_OFFSET. */
14242 static void
14243 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
14244 bool maybe_eh_return)
14246 struct machine_function *m = cfun->machine;
14247 unsigned int regno;
14249 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
14250 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
14252 rtx reg = gen_rtx_REG (word_mode, regno);
14253 rtx mem;
14254 rtx_insn *insn;
14256 mem = choose_baseaddr (cfa_offset, NULL);
14257 mem = gen_frame_mem (word_mode, mem);
14258 insn = emit_move_insn (reg, mem);
14260 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
14262 /* Previously we'd represented the CFA as an expression
14263 like *(%ebp - 8). We've just popped that value from
14264 the stack, which means we need to reset the CFA to
14265 the drap register. This will remain until we restore
14266 the stack pointer. */
14267 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
14268 RTX_FRAME_RELATED_P (insn) = 1;
14270 /* This means that the DRAP register is valid for addressing. */
14271 m->fs.drap_valid = true;
14273 else
14274 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
14276 cfa_offset -= UNITS_PER_WORD;
14280 /* Emit code to restore saved registers using MOV insns.
14281 First register is restored from CFA - CFA_OFFSET. */
14282 static void
14283 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
14284 bool maybe_eh_return)
14286 unsigned int regno;
14288 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
14289 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
14291 rtx reg = gen_rtx_REG (V4SFmode, regno);
14292 rtx mem;
14293 unsigned int align = GET_MODE_ALIGNMENT (V4SFmode);
14295 mem = choose_baseaddr (cfa_offset, &align);
14296 mem = gen_rtx_MEM (V4SFmode, mem);
14298 /* The location aligment depends upon the base register. */
14299 align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align);
14300 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
14301 set_mem_align (mem, align);
14302 emit_insn (gen_rtx_SET (reg, mem));
14304 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
14306 cfa_offset -= GET_MODE_SIZE (V4SFmode);
14310 static void
14311 ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame,
14312 bool use_call, int style)
14314 struct machine_function *m = cfun->machine;
14315 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
14316 + m->call_ms2sysv_extra_regs;
14317 rtvec v;
14318 unsigned int elems_needed, align, i, vi = 0;
14319 rtx_insn *insn;
14320 rtx sym, tmp;
14321 rtx rsi = gen_rtx_REG (word_mode, SI_REG);
14322 rtx r10 = NULL_RTX;
14323 const struct xlogue_layout &xlogue = xlogue_layout::get_instance ();
14324 HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset ();
14325 HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset;
14326 rtx rsi_frame_load = NULL_RTX;
14327 HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1;
14328 enum xlogue_stub stub;
14330 gcc_assert (!m->fs.fp_valid || frame_pointer_needed);
14332 /* If using a realigned stack, we should never start with padding. */
14333 gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ());
14335 /* Setup RSI as the stub's base pointer. */
14336 align = GET_MODE_ALIGNMENT (V4SFmode);
14337 tmp = choose_baseaddr (rsi_offset, &align, SI_REG);
14338 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
14340 emit_insn (gen_rtx_SET (rsi, tmp));
14342 /* Get a symbol for the stub. */
14343 if (frame_pointer_needed)
14344 stub = use_call ? XLOGUE_STUB_RESTORE_HFP
14345 : XLOGUE_STUB_RESTORE_HFP_TAIL;
14346 else
14347 stub = use_call ? XLOGUE_STUB_RESTORE
14348 : XLOGUE_STUB_RESTORE_TAIL;
14349 sym = xlogue.get_stub_rtx (stub);
14351 elems_needed = ncregs;
14352 if (use_call)
14353 elems_needed += 1;
14354 else
14355 elems_needed += frame_pointer_needed ? 5 : 3;
14356 v = rtvec_alloc (elems_needed);
14358 /* We call the epilogue stub when we need to pop incoming args or we are
14359 doing a sibling call as the tail. Otherwise, we will emit a jmp to the
14360 epilogue stub and it is the tail-call. */
14361 if (use_call)
14362 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
14363 else
14365 RTVEC_ELT (v, vi++) = ret_rtx;
14366 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
14367 if (frame_pointer_needed)
14369 rtx rbp = gen_rtx_REG (DImode, BP_REG);
14370 gcc_assert (m->fs.fp_valid);
14371 gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx);
14373 tmp = gen_rtx_PLUS (DImode, rbp, GEN_INT (8));
14374 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp);
14375 RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp));
14376 tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
14377 RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp);
14379 else
14381 /* If no hard frame pointer, we set R10 to the SP restore value. */
14382 gcc_assert (!m->fs.fp_valid);
14383 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
14384 gcc_assert (m->fs.sp_valid);
14386 r10 = gen_rtx_REG (DImode, R10_REG);
14387 tmp = gen_rtx_PLUS (Pmode, rsi, GEN_INT (stub_ptr_offset));
14388 emit_insn (gen_rtx_SET (r10, tmp));
14390 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10);
14394 /* Generate frame load insns and restore notes. */
14395 for (i = 0; i < ncregs; ++i)
14397 const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
14398 machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode;
14399 rtx reg, frame_load;
14401 reg = gen_rtx_REG (mode, r.regno);
14402 frame_load = gen_frame_load (reg, rsi, r.offset);
14404 /* Save RSI frame load insn & note to add last. */
14405 if (r.regno == SI_REG)
14407 gcc_assert (!rsi_frame_load);
14408 rsi_frame_load = frame_load;
14409 rsi_restore_offset = r.offset;
14411 else
14413 RTVEC_ELT (v, vi++) = frame_load;
14414 ix86_add_cfa_restore_note (NULL, reg, r.offset);
14418 /* Add RSI frame load & restore note at the end. */
14419 gcc_assert (rsi_frame_load);
14420 gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1);
14421 RTVEC_ELT (v, vi++) = rsi_frame_load;
14422 ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG),
14423 rsi_restore_offset);
14425 /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */
14426 if (!use_call && !frame_pointer_needed)
14428 gcc_assert (m->fs.sp_valid);
14429 gcc_assert (!m->fs.sp_realigned);
14431 /* At this point, R10 should point to frame.stack_realign_offset. */
14432 if (m->fs.cfa_reg == stack_pointer_rtx)
14433 m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset;
14434 m->fs.sp_offset = frame.stack_realign_offset;
14437 gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v));
14438 tmp = gen_rtx_PARALLEL (VOIDmode, v);
14439 if (use_call)
14440 insn = emit_insn (tmp);
14441 else
14443 insn = emit_jump_insn (tmp);
14444 JUMP_LABEL (insn) = ret_rtx;
14446 if (frame_pointer_needed)
14447 ix86_emit_leave (insn);
14448 else
14450 /* Need CFA adjust note. */
14451 tmp = gen_rtx_SET (stack_pointer_rtx, r10);
14452 add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp);
14456 RTX_FRAME_RELATED_P (insn) = true;
14457 ix86_add_queued_cfa_restore_notes (insn);
14459 /* If we're not doing a tail-call, we need to adjust the stack. */
14460 if (use_call && m->fs.sp_valid)
14462 HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset;
14463 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
14464 GEN_INT (dealloc), style,
14465 m->fs.cfa_reg == stack_pointer_rtx);
14469 /* Restore function stack, frame, and registers. */
14471 void
14472 ix86_expand_epilogue (int style)
14474 struct machine_function *m = cfun->machine;
14475 struct machine_frame_state frame_state_save = m->fs;
14476 bool restore_regs_via_mov;
14477 bool using_drap;
14478 bool restore_stub_is_tail = false;
14480 if (ix86_function_naked (current_function_decl))
14482 /* The program should not reach this point. */
14483 emit_insn (gen_ud2 ());
14484 return;
14487 ix86_finalize_stack_frame_flags ();
14488 const struct ix86_frame &frame = cfun->machine->frame;
14490 m->fs.sp_realigned = stack_realign_fp;
14491 m->fs.sp_valid = stack_realign_fp
14492 || !frame_pointer_needed
14493 || crtl->sp_is_unchanging;
14494 gcc_assert (!m->fs.sp_valid
14495 || m->fs.sp_offset == frame.stack_pointer_offset);
14497 /* The FP must be valid if the frame pointer is present. */
14498 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
14499 gcc_assert (!m->fs.fp_valid
14500 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
14502 /* We must have *some* valid pointer to the stack frame. */
14503 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
14505 /* The DRAP is never valid at this point. */
14506 gcc_assert (!m->fs.drap_valid);
14508 /* See the comment about red zone and frame
14509 pointer usage in ix86_expand_prologue. */
14510 if (frame_pointer_needed && frame.red_zone_size)
14511 emit_insn (gen_memory_blockage ());
14513 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
14514 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
14516 /* Determine the CFA offset of the end of the red-zone. */
14517 m->fs.red_zone_offset = 0;
14518 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
14520 /* The red-zone begins below return address and error code in
14521 exception handler. */
14522 m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET;
14524 /* When the register save area is in the aligned portion of
14525 the stack, determine the maximum runtime displacement that
14526 matches up with the aligned frame. */
14527 if (stack_realign_drap)
14528 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
14529 + UNITS_PER_WORD);
14532 HOST_WIDE_INT reg_save_offset = frame.reg_save_offset;
14534 /* Special care must be taken for the normal return case of a function
14535 using eh_return: the eax and edx registers are marked as saved, but
14536 not restored along this path. Adjust the save location to match. */
14537 if (crtl->calls_eh_return && style != 2)
14538 reg_save_offset -= 2 * UNITS_PER_WORD;
14540 /* EH_RETURN requires the use of moves to function properly. */
14541 if (crtl->calls_eh_return)
14542 restore_regs_via_mov = true;
14543 /* SEH requires the use of pops to identify the epilogue. */
14544 else if (TARGET_SEH)
14545 restore_regs_via_mov = false;
14546 /* If we're only restoring one register and sp cannot be used then
14547 using a move instruction to restore the register since it's
14548 less work than reloading sp and popping the register. */
14549 else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1)
14550 restore_regs_via_mov = true;
14551 else if (TARGET_EPILOGUE_USING_MOVE
14552 && cfun->machine->use_fast_prologue_epilogue
14553 && (frame.nregs > 1
14554 || m->fs.sp_offset != reg_save_offset))
14555 restore_regs_via_mov = true;
14556 else if (frame_pointer_needed
14557 && !frame.nregs
14558 && m->fs.sp_offset != reg_save_offset)
14559 restore_regs_via_mov = true;
14560 else if (frame_pointer_needed
14561 && TARGET_USE_LEAVE
14562 && cfun->machine->use_fast_prologue_epilogue
14563 && frame.nregs == 1)
14564 restore_regs_via_mov = true;
14565 else
14566 restore_regs_via_mov = false;
14568 if (restore_regs_via_mov || frame.nsseregs)
14570 /* Ensure that the entire register save area is addressable via
14571 the stack pointer, if we will restore SSE regs via sp. */
14572 if (TARGET_64BIT
14573 && m->fs.sp_offset > 0x7fffffff
14574 && sp_valid_at (frame.stack_realign_offset + 1)
14575 && (frame.nsseregs + frame.nregs) != 0)
14577 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
14578 GEN_INT (m->fs.sp_offset
14579 - frame.sse_reg_save_offset),
14580 style,
14581 m->fs.cfa_reg == stack_pointer_rtx);
14585 /* If there are any SSE registers to restore, then we have to do it
14586 via moves, since there's obviously no pop for SSE regs. */
14587 if (frame.nsseregs)
14588 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
14589 style == 2);
14591 if (m->call_ms2sysv)
14593 int pop_incoming_args = crtl->args.pops_args && crtl->args.size;
14595 /* We cannot use a tail-call for the stub if:
14596 1. We have to pop incoming args,
14597 2. We have additional int regs to restore, or
14598 3. A sibling call will be the tail-call, or
14599 4. We are emitting an eh_return_internal epilogue.
14601 TODO: Item 4 has not yet tested!
14603 If any of the above are true, we will call the stub rather than
14604 jump to it. */
14605 restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1);
14606 ix86_emit_outlined_ms2sysv_restore (frame, !restore_stub_is_tail, style);
14609 /* If using out-of-line stub that is a tail-call, then...*/
14610 if (m->call_ms2sysv && restore_stub_is_tail)
14612 /* TODO: parinoid tests. (remove eventually) */
14613 gcc_assert (m->fs.sp_valid);
14614 gcc_assert (!m->fs.sp_realigned);
14615 gcc_assert (!m->fs.fp_valid);
14616 gcc_assert (!m->fs.realigned);
14617 gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
14618 gcc_assert (!crtl->drap_reg);
14619 gcc_assert (!frame.nregs);
14621 else if (restore_regs_via_mov)
14623 rtx t;
14625 if (frame.nregs)
14626 ix86_emit_restore_regs_using_mov (reg_save_offset, style == 2);
14628 /* eh_return epilogues need %ecx added to the stack pointer. */
14629 if (style == 2)
14631 rtx sa = EH_RETURN_STACKADJ_RTX;
14632 rtx_insn *insn;
14634 /* %ecx can't be used for both DRAP register and eh_return. */
14635 if (crtl->drap_reg)
14636 gcc_assert (REGNO (crtl->drap_reg) != CX_REG);
14638 /* regparm nested functions don't work with eh_return. */
14639 gcc_assert (!ix86_static_chain_on_stack);
14641 if (frame_pointer_needed)
14643 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
14644 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
14645 emit_insn (gen_rtx_SET (sa, t));
14647 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
14648 insn = emit_move_insn (hard_frame_pointer_rtx, t);
14650 /* Note that we use SA as a temporary CFA, as the return
14651 address is at the proper place relative to it. We
14652 pretend this happens at the FP restore insn because
14653 prior to this insn the FP would be stored at the wrong
14654 offset relative to SA, and after this insn we have no
14655 other reasonable register to use for the CFA. We don't
14656 bother resetting the CFA to the SP for the duration of
14657 the return insn, unless the control flow instrumentation
14658 is done. In this case the SP is used later and we have
14659 to reset CFA to SP. */
14660 add_reg_note (insn, REG_CFA_DEF_CFA,
14661 plus_constant (Pmode, sa, UNITS_PER_WORD));
14662 ix86_add_queued_cfa_restore_notes (insn);
14663 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
14664 RTX_FRAME_RELATED_P (insn) = 1;
14666 m->fs.cfa_reg = sa;
14667 m->fs.cfa_offset = UNITS_PER_WORD;
14668 m->fs.fp_valid = false;
14670 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
14671 const0_rtx, style,
14672 flag_cf_protection);
14674 else
14676 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
14677 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
14678 insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
14679 ix86_add_queued_cfa_restore_notes (insn);
14681 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
14682 if (m->fs.cfa_offset != UNITS_PER_WORD)
14684 m->fs.cfa_offset = UNITS_PER_WORD;
14685 add_reg_note (insn, REG_CFA_DEF_CFA,
14686 plus_constant (Pmode, stack_pointer_rtx,
14687 UNITS_PER_WORD));
14688 RTX_FRAME_RELATED_P (insn) = 1;
14691 m->fs.sp_offset = UNITS_PER_WORD;
14692 m->fs.sp_valid = true;
14693 m->fs.sp_realigned = false;
14696 else
14698 /* SEH requires that the function end with (1) a stack adjustment
14699 if necessary, (2) a sequence of pops, and (3) a return or
14700 jump instruction. Prevent insns from the function body from
14701 being scheduled into this sequence. */
14702 if (TARGET_SEH)
14704 /* Prevent a catch region from being adjacent to the standard
14705 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
14706 several other flags that would be interesting to test are
14707 not yet set up. */
14708 if (flag_non_call_exceptions)
14709 emit_insn (gen_nops (const1_rtx));
14710 else
14711 emit_insn (gen_blockage ());
14714 /* First step is to deallocate the stack frame so that we can
14715 pop the registers. If the stack pointer was realigned, it needs
14716 to be restored now. Also do it on SEH target for very large
14717 frame as the emitted instructions aren't allowed by the ABI
14718 in epilogues. */
14719 if (!m->fs.sp_valid || m->fs.sp_realigned
14720 || (TARGET_SEH
14721 && (m->fs.sp_offset - reg_save_offset
14722 >= SEH_MAX_FRAME_SIZE)))
14724 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
14725 GEN_INT (m->fs.fp_offset
14726 - reg_save_offset),
14727 style, false);
14729 else if (m->fs.sp_offset != reg_save_offset)
14731 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
14732 GEN_INT (m->fs.sp_offset
14733 - reg_save_offset),
14734 style,
14735 m->fs.cfa_reg == stack_pointer_rtx);
14738 ix86_emit_restore_regs_using_pop ();
14741 /* If we used a stack pointer and haven't already got rid of it,
14742 then do so now. */
14743 if (m->fs.fp_valid)
14745 /* If the stack pointer is valid and pointing at the frame
14746 pointer store address, then we only need a pop. */
14747 if (sp_valid_at (frame.hfp_save_offset)
14748 && m->fs.sp_offset == frame.hfp_save_offset)
14749 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
14750 /* Leave results in shorter dependency chains on CPUs that are
14751 able to grok it fast. */
14752 else if (TARGET_USE_LEAVE
14753 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
14754 || !cfun->machine->use_fast_prologue_epilogue)
14755 ix86_emit_leave (NULL);
14756 else
14758 pro_epilogue_adjust_stack (stack_pointer_rtx,
14759 hard_frame_pointer_rtx,
14760 const0_rtx, style, !using_drap);
14761 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
14765 if (using_drap)
14767 int param_ptr_offset = UNITS_PER_WORD;
14768 rtx_insn *insn;
14770 gcc_assert (stack_realign_drap);
14772 if (ix86_static_chain_on_stack)
14773 param_ptr_offset += UNITS_PER_WORD;
14774 if (!call_used_regs[REGNO (crtl->drap_reg)])
14775 param_ptr_offset += UNITS_PER_WORD;
14777 insn = emit_insn (gen_rtx_SET
14778 (stack_pointer_rtx,
14779 gen_rtx_PLUS (Pmode,
14780 crtl->drap_reg,
14781 GEN_INT (-param_ptr_offset))));
14782 m->fs.cfa_reg = stack_pointer_rtx;
14783 m->fs.cfa_offset = param_ptr_offset;
14784 m->fs.sp_offset = param_ptr_offset;
14785 m->fs.realigned = false;
14787 add_reg_note (insn, REG_CFA_DEF_CFA,
14788 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14789 GEN_INT (param_ptr_offset)));
14790 RTX_FRAME_RELATED_P (insn) = 1;
14792 if (!call_used_regs[REGNO (crtl->drap_reg)])
14793 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
14796 /* At this point the stack pointer must be valid, and we must have
14797 restored all of the registers. We may not have deallocated the
14798 entire stack frame. We've delayed this until now because it may
14799 be possible to merge the local stack deallocation with the
14800 deallocation forced by ix86_static_chain_on_stack. */
14801 gcc_assert (m->fs.sp_valid);
14802 gcc_assert (!m->fs.sp_realigned);
14803 gcc_assert (!m->fs.fp_valid);
14804 gcc_assert (!m->fs.realigned);
14805 if (m->fs.sp_offset != UNITS_PER_WORD)
14807 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
14808 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
14809 style, true);
14811 else
14812 ix86_add_queued_cfa_restore_notes (get_last_insn ());
14814 /* Sibcall epilogues don't want a return instruction. */
14815 if (style == 0)
14817 m->fs = frame_state_save;
14818 return;
14821 if (cfun->machine->func_type != TYPE_NORMAL)
14822 emit_jump_insn (gen_interrupt_return ());
14823 else if (crtl->args.pops_args && crtl->args.size)
14825 rtx popc = GEN_INT (crtl->args.pops_args);
14827 /* i386 can only pop 64K bytes. If asked to pop more, pop return
14828 address, do explicit add, and jump indirectly to the caller. */
14830 if (crtl->args.pops_args >= 65536)
14832 rtx ecx = gen_rtx_REG (SImode, CX_REG);
14833 rtx_insn *insn;
14835 /* There is no "pascal" calling convention in any 64bit ABI. */
14836 gcc_assert (!TARGET_64BIT);
14838 insn = emit_insn (gen_pop (ecx));
14839 m->fs.cfa_offset -= UNITS_PER_WORD;
14840 m->fs.sp_offset -= UNITS_PER_WORD;
14842 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
14843 x = gen_rtx_SET (stack_pointer_rtx, x);
14844 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
14845 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
14846 RTX_FRAME_RELATED_P (insn) = 1;
14848 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
14849 popc, -1, true);
14850 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
14852 else
14853 emit_jump_insn (gen_simple_return_pop_internal (popc));
14855 else if (!m->call_ms2sysv || !restore_stub_is_tail)
14857 /* In case of return from EH a simple return cannot be used
14858 as a return address will be compared with a shadow stack
14859 return address. Use indirect jump instead. */
14860 if (style == 2 && flag_cf_protection)
14862 /* Register used in indirect jump must be in word_mode. But
14863 Pmode may not be the same as word_mode for x32. */
14864 rtx ecx = gen_rtx_REG (word_mode, CX_REG);
14865 rtx_insn *insn;
14867 insn = emit_insn (gen_pop (ecx));
14868 m->fs.cfa_offset -= UNITS_PER_WORD;
14869 m->fs.sp_offset -= UNITS_PER_WORD;
14871 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
14872 x = gen_rtx_SET (stack_pointer_rtx, x);
14873 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
14874 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
14875 RTX_FRAME_RELATED_P (insn) = 1;
14877 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
14879 else
14880 emit_jump_insn (gen_simple_return_internal ());
14883 /* Restore the state back to the state from the prologue,
14884 so that it's correct for the next epilogue. */
14885 m->fs = frame_state_save;
14888 /* Reset from the function's potential modifications. */
14890 static void
14891 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED)
14893 if (pic_offset_table_rtx
14894 && !ix86_use_pseudo_pic_reg ())
14895 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
14897 if (TARGET_MACHO)
14899 rtx_insn *insn = get_last_insn ();
14900 rtx_insn *deleted_debug_label = NULL;
14902 /* Mach-O doesn't support labels at the end of objects, so if
14903 it looks like we might want one, take special action.
14904 First, collect any sequence of deleted debug labels. */
14905 while (insn
14906 && NOTE_P (insn)
14907 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
14909 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
14910 notes only, instead set their CODE_LABEL_NUMBER to -1,
14911 otherwise there would be code generation differences
14912 in between -g and -g0. */
14913 if (NOTE_P (insn) && NOTE_KIND (insn)
14914 == NOTE_INSN_DELETED_DEBUG_LABEL)
14915 deleted_debug_label = insn;
14916 insn = PREV_INSN (insn);
14919 /* If we have:
14920 label:
14921 barrier
14922 then this needs to be detected, so skip past the barrier. */
14924 if (insn && BARRIER_P (insn))
14925 insn = PREV_INSN (insn);
14927 /* Up to now we've only seen notes or barriers. */
14928 if (insn)
14930 if (LABEL_P (insn)
14931 || (NOTE_P (insn)
14932 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
14933 /* Trailing label. */
14934 fputs ("\tnop\n", file);
14935 else if (cfun && ! cfun->is_thunk)
14937 /* See if we have a completely empty function body, skipping
14938 the special case of the picbase thunk emitted as asm. */
14939 while (insn && ! INSN_P (insn))
14940 insn = PREV_INSN (insn);
14941 /* If we don't find any insns, we've got an empty function body;
14942 I.e. completely empty - without a return or branch. This is
14943 taken as the case where a function body has been removed
14944 because it contains an inline __builtin_unreachable(). GCC
14945 declares that reaching __builtin_unreachable() means UB so
14946 we're not obliged to do anything special; however, we want
14947 non-zero-sized function bodies. To meet this, and help the
14948 user out, let's trap the case. */
14949 if (insn == NULL)
14950 fputs ("\tud2\n", file);
14953 else if (deleted_debug_label)
14954 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
14955 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
14956 CODE_LABEL_NUMBER (insn) = -1;
14960 /* Return a scratch register to use in the split stack prologue. The
14961 split stack prologue is used for -fsplit-stack. It is the first
14962 instructions in the function, even before the regular prologue.
14963 The scratch register can be any caller-saved register which is not
14964 used for parameters or for the static chain. */
14966 static unsigned int
14967 split_stack_prologue_scratch_regno (void)
14969 if (TARGET_64BIT)
14970 return R11_REG;
14971 else
14973 bool is_fastcall, is_thiscall;
14974 int regparm;
14976 is_fastcall = (lookup_attribute ("fastcall",
14977 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
14978 != NULL);
14979 is_thiscall = (lookup_attribute ("thiscall",
14980 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
14981 != NULL);
14982 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
14984 if (is_fastcall)
14986 if (DECL_STATIC_CHAIN (cfun->decl))
14988 sorry ("-fsplit-stack does not support fastcall with "
14989 "nested function");
14990 return INVALID_REGNUM;
14992 return AX_REG;
14994 else if (is_thiscall)
14996 if (!DECL_STATIC_CHAIN (cfun->decl))
14997 return DX_REG;
14998 return AX_REG;
15000 else if (regparm < 3)
15002 if (!DECL_STATIC_CHAIN (cfun->decl))
15003 return CX_REG;
15004 else
15006 if (regparm >= 2)
15008 sorry ("-fsplit-stack does not support 2 register "
15009 "parameters for a nested function");
15010 return INVALID_REGNUM;
15012 return DX_REG;
15015 else
15017 /* FIXME: We could make this work by pushing a register
15018 around the addition and comparison. */
15019 sorry ("-fsplit-stack does not support 3 register parameters");
15020 return INVALID_REGNUM;
15025 /* A SYMBOL_REF for the function which allocates new stackspace for
15026 -fsplit-stack. */
15028 static GTY(()) rtx split_stack_fn;
15030 /* A SYMBOL_REF for the more stack function when using the large
15031 model. */
15033 static GTY(()) rtx split_stack_fn_large;
15035 /* Return location of the stack guard value in the TLS block. */
15038 ix86_split_stack_guard (void)
15040 int offset;
15041 addr_space_t as = DEFAULT_TLS_SEG_REG;
15042 rtx r;
15044 gcc_assert (flag_split_stack);
15046 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15047 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
15048 #else
15049 gcc_unreachable ();
15050 #endif
15052 r = GEN_INT (offset);
15053 r = gen_const_mem (Pmode, r);
15054 set_mem_addr_space (r, as);
15056 return r;
15059 /* Handle -fsplit-stack. These are the first instructions in the
15060 function, even before the regular prologue. */
15062 void
15063 ix86_expand_split_stack_prologue (void)
15065 HOST_WIDE_INT allocate;
15066 unsigned HOST_WIDE_INT args_size;
15067 rtx_code_label *label;
15068 rtx limit, current, allocate_rtx, call_insn, call_fusage;
15069 rtx scratch_reg = NULL_RTX;
15070 rtx_code_label *varargs_label = NULL;
15071 rtx fn;
15073 gcc_assert (flag_split_stack && reload_completed);
15075 ix86_finalize_stack_frame_flags ();
15076 struct ix86_frame &frame = cfun->machine->frame;
15077 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
15079 /* This is the label we will branch to if we have enough stack
15080 space. We expect the basic block reordering pass to reverse this
15081 branch if optimizing, so that we branch in the unlikely case. */
15082 label = gen_label_rtx ();
15084 /* We need to compare the stack pointer minus the frame size with
15085 the stack boundary in the TCB. The stack boundary always gives
15086 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
15087 can compare directly. Otherwise we need to do an addition. */
15089 limit = ix86_split_stack_guard ();
15091 if (allocate < SPLIT_STACK_AVAILABLE)
15092 current = stack_pointer_rtx;
15093 else
15095 unsigned int scratch_regno;
15096 rtx offset;
15098 /* We need a scratch register to hold the stack pointer minus
15099 the required frame size. Since this is the very start of the
15100 function, the scratch register can be any caller-saved
15101 register which is not used for parameters. */
15102 offset = GEN_INT (- allocate);
15103 scratch_regno = split_stack_prologue_scratch_regno ();
15104 if (scratch_regno == INVALID_REGNUM)
15105 return;
15106 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
15107 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
15109 /* We don't use ix86_gen_add3 in this case because it will
15110 want to split to lea, but when not optimizing the insn
15111 will not be split after this point. */
15112 emit_insn (gen_rtx_SET (scratch_reg,
15113 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
15114 offset)));
15116 else
15118 emit_move_insn (scratch_reg, offset);
15119 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
15120 stack_pointer_rtx));
15122 current = scratch_reg;
15125 ix86_expand_branch (GEU, current, limit, label);
15126 rtx_insn *jump_insn = get_last_insn ();
15127 JUMP_LABEL (jump_insn) = label;
15129 /* Mark the jump as very likely to be taken. */
15130 add_reg_br_prob_note (jump_insn, profile_probability::very_likely ());
15132 if (split_stack_fn == NULL_RTX)
15134 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
15135 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
15137 fn = split_stack_fn;
15139 /* Get more stack space. We pass in the desired stack space and the
15140 size of the arguments to copy to the new stack. In 32-bit mode
15141 we push the parameters; __morestack will return on a new stack
15142 anyhow. In 64-bit mode we pass the parameters in r10 and
15143 r11. */
15144 allocate_rtx = GEN_INT (allocate);
15145 args_size = crtl->args.size >= 0 ? (HOST_WIDE_INT) crtl->args.size : 0;
15146 call_fusage = NULL_RTX;
15147 rtx pop = NULL_RTX;
15148 if (TARGET_64BIT)
15150 rtx reg10, reg11;
15152 reg10 = gen_rtx_REG (Pmode, R10_REG);
15153 reg11 = gen_rtx_REG (Pmode, R11_REG);
15155 /* If this function uses a static chain, it will be in %r10.
15156 Preserve it across the call to __morestack. */
15157 if (DECL_STATIC_CHAIN (cfun->decl))
15159 rtx rax;
15161 rax = gen_rtx_REG (word_mode, AX_REG);
15162 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
15163 use_reg (&call_fusage, rax);
15166 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
15167 && !TARGET_PECOFF)
15169 HOST_WIDE_INT argval;
15171 gcc_assert (Pmode == DImode);
15172 /* When using the large model we need to load the address
15173 into a register, and we've run out of registers. So we
15174 switch to a different calling convention, and we call a
15175 different function: __morestack_large. We pass the
15176 argument size in the upper 32 bits of r10 and pass the
15177 frame size in the lower 32 bits. */
15178 gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate);
15179 gcc_assert ((args_size & 0xffffffff) == args_size);
15181 if (split_stack_fn_large == NULL_RTX)
15183 split_stack_fn_large =
15184 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
15185 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
15187 if (ix86_cmodel == CM_LARGE_PIC)
15189 rtx_code_label *label;
15190 rtx x;
15192 label = gen_label_rtx ();
15193 emit_label (label);
15194 LABEL_PRESERVE_P (label) = 1;
15195 emit_insn (gen_set_rip_rex64 (reg10, label));
15196 emit_insn (gen_set_got_offset_rex64 (reg11, label));
15197 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
15198 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
15199 UNSPEC_GOT);
15200 x = gen_rtx_CONST (Pmode, x);
15201 emit_move_insn (reg11, x);
15202 x = gen_rtx_PLUS (Pmode, reg10, reg11);
15203 x = gen_const_mem (Pmode, x);
15204 emit_move_insn (reg11, x);
15206 else
15207 emit_move_insn (reg11, split_stack_fn_large);
15209 fn = reg11;
15211 argval = ((args_size << 16) << 16) + allocate;
15212 emit_move_insn (reg10, GEN_INT (argval));
15214 else
15216 emit_move_insn (reg10, allocate_rtx);
15217 emit_move_insn (reg11, GEN_INT (args_size));
15218 use_reg (&call_fusage, reg11);
15221 use_reg (&call_fusage, reg10);
15223 else
15225 rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size)));
15226 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD));
15227 insn = emit_insn (gen_push (allocate_rtx));
15228 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD));
15229 pop = GEN_INT (2 * UNITS_PER_WORD);
15231 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
15232 GEN_INT (UNITS_PER_WORD), constm1_rtx,
15233 pop, false);
15234 add_function_usage_to (call_insn, call_fusage);
15235 if (!TARGET_64BIT)
15236 add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0));
15237 /* Indicate that this function can't jump to non-local gotos. */
15238 make_reg_eh_region_note_nothrow_nononlocal (as_a <rtx_insn *> (call_insn));
15240 /* In order to make call/return prediction work right, we now need
15241 to execute a return instruction. See
15242 libgcc/config/i386/morestack.S for the details on how this works.
15244 For flow purposes gcc must not see this as a return
15245 instruction--we need control flow to continue at the subsequent
15246 label. Therefore, we use an unspec. */
15247 gcc_assert (crtl->args.pops_args < 65536);
15248 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
15250 /* If we are in 64-bit mode and this function uses a static chain,
15251 we saved %r10 in %rax before calling _morestack. */
15252 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
15253 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
15254 gen_rtx_REG (word_mode, AX_REG));
15256 /* If this function calls va_start, we need to store a pointer to
15257 the arguments on the old stack, because they may not have been
15258 all copied to the new stack. At this point the old stack can be
15259 found at the frame pointer value used by __morestack, because
15260 __morestack has set that up before calling back to us. Here we
15261 store that pointer in a scratch register, and in
15262 ix86_expand_prologue we store the scratch register in a stack
15263 slot. */
15264 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
15266 unsigned int scratch_regno;
15267 rtx frame_reg;
15268 int words;
15270 scratch_regno = split_stack_prologue_scratch_regno ();
15271 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
15272 frame_reg = gen_rtx_REG (Pmode, BP_REG);
15274 /* 64-bit:
15275 fp -> old fp value
15276 return address within this function
15277 return address of caller of this function
15278 stack arguments
15279 So we add three words to get to the stack arguments.
15281 32-bit:
15282 fp -> old fp value
15283 return address within this function
15284 first argument to __morestack
15285 second argument to __morestack
15286 return address of caller of this function
15287 stack arguments
15288 So we add five words to get to the stack arguments.
15290 words = TARGET_64BIT ? 3 : 5;
15291 emit_insn (gen_rtx_SET (scratch_reg,
15292 gen_rtx_PLUS (Pmode, frame_reg,
15293 GEN_INT (words * UNITS_PER_WORD))));
15295 varargs_label = gen_label_rtx ();
15296 emit_jump_insn (gen_jump (varargs_label));
15297 JUMP_LABEL (get_last_insn ()) = varargs_label;
15299 emit_barrier ();
15302 emit_label (label);
15303 LABEL_NUSES (label) = 1;
15305 /* If this function calls va_start, we now have to set the scratch
15306 register for the case where we do not call __morestack. In this
15307 case we need to set it based on the stack pointer. */
15308 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
15310 emit_insn (gen_rtx_SET (scratch_reg,
15311 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
15312 GEN_INT (UNITS_PER_WORD))));
15314 emit_label (varargs_label);
15315 LABEL_NUSES (varargs_label) = 1;
15319 /* We may have to tell the dataflow pass that the split stack prologue
15320 is initializing a scratch register. */
15322 static void
15323 ix86_live_on_entry (bitmap regs)
15325 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
15327 gcc_assert (flag_split_stack);
15328 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
15332 /* Extract the parts of an RTL expression that is a valid memory address
15333 for an instruction. Return 0 if the structure of the address is
15334 grossly off. Return -1 if the address contains ASHIFT, so it is not
15335 strictly valid, but still used for computing length of lea instruction. */
15338 ix86_decompose_address (rtx addr, struct ix86_address *out)
15340 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
15341 rtx base_reg, index_reg;
15342 HOST_WIDE_INT scale = 1;
15343 rtx scale_rtx = NULL_RTX;
15344 rtx tmp;
15345 int retval = 1;
15346 addr_space_t seg = ADDR_SPACE_GENERIC;
15348 /* Allow zero-extended SImode addresses,
15349 they will be emitted with addr32 prefix. */
15350 if (TARGET_64BIT && GET_MODE (addr) == DImode)
15352 if (GET_CODE (addr) == ZERO_EXTEND
15353 && GET_MODE (XEXP (addr, 0)) == SImode)
15355 addr = XEXP (addr, 0);
15356 if (CONST_INT_P (addr))
15357 return 0;
15359 else if (GET_CODE (addr) == AND
15360 && const_32bit_mask (XEXP (addr, 1), DImode))
15362 addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode);
15363 if (addr == NULL_RTX)
15364 return 0;
15366 if (CONST_INT_P (addr))
15367 return 0;
15371 /* Allow SImode subregs of DImode addresses,
15372 they will be emitted with addr32 prefix. */
15373 if (TARGET_64BIT && GET_MODE (addr) == SImode)
15375 if (SUBREG_P (addr)
15376 && GET_MODE (SUBREG_REG (addr)) == DImode)
15378 addr = SUBREG_REG (addr);
15379 if (CONST_INT_P (addr))
15380 return 0;
15384 if (REG_P (addr))
15385 base = addr;
15386 else if (SUBREG_P (addr))
15388 if (REG_P (SUBREG_REG (addr)))
15389 base = addr;
15390 else
15391 return 0;
15393 else if (GET_CODE (addr) == PLUS)
15395 rtx addends[4], op;
15396 int n = 0, i;
15398 op = addr;
15401 if (n >= 4)
15402 return 0;
15403 addends[n++] = XEXP (op, 1);
15404 op = XEXP (op, 0);
15406 while (GET_CODE (op) == PLUS);
15407 if (n >= 4)
15408 return 0;
15409 addends[n] = op;
15411 for (i = n; i >= 0; --i)
15413 op = addends[i];
15414 switch (GET_CODE (op))
15416 case MULT:
15417 if (index)
15418 return 0;
15419 index = XEXP (op, 0);
15420 scale_rtx = XEXP (op, 1);
15421 break;
15423 case ASHIFT:
15424 if (index)
15425 return 0;
15426 index = XEXP (op, 0);
15427 tmp = XEXP (op, 1);
15428 if (!CONST_INT_P (tmp))
15429 return 0;
15430 scale = INTVAL (tmp);
15431 if ((unsigned HOST_WIDE_INT) scale > 3)
15432 return 0;
15433 scale = 1 << scale;
15434 break;
15436 case ZERO_EXTEND:
15437 op = XEXP (op, 0);
15438 if (GET_CODE (op) != UNSPEC)
15439 return 0;
15440 /* FALLTHRU */
15442 case UNSPEC:
15443 if (XINT (op, 1) == UNSPEC_TP
15444 && TARGET_TLS_DIRECT_SEG_REFS
15445 && seg == ADDR_SPACE_GENERIC)
15446 seg = DEFAULT_TLS_SEG_REG;
15447 else
15448 return 0;
15449 break;
15451 case SUBREG:
15452 if (!REG_P (SUBREG_REG (op)))
15453 return 0;
15454 /* FALLTHRU */
15456 case REG:
15457 if (!base)
15458 base = op;
15459 else if (!index)
15460 index = op;
15461 else
15462 return 0;
15463 break;
15465 case CONST:
15466 case CONST_INT:
15467 case SYMBOL_REF:
15468 case LABEL_REF:
15469 if (disp)
15470 return 0;
15471 disp = op;
15472 break;
15474 default:
15475 return 0;
15479 else if (GET_CODE (addr) == MULT)
15481 index = XEXP (addr, 0); /* index*scale */
15482 scale_rtx = XEXP (addr, 1);
15484 else if (GET_CODE (addr) == ASHIFT)
15486 /* We're called for lea too, which implements ashift on occasion. */
15487 index = XEXP (addr, 0);
15488 tmp = XEXP (addr, 1);
15489 if (!CONST_INT_P (tmp))
15490 return 0;
15491 scale = INTVAL (tmp);
15492 if ((unsigned HOST_WIDE_INT) scale > 3)
15493 return 0;
15494 scale = 1 << scale;
15495 retval = -1;
15497 else
15498 disp = addr; /* displacement */
15500 if (index)
15502 if (REG_P (index))
15504 else if (SUBREG_P (index)
15505 && REG_P (SUBREG_REG (index)))
15507 else
15508 return 0;
15511 /* Extract the integral value of scale. */
15512 if (scale_rtx)
15514 if (!CONST_INT_P (scale_rtx))
15515 return 0;
15516 scale = INTVAL (scale_rtx);
15519 base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
15520 index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
15522 /* Avoid useless 0 displacement. */
15523 if (disp == const0_rtx && (base || index))
15524 disp = NULL_RTX;
15526 /* Allow arg pointer and stack pointer as index if there is not scaling. */
15527 if (base_reg && index_reg && scale == 1
15528 && (REGNO (index_reg) == ARG_POINTER_REGNUM
15529 || REGNO (index_reg) == FRAME_POINTER_REGNUM
15530 || REGNO (index_reg) == SP_REG))
15532 std::swap (base, index);
15533 std::swap (base_reg, index_reg);
15536 /* Special case: %ebp cannot be encoded as a base without a displacement.
15537 Similarly %r13. */
15538 if (!disp && base_reg
15539 && (REGNO (base_reg) == ARG_POINTER_REGNUM
15540 || REGNO (base_reg) == FRAME_POINTER_REGNUM
15541 || REGNO (base_reg) == BP_REG
15542 || REGNO (base_reg) == R13_REG))
15543 disp = const0_rtx;
15545 /* Special case: on K6, [%esi] makes the instruction vector decoded.
15546 Avoid this by transforming to [%esi+0].
15547 Reload calls address legitimization without cfun defined, so we need
15548 to test cfun for being non-NULL. */
15549 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
15550 && base_reg && !index_reg && !disp
15551 && REGNO (base_reg) == SI_REG)
15552 disp = const0_rtx;
15554 /* Special case: encode reg+reg instead of reg*2. */
15555 if (!base && index && scale == 2)
15556 base = index, base_reg = index_reg, scale = 1;
15558 /* Special case: scaling cannot be encoded without base or displacement. */
15559 if (!base && !disp && index && scale != 1)
15560 disp = const0_rtx;
15562 out->base = base;
15563 out->index = index;
15564 out->disp = disp;
15565 out->scale = scale;
15566 out->seg = seg;
15568 return retval;
15571 /* Return cost of the memory address x.
15572 For i386, it is better to use a complex address than let gcc copy
15573 the address into a reg and make a new pseudo. But not if the address
15574 requires to two regs - that would mean more pseudos with longer
15575 lifetimes. */
15576 static int
15577 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
15579 struct ix86_address parts;
15580 int cost = 1;
15581 int ok = ix86_decompose_address (x, &parts);
15583 gcc_assert (ok);
15585 if (parts.base && SUBREG_P (parts.base))
15586 parts.base = SUBREG_REG (parts.base);
15587 if (parts.index && SUBREG_P (parts.index))
15588 parts.index = SUBREG_REG (parts.index);
15590 /* Attempt to minimize number of registers in the address by increasing
15591 address cost for each used register. We don't increase address cost
15592 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
15593 is not invariant itself it most likely means that base or index is not
15594 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
15595 which is not profitable for x86. */
15596 if (parts.base
15597 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
15598 && (current_pass->type == GIMPLE_PASS
15599 || !pic_offset_table_rtx
15600 || !REG_P (parts.base)
15601 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
15602 cost++;
15604 if (parts.index
15605 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
15606 && (current_pass->type == GIMPLE_PASS
15607 || !pic_offset_table_rtx
15608 || !REG_P (parts.index)
15609 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
15610 cost++;
15612 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
15613 since it's predecode logic can't detect the length of instructions
15614 and it degenerates to vector decoded. Increase cost of such
15615 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
15616 to split such addresses or even refuse such addresses at all.
15618 Following addressing modes are affected:
15619 [base+scale*index]
15620 [scale*index+disp]
15621 [base+index]
15623 The first and last case may be avoidable by explicitly coding the zero in
15624 memory address, but I don't have AMD-K6 machine handy to check this
15625 theory. */
15627 if (TARGET_K6
15628 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
15629 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
15630 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
15631 cost += 10;
15633 return cost;
15636 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
15637 this is used for to form addresses to local data when -fPIC is in
15638 use. */
15640 static bool
15641 darwin_local_data_pic (rtx disp)
15643 return (GET_CODE (disp) == UNSPEC
15644 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
15647 /* True if operand X should be loaded from GOT. */
15649 bool
15650 ix86_force_load_from_GOT_p (rtx x)
15652 return ((TARGET_64BIT || HAVE_AS_IX86_GOT32X)
15653 && !TARGET_PECOFF && !TARGET_MACHO
15654 && !flag_plt && !flag_pic
15655 && ix86_cmodel != CM_LARGE
15656 && GET_CODE (x) == SYMBOL_REF
15657 && SYMBOL_REF_FUNCTION_P (x)
15658 && !SYMBOL_REF_LOCAL_P (x));
15661 /* Determine if a given RTX is a valid constant. We already know this
15662 satisfies CONSTANT_P. */
15664 static bool
15665 ix86_legitimate_constant_p (machine_mode mode, rtx x)
15667 /* Pointer bounds constants are not valid. */
15668 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
15669 return false;
15671 switch (GET_CODE (x))
15673 case CONST:
15674 x = XEXP (x, 0);
15676 if (GET_CODE (x) == PLUS)
15678 if (!CONST_INT_P (XEXP (x, 1)))
15679 return false;
15680 x = XEXP (x, 0);
15683 if (TARGET_MACHO && darwin_local_data_pic (x))
15684 return true;
15686 /* Only some unspecs are valid as "constants". */
15687 if (GET_CODE (x) == UNSPEC)
15688 switch (XINT (x, 1))
15690 case UNSPEC_GOT:
15691 case UNSPEC_GOTOFF:
15692 case UNSPEC_PLTOFF:
15693 return TARGET_64BIT;
15694 case UNSPEC_TPOFF:
15695 case UNSPEC_NTPOFF:
15696 x = XVECEXP (x, 0, 0);
15697 return (GET_CODE (x) == SYMBOL_REF
15698 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
15699 case UNSPEC_DTPOFF:
15700 x = XVECEXP (x, 0, 0);
15701 return (GET_CODE (x) == SYMBOL_REF
15702 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
15703 default:
15704 return false;
15707 /* We must have drilled down to a symbol. */
15708 if (GET_CODE (x) == LABEL_REF)
15709 return true;
15710 if (GET_CODE (x) != SYMBOL_REF)
15711 return false;
15712 /* FALLTHRU */
15714 case SYMBOL_REF:
15715 /* TLS symbols are never valid. */
15716 if (SYMBOL_REF_TLS_MODEL (x))
15717 return false;
15719 /* DLLIMPORT symbols are never valid. */
15720 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15721 && SYMBOL_REF_DLLIMPORT_P (x))
15722 return false;
15724 #if TARGET_MACHO
15725 /* mdynamic-no-pic */
15726 if (MACHO_DYNAMIC_NO_PIC_P)
15727 return machopic_symbol_defined_p (x);
15728 #endif
15730 /* External function address should be loaded
15731 via the GOT slot to avoid PLT. */
15732 if (ix86_force_load_from_GOT_p (x))
15733 return false;
15735 break;
15737 CASE_CONST_SCALAR_INT:
15738 switch (mode)
15740 case E_TImode:
15741 if (TARGET_64BIT)
15742 return true;
15743 /* FALLTHRU */
15744 case E_OImode:
15745 case E_XImode:
15746 if (!standard_sse_constant_p (x, mode))
15747 return false;
15748 default:
15749 break;
15751 break;
15753 case CONST_VECTOR:
15754 if (!standard_sse_constant_p (x, mode))
15755 return false;
15757 default:
15758 break;
15761 /* Otherwise we handle everything else in the move patterns. */
15762 return true;
15765 /* Determine if it's legal to put X into the constant pool. This
15766 is not possible for the address of thread-local symbols, which
15767 is checked above. */
15769 static bool
15770 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
15772 /* We can put any immediate constant in memory. */
15773 switch (GET_CODE (x))
15775 CASE_CONST_ANY:
15776 return false;
15778 default:
15779 break;
15782 return !ix86_legitimate_constant_p (mode, x);
15785 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
15786 otherwise zero. */
15788 static bool
15789 is_imported_p (rtx x)
15791 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
15792 || GET_CODE (x) != SYMBOL_REF)
15793 return false;
15795 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
15799 /* Nonzero if the constant value X is a legitimate general operand
15800 when generating PIC code. It is given that flag_pic is on and
15801 that X satisfies CONSTANT_P. */
15803 bool
15804 legitimate_pic_operand_p (rtx x)
15806 rtx inner;
15808 switch (GET_CODE (x))
15810 case CONST:
15811 inner = XEXP (x, 0);
15812 if (GET_CODE (inner) == PLUS
15813 && CONST_INT_P (XEXP (inner, 1)))
15814 inner = XEXP (inner, 0);
15816 /* Only some unspecs are valid as "constants". */
15817 if (GET_CODE (inner) == UNSPEC)
15818 switch (XINT (inner, 1))
15820 case UNSPEC_GOT:
15821 case UNSPEC_GOTOFF:
15822 case UNSPEC_PLTOFF:
15823 return TARGET_64BIT;
15824 case UNSPEC_TPOFF:
15825 x = XVECEXP (inner, 0, 0);
15826 return (GET_CODE (x) == SYMBOL_REF
15827 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
15828 case UNSPEC_MACHOPIC_OFFSET:
15829 return legitimate_pic_address_disp_p (x);
15830 default:
15831 return false;
15833 /* FALLTHRU */
15835 case SYMBOL_REF:
15836 case LABEL_REF:
15837 return legitimate_pic_address_disp_p (x);
15839 default:
15840 return true;
15844 /* Determine if a given CONST RTX is a valid memory displacement
15845 in PIC mode. */
15847 bool
15848 legitimate_pic_address_disp_p (rtx disp)
15850 bool saw_plus;
15852 /* In 64bit mode we can allow direct addresses of symbols and labels
15853 when they are not dynamic symbols. */
15854 if (TARGET_64BIT)
15856 rtx op0 = disp, op1;
15858 switch (GET_CODE (disp))
15860 case LABEL_REF:
15861 return true;
15863 case CONST:
15864 if (GET_CODE (XEXP (disp, 0)) != PLUS)
15865 break;
15866 op0 = XEXP (XEXP (disp, 0), 0);
15867 op1 = XEXP (XEXP (disp, 0), 1);
15868 if (!CONST_INT_P (op1))
15869 break;
15870 if (GET_CODE (op0) == UNSPEC
15871 && (XINT (op0, 1) == UNSPEC_DTPOFF
15872 || XINT (op0, 1) == UNSPEC_NTPOFF)
15873 && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1))
15874 return true;
15875 if (INTVAL (op1) >= 16*1024*1024
15876 || INTVAL (op1) < -16*1024*1024)
15877 break;
15878 if (GET_CODE (op0) == LABEL_REF)
15879 return true;
15880 if (GET_CODE (op0) == CONST
15881 && GET_CODE (XEXP (op0, 0)) == UNSPEC
15882 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
15883 return true;
15884 if (GET_CODE (op0) == UNSPEC
15885 && XINT (op0, 1) == UNSPEC_PCREL)
15886 return true;
15887 if (GET_CODE (op0) != SYMBOL_REF)
15888 break;
15889 /* FALLTHRU */
15891 case SYMBOL_REF:
15892 /* TLS references should always be enclosed in UNSPEC.
15893 The dllimported symbol needs always to be resolved. */
15894 if (SYMBOL_REF_TLS_MODEL (op0)
15895 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
15896 return false;
15898 if (TARGET_PECOFF)
15900 if (is_imported_p (op0))
15901 return true;
15903 if (SYMBOL_REF_FAR_ADDR_P (op0)
15904 || !SYMBOL_REF_LOCAL_P (op0))
15905 break;
15907 /* Function-symbols need to be resolved only for
15908 large-model.
15909 For the small-model we don't need to resolve anything
15910 here. */
15911 if ((ix86_cmodel != CM_LARGE_PIC
15912 && SYMBOL_REF_FUNCTION_P (op0))
15913 || ix86_cmodel == CM_SMALL_PIC)
15914 return true;
15915 /* Non-external symbols don't need to be resolved for
15916 large, and medium-model. */
15917 if ((ix86_cmodel == CM_LARGE_PIC
15918 || ix86_cmodel == CM_MEDIUM_PIC)
15919 && !SYMBOL_REF_EXTERNAL_P (op0))
15920 return true;
15922 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
15923 && (SYMBOL_REF_LOCAL_P (op0)
15924 || (HAVE_LD_PIE_COPYRELOC
15925 && flag_pie
15926 && !SYMBOL_REF_WEAK (op0)
15927 && !SYMBOL_REF_FUNCTION_P (op0)))
15928 && ix86_cmodel != CM_LARGE_PIC)
15929 return true;
15930 break;
15932 default:
15933 break;
15936 if (GET_CODE (disp) != CONST)
15937 return false;
15938 disp = XEXP (disp, 0);
15940 if (TARGET_64BIT)
15942 /* We are unsafe to allow PLUS expressions. This limit allowed distance
15943 of GOT tables. We should not need these anyway. */
15944 if (GET_CODE (disp) != UNSPEC
15945 || (XINT (disp, 1) != UNSPEC_GOTPCREL
15946 && XINT (disp, 1) != UNSPEC_GOTOFF
15947 && XINT (disp, 1) != UNSPEC_PCREL
15948 && XINT (disp, 1) != UNSPEC_PLTOFF))
15949 return false;
15951 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
15952 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
15953 return false;
15954 return true;
15957 saw_plus = false;
15958 if (GET_CODE (disp) == PLUS)
15960 if (!CONST_INT_P (XEXP (disp, 1)))
15961 return false;
15962 disp = XEXP (disp, 0);
15963 saw_plus = true;
15966 if (TARGET_MACHO && darwin_local_data_pic (disp))
15967 return true;
15969 if (GET_CODE (disp) != UNSPEC)
15970 return false;
15972 switch (XINT (disp, 1))
15974 case UNSPEC_GOT:
15975 if (saw_plus)
15976 return false;
15977 /* We need to check for both symbols and labels because VxWorks loads
15978 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
15979 details. */
15980 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
15981 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
15982 case UNSPEC_GOTOFF:
15983 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
15984 While ABI specify also 32bit relocation but we don't produce it in
15985 small PIC model at all. */
15986 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
15987 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
15988 && !TARGET_64BIT)
15989 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
15990 return false;
15991 case UNSPEC_GOTTPOFF:
15992 case UNSPEC_GOTNTPOFF:
15993 case UNSPEC_INDNTPOFF:
15994 if (saw_plus)
15995 return false;
15996 disp = XVECEXP (disp, 0, 0);
15997 return (GET_CODE (disp) == SYMBOL_REF
15998 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
15999 case UNSPEC_NTPOFF:
16000 disp = XVECEXP (disp, 0, 0);
16001 return (GET_CODE (disp) == SYMBOL_REF
16002 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
16003 case UNSPEC_DTPOFF:
16004 disp = XVECEXP (disp, 0, 0);
16005 return (GET_CODE (disp) == SYMBOL_REF
16006 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
16009 return false;
16012 /* Determine if op is suitable RTX for an address register.
16013 Return naked register if a register or a register subreg is
16014 found, otherwise return NULL_RTX. */
16016 static rtx
16017 ix86_validate_address_register (rtx op)
16019 machine_mode mode = GET_MODE (op);
16021 /* Only SImode or DImode registers can form the address. */
16022 if (mode != SImode && mode != DImode)
16023 return NULL_RTX;
16025 if (REG_P (op))
16026 return op;
16027 else if (SUBREG_P (op))
16029 rtx reg = SUBREG_REG (op);
16031 if (!REG_P (reg))
16032 return NULL_RTX;
16034 mode = GET_MODE (reg);
16036 /* Don't allow SUBREGs that span more than a word. It can
16037 lead to spill failures when the register is one word out
16038 of a two word structure. */
16039 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
16040 return NULL_RTX;
16042 /* Allow only SUBREGs of non-eliminable hard registers. */
16043 if (register_no_elim_operand (reg, mode))
16044 return reg;
16047 /* Op is not a register. */
16048 return NULL_RTX;
16051 /* Recognizes RTL expressions that are valid memory addresses for an
16052 instruction. The MODE argument is the machine mode for the MEM
16053 expression that wants to use this address.
16055 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
16056 convert common non-canonical forms to canonical form so that they will
16057 be recognized. */
16059 static bool
16060 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
16062 struct ix86_address parts;
16063 rtx base, index, disp;
16064 HOST_WIDE_INT scale;
16065 addr_space_t seg;
16067 if (ix86_decompose_address (addr, &parts) <= 0)
16068 /* Decomposition failed. */
16069 return false;
16071 base = parts.base;
16072 index = parts.index;
16073 disp = parts.disp;
16074 scale = parts.scale;
16075 seg = parts.seg;
16077 /* Validate base register. */
16078 if (base)
16080 rtx reg = ix86_validate_address_register (base);
16082 if (reg == NULL_RTX)
16083 return false;
16085 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
16086 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
16087 /* Base is not valid. */
16088 return false;
16091 /* Validate index register. */
16092 if (index)
16094 rtx reg = ix86_validate_address_register (index);
16096 if (reg == NULL_RTX)
16097 return false;
16099 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
16100 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
16101 /* Index is not valid. */
16102 return false;
16105 /* Index and base should have the same mode. */
16106 if (base && index
16107 && GET_MODE (base) != GET_MODE (index))
16108 return false;
16110 /* Address override works only on the (%reg) part of %fs:(%reg). */
16111 if (seg != ADDR_SPACE_GENERIC
16112 && ((base && GET_MODE (base) != word_mode)
16113 || (index && GET_MODE (index) != word_mode)))
16114 return false;
16116 /* Validate scale factor. */
16117 if (scale != 1)
16119 if (!index)
16120 /* Scale without index. */
16121 return false;
16123 if (scale != 2 && scale != 4 && scale != 8)
16124 /* Scale is not a valid multiplier. */
16125 return false;
16128 /* Validate displacement. */
16129 if (disp)
16131 if (GET_CODE (disp) == CONST
16132 && GET_CODE (XEXP (disp, 0)) == UNSPEC
16133 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
16134 switch (XINT (XEXP (disp, 0), 1))
16136 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
16137 when used. While ABI specify also 32bit relocations, we
16138 don't produce them at all and use IP relative instead.
16139 Allow GOT in 32bit mode for both PIC and non-PIC if symbol
16140 should be loaded via GOT. */
16141 case UNSPEC_GOT:
16142 if (!TARGET_64BIT
16143 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
16144 goto is_legitimate_pic;
16145 /* FALLTHRU */
16146 case UNSPEC_GOTOFF:
16147 gcc_assert (flag_pic);
16148 if (!TARGET_64BIT)
16149 goto is_legitimate_pic;
16151 /* 64bit address unspec. */
16152 return false;
16154 case UNSPEC_GOTPCREL:
16155 if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
16156 goto is_legitimate_pic;
16157 /* FALLTHRU */
16158 case UNSPEC_PCREL:
16159 gcc_assert (flag_pic);
16160 goto is_legitimate_pic;
16162 case UNSPEC_GOTTPOFF:
16163 case UNSPEC_GOTNTPOFF:
16164 case UNSPEC_INDNTPOFF:
16165 case UNSPEC_NTPOFF:
16166 case UNSPEC_DTPOFF:
16167 break;
16169 default:
16170 /* Invalid address unspec. */
16171 return false;
16174 else if (SYMBOLIC_CONST (disp)
16175 && (flag_pic
16176 || (TARGET_MACHO
16177 #if TARGET_MACHO
16178 && MACHOPIC_INDIRECT
16179 && !machopic_operand_p (disp)
16180 #endif
16184 is_legitimate_pic:
16185 if (TARGET_64BIT && (index || base))
16187 /* foo@dtpoff(%rX) is ok. */
16188 if (GET_CODE (disp) != CONST
16189 || GET_CODE (XEXP (disp, 0)) != PLUS
16190 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
16191 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
16192 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
16193 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
16194 /* Non-constant pic memory reference. */
16195 return false;
16197 else if ((!TARGET_MACHO || flag_pic)
16198 && ! legitimate_pic_address_disp_p (disp))
16199 /* Displacement is an invalid pic construct. */
16200 return false;
16201 #if TARGET_MACHO
16202 else if (MACHO_DYNAMIC_NO_PIC_P
16203 && !ix86_legitimate_constant_p (Pmode, disp))
16204 /* displacment must be referenced via non_lazy_pointer */
16205 return false;
16206 #endif
16208 /* This code used to verify that a symbolic pic displacement
16209 includes the pic_offset_table_rtx register.
16211 While this is good idea, unfortunately these constructs may
16212 be created by "adds using lea" optimization for incorrect
16213 code like:
16215 int a;
16216 int foo(int i)
16218 return *(&a+i);
16221 This code is nonsensical, but results in addressing
16222 GOT table with pic_offset_table_rtx base. We can't
16223 just refuse it easily, since it gets matched by
16224 "addsi3" pattern, that later gets split to lea in the
16225 case output register differs from input. While this
16226 can be handled by separate addsi pattern for this case
16227 that never results in lea, this seems to be easier and
16228 correct fix for crash to disable this test. */
16230 else if (GET_CODE (disp) != LABEL_REF
16231 && !CONST_INT_P (disp)
16232 && (GET_CODE (disp) != CONST
16233 || !ix86_legitimate_constant_p (Pmode, disp))
16234 && (GET_CODE (disp) != SYMBOL_REF
16235 || !ix86_legitimate_constant_p (Pmode, disp)))
16236 /* Displacement is not constant. */
16237 return false;
16238 else if (TARGET_64BIT
16239 && !x86_64_immediate_operand (disp, VOIDmode))
16240 /* Displacement is out of range. */
16241 return false;
16242 /* In x32 mode, constant addresses are sign extended to 64bit, so
16243 we have to prevent addresses from 0x80000000 to 0xffffffff. */
16244 else if (TARGET_X32 && !(index || base)
16245 && CONST_INT_P (disp)
16246 && val_signbit_known_set_p (SImode, INTVAL (disp)))
16247 return false;
16250 /* Everything looks valid. */
16251 return true;
16254 /* Determine if a given RTX is a valid constant address. */
16256 bool
16257 constant_address_p (rtx x)
16259 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
16262 /* Return a unique alias set for the GOT. */
16264 static alias_set_type
16265 ix86_GOT_alias_set (void)
16267 static alias_set_type set = -1;
16268 if (set == -1)
16269 set = new_alias_set ();
16270 return set;
16273 /* Return a legitimate reference for ORIG (an address) using the
16274 register REG. If REG is 0, a new pseudo is generated.
16276 There are two types of references that must be handled:
16278 1. Global data references must load the address from the GOT, via
16279 the PIC reg. An insn is emitted to do this load, and the reg is
16280 returned.
16282 2. Static data references, constant pool addresses, and code labels
16283 compute the address as an offset from the GOT, whose base is in
16284 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
16285 differentiate them from global data objects. The returned
16286 address is the PIC reg + an unspec constant.
16288 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
16289 reg also appears in the address. */
16291 static rtx
16292 legitimize_pic_address (rtx orig, rtx reg)
16294 rtx addr = orig;
16295 rtx new_rtx = orig;
16297 #if TARGET_MACHO
16298 if (TARGET_MACHO && !TARGET_64BIT)
16300 if (reg == 0)
16301 reg = gen_reg_rtx (Pmode);
16302 /* Use the generic Mach-O PIC machinery. */
16303 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
16305 #endif
16307 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
16309 rtx tmp = legitimize_pe_coff_symbol (addr, true);
16310 if (tmp)
16311 return tmp;
16314 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
16315 new_rtx = addr;
16316 else if ((!TARGET_64BIT
16317 || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC)
16318 && !TARGET_PECOFF
16319 && gotoff_operand (addr, Pmode))
16321 /* This symbol may be referenced via a displacement
16322 from the PIC base address (@GOTOFF). */
16323 if (GET_CODE (addr) == CONST)
16324 addr = XEXP (addr, 0);
16326 if (GET_CODE (addr) == PLUS)
16328 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
16329 UNSPEC_GOTOFF);
16330 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
16332 else
16333 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
16335 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
16337 if (TARGET_64BIT)
16338 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
16340 if (reg != 0)
16342 gcc_assert (REG_P (reg));
16343 new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
16344 new_rtx, reg, 1, OPTAB_DIRECT);
16346 else
16347 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
16349 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
16350 /* We can't use @GOTOFF for text labels
16351 on VxWorks, see gotoff_operand. */
16352 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
16354 rtx tmp = legitimize_pe_coff_symbol (addr, true);
16355 if (tmp)
16356 return tmp;
16358 /* For x64 PE-COFF there is no GOT table,
16359 so we use address directly. */
16360 if (TARGET_64BIT && TARGET_PECOFF)
16362 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
16363 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
16365 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
16367 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
16368 UNSPEC_GOTPCREL);
16369 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
16370 new_rtx = gen_const_mem (Pmode, new_rtx);
16371 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
16373 else
16375 /* This symbol must be referenced via a load
16376 from the Global Offset Table (@GOT). */
16377 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
16378 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
16379 if (TARGET_64BIT)
16380 new_rtx = force_reg (Pmode, new_rtx);
16381 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
16382 new_rtx = gen_const_mem (Pmode, new_rtx);
16383 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
16386 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
16388 else
16390 if (CONST_INT_P (addr)
16391 && !x86_64_immediate_operand (addr, VOIDmode))
16392 new_rtx = copy_to_suggested_reg (addr, reg, Pmode);
16393 else if (GET_CODE (addr) == CONST)
16395 addr = XEXP (addr, 0);
16397 /* We must match stuff we generate before. Assume the only
16398 unspecs that can get here are ours. Not that we could do
16399 anything with them anyway.... */
16400 if (GET_CODE (addr) == UNSPEC
16401 || (GET_CODE (addr) == PLUS
16402 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
16403 return orig;
16404 gcc_assert (GET_CODE (addr) == PLUS);
16407 if (GET_CODE (addr) == PLUS)
16409 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
16411 /* Check first to see if this is a constant
16412 offset from a @GOTOFF symbol reference. */
16413 if (!TARGET_PECOFF
16414 && gotoff_operand (op0, Pmode)
16415 && CONST_INT_P (op1))
16417 if (!TARGET_64BIT)
16419 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
16420 UNSPEC_GOTOFF);
16421 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
16422 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
16424 if (reg != 0)
16426 gcc_assert (REG_P (reg));
16427 new_rtx = expand_simple_binop (Pmode, PLUS,
16428 pic_offset_table_rtx,
16429 new_rtx, reg, 1,
16430 OPTAB_DIRECT);
16432 else
16433 new_rtx
16434 = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
16436 else
16438 if (INTVAL (op1) < -16*1024*1024
16439 || INTVAL (op1) >= 16*1024*1024)
16441 if (!x86_64_immediate_operand (op1, Pmode))
16442 op1 = force_reg (Pmode, op1);
16444 new_rtx
16445 = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
16449 else
16451 rtx base = legitimize_pic_address (op0, reg);
16452 machine_mode mode = GET_MODE (base);
16453 new_rtx
16454 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
16456 if (CONST_INT_P (new_rtx))
16458 if (INTVAL (new_rtx) < -16*1024*1024
16459 || INTVAL (new_rtx) >= 16*1024*1024)
16461 if (!x86_64_immediate_operand (new_rtx, mode))
16462 new_rtx = force_reg (mode, new_rtx);
16464 new_rtx
16465 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
16467 else
16468 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
16470 else
16472 /* For %rip addressing, we have to use
16473 just disp32, not base nor index. */
16474 if (TARGET_64BIT
16475 && (GET_CODE (base) == SYMBOL_REF
16476 || GET_CODE (base) == LABEL_REF))
16477 base = force_reg (mode, base);
16478 if (GET_CODE (new_rtx) == PLUS
16479 && CONSTANT_P (XEXP (new_rtx, 1)))
16481 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
16482 new_rtx = XEXP (new_rtx, 1);
16484 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
16489 return new_rtx;
16492 /* Load the thread pointer. If TO_REG is true, force it into a register. */
16494 static rtx
16495 get_thread_pointer (machine_mode tp_mode, bool to_reg)
16497 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
16499 if (GET_MODE (tp) != tp_mode)
16501 gcc_assert (GET_MODE (tp) == SImode);
16502 gcc_assert (tp_mode == DImode);
16504 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
16507 if (to_reg)
16508 tp = copy_to_mode_reg (tp_mode, tp);
16510 return tp;
16513 /* Construct the SYMBOL_REF for the tls_get_addr function. */
16515 static GTY(()) rtx ix86_tls_symbol;
16517 static rtx
16518 ix86_tls_get_addr (void)
16520 if (!ix86_tls_symbol)
16522 const char *sym
16523 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
16524 ? "___tls_get_addr" : "__tls_get_addr");
16526 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
16529 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
16531 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
16532 UNSPEC_PLTOFF);
16533 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
16534 gen_rtx_CONST (Pmode, unspec));
16537 return ix86_tls_symbol;
16540 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
16542 static GTY(()) rtx ix86_tls_module_base_symbol;
16545 ix86_tls_module_base (void)
16547 if (!ix86_tls_module_base_symbol)
16549 ix86_tls_module_base_symbol
16550 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
16552 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
16553 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
16556 return ix86_tls_module_base_symbol;
16559 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
16560 false if we expect this to be used for a memory address and true if
16561 we expect to load the address into a register. */
16563 static rtx
16564 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
16566 rtx dest, base, off;
16567 rtx pic = NULL_RTX, tp = NULL_RTX;
16568 machine_mode tp_mode = Pmode;
16569 int type;
16571 /* Fall back to global dynamic model if tool chain cannot support local
16572 dynamic. */
16573 if (TARGET_SUN_TLS && !TARGET_64BIT
16574 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
16575 && model == TLS_MODEL_LOCAL_DYNAMIC)
16576 model = TLS_MODEL_GLOBAL_DYNAMIC;
16578 switch (model)
16580 case TLS_MODEL_GLOBAL_DYNAMIC:
16581 dest = gen_reg_rtx (Pmode);
16583 if (!TARGET_64BIT)
16585 if (flag_pic && !TARGET_PECOFF)
16586 pic = pic_offset_table_rtx;
16587 else
16589 pic = gen_reg_rtx (Pmode);
16590 emit_insn (gen_set_got (pic));
16594 if (TARGET_GNU2_TLS)
16596 if (TARGET_64BIT)
16597 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
16598 else
16599 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
16601 tp = get_thread_pointer (Pmode, true);
16602 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
16604 if (GET_MODE (x) != Pmode)
16605 x = gen_rtx_ZERO_EXTEND (Pmode, x);
16607 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
16609 else
16611 rtx caddr = ix86_tls_get_addr ();
16613 if (TARGET_64BIT)
16615 rtx rax = gen_rtx_REG (Pmode, AX_REG);
16616 rtx_insn *insns;
16618 start_sequence ();
16619 emit_call_insn
16620 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
16621 insns = get_insns ();
16622 end_sequence ();
16624 if (GET_MODE (x) != Pmode)
16625 x = gen_rtx_ZERO_EXTEND (Pmode, x);
16627 RTL_CONST_CALL_P (insns) = 1;
16628 emit_libcall_block (insns, dest, rax, x);
16630 else
16631 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
16633 break;
16635 case TLS_MODEL_LOCAL_DYNAMIC:
16636 base = gen_reg_rtx (Pmode);
16638 if (!TARGET_64BIT)
16640 if (flag_pic)
16641 pic = pic_offset_table_rtx;
16642 else
16644 pic = gen_reg_rtx (Pmode);
16645 emit_insn (gen_set_got (pic));
16649 if (TARGET_GNU2_TLS)
16651 rtx tmp = ix86_tls_module_base ();
16653 if (TARGET_64BIT)
16654 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
16655 else
16656 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
16658 tp = get_thread_pointer (Pmode, true);
16659 set_unique_reg_note (get_last_insn (), REG_EQUAL,
16660 gen_rtx_MINUS (Pmode, tmp, tp));
16662 else
16664 rtx caddr = ix86_tls_get_addr ();
16666 if (TARGET_64BIT)
16668 rtx rax = gen_rtx_REG (Pmode, AX_REG);
16669 rtx_insn *insns;
16670 rtx eqv;
16672 start_sequence ();
16673 emit_call_insn
16674 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
16675 insns = get_insns ();
16676 end_sequence ();
16678 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
16679 share the LD_BASE result with other LD model accesses. */
16680 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
16681 UNSPEC_TLS_LD_BASE);
16683 RTL_CONST_CALL_P (insns) = 1;
16684 emit_libcall_block (insns, base, rax, eqv);
16686 else
16687 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
16690 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
16691 off = gen_rtx_CONST (Pmode, off);
16693 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
16695 if (TARGET_GNU2_TLS)
16697 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
16699 if (GET_MODE (x) != Pmode)
16700 x = gen_rtx_ZERO_EXTEND (Pmode, x);
16702 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
16704 break;
16706 case TLS_MODEL_INITIAL_EXEC:
16707 if (TARGET_64BIT)
16709 if (TARGET_SUN_TLS && !TARGET_X32)
16711 /* The Sun linker took the AMD64 TLS spec literally
16712 and can only handle %rax as destination of the
16713 initial executable code sequence. */
16715 dest = gen_reg_rtx (DImode);
16716 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
16717 return dest;
16720 /* Generate DImode references to avoid %fs:(%reg32)
16721 problems and linker IE->LE relaxation bug. */
16722 tp_mode = DImode;
16723 pic = NULL;
16724 type = UNSPEC_GOTNTPOFF;
16726 else if (flag_pic)
16728 pic = pic_offset_table_rtx;
16729 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
16731 else if (!TARGET_ANY_GNU_TLS)
16733 pic = gen_reg_rtx (Pmode);
16734 emit_insn (gen_set_got (pic));
16735 type = UNSPEC_GOTTPOFF;
16737 else
16739 pic = NULL;
16740 type = UNSPEC_INDNTPOFF;
16743 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
16744 off = gen_rtx_CONST (tp_mode, off);
16745 if (pic)
16746 off = gen_rtx_PLUS (tp_mode, pic, off);
16747 off = gen_const_mem (tp_mode, off);
16748 set_mem_alias_set (off, ix86_GOT_alias_set ());
16750 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
16752 base = get_thread_pointer (tp_mode,
16753 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
16754 off = force_reg (tp_mode, off);
16755 dest = gen_rtx_PLUS (tp_mode, base, off);
16756 if (tp_mode != Pmode)
16757 dest = convert_to_mode (Pmode, dest, 1);
16759 else
16761 base = get_thread_pointer (Pmode, true);
16762 dest = gen_reg_rtx (Pmode);
16763 emit_insn (ix86_gen_sub3 (dest, base, off));
16765 break;
16767 case TLS_MODEL_LOCAL_EXEC:
16768 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
16769 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
16770 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
16771 off = gen_rtx_CONST (Pmode, off);
16773 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
16775 base = get_thread_pointer (Pmode,
16776 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
16777 return gen_rtx_PLUS (Pmode, base, off);
16779 else
16781 base = get_thread_pointer (Pmode, true);
16782 dest = gen_reg_rtx (Pmode);
16783 emit_insn (ix86_gen_sub3 (dest, base, off));
16785 break;
16787 default:
16788 gcc_unreachable ();
16791 return dest;
16794 /* Return true if OP refers to a TLS address. */
16795 bool
16796 ix86_tls_address_pattern_p (rtx op)
16798 subrtx_var_iterator::array_type array;
16799 FOR_EACH_SUBRTX_VAR (iter, array, op, ALL)
16801 rtx op = *iter;
16802 if (MEM_P (op))
16804 rtx *x = &XEXP (op, 0);
16805 while (GET_CODE (*x) == PLUS)
16807 int i;
16808 for (i = 0; i < 2; i++)
16810 rtx u = XEXP (*x, i);
16811 if (GET_CODE (u) == ZERO_EXTEND)
16812 u = XEXP (u, 0);
16813 if (GET_CODE (u) == UNSPEC
16814 && XINT (u, 1) == UNSPEC_TP)
16815 return true;
16817 x = &XEXP (*x, 0);
16820 iter.skip_subrtxes ();
16824 return false;
16827 /* Rewrite *LOC so that it refers to a default TLS address space. */
16828 void
16829 ix86_rewrite_tls_address_1 (rtx *loc)
16831 subrtx_ptr_iterator::array_type array;
16832 FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL)
16834 rtx *loc = *iter;
16835 if (MEM_P (*loc))
16837 rtx addr = XEXP (*loc, 0);
16838 rtx *x = &addr;
16839 while (GET_CODE (*x) == PLUS)
16841 int i;
16842 for (i = 0; i < 2; i++)
16844 rtx u = XEXP (*x, i);
16845 if (GET_CODE (u) == ZERO_EXTEND)
16846 u = XEXP (u, 0);
16847 if (GET_CODE (u) == UNSPEC
16848 && XINT (u, 1) == UNSPEC_TP)
16850 addr_space_t as = DEFAULT_TLS_SEG_REG;
16852 *x = XEXP (*x, 1 - i);
16854 *loc = replace_equiv_address_nv (*loc, addr, true);
16855 set_mem_addr_space (*loc, as);
16856 return;
16859 x = &XEXP (*x, 0);
16862 iter.skip_subrtxes ();
16867 /* Rewrite instruction pattern involvning TLS address
16868 so that it refers to a default TLS address space. */
16870 ix86_rewrite_tls_address (rtx pattern)
16872 pattern = copy_insn (pattern);
16873 ix86_rewrite_tls_address_1 (&pattern);
16874 return pattern;
16877 /* Create or return the unique __imp_DECL dllimport symbol corresponding
16878 to symbol DECL if BEIMPORT is true. Otherwise create or return the
16879 unique refptr-DECL symbol corresponding to symbol DECL. */
16881 struct dllimport_hasher : ggc_cache_ptr_hash<tree_map>
16883 static inline hashval_t hash (tree_map *m) { return m->hash; }
16884 static inline bool
16885 equal (tree_map *a, tree_map *b)
16887 return a->base.from == b->base.from;
16890 static int
16891 keep_cache_entry (tree_map *&m)
16893 return ggc_marked_p (m->base.from);
16897 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
16899 static tree
16900 get_dllimport_decl (tree decl, bool beimport)
16902 struct tree_map *h, in;
16903 const char *name;
16904 const char *prefix;
16905 size_t namelen, prefixlen;
16906 char *imp_name;
16907 tree to;
16908 rtx rtl;
16910 if (!dllimport_map)
16911 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
16913 in.hash = htab_hash_pointer (decl);
16914 in.base.from = decl;
16915 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
16916 h = *loc;
16917 if (h)
16918 return h->to;
16920 *loc = h = ggc_alloc<tree_map> ();
16921 h->hash = in.hash;
16922 h->base.from = decl;
16923 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
16924 VAR_DECL, NULL, ptr_type_node);
16925 DECL_ARTIFICIAL (to) = 1;
16926 DECL_IGNORED_P (to) = 1;
16927 DECL_EXTERNAL (to) = 1;
16928 TREE_READONLY (to) = 1;
16930 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
16931 name = targetm.strip_name_encoding (name);
16932 if (beimport)
16933 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
16934 ? "*__imp_" : "*__imp__";
16935 else
16936 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
16937 namelen = strlen (name);
16938 prefixlen = strlen (prefix);
16939 imp_name = (char *) alloca (namelen + prefixlen + 1);
16940 memcpy (imp_name, prefix, prefixlen);
16941 memcpy (imp_name + prefixlen, name, namelen + 1);
16943 name = ggc_alloc_string (imp_name, namelen + prefixlen);
16944 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
16945 SET_SYMBOL_REF_DECL (rtl, to);
16946 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
16947 if (!beimport)
16949 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
16950 #ifdef SUB_TARGET_RECORD_STUB
16951 SUB_TARGET_RECORD_STUB (name);
16952 #endif
16955 rtl = gen_const_mem (Pmode, rtl);
16956 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
16958 SET_DECL_RTL (to, rtl);
16959 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
16961 return to;
16964 /* Expand SYMBOL into its corresponding far-address symbol.
16965 WANT_REG is true if we require the result be a register. */
16967 static rtx
16968 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
16970 tree imp_decl;
16971 rtx x;
16973 gcc_assert (SYMBOL_REF_DECL (symbol));
16974 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
16976 x = DECL_RTL (imp_decl);
16977 if (want_reg)
16978 x = force_reg (Pmode, x);
16979 return x;
16982 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
16983 true if we require the result be a register. */
16985 static rtx
16986 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
16988 tree imp_decl;
16989 rtx x;
16991 gcc_assert (SYMBOL_REF_DECL (symbol));
16992 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
16994 x = DECL_RTL (imp_decl);
16995 if (want_reg)
16996 x = force_reg (Pmode, x);
16997 return x;
17000 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
17001 is true if we require the result be a register. */
17003 static rtx
17004 legitimize_pe_coff_symbol (rtx addr, bool inreg)
17006 if (!TARGET_PECOFF)
17007 return NULL_RTX;
17009 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
17011 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
17012 return legitimize_dllimport_symbol (addr, inreg);
17013 if (GET_CODE (addr) == CONST
17014 && GET_CODE (XEXP (addr, 0)) == PLUS
17015 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
17016 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
17018 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
17019 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
17023 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
17024 return NULL_RTX;
17025 if (GET_CODE (addr) == SYMBOL_REF
17026 && !is_imported_p (addr)
17027 && SYMBOL_REF_EXTERNAL_P (addr)
17028 && SYMBOL_REF_DECL (addr))
17029 return legitimize_pe_coff_extern_decl (addr, inreg);
17031 if (GET_CODE (addr) == CONST
17032 && GET_CODE (XEXP (addr, 0)) == PLUS
17033 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
17034 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
17035 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
17036 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
17038 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
17039 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
17041 return NULL_RTX;
17044 /* Try machine-dependent ways of modifying an illegitimate address
17045 to be legitimate. If we find one, return the new, valid address.
17046 This macro is used in only one place: `memory_address' in explow.c.
17048 OLDX is the address as it was before break_out_memory_refs was called.
17049 In some cases it is useful to look at this to decide what needs to be done.
17051 It is always safe for this macro to do nothing. It exists to recognize
17052 opportunities to optimize the output.
17054 For the 80386, we handle X+REG by loading X into a register R and
17055 using R+REG. R will go in a general reg and indexing will be used.
17056 However, if REG is a broken-out memory address or multiplication,
17057 nothing needs to be done because REG can certainly go in a general reg.
17059 When -fpic is used, special handling is needed for symbolic references.
17060 See comments by legitimize_pic_address in i386.c for details. */
17062 static rtx
17063 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
17065 bool changed = false;
17066 unsigned log;
17068 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
17069 if (log)
17070 return legitimize_tls_address (x, (enum tls_model) log, false);
17071 if (GET_CODE (x) == CONST
17072 && GET_CODE (XEXP (x, 0)) == PLUS
17073 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
17074 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
17076 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
17077 (enum tls_model) log, false);
17078 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
17081 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
17083 rtx tmp = legitimize_pe_coff_symbol (x, true);
17084 if (tmp)
17085 return tmp;
17088 if (flag_pic && SYMBOLIC_CONST (x))
17089 return legitimize_pic_address (x, 0);
17091 #if TARGET_MACHO
17092 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
17093 return machopic_indirect_data_reference (x, 0);
17094 #endif
17096 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
17097 if (GET_CODE (x) == ASHIFT
17098 && CONST_INT_P (XEXP (x, 1))
17099 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
17101 changed = true;
17102 log = INTVAL (XEXP (x, 1));
17103 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
17104 GEN_INT (1 << log));
17107 if (GET_CODE (x) == PLUS)
17109 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
17111 if (GET_CODE (XEXP (x, 0)) == ASHIFT
17112 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
17113 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
17115 changed = true;
17116 log = INTVAL (XEXP (XEXP (x, 0), 1));
17117 XEXP (x, 0) = gen_rtx_MULT (Pmode,
17118 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
17119 GEN_INT (1 << log));
17122 if (GET_CODE (XEXP (x, 1)) == ASHIFT
17123 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
17124 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
17126 changed = true;
17127 log = INTVAL (XEXP (XEXP (x, 1), 1));
17128 XEXP (x, 1) = gen_rtx_MULT (Pmode,
17129 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
17130 GEN_INT (1 << log));
17133 /* Put multiply first if it isn't already. */
17134 if (GET_CODE (XEXP (x, 1)) == MULT)
17136 std::swap (XEXP (x, 0), XEXP (x, 1));
17137 changed = true;
17140 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
17141 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
17142 created by virtual register instantiation, register elimination, and
17143 similar optimizations. */
17144 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
17146 changed = true;
17147 x = gen_rtx_PLUS (Pmode,
17148 gen_rtx_PLUS (Pmode, XEXP (x, 0),
17149 XEXP (XEXP (x, 1), 0)),
17150 XEXP (XEXP (x, 1), 1));
17153 /* Canonicalize
17154 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
17155 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
17156 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
17157 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
17158 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
17159 && CONSTANT_P (XEXP (x, 1)))
17161 rtx constant;
17162 rtx other = NULL_RTX;
17164 if (CONST_INT_P (XEXP (x, 1)))
17166 constant = XEXP (x, 1);
17167 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
17169 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
17171 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
17172 other = XEXP (x, 1);
17174 else
17175 constant = 0;
17177 if (constant)
17179 changed = true;
17180 x = gen_rtx_PLUS (Pmode,
17181 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
17182 XEXP (XEXP (XEXP (x, 0), 1), 0)),
17183 plus_constant (Pmode, other,
17184 INTVAL (constant)));
17188 if (changed && ix86_legitimate_address_p (mode, x, false))
17189 return x;
17191 if (GET_CODE (XEXP (x, 0)) == MULT)
17193 changed = true;
17194 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
17197 if (GET_CODE (XEXP (x, 1)) == MULT)
17199 changed = true;
17200 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
17203 if (changed
17204 && REG_P (XEXP (x, 1))
17205 && REG_P (XEXP (x, 0)))
17206 return x;
17208 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
17210 changed = true;
17211 x = legitimize_pic_address (x, 0);
17214 if (changed && ix86_legitimate_address_p (mode, x, false))
17215 return x;
17217 if (REG_P (XEXP (x, 0)))
17219 rtx temp = gen_reg_rtx (Pmode);
17220 rtx val = force_operand (XEXP (x, 1), temp);
17221 if (val != temp)
17223 val = convert_to_mode (Pmode, val, 1);
17224 emit_move_insn (temp, val);
17227 XEXP (x, 1) = temp;
17228 return x;
17231 else if (REG_P (XEXP (x, 1)))
17233 rtx temp = gen_reg_rtx (Pmode);
17234 rtx val = force_operand (XEXP (x, 0), temp);
17235 if (val != temp)
17237 val = convert_to_mode (Pmode, val, 1);
17238 emit_move_insn (temp, val);
17241 XEXP (x, 0) = temp;
17242 return x;
17246 return x;
17249 /* Print an integer constant expression in assembler syntax. Addition
17250 and subtraction are the only arithmetic that may appear in these
17251 expressions. FILE is the stdio stream to write to, X is the rtx, and
17252 CODE is the operand print code from the output string. */
17254 static void
17255 output_pic_addr_const (FILE *file, rtx x, int code)
17257 char buf[256];
17259 switch (GET_CODE (x))
17261 case PC:
17262 gcc_assert (flag_pic);
17263 putc ('.', file);
17264 break;
17266 case SYMBOL_REF:
17267 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
17268 output_addr_const (file, x);
17269 else
17271 const char *name = XSTR (x, 0);
17273 /* Mark the decl as referenced so that cgraph will
17274 output the function. */
17275 if (SYMBOL_REF_DECL (x))
17276 mark_decl_referenced (SYMBOL_REF_DECL (x));
17278 #if TARGET_MACHO
17279 if (MACHOPIC_INDIRECT
17280 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
17281 name = machopic_indirection_name (x, /*stub_p=*/true);
17282 #endif
17283 assemble_name (file, name);
17285 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
17286 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
17287 fputs ("@PLT", file);
17288 break;
17290 case LABEL_REF:
17291 x = XEXP (x, 0);
17292 /* FALLTHRU */
17293 case CODE_LABEL:
17294 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
17295 assemble_name (asm_out_file, buf);
17296 break;
17298 case CONST_INT:
17299 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
17300 break;
17302 case CONST:
17303 /* This used to output parentheses around the expression,
17304 but that does not work on the 386 (either ATT or BSD assembler). */
17305 output_pic_addr_const (file, XEXP (x, 0), code);
17306 break;
17308 case CONST_DOUBLE:
17309 /* We can't handle floating point constants;
17310 TARGET_PRINT_OPERAND must handle them. */
17311 output_operand_lossage ("floating constant misused");
17312 break;
17314 case PLUS:
17315 /* Some assemblers need integer constants to appear first. */
17316 if (CONST_INT_P (XEXP (x, 0)))
17318 output_pic_addr_const (file, XEXP (x, 0), code);
17319 putc ('+', file);
17320 output_pic_addr_const (file, XEXP (x, 1), code);
17322 else
17324 gcc_assert (CONST_INT_P (XEXP (x, 1)));
17325 output_pic_addr_const (file, XEXP (x, 1), code);
17326 putc ('+', file);
17327 output_pic_addr_const (file, XEXP (x, 0), code);
17329 break;
17331 case MINUS:
17332 if (!TARGET_MACHO)
17333 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
17334 output_pic_addr_const (file, XEXP (x, 0), code);
17335 putc ('-', file);
17336 output_pic_addr_const (file, XEXP (x, 1), code);
17337 if (!TARGET_MACHO)
17338 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
17339 break;
17341 case UNSPEC:
17342 gcc_assert (XVECLEN (x, 0) == 1);
17343 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
17344 switch (XINT (x, 1))
17346 case UNSPEC_GOT:
17347 fputs ("@GOT", file);
17348 break;
17349 case UNSPEC_GOTOFF:
17350 fputs ("@GOTOFF", file);
17351 break;
17352 case UNSPEC_PLTOFF:
17353 fputs ("@PLTOFF", file);
17354 break;
17355 case UNSPEC_PCREL:
17356 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
17357 "(%rip)" : "[rip]", file);
17358 break;
17359 case UNSPEC_GOTPCREL:
17360 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
17361 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
17362 break;
17363 case UNSPEC_GOTTPOFF:
17364 /* FIXME: This might be @TPOFF in Sun ld too. */
17365 fputs ("@gottpoff", file);
17366 break;
17367 case UNSPEC_TPOFF:
17368 fputs ("@tpoff", file);
17369 break;
17370 case UNSPEC_NTPOFF:
17371 if (TARGET_64BIT)
17372 fputs ("@tpoff", file);
17373 else
17374 fputs ("@ntpoff", file);
17375 break;
17376 case UNSPEC_DTPOFF:
17377 fputs ("@dtpoff", file);
17378 break;
17379 case UNSPEC_GOTNTPOFF:
17380 if (TARGET_64BIT)
17381 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
17382 "@gottpoff(%rip)": "@gottpoff[rip]", file);
17383 else
17384 fputs ("@gotntpoff", file);
17385 break;
17386 case UNSPEC_INDNTPOFF:
17387 fputs ("@indntpoff", file);
17388 break;
17389 #if TARGET_MACHO
17390 case UNSPEC_MACHOPIC_OFFSET:
17391 putc ('-', file);
17392 machopic_output_function_base_name (file);
17393 break;
17394 #endif
17395 default:
17396 output_operand_lossage ("invalid UNSPEC as operand");
17397 break;
17399 break;
17401 default:
17402 output_operand_lossage ("invalid expression as operand");
17406 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
17407 We need to emit DTP-relative relocations. */
17409 static void ATTRIBUTE_UNUSED
17410 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
17412 fputs (ASM_LONG, file);
17413 output_addr_const (file, x);
17414 fputs ("@dtpoff", file);
17415 switch (size)
17417 case 4:
17418 break;
17419 case 8:
17420 fputs (", 0", file);
17421 break;
17422 default:
17423 gcc_unreachable ();
17427 /* Return true if X is a representation of the PIC register. This copes
17428 with calls from ix86_find_base_term, where the register might have
17429 been replaced by a cselib value. */
17431 static bool
17432 ix86_pic_register_p (rtx x)
17434 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
17435 return (pic_offset_table_rtx
17436 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
17437 else if (!REG_P (x))
17438 return false;
17439 else if (pic_offset_table_rtx)
17441 if (REGNO (x) == REGNO (pic_offset_table_rtx))
17442 return true;
17443 if (HARD_REGISTER_P (x)
17444 && !HARD_REGISTER_P (pic_offset_table_rtx)
17445 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
17446 return true;
17447 return false;
17449 else
17450 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
17453 /* Helper function for ix86_delegitimize_address.
17454 Attempt to delegitimize TLS local-exec accesses. */
17456 static rtx
17457 ix86_delegitimize_tls_address (rtx orig_x)
17459 rtx x = orig_x, unspec;
17460 struct ix86_address addr;
17462 if (!TARGET_TLS_DIRECT_SEG_REFS)
17463 return orig_x;
17464 if (MEM_P (x))
17465 x = XEXP (x, 0);
17466 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
17467 return orig_x;
17468 if (ix86_decompose_address (x, &addr) == 0
17469 || addr.seg != DEFAULT_TLS_SEG_REG
17470 || addr.disp == NULL_RTX
17471 || GET_CODE (addr.disp) != CONST)
17472 return orig_x;
17473 unspec = XEXP (addr.disp, 0);
17474 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
17475 unspec = XEXP (unspec, 0);
17476 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
17477 return orig_x;
17478 x = XVECEXP (unspec, 0, 0);
17479 gcc_assert (GET_CODE (x) == SYMBOL_REF);
17480 if (unspec != XEXP (addr.disp, 0))
17481 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
17482 if (addr.index)
17484 rtx idx = addr.index;
17485 if (addr.scale != 1)
17486 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
17487 x = gen_rtx_PLUS (Pmode, idx, x);
17489 if (addr.base)
17490 x = gen_rtx_PLUS (Pmode, addr.base, x);
17491 if (MEM_P (orig_x))
17492 x = replace_equiv_address_nv (orig_x, x);
17493 return x;
17496 /* In the name of slightly smaller debug output, and to cater to
17497 general assembler lossage, recognize PIC+GOTOFF and turn it back
17498 into a direct symbol reference.
17500 On Darwin, this is necessary to avoid a crash, because Darwin
17501 has a different PIC label for each routine but the DWARF debugging
17502 information is not associated with any particular routine, so it's
17503 necessary to remove references to the PIC label from RTL stored by
17504 the DWARF output code.
17506 This helper is used in the normal ix86_delegitimize_address
17507 entrypoint (e.g. used in the target delegitimization hook) and
17508 in ix86_find_base_term. As compile time memory optimization, we
17509 avoid allocating rtxes that will not change anything on the outcome
17510 of the callers (find_base_value and find_base_term). */
17512 static inline rtx
17513 ix86_delegitimize_address_1 (rtx x, bool base_term_p)
17515 rtx orig_x = delegitimize_mem_from_attrs (x);
17516 /* addend is NULL or some rtx if x is something+GOTOFF where
17517 something doesn't include the PIC register. */
17518 rtx addend = NULL_RTX;
17519 /* reg_addend is NULL or a multiple of some register. */
17520 rtx reg_addend = NULL_RTX;
17521 /* const_addend is NULL or a const_int. */
17522 rtx const_addend = NULL_RTX;
17523 /* This is the result, or NULL. */
17524 rtx result = NULL_RTX;
17526 x = orig_x;
17528 if (MEM_P (x))
17529 x = XEXP (x, 0);
17531 if (TARGET_64BIT)
17533 if (GET_CODE (x) == CONST
17534 && GET_CODE (XEXP (x, 0)) == PLUS
17535 && GET_MODE (XEXP (x, 0)) == Pmode
17536 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
17537 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
17538 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
17540 /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
17541 base. A CONST can't be arg_pointer_rtx based. */
17542 if (base_term_p && MEM_P (orig_x))
17543 return orig_x;
17544 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
17545 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
17546 if (MEM_P (orig_x))
17547 x = replace_equiv_address_nv (orig_x, x);
17548 return x;
17551 if (GET_CODE (x) == CONST
17552 && GET_CODE (XEXP (x, 0)) == UNSPEC
17553 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
17554 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
17555 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
17557 x = XVECEXP (XEXP (x, 0), 0, 0);
17558 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
17560 x = lowpart_subreg (GET_MODE (orig_x), x, GET_MODE (x));
17561 if (x == NULL_RTX)
17562 return orig_x;
17564 return x;
17567 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
17568 return ix86_delegitimize_tls_address (orig_x);
17570 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
17571 and -mcmodel=medium -fpic. */
17574 if (GET_CODE (x) != PLUS
17575 || GET_CODE (XEXP (x, 1)) != CONST)
17576 return ix86_delegitimize_tls_address (orig_x);
17578 if (ix86_pic_register_p (XEXP (x, 0)))
17579 /* %ebx + GOT/GOTOFF */
17581 else if (GET_CODE (XEXP (x, 0)) == PLUS)
17583 /* %ebx + %reg * scale + GOT/GOTOFF */
17584 reg_addend = XEXP (x, 0);
17585 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
17586 reg_addend = XEXP (reg_addend, 1);
17587 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
17588 reg_addend = XEXP (reg_addend, 0);
17589 else
17591 reg_addend = NULL_RTX;
17592 addend = XEXP (x, 0);
17595 else
17596 addend = XEXP (x, 0);
17598 x = XEXP (XEXP (x, 1), 0);
17599 if (GET_CODE (x) == PLUS
17600 && CONST_INT_P (XEXP (x, 1)))
17602 const_addend = XEXP (x, 1);
17603 x = XEXP (x, 0);
17606 if (GET_CODE (x) == UNSPEC
17607 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
17608 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
17609 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
17610 && !MEM_P (orig_x) && !addend)))
17611 result = XVECEXP (x, 0, 0);
17613 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
17614 && !MEM_P (orig_x))
17615 result = XVECEXP (x, 0, 0);
17617 if (! result)
17618 return ix86_delegitimize_tls_address (orig_x);
17620 /* For (PLUS something CONST_INT) both find_base_{value,term} just
17621 recurse on the first operand. */
17622 if (const_addend && !base_term_p)
17623 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
17624 if (reg_addend)
17625 result = gen_rtx_PLUS (Pmode, reg_addend, result);
17626 if (addend)
17628 /* If the rest of original X doesn't involve the PIC register, add
17629 addend and subtract pic_offset_table_rtx. This can happen e.g.
17630 for code like:
17631 leal (%ebx, %ecx, 4), %ecx
17633 movl foo@GOTOFF(%ecx), %edx
17634 in which case we return (%ecx - %ebx) + foo
17635 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
17636 and reload has completed. Don't do the latter for debug,
17637 as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */
17638 if (pic_offset_table_rtx
17639 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
17640 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
17641 pic_offset_table_rtx),
17642 result);
17643 else if (base_term_p
17644 && pic_offset_table_rtx
17645 && !TARGET_MACHO
17646 && !TARGET_VXWORKS_RTP)
17648 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
17649 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
17650 result = gen_rtx_PLUS (Pmode, tmp, result);
17652 else
17653 return orig_x;
17655 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
17657 result = lowpart_subreg (GET_MODE (orig_x), result, Pmode);
17658 if (result == NULL_RTX)
17659 return orig_x;
17661 return result;
17664 /* The normal instantiation of the above template. */
17666 static rtx
17667 ix86_delegitimize_address (rtx x)
17669 return ix86_delegitimize_address_1 (x, false);
17672 /* If X is a machine specific address (i.e. a symbol or label being
17673 referenced as a displacement from the GOT implemented using an
17674 UNSPEC), then return the base term. Otherwise return X. */
17677 ix86_find_base_term (rtx x)
17679 rtx term;
17681 if (TARGET_64BIT)
17683 if (GET_CODE (x) != CONST)
17684 return x;
17685 term = XEXP (x, 0);
17686 if (GET_CODE (term) == PLUS
17687 && CONST_INT_P (XEXP (term, 1)))
17688 term = XEXP (term, 0);
17689 if (GET_CODE (term) != UNSPEC
17690 || (XINT (term, 1) != UNSPEC_GOTPCREL
17691 && XINT (term, 1) != UNSPEC_PCREL))
17692 return x;
17694 return XVECEXP (term, 0, 0);
17697 return ix86_delegitimize_address_1 (x, true);
17700 /* Return true if X shouldn't be emitted into the debug info.
17701 Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_
17702 symbol easily into the .debug_info section, so we need not to
17703 delegitimize, but instead assemble as @gotoff.
17704 Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically
17705 assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */
17707 static bool
17708 ix86_const_not_ok_for_debug_p (rtx x)
17710 if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF)
17711 return true;
17713 if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0)
17714 return true;
17716 return false;
17719 static void
17720 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
17721 bool fp, FILE *file)
17723 const char *suffix;
17725 if (mode == CCFPmode)
17727 code = ix86_fp_compare_code_to_integer (code);
17728 mode = CCmode;
17730 if (reverse)
17731 code = reverse_condition (code);
17733 switch (code)
17735 case EQ:
17736 gcc_assert (mode != CCGZmode);
17737 switch (mode)
17739 case E_CCAmode:
17740 suffix = "a";
17741 break;
17742 case E_CCCmode:
17743 suffix = "c";
17744 break;
17745 case E_CCOmode:
17746 suffix = "o";
17747 break;
17748 case E_CCPmode:
17749 suffix = "p";
17750 break;
17751 case E_CCSmode:
17752 suffix = "s";
17753 break;
17754 default:
17755 suffix = "e";
17756 break;
17758 break;
17759 case NE:
17760 gcc_assert (mode != CCGZmode);
17761 switch (mode)
17763 case E_CCAmode:
17764 suffix = "na";
17765 break;
17766 case E_CCCmode:
17767 suffix = "nc";
17768 break;
17769 case E_CCOmode:
17770 suffix = "no";
17771 break;
17772 case E_CCPmode:
17773 suffix = "np";
17774 break;
17775 case E_CCSmode:
17776 suffix = "ns";
17777 break;
17778 default:
17779 suffix = "ne";
17780 break;
17782 break;
17783 case GT:
17784 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
17785 suffix = "g";
17786 break;
17787 case GTU:
17788 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
17789 Those same assemblers have the same but opposite lossage on cmov. */
17790 if (mode == CCmode)
17791 suffix = fp ? "nbe" : "a";
17792 else
17793 gcc_unreachable ();
17794 break;
17795 case LT:
17796 switch (mode)
17798 case E_CCNOmode:
17799 case E_CCGOCmode:
17800 suffix = "s";
17801 break;
17803 case E_CCmode:
17804 case E_CCGCmode:
17805 case E_CCGZmode:
17806 suffix = "l";
17807 break;
17809 default:
17810 gcc_unreachable ();
17812 break;
17813 case LTU:
17814 if (mode == CCmode || mode == CCGZmode)
17815 suffix = "b";
17816 else if (mode == CCCmode)
17817 suffix = fp ? "b" : "c";
17818 else
17819 gcc_unreachable ();
17820 break;
17821 case GE:
17822 switch (mode)
17824 case E_CCNOmode:
17825 case E_CCGOCmode:
17826 suffix = "ns";
17827 break;
17829 case E_CCmode:
17830 case E_CCGCmode:
17831 case E_CCGZmode:
17832 suffix = "ge";
17833 break;
17835 default:
17836 gcc_unreachable ();
17838 break;
17839 case GEU:
17840 if (mode == CCmode || mode == CCGZmode)
17841 suffix = "nb";
17842 else if (mode == CCCmode)
17843 suffix = fp ? "nb" : "nc";
17844 else
17845 gcc_unreachable ();
17846 break;
17847 case LE:
17848 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
17849 suffix = "le";
17850 break;
17851 case LEU:
17852 if (mode == CCmode)
17853 suffix = "be";
17854 else
17855 gcc_unreachable ();
17856 break;
17857 case UNORDERED:
17858 suffix = fp ? "u" : "p";
17859 break;
17860 case ORDERED:
17861 suffix = fp ? "nu" : "np";
17862 break;
17863 default:
17864 gcc_unreachable ();
17866 fputs (suffix, file);
17869 /* Print the name of register X to FILE based on its machine mode and number.
17870 If CODE is 'w', pretend the mode is HImode.
17871 If CODE is 'b', pretend the mode is QImode.
17872 If CODE is 'k', pretend the mode is SImode.
17873 If CODE is 'q', pretend the mode is DImode.
17874 If CODE is 'x', pretend the mode is V4SFmode.
17875 If CODE is 't', pretend the mode is V8SFmode.
17876 If CODE is 'g', pretend the mode is V16SFmode.
17877 If CODE is 'h', pretend the reg is the 'high' byte register.
17878 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
17879 If CODE is 'd', duplicate the operand for AVX instruction.
17880 If CODE is 'V', print naked full integer register name without %.
17883 void
17884 print_reg (rtx x, int code, FILE *file)
17886 const char *reg;
17887 int msize;
17888 unsigned int regno;
17889 bool duplicated;
17891 if (ASSEMBLER_DIALECT == ASM_ATT && code != 'V')
17892 putc ('%', file);
17894 if (x == pc_rtx)
17896 gcc_assert (TARGET_64BIT);
17897 fputs ("rip", file);
17898 return;
17901 if (code == 'y' && STACK_TOP_P (x))
17903 fputs ("st(0)", file);
17904 return;
17907 if (code == 'w')
17908 msize = 2;
17909 else if (code == 'b')
17910 msize = 1;
17911 else if (code == 'k')
17912 msize = 4;
17913 else if (code == 'q')
17914 msize = 8;
17915 else if (code == 'h')
17916 msize = 0;
17917 else if (code == 'x')
17918 msize = 16;
17919 else if (code == 't')
17920 msize = 32;
17921 else if (code == 'g')
17922 msize = 64;
17923 else
17924 msize = GET_MODE_SIZE (GET_MODE (x));
17926 regno = REGNO (x);
17928 if (regno == ARG_POINTER_REGNUM
17929 || regno == FRAME_POINTER_REGNUM
17930 || regno == FPSR_REG
17931 || regno == FPCR_REG)
17933 output_operand_lossage
17934 ("invalid use of register '%s'", reg_names[regno]);
17935 return;
17937 else if (regno == FLAGS_REG)
17939 output_operand_lossage ("invalid use of asm flag output");
17940 return;
17943 if (code == 'V')
17945 if (GENERAL_REGNO_P (regno))
17946 msize = GET_MODE_SIZE (word_mode);
17947 else
17948 error ("'V' modifier on non-integer register");
17951 duplicated = code == 'd' && TARGET_AVX;
17953 switch (msize)
17955 case 16:
17956 case 12:
17957 case 8:
17958 if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode))
17959 warning (0, "unsupported size for integer register");
17960 /* FALLTHRU */
17961 case 4:
17962 if (LEGACY_INT_REGNO_P (regno))
17963 putc (msize > 4 && TARGET_64BIT ? 'r' : 'e', file);
17964 /* FALLTHRU */
17965 case 2:
17966 normal:
17967 reg = hi_reg_name[regno];
17968 break;
17969 case 1:
17970 if (regno >= ARRAY_SIZE (qi_reg_name))
17971 goto normal;
17972 if (!ANY_QI_REGNO_P (regno))
17973 error ("unsupported size for integer register");
17974 reg = qi_reg_name[regno];
17975 break;
17976 case 0:
17977 if (regno >= ARRAY_SIZE (qi_high_reg_name))
17978 goto normal;
17979 reg = qi_high_reg_name[regno];
17980 break;
17981 case 32:
17982 case 64:
17983 if (SSE_REGNO_P (regno))
17985 gcc_assert (!duplicated);
17986 putc (msize == 32 ? 'y' : 'z', file);
17987 reg = hi_reg_name[regno] + 1;
17988 break;
17990 goto normal;
17991 default:
17992 gcc_unreachable ();
17995 fputs (reg, file);
17997 /* Irritatingly, AMD extended registers use
17998 different naming convention: "r%d[bwd]" */
17999 if (REX_INT_REGNO_P (regno))
18001 gcc_assert (TARGET_64BIT);
18002 switch (msize)
18004 case 0:
18005 error ("extended registers have no high halves");
18006 break;
18007 case 1:
18008 putc ('b', file);
18009 break;
18010 case 2:
18011 putc ('w', file);
18012 break;
18013 case 4:
18014 putc ('d', file);
18015 break;
18016 case 8:
18017 /* no suffix */
18018 break;
18019 default:
18020 error ("unsupported operand size for extended register");
18021 break;
18023 return;
18026 if (duplicated)
18028 if (ASSEMBLER_DIALECT == ASM_ATT)
18029 fprintf (file, ", %%%s", reg);
18030 else
18031 fprintf (file, ", %s", reg);
18035 /* Meaning of CODE:
18036 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
18037 C -- print opcode suffix for set/cmov insn.
18038 c -- like C, but print reversed condition
18039 F,f -- likewise, but for floating-point.
18040 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
18041 otherwise nothing
18042 R -- print embedded rounding and sae.
18043 r -- print only sae.
18044 z -- print the opcode suffix for the size of the current operand.
18045 Z -- likewise, with special suffixes for x87 instructions.
18046 * -- print a star (in certain assembler syntax)
18047 A -- print an absolute memory reference.
18048 E -- print address with DImode register names if TARGET_64BIT.
18049 w -- print the operand as if it's a "word" (HImode) even if it isn't.
18050 s -- print a shift double count, followed by the assemblers argument
18051 delimiter.
18052 b -- print the QImode name of the register for the indicated operand.
18053 %b0 would print %al if operands[0] is reg 0.
18054 w -- likewise, print the HImode name of the register.
18055 k -- likewise, print the SImode name of the register.
18056 q -- likewise, print the DImode name of the register.
18057 x -- likewise, print the V4SFmode name of the register.
18058 t -- likewise, print the V8SFmode name of the register.
18059 g -- likewise, print the V16SFmode name of the register.
18060 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
18061 y -- print "st(0)" instead of "st" as a register.
18062 d -- print duplicated register operand for AVX instruction.
18063 D -- print condition for SSE cmp instruction.
18064 P -- if PIC, print an @PLT suffix.
18065 p -- print raw symbol name.
18066 X -- don't print any sort of PIC '@' suffix for a symbol.
18067 & -- print some in-use local-dynamic symbol name.
18068 H -- print a memory address offset by 8; used for sse high-parts
18069 Y -- print condition for XOP pcom* instruction.
18070 V -- print naked full integer register name without %.
18071 + -- print a branch hint as 'cs' or 'ds' prefix
18072 ; -- print a semicolon (after prefixes due to bug in older gas).
18073 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
18074 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
18075 ! -- print MPX prefix for jxx/call/ret instructions if required.
18078 void
18079 ix86_print_operand (FILE *file, rtx x, int code)
18081 if (code)
18083 switch (code)
18085 case 'A':
18086 switch (ASSEMBLER_DIALECT)
18088 case ASM_ATT:
18089 putc ('*', file);
18090 break;
18092 case ASM_INTEL:
18093 /* Intel syntax. For absolute addresses, registers should not
18094 be surrounded by braces. */
18095 if (!REG_P (x))
18097 putc ('[', file);
18098 ix86_print_operand (file, x, 0);
18099 putc (']', file);
18100 return;
18102 break;
18104 default:
18105 gcc_unreachable ();
18108 ix86_print_operand (file, x, 0);
18109 return;
18111 case 'E':
18112 /* Wrap address in an UNSPEC to declare special handling. */
18113 if (TARGET_64BIT)
18114 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
18116 output_address (VOIDmode, x);
18117 return;
18119 case 'L':
18120 if (ASSEMBLER_DIALECT == ASM_ATT)
18121 putc ('l', file);
18122 return;
18124 case 'W':
18125 if (ASSEMBLER_DIALECT == ASM_ATT)
18126 putc ('w', file);
18127 return;
18129 case 'B':
18130 if (ASSEMBLER_DIALECT == ASM_ATT)
18131 putc ('b', file);
18132 return;
18134 case 'Q':
18135 if (ASSEMBLER_DIALECT == ASM_ATT)
18136 putc ('l', file);
18137 return;
18139 case 'S':
18140 if (ASSEMBLER_DIALECT == ASM_ATT)
18141 putc ('s', file);
18142 return;
18144 case 'T':
18145 if (ASSEMBLER_DIALECT == ASM_ATT)
18146 putc ('t', file);
18147 return;
18149 case 'O':
18150 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
18151 if (ASSEMBLER_DIALECT != ASM_ATT)
18152 return;
18154 switch (GET_MODE_SIZE (GET_MODE (x)))
18156 case 2:
18157 putc ('w', file);
18158 break;
18160 case 4:
18161 putc ('l', file);
18162 break;
18164 case 8:
18165 putc ('q', file);
18166 break;
18168 default:
18169 output_operand_lossage ("invalid operand size for operand "
18170 "code 'O'");
18171 return;
18174 putc ('.', file);
18175 #endif
18176 return;
18178 case 'z':
18179 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
18181 /* Opcodes don't get size suffixes if using Intel opcodes. */
18182 if (ASSEMBLER_DIALECT == ASM_INTEL)
18183 return;
18185 switch (GET_MODE_SIZE (GET_MODE (x)))
18187 case 1:
18188 putc ('b', file);
18189 return;
18191 case 2:
18192 putc ('w', file);
18193 return;
18195 case 4:
18196 putc ('l', file);
18197 return;
18199 case 8:
18200 putc ('q', file);
18201 return;
18203 default:
18204 output_operand_lossage ("invalid operand size for operand "
18205 "code 'z'");
18206 return;
18210 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
18211 warning (0, "non-integer operand used with operand code 'z'");
18212 /* FALLTHRU */
18214 case 'Z':
18215 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
18216 if (ASSEMBLER_DIALECT == ASM_INTEL)
18217 return;
18219 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
18221 switch (GET_MODE_SIZE (GET_MODE (x)))
18223 case 2:
18224 #ifdef HAVE_AS_IX86_FILDS
18225 putc ('s', file);
18226 #endif
18227 return;
18229 case 4:
18230 putc ('l', file);
18231 return;
18233 case 8:
18234 #ifdef HAVE_AS_IX86_FILDQ
18235 putc ('q', file);
18236 #else
18237 fputs ("ll", file);
18238 #endif
18239 return;
18241 default:
18242 break;
18245 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
18247 /* 387 opcodes don't get size suffixes
18248 if the operands are registers. */
18249 if (STACK_REG_P (x))
18250 return;
18252 switch (GET_MODE_SIZE (GET_MODE (x)))
18254 case 4:
18255 putc ('s', file);
18256 return;
18258 case 8:
18259 putc ('l', file);
18260 return;
18262 case 12:
18263 case 16:
18264 putc ('t', file);
18265 return;
18267 default:
18268 break;
18271 else
18273 output_operand_lossage ("invalid operand type used with "
18274 "operand code 'Z'");
18275 return;
18278 output_operand_lossage ("invalid operand size for operand code 'Z'");
18279 return;
18281 case 'd':
18282 case 'b':
18283 case 'w':
18284 case 'k':
18285 case 'q':
18286 case 'h':
18287 case 't':
18288 case 'g':
18289 case 'y':
18290 case 'x':
18291 case 'X':
18292 case 'P':
18293 case 'p':
18294 case 'V':
18295 break;
18297 case 's':
18298 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
18300 ix86_print_operand (file, x, 0);
18301 fputs (", ", file);
18303 return;
18305 case 'Y':
18306 switch (GET_CODE (x))
18308 case NE:
18309 fputs ("neq", file);
18310 break;
18311 case EQ:
18312 fputs ("eq", file);
18313 break;
18314 case GE:
18315 case GEU:
18316 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
18317 break;
18318 case GT:
18319 case GTU:
18320 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
18321 break;
18322 case LE:
18323 case LEU:
18324 fputs ("le", file);
18325 break;
18326 case LT:
18327 case LTU:
18328 fputs ("lt", file);
18329 break;
18330 case UNORDERED:
18331 fputs ("unord", file);
18332 break;
18333 case ORDERED:
18334 fputs ("ord", file);
18335 break;
18336 case UNEQ:
18337 fputs ("ueq", file);
18338 break;
18339 case UNGE:
18340 fputs ("nlt", file);
18341 break;
18342 case UNGT:
18343 fputs ("nle", file);
18344 break;
18345 case UNLE:
18346 fputs ("ule", file);
18347 break;
18348 case UNLT:
18349 fputs ("ult", file);
18350 break;
18351 case LTGT:
18352 fputs ("une", file);
18353 break;
18354 default:
18355 output_operand_lossage ("operand is not a condition code, "
18356 "invalid operand code 'Y'");
18357 return;
18359 return;
18361 case 'D':
18362 /* Little bit of braindamage here. The SSE compare instructions
18363 does use completely different names for the comparisons that the
18364 fp conditional moves. */
18365 switch (GET_CODE (x))
18367 case UNEQ:
18368 if (TARGET_AVX)
18370 fputs ("eq_us", file);
18371 break;
18373 /* FALLTHRU */
18374 case EQ:
18375 fputs ("eq", file);
18376 break;
18377 case UNLT:
18378 if (TARGET_AVX)
18380 fputs ("nge", file);
18381 break;
18383 /* FALLTHRU */
18384 case LT:
18385 fputs ("lt", file);
18386 break;
18387 case UNLE:
18388 if (TARGET_AVX)
18390 fputs ("ngt", file);
18391 break;
18393 /* FALLTHRU */
18394 case LE:
18395 fputs ("le", file);
18396 break;
18397 case UNORDERED:
18398 fputs ("unord", file);
18399 break;
18400 case LTGT:
18401 if (TARGET_AVX)
18403 fputs ("neq_oq", file);
18404 break;
18406 /* FALLTHRU */
18407 case NE:
18408 fputs ("neq", file);
18409 break;
18410 case GE:
18411 if (TARGET_AVX)
18413 fputs ("ge", file);
18414 break;
18416 /* FALLTHRU */
18417 case UNGE:
18418 fputs ("nlt", file);
18419 break;
18420 case GT:
18421 if (TARGET_AVX)
18423 fputs ("gt", file);
18424 break;
18426 /* FALLTHRU */
18427 case UNGT:
18428 fputs ("nle", file);
18429 break;
18430 case ORDERED:
18431 fputs ("ord", file);
18432 break;
18433 default:
18434 output_operand_lossage ("operand is not a condition code, "
18435 "invalid operand code 'D'");
18436 return;
18438 return;
18440 case 'F':
18441 case 'f':
18442 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
18443 if (ASSEMBLER_DIALECT == ASM_ATT)
18444 putc ('.', file);
18445 gcc_fallthrough ();
18446 #endif
18448 case 'C':
18449 case 'c':
18450 if (!COMPARISON_P (x))
18452 output_operand_lossage ("operand is not a condition code, "
18453 "invalid operand code '%c'", code);
18454 return;
18456 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
18457 code == 'c' || code == 'f',
18458 code == 'F' || code == 'f',
18459 file);
18460 return;
18462 case 'H':
18463 if (!offsettable_memref_p (x))
18465 output_operand_lossage ("operand is not an offsettable memory "
18466 "reference, invalid operand code 'H'");
18467 return;
18469 /* It doesn't actually matter what mode we use here, as we're
18470 only going to use this for printing. */
18471 x = adjust_address_nv (x, DImode, 8);
18472 /* Output 'qword ptr' for intel assembler dialect. */
18473 if (ASSEMBLER_DIALECT == ASM_INTEL)
18474 code = 'q';
18475 break;
18477 case 'K':
18478 if (!CONST_INT_P (x))
18480 output_operand_lossage ("operand is not an integer, invalid "
18481 "operand code 'K'");
18482 return;
18485 if (INTVAL (x) & IX86_HLE_ACQUIRE)
18486 #ifdef HAVE_AS_IX86_HLE
18487 fputs ("xacquire ", file);
18488 #else
18489 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
18490 #endif
18491 else if (INTVAL (x) & IX86_HLE_RELEASE)
18492 #ifdef HAVE_AS_IX86_HLE
18493 fputs ("xrelease ", file);
18494 #else
18495 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
18496 #endif
18497 /* We do not want to print value of the operand. */
18498 return;
18500 case 'N':
18501 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
18502 fputs ("{z}", file);
18503 return;
18505 case 'r':
18506 if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE)
18508 output_operand_lossage ("operand is not a specific integer, "
18509 "invalid operand code 'r'");
18510 return;
18513 if (ASSEMBLER_DIALECT == ASM_INTEL)
18514 fputs (", ", file);
18516 fputs ("{sae}", file);
18518 if (ASSEMBLER_DIALECT == ASM_ATT)
18519 fputs (", ", file);
18521 return;
18523 case 'R':
18524 if (!CONST_INT_P (x))
18526 output_operand_lossage ("operand is not an integer, invalid "
18527 "operand code 'R'");
18528 return;
18531 if (ASSEMBLER_DIALECT == ASM_INTEL)
18532 fputs (", ", file);
18534 switch (INTVAL (x))
18536 case ROUND_NEAREST_INT | ROUND_SAE:
18537 fputs ("{rn-sae}", file);
18538 break;
18539 case ROUND_NEG_INF | ROUND_SAE:
18540 fputs ("{rd-sae}", file);
18541 break;
18542 case ROUND_POS_INF | ROUND_SAE:
18543 fputs ("{ru-sae}", file);
18544 break;
18545 case ROUND_ZERO | ROUND_SAE:
18546 fputs ("{rz-sae}", file);
18547 break;
18548 default:
18549 output_operand_lossage ("operand is not a specific integer, "
18550 "invalid operand code 'R'");
18553 if (ASSEMBLER_DIALECT == ASM_ATT)
18554 fputs (", ", file);
18556 return;
18558 case '*':
18559 if (ASSEMBLER_DIALECT == ASM_ATT)
18560 putc ('*', file);
18561 return;
18563 case '&':
18565 const char *name = get_some_local_dynamic_name ();
18566 if (name == NULL)
18567 output_operand_lossage ("'%%&' used without any "
18568 "local dynamic TLS references");
18569 else
18570 assemble_name (file, name);
18571 return;
18574 case '+':
18576 rtx x;
18578 if (!optimize
18579 || optimize_function_for_size_p (cfun)
18580 || !TARGET_BRANCH_PREDICTION_HINTS)
18581 return;
18583 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
18584 if (x)
18586 int pred_val = profile_probability::from_reg_br_prob_note
18587 (XINT (x, 0)).to_reg_br_prob_base ();
18589 if (pred_val < REG_BR_PROB_BASE * 45 / 100
18590 || pred_val > REG_BR_PROB_BASE * 55 / 100)
18592 bool taken = pred_val > REG_BR_PROB_BASE / 2;
18593 bool cputaken
18594 = final_forward_branch_p (current_output_insn) == 0;
18596 /* Emit hints only in the case default branch prediction
18597 heuristics would fail. */
18598 if (taken != cputaken)
18600 /* We use 3e (DS) prefix for taken branches and
18601 2e (CS) prefix for not taken branches. */
18602 if (taken)
18603 fputs ("ds ; ", file);
18604 else
18605 fputs ("cs ; ", file);
18609 return;
18612 case ';':
18613 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
18614 putc (';', file);
18615 #endif
18616 return;
18618 case '~':
18619 putc (TARGET_AVX2 ? 'i' : 'f', file);
18620 return;
18622 case '^':
18623 if (TARGET_64BIT && Pmode != word_mode)
18624 fputs ("addr32 ", file);
18625 return;
18627 case '!':
18628 if (ix86_bnd_prefixed_insn_p (current_output_insn))
18629 fputs ("bnd ", file);
18630 if (ix86_notrack_prefixed_insn_p (current_output_insn))
18631 fputs ("notrack ", file);
18632 return;
18634 default:
18635 output_operand_lossage ("invalid operand code '%c'", code);
18639 if (REG_P (x))
18640 print_reg (x, code, file);
18642 else if (MEM_P (x))
18644 rtx addr = XEXP (x, 0);
18646 /* No `byte ptr' prefix for call instructions ... */
18647 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
18649 machine_mode mode = GET_MODE (x);
18650 const char *size;
18652 /* Check for explicit size override codes. */
18653 if (code == 'b')
18654 size = "BYTE";
18655 else if (code == 'w')
18656 size = "WORD";
18657 else if (code == 'k')
18658 size = "DWORD";
18659 else if (code == 'q')
18660 size = "QWORD";
18661 else if (code == 'x')
18662 size = "XMMWORD";
18663 else if (code == 't')
18664 size = "YMMWORD";
18665 else if (code == 'g')
18666 size = "ZMMWORD";
18667 else if (mode == BLKmode)
18668 /* ... or BLKmode operands, when not overridden. */
18669 size = NULL;
18670 else
18671 switch (GET_MODE_SIZE (mode))
18673 case 1: size = "BYTE"; break;
18674 case 2: size = "WORD"; break;
18675 case 4: size = "DWORD"; break;
18676 case 8: size = "QWORD"; break;
18677 case 12: size = "TBYTE"; break;
18678 case 16:
18679 if (mode == XFmode)
18680 size = "TBYTE";
18681 else
18682 size = "XMMWORD";
18683 break;
18684 case 32: size = "YMMWORD"; break;
18685 case 64: size = "ZMMWORD"; break;
18686 default:
18687 gcc_unreachable ();
18689 if (size)
18691 fputs (size, file);
18692 fputs (" PTR ", file);
18696 if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
18697 output_operand_lossage ("invalid constraints for operand");
18698 else
18699 ix86_print_operand_address_as
18700 (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
18703 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
18705 long l;
18707 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
18709 if (ASSEMBLER_DIALECT == ASM_ATT)
18710 putc ('$', file);
18711 /* Sign extend 32bit SFmode immediate to 8 bytes. */
18712 if (code == 'q')
18713 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
18714 (unsigned long long) (int) l);
18715 else
18716 fprintf (file, "0x%08x", (unsigned int) l);
18719 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
18721 long l[2];
18723 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
18725 if (ASSEMBLER_DIALECT == ASM_ATT)
18726 putc ('$', file);
18727 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
18730 /* These float cases don't actually occur as immediate operands. */
18731 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
18733 char dstr[30];
18735 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
18736 fputs (dstr, file);
18739 else
18741 /* We have patterns that allow zero sets of memory, for instance.
18742 In 64-bit mode, we should probably support all 8-byte vectors,
18743 since we can in fact encode that into an immediate. */
18744 if (GET_CODE (x) == CONST_VECTOR)
18746 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
18747 x = const0_rtx;
18750 if (code != 'P' && code != 'p')
18752 if (CONST_INT_P (x))
18754 if (ASSEMBLER_DIALECT == ASM_ATT)
18755 putc ('$', file);
18757 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
18758 || GET_CODE (x) == LABEL_REF)
18760 if (ASSEMBLER_DIALECT == ASM_ATT)
18761 putc ('$', file);
18762 else
18763 fputs ("OFFSET FLAT:", file);
18766 if (CONST_INT_P (x))
18767 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
18768 else if (flag_pic || MACHOPIC_INDIRECT)
18769 output_pic_addr_const (file, x, code);
18770 else
18771 output_addr_const (file, x);
18775 static bool
18776 ix86_print_operand_punct_valid_p (unsigned char code)
18778 return (code == '*' || code == '+' || code == '&' || code == ';'
18779 || code == '~' || code == '^' || code == '!');
18782 /* Print a memory operand whose address is ADDR. */
18784 static void
18785 ix86_print_operand_address_as (FILE *file, rtx addr,
18786 addr_space_t as, bool no_rip)
18788 struct ix86_address parts;
18789 rtx base, index, disp;
18790 int scale;
18791 int ok;
18792 bool vsib = false;
18793 int code = 0;
18795 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
18797 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
18798 gcc_assert (parts.index == NULL_RTX);
18799 parts.index = XVECEXP (addr, 0, 1);
18800 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
18801 addr = XVECEXP (addr, 0, 0);
18802 vsib = true;
18804 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
18806 gcc_assert (TARGET_64BIT);
18807 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
18808 code = 'q';
18810 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
18812 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
18813 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
18814 if (parts.base != NULL_RTX)
18816 parts.index = parts.base;
18817 parts.scale = 1;
18819 parts.base = XVECEXP (addr, 0, 0);
18820 addr = XVECEXP (addr, 0, 0);
18822 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
18824 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
18825 gcc_assert (parts.index == NULL_RTX);
18826 parts.index = XVECEXP (addr, 0, 1);
18827 addr = XVECEXP (addr, 0, 0);
18829 else
18830 ok = ix86_decompose_address (addr, &parts);
18832 gcc_assert (ok);
18834 base = parts.base;
18835 index = parts.index;
18836 disp = parts.disp;
18837 scale = parts.scale;
18839 if (ADDR_SPACE_GENERIC_P (as))
18840 as = parts.seg;
18841 else
18842 gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg));
18844 if (!ADDR_SPACE_GENERIC_P (as))
18846 const char *string;
18848 if (as == ADDR_SPACE_SEG_FS)
18849 string = (ASSEMBLER_DIALECT == ASM_ATT ? "%fs:" : "fs:");
18850 else if (as == ADDR_SPACE_SEG_GS)
18851 string = (ASSEMBLER_DIALECT == ASM_ATT ? "%gs:" : "gs:");
18852 else
18853 gcc_unreachable ();
18854 fputs (string, file);
18857 /* Use one byte shorter RIP relative addressing for 64bit mode. */
18858 if (TARGET_64BIT && !base && !index && !no_rip)
18860 rtx symbol = disp;
18862 if (GET_CODE (disp) == CONST
18863 && GET_CODE (XEXP (disp, 0)) == PLUS
18864 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
18865 symbol = XEXP (XEXP (disp, 0), 0);
18867 if (GET_CODE (symbol) == LABEL_REF
18868 || (GET_CODE (symbol) == SYMBOL_REF
18869 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
18870 base = pc_rtx;
18873 if (!base && !index)
18875 /* Displacement only requires special attention. */
18876 if (CONST_INT_P (disp))
18878 if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as))
18879 fputs ("ds:", file);
18880 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
18882 /* Load the external function address via the GOT slot to avoid PLT. */
18883 else if (GET_CODE (disp) == CONST
18884 && GET_CODE (XEXP (disp, 0)) == UNSPEC
18885 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL
18886 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT)
18887 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
18888 output_pic_addr_const (file, disp, 0);
18889 else if (flag_pic)
18890 output_pic_addr_const (file, disp, 0);
18891 else
18892 output_addr_const (file, disp);
18894 else
18896 /* Print SImode register names to force addr32 prefix. */
18897 if (SImode_address_operand (addr, VOIDmode))
18899 if (flag_checking)
18901 gcc_assert (TARGET_64BIT);
18902 switch (GET_CODE (addr))
18904 case SUBREG:
18905 gcc_assert (GET_MODE (addr) == SImode);
18906 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
18907 break;
18908 case ZERO_EXTEND:
18909 case AND:
18910 gcc_assert (GET_MODE (addr) == DImode);
18911 break;
18912 default:
18913 gcc_unreachable ();
18916 gcc_assert (!code);
18917 code = 'k';
18919 else if (code == 0
18920 && TARGET_X32
18921 && disp
18922 && CONST_INT_P (disp)
18923 && INTVAL (disp) < -16*1024*1024)
18925 /* X32 runs in 64-bit mode, where displacement, DISP, in
18926 address DISP(%r64), is encoded as 32-bit immediate sign-
18927 extended from 32-bit to 64-bit. For -0x40000300(%r64),
18928 address is %r64 + 0xffffffffbffffd00. When %r64 <
18929 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
18930 which is invalid for x32. The correct address is %r64
18931 - 0x40000300 == 0xf7ffdd64. To properly encode
18932 -0x40000300(%r64) for x32, we zero-extend negative
18933 displacement by forcing addr32 prefix which truncates
18934 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
18935 zero-extend all negative displacements, including -1(%rsp).
18936 However, for small negative displacements, sign-extension
18937 won't cause overflow. We only zero-extend negative
18938 displacements if they < -16*1024*1024, which is also used
18939 to check legitimate address displacements for PIC. */
18940 code = 'k';
18943 /* Since the upper 32 bits of RSP are always zero for x32,
18944 we can encode %esp as %rsp to avoid 0x67 prefix if
18945 there is no index register. */
18946 if (TARGET_X32 && Pmode == SImode
18947 && !index && base && REG_P (base) && REGNO (base) == SP_REG)
18948 code = 'q';
18950 if (ASSEMBLER_DIALECT == ASM_ATT)
18952 if (disp)
18954 if (flag_pic)
18955 output_pic_addr_const (file, disp, 0);
18956 else if (GET_CODE (disp) == LABEL_REF)
18957 output_asm_label (disp);
18958 else
18959 output_addr_const (file, disp);
18962 putc ('(', file);
18963 if (base)
18964 print_reg (base, code, file);
18965 if (index)
18967 putc (',', file);
18968 print_reg (index, vsib ? 0 : code, file);
18969 if (scale != 1 || vsib)
18970 fprintf (file, ",%d", scale);
18972 putc (')', file);
18974 else
18976 rtx offset = NULL_RTX;
18978 if (disp)
18980 /* Pull out the offset of a symbol; print any symbol itself. */
18981 if (GET_CODE (disp) == CONST
18982 && GET_CODE (XEXP (disp, 0)) == PLUS
18983 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
18985 offset = XEXP (XEXP (disp, 0), 1);
18986 disp = gen_rtx_CONST (VOIDmode,
18987 XEXP (XEXP (disp, 0), 0));
18990 if (flag_pic)
18991 output_pic_addr_const (file, disp, 0);
18992 else if (GET_CODE (disp) == LABEL_REF)
18993 output_asm_label (disp);
18994 else if (CONST_INT_P (disp))
18995 offset = disp;
18996 else
18997 output_addr_const (file, disp);
19000 putc ('[', file);
19001 if (base)
19003 print_reg (base, code, file);
19004 if (offset)
19006 if (INTVAL (offset) >= 0)
19007 putc ('+', file);
19008 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
19011 else if (offset)
19012 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
19013 else
19014 putc ('0', file);
19016 if (index)
19018 putc ('+', file);
19019 print_reg (index, vsib ? 0 : code, file);
19020 if (scale != 1 || vsib)
19021 fprintf (file, "*%d", scale);
19023 putc (']', file);
19028 static void
19029 ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
19031 ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false);
19034 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
19036 static bool
19037 i386_asm_output_addr_const_extra (FILE *file, rtx x)
19039 rtx op;
19041 if (GET_CODE (x) != UNSPEC)
19042 return false;
19044 op = XVECEXP (x, 0, 0);
19045 switch (XINT (x, 1))
19047 case UNSPEC_GOTOFF:
19048 output_addr_const (file, op);
19049 fputs ("@gotoff", file);
19050 break;
19051 case UNSPEC_GOTTPOFF:
19052 output_addr_const (file, op);
19053 /* FIXME: This might be @TPOFF in Sun ld. */
19054 fputs ("@gottpoff", file);
19055 break;
19056 case UNSPEC_TPOFF:
19057 output_addr_const (file, op);
19058 fputs ("@tpoff", file);
19059 break;
19060 case UNSPEC_NTPOFF:
19061 output_addr_const (file, op);
19062 if (TARGET_64BIT)
19063 fputs ("@tpoff", file);
19064 else
19065 fputs ("@ntpoff", file);
19066 break;
19067 case UNSPEC_DTPOFF:
19068 output_addr_const (file, op);
19069 fputs ("@dtpoff", file);
19070 break;
19071 case UNSPEC_GOTNTPOFF:
19072 output_addr_const (file, op);
19073 if (TARGET_64BIT)
19074 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
19075 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
19076 else
19077 fputs ("@gotntpoff", file);
19078 break;
19079 case UNSPEC_INDNTPOFF:
19080 output_addr_const (file, op);
19081 fputs ("@indntpoff", file);
19082 break;
19083 #if TARGET_MACHO
19084 case UNSPEC_MACHOPIC_OFFSET:
19085 output_addr_const (file, op);
19086 putc ('-', file);
19087 machopic_output_function_base_name (file);
19088 break;
19089 #endif
19091 default:
19092 return false;
19095 return true;
19098 /* Split one or more double-mode RTL references into pairs of half-mode
19099 references. The RTL can be REG, offsettable MEM, integer constant, or
19100 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
19101 split and "num" is its length. lo_half and hi_half are output arrays
19102 that parallel "operands". */
19104 void
19105 split_double_mode (machine_mode mode, rtx operands[],
19106 int num, rtx lo_half[], rtx hi_half[])
19108 machine_mode half_mode;
19109 unsigned int byte;
19111 switch (mode)
19113 case E_TImode:
19114 half_mode = DImode;
19115 break;
19116 case E_DImode:
19117 half_mode = SImode;
19118 break;
19119 default:
19120 gcc_unreachable ();
19123 byte = GET_MODE_SIZE (half_mode);
19125 while (num--)
19127 rtx op = operands[num];
19129 /* simplify_subreg refuse to split volatile memory addresses,
19130 but we still have to handle it. */
19131 if (MEM_P (op))
19133 lo_half[num] = adjust_address (op, half_mode, 0);
19134 hi_half[num] = adjust_address (op, half_mode, byte);
19136 else
19138 lo_half[num] = simplify_gen_subreg (half_mode, op,
19139 GET_MODE (op) == VOIDmode
19140 ? mode : GET_MODE (op), 0);
19141 hi_half[num] = simplify_gen_subreg (half_mode, op,
19142 GET_MODE (op) == VOIDmode
19143 ? mode : GET_MODE (op), byte);
19148 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
19149 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
19150 is the expression of the binary operation. The output may either be
19151 emitted here, or returned to the caller, like all output_* functions.
19153 There is no guarantee that the operands are the same mode, as they
19154 might be within FLOAT or FLOAT_EXTEND expressions. */
19156 #ifndef SYSV386_COMPAT
19157 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
19158 wants to fix the assemblers because that causes incompatibility
19159 with gcc. No-one wants to fix gcc because that causes
19160 incompatibility with assemblers... You can use the option of
19161 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
19162 #define SYSV386_COMPAT 1
19163 #endif
19165 const char *
19166 output_387_binary_op (rtx_insn *insn, rtx *operands)
19168 static char buf[40];
19169 const char *p;
19170 bool is_sse
19171 = (SSE_REG_P (operands[0])
19172 || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]));
19174 if (is_sse)
19175 p = "%v";
19176 else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
19177 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
19178 p = "fi";
19179 else
19180 p = "f";
19182 strcpy (buf, p);
19184 switch (GET_CODE (operands[3]))
19186 case PLUS:
19187 p = "add"; break;
19188 case MINUS:
19189 p = "sub"; break;
19190 case MULT:
19191 p = "mul"; break;
19192 case DIV:
19193 p = "div"; break;
19194 default:
19195 gcc_unreachable ();
19198 strcat (buf, p);
19200 if (is_sse)
19202 p = (GET_MODE (operands[0]) == SFmode) ? "ss" : "sd";
19203 strcat (buf, p);
19205 if (TARGET_AVX)
19206 p = "\t{%2, %1, %0|%0, %1, %2}";
19207 else
19208 p = "\t{%2, %0|%0, %2}";
19210 strcat (buf, p);
19211 return buf;
19214 /* Even if we do not want to check the inputs, this documents input
19215 constraints. Which helps in understanding the following code. */
19216 if (flag_checking)
19218 if (STACK_REG_P (operands[0])
19219 && ((REG_P (operands[1])
19220 && REGNO (operands[0]) == REGNO (operands[1])
19221 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
19222 || (REG_P (operands[2])
19223 && REGNO (operands[0]) == REGNO (operands[2])
19224 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
19225 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
19226 ; /* ok */
19227 else
19228 gcc_unreachable ();
19231 switch (GET_CODE (operands[3]))
19233 case MULT:
19234 case PLUS:
19235 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
19236 std::swap (operands[1], operands[2]);
19238 /* know operands[0] == operands[1]. */
19240 if (MEM_P (operands[2]))
19242 p = "%Z2\t%2";
19243 break;
19246 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
19248 if (STACK_TOP_P (operands[0]))
19249 /* How is it that we are storing to a dead operand[2]?
19250 Well, presumably operands[1] is dead too. We can't
19251 store the result to st(0) as st(0) gets popped on this
19252 instruction. Instead store to operands[2] (which I
19253 think has to be st(1)). st(1) will be popped later.
19254 gcc <= 2.8.1 didn't have this check and generated
19255 assembly code that the Unixware assembler rejected. */
19256 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
19257 else
19258 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
19259 break;
19262 if (STACK_TOP_P (operands[0]))
19263 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
19264 else
19265 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
19266 break;
19268 case MINUS:
19269 case DIV:
19270 if (MEM_P (operands[1]))
19272 p = "r%Z1\t%1";
19273 break;
19276 if (MEM_P (operands[2]))
19278 p = "%Z2\t%2";
19279 break;
19282 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
19284 #if SYSV386_COMPAT
19285 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
19286 derived assemblers, confusingly reverse the direction of
19287 the operation for fsub{r} and fdiv{r} when the
19288 destination register is not st(0). The Intel assembler
19289 doesn't have this brain damage. Read !SYSV386_COMPAT to
19290 figure out what the hardware really does. */
19291 if (STACK_TOP_P (operands[0]))
19292 p = "{p\t%0, %2|rp\t%2, %0}";
19293 else
19294 p = "{rp\t%2, %0|p\t%0, %2}";
19295 #else
19296 if (STACK_TOP_P (operands[0]))
19297 /* As above for fmul/fadd, we can't store to st(0). */
19298 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
19299 else
19300 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
19301 #endif
19302 break;
19305 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
19307 #if SYSV386_COMPAT
19308 if (STACK_TOP_P (operands[0]))
19309 p = "{rp\t%0, %1|p\t%1, %0}";
19310 else
19311 p = "{p\t%1, %0|rp\t%0, %1}";
19312 #else
19313 if (STACK_TOP_P (operands[0]))
19314 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
19315 else
19316 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
19317 #endif
19318 break;
19321 if (STACK_TOP_P (operands[0]))
19323 if (STACK_TOP_P (operands[1]))
19324 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
19325 else
19326 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
19327 break;
19329 else if (STACK_TOP_P (operands[1]))
19331 #if SYSV386_COMPAT
19332 p = "{\t%1, %0|r\t%0, %1}";
19333 #else
19334 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
19335 #endif
19337 else
19339 #if SYSV386_COMPAT
19340 p = "{r\t%2, %0|\t%0, %2}";
19341 #else
19342 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
19343 #endif
19345 break;
19347 default:
19348 gcc_unreachable ();
19351 strcat (buf, p);
19352 return buf;
19355 /* Return needed mode for entity in optimize_mode_switching pass. */
19357 static int
19358 ix86_dirflag_mode_needed (rtx_insn *insn)
19360 if (CALL_P (insn))
19362 if (cfun->machine->func_type == TYPE_NORMAL)
19363 return X86_DIRFLAG_ANY;
19364 else
19365 /* No need to emit CLD in interrupt handler for TARGET_CLD. */
19366 return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET;
19369 if (recog_memoized (insn) < 0)
19370 return X86_DIRFLAG_ANY;
19372 if (get_attr_type (insn) == TYPE_STR)
19374 /* Emit cld instruction if stringops are used in the function. */
19375 if (cfun->machine->func_type == TYPE_NORMAL)
19376 return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY;
19377 else
19378 return X86_DIRFLAG_RESET;
19381 return X86_DIRFLAG_ANY;
19384 /* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */
19386 static bool
19387 ix86_check_avx_upper_register (const_rtx exp)
19389 if (SUBREG_P (exp))
19390 exp = SUBREG_REG (exp);
19392 return (REG_P (exp)
19393 && (VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp))
19394 || VALID_AVX512F_REG_OR_XI_MODE (GET_MODE (exp))));
19397 /* Return needed mode for entity in optimize_mode_switching pass. */
19399 static int
19400 ix86_avx_u128_mode_needed (rtx_insn *insn)
19402 if (CALL_P (insn))
19404 rtx link;
19406 /* Needed mode is set to AVX_U128_CLEAN if there are
19407 no 256bit or 512bit modes used in function arguments. */
19408 for (link = CALL_INSN_FUNCTION_USAGE (insn);
19409 link;
19410 link = XEXP (link, 1))
19412 if (GET_CODE (XEXP (link, 0)) == USE)
19414 rtx arg = XEXP (XEXP (link, 0), 0);
19416 if (ix86_check_avx_upper_register (arg))
19417 return AVX_U128_DIRTY;
19421 return AVX_U128_CLEAN;
19424 /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced.
19425 Hardware changes state only when a 256bit register is written to,
19426 but we need to prevent the compiler from moving optimal insertion
19427 point above eventual read from 256bit or 512 bit register. */
19428 subrtx_iterator::array_type array;
19429 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
19430 if (ix86_check_avx_upper_register (*iter))
19431 return AVX_U128_DIRTY;
19433 return AVX_U128_ANY;
19436 /* Return mode that i387 must be switched into
19437 prior to the execution of insn. */
19439 static int
19440 ix86_i387_mode_needed (int entity, rtx_insn *insn)
19442 enum attr_i387_cw mode;
19444 /* The mode UNINITIALIZED is used to store control word after a
19445 function call or ASM pattern. The mode ANY specify that function
19446 has no requirements on the control word and make no changes in the
19447 bits we are interested in. */
19449 if (CALL_P (insn)
19450 || (NONJUMP_INSN_P (insn)
19451 && (asm_noperands (PATTERN (insn)) >= 0
19452 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
19453 return I387_CW_UNINITIALIZED;
19455 if (recog_memoized (insn) < 0)
19456 return I387_CW_ANY;
19458 mode = get_attr_i387_cw (insn);
19460 switch (entity)
19462 case I387_TRUNC:
19463 if (mode == I387_CW_TRUNC)
19464 return mode;
19465 break;
19467 case I387_FLOOR:
19468 if (mode == I387_CW_FLOOR)
19469 return mode;
19470 break;
19472 case I387_CEIL:
19473 if (mode == I387_CW_CEIL)
19474 return mode;
19475 break;
19477 case I387_MASK_PM:
19478 if (mode == I387_CW_MASK_PM)
19479 return mode;
19480 break;
19482 default:
19483 gcc_unreachable ();
19486 return I387_CW_ANY;
19489 /* Return mode that entity must be switched into
19490 prior to the execution of insn. */
19492 static int
19493 ix86_mode_needed (int entity, rtx_insn *insn)
19495 switch (entity)
19497 case X86_DIRFLAG:
19498 return ix86_dirflag_mode_needed (insn);
19499 case AVX_U128:
19500 return ix86_avx_u128_mode_needed (insn);
19501 case I387_TRUNC:
19502 case I387_FLOOR:
19503 case I387_CEIL:
19504 case I387_MASK_PM:
19505 return ix86_i387_mode_needed (entity, insn);
19506 default:
19507 gcc_unreachable ();
19509 return 0;
19512 /* Check if a 256bit or 512bit AVX register is referenced in stores. */
19514 static void
19515 ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
19517 if (ix86_check_avx_upper_register (dest))
19519 bool *used = (bool *) data;
19520 *used = true;
19524 /* Calculate mode of upper 128bit AVX registers after the insn. */
19526 static int
19527 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
19529 rtx pat = PATTERN (insn);
19531 if (vzeroupper_operation (pat, VOIDmode)
19532 || vzeroall_operation (pat, VOIDmode))
19533 return AVX_U128_CLEAN;
19535 /* We know that state is clean after CALL insn if there are no
19536 256bit or 512bit registers used in the function return register. */
19537 if (CALL_P (insn))
19539 bool avx_upper_reg_found = false;
19540 note_stores (pat, ix86_check_avx_upper_stores, &avx_upper_reg_found);
19542 return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
19545 /* Otherwise, return current mode. Remember that if insn
19546 references AVX 256bit or 512bit registers, the mode was already
19547 changed to DIRTY from MODE_NEEDED. */
19548 return mode;
19551 /* Return the mode that an insn results in. */
19553 static int
19554 ix86_mode_after (int entity, int mode, rtx_insn *insn)
19556 switch (entity)
19558 case X86_DIRFLAG:
19559 return mode;
19560 case AVX_U128:
19561 return ix86_avx_u128_mode_after (mode, insn);
19562 case I387_TRUNC:
19563 case I387_FLOOR:
19564 case I387_CEIL:
19565 case I387_MASK_PM:
19566 return mode;
19567 default:
19568 gcc_unreachable ();
19572 static int
19573 ix86_dirflag_mode_entry (void)
19575 /* For TARGET_CLD or in the interrupt handler we can't assume
19576 direction flag state at function entry. */
19577 if (TARGET_CLD
19578 || cfun->machine->func_type != TYPE_NORMAL)
19579 return X86_DIRFLAG_ANY;
19581 return X86_DIRFLAG_RESET;
19584 static int
19585 ix86_avx_u128_mode_entry (void)
19587 tree arg;
19589 /* Entry mode is set to AVX_U128_DIRTY if there are
19590 256bit or 512bit modes used in function arguments. */
19591 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
19592 arg = TREE_CHAIN (arg))
19594 rtx incoming = DECL_INCOMING_RTL (arg);
19596 if (incoming && ix86_check_avx_upper_register (incoming))
19597 return AVX_U128_DIRTY;
19600 return AVX_U128_CLEAN;
19603 /* Return a mode that ENTITY is assumed to be
19604 switched to at function entry. */
19606 static int
19607 ix86_mode_entry (int entity)
19609 switch (entity)
19611 case X86_DIRFLAG:
19612 return ix86_dirflag_mode_entry ();
19613 case AVX_U128:
19614 return ix86_avx_u128_mode_entry ();
19615 case I387_TRUNC:
19616 case I387_FLOOR:
19617 case I387_CEIL:
19618 case I387_MASK_PM:
19619 return I387_CW_ANY;
19620 default:
19621 gcc_unreachable ();
19625 static int
19626 ix86_avx_u128_mode_exit (void)
19628 rtx reg = crtl->return_rtx;
19630 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
19631 or 512 bit modes used in the function return register. */
19632 if (reg && ix86_check_avx_upper_register (reg))
19633 return AVX_U128_DIRTY;
19635 return AVX_U128_CLEAN;
19638 /* Return a mode that ENTITY is assumed to be
19639 switched to at function exit. */
19641 static int
19642 ix86_mode_exit (int entity)
19644 switch (entity)
19646 case X86_DIRFLAG:
19647 return X86_DIRFLAG_ANY;
19648 case AVX_U128:
19649 return ix86_avx_u128_mode_exit ();
19650 case I387_TRUNC:
19651 case I387_FLOOR:
19652 case I387_CEIL:
19653 case I387_MASK_PM:
19654 return I387_CW_ANY;
19655 default:
19656 gcc_unreachable ();
19660 static int
19661 ix86_mode_priority (int, int n)
19663 return n;
19666 /* Output code to initialize control word copies used by trunc?f?i and
19667 rounding patterns. CURRENT_MODE is set to current control word,
19668 while NEW_MODE is set to new control word. */
19670 static void
19671 emit_i387_cw_initialization (int mode)
19673 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
19674 rtx new_mode;
19676 enum ix86_stack_slot slot;
19678 rtx reg = gen_reg_rtx (HImode);
19680 emit_insn (gen_x86_fnstcw_1 (stored_mode));
19681 emit_move_insn (reg, copy_rtx (stored_mode));
19683 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
19684 || optimize_insn_for_size_p ())
19686 switch (mode)
19688 case I387_CW_TRUNC:
19689 /* round toward zero (truncate) */
19690 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
19691 slot = SLOT_CW_TRUNC;
19692 break;
19694 case I387_CW_FLOOR:
19695 /* round down toward -oo */
19696 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
19697 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
19698 slot = SLOT_CW_FLOOR;
19699 break;
19701 case I387_CW_CEIL:
19702 /* round up toward +oo */
19703 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
19704 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
19705 slot = SLOT_CW_CEIL;
19706 break;
19708 case I387_CW_MASK_PM:
19709 /* mask precision exception for nearbyint() */
19710 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
19711 slot = SLOT_CW_MASK_PM;
19712 break;
19714 default:
19715 gcc_unreachable ();
19718 else
19720 switch (mode)
19722 case I387_CW_TRUNC:
19723 /* round toward zero (truncate) */
19724 emit_insn (gen_insvsi_1 (reg, GEN_INT (0xc)));
19725 slot = SLOT_CW_TRUNC;
19726 break;
19728 case I387_CW_FLOOR:
19729 /* round down toward -oo */
19730 emit_insn (gen_insvsi_1 (reg, GEN_INT (0x4)));
19731 slot = SLOT_CW_FLOOR;
19732 break;
19734 case I387_CW_CEIL:
19735 /* round up toward +oo */
19736 emit_insn (gen_insvsi_1 (reg, GEN_INT (0x8)));
19737 slot = SLOT_CW_CEIL;
19738 break;
19740 case I387_CW_MASK_PM:
19741 /* mask precision exception for nearbyint() */
19742 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
19743 slot = SLOT_CW_MASK_PM;
19744 break;
19746 default:
19747 gcc_unreachable ();
19751 gcc_assert (slot < MAX_386_STACK_LOCALS);
19753 new_mode = assign_386_stack_local (HImode, slot);
19754 emit_move_insn (new_mode, reg);
19757 /* Emit vzeroupper. */
19759 void
19760 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
19762 int i;
19764 /* Cancel automatic vzeroupper insertion if there are
19765 live call-saved SSE registers at the insertion point. */
19767 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
19768 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
19769 return;
19771 if (TARGET_64BIT)
19772 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
19773 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
19774 return;
19776 emit_insn (gen_avx_vzeroupper ());
19779 /* Generate one or more insns to set ENTITY to MODE. */
19781 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
19782 is the set of hard registers live at the point where the insn(s)
19783 are to be inserted. */
19785 static void
19786 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
19787 HARD_REG_SET regs_live)
19789 switch (entity)
19791 case X86_DIRFLAG:
19792 if (mode == X86_DIRFLAG_RESET)
19793 emit_insn (gen_cld ());
19794 break;
19795 case AVX_U128:
19796 if (mode == AVX_U128_CLEAN)
19797 ix86_avx_emit_vzeroupper (regs_live);
19798 break;
19799 case I387_TRUNC:
19800 case I387_FLOOR:
19801 case I387_CEIL:
19802 case I387_MASK_PM:
19803 if (mode != I387_CW_ANY
19804 && mode != I387_CW_UNINITIALIZED)
19805 emit_i387_cw_initialization (mode);
19806 break;
19807 default:
19808 gcc_unreachable ();
19812 /* Output code for INSN to convert a float to a signed int. OPERANDS
19813 are the insn operands. The output may be [HSD]Imode and the input
19814 operand may be [SDX]Fmode. */
19816 const char *
19817 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
19819 bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
19820 bool dimode_p = GET_MODE (operands[0]) == DImode;
19821 int round_mode = get_attr_i387_cw (insn);
19823 static char buf[40];
19824 const char *p;
19826 /* Jump through a hoop or two for DImode, since the hardware has no
19827 non-popping instruction. We used to do this a different way, but
19828 that was somewhat fragile and broke with post-reload splitters. */
19829 if ((dimode_p || fisttp) && !stack_top_dies)
19830 output_asm_insn ("fld\t%y1", operands);
19832 gcc_assert (STACK_TOP_P (operands[1]));
19833 gcc_assert (MEM_P (operands[0]));
19834 gcc_assert (GET_MODE (operands[1]) != TFmode);
19836 if (fisttp)
19837 return "fisttp%Z0\t%0";
19839 strcpy (buf, "fist");
19841 if (round_mode != I387_CW_ANY)
19842 output_asm_insn ("fldcw\t%3", operands);
19844 p = "p%Z0\t%0";
19845 strcat (buf, p + !(stack_top_dies || dimode_p));
19847 output_asm_insn (buf, operands);
19849 if (round_mode != I387_CW_ANY)
19850 output_asm_insn ("fldcw\t%2", operands);
19852 return "";
19855 /* Output code for x87 ffreep insn. The OPNO argument, which may only
19856 have the values zero or one, indicates the ffreep insn's operand
19857 from the OPERANDS array. */
19859 static const char *
19860 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
19862 if (TARGET_USE_FFREEP)
19863 #ifdef HAVE_AS_IX86_FFREEP
19864 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
19865 #else
19867 static char retval[32];
19868 int regno = REGNO (operands[opno]);
19870 gcc_assert (STACK_REGNO_P (regno));
19872 regno -= FIRST_STACK_REG;
19874 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
19875 return retval;
19877 #endif
19879 return opno ? "fstp\t%y1" : "fstp\t%y0";
19883 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
19884 should be used. UNORDERED_P is true when fucom should be used. */
19886 const char *
19887 output_fp_compare (rtx_insn *insn, rtx *operands,
19888 bool eflags_p, bool unordered_p)
19890 rtx *xops = eflags_p ? &operands[0] : &operands[1];
19891 bool stack_top_dies;
19893 static char buf[40];
19894 const char *p;
19896 gcc_assert (STACK_TOP_P (xops[0]));
19898 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
19900 if (eflags_p)
19902 p = unordered_p ? "fucomi" : "fcomi";
19903 strcpy (buf, p);
19905 p = "p\t{%y1, %0|%0, %y1}";
19906 strcat (buf, p + !stack_top_dies);
19908 return buf;
19911 if (STACK_REG_P (xops[1])
19912 && stack_top_dies
19913 && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1))
19915 gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1);
19917 /* If both the top of the 387 stack die, and the other operand
19918 is also a stack register that dies, then this must be a
19919 `fcompp' float compare. */
19920 p = unordered_p ? "fucompp" : "fcompp";
19921 strcpy (buf, p);
19923 else if (const0_operand (xops[1], VOIDmode))
19925 gcc_assert (!unordered_p);
19926 strcpy (buf, "ftst");
19928 else
19930 if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT)
19932 gcc_assert (!unordered_p);
19933 p = "ficom";
19935 else
19936 p = unordered_p ? "fucom" : "fcom";
19938 strcpy (buf, p);
19940 p = "p%Z2\t%y2";
19941 strcat (buf, p + !stack_top_dies);
19944 output_asm_insn (buf, operands);
19945 return "fnstsw\t%0";
19948 void
19949 ix86_output_addr_vec_elt (FILE *file, int value)
19951 const char *directive = ASM_LONG;
19953 #ifdef ASM_QUAD
19954 if (TARGET_LP64)
19955 directive = ASM_QUAD;
19956 #else
19957 gcc_assert (!TARGET_64BIT);
19958 #endif
19960 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
19963 void
19964 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
19966 const char *directive = ASM_LONG;
19968 #ifdef ASM_QUAD
19969 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
19970 directive = ASM_QUAD;
19971 #else
19972 gcc_assert (!TARGET_64BIT);
19973 #endif
19974 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
19975 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
19976 fprintf (file, "%s%s%d-%s%d\n",
19977 directive, LPREFIX, value, LPREFIX, rel);
19978 else if (HAVE_AS_GOTOFF_IN_DATA)
19979 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
19980 #if TARGET_MACHO
19981 else if (TARGET_MACHO)
19983 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
19984 machopic_output_function_base_name (file);
19985 putc ('\n', file);
19987 #endif
19988 else
19989 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
19990 GOT_SYMBOL_NAME, LPREFIX, value);
19993 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
19994 for the target. */
19996 void
19997 ix86_expand_clear (rtx dest)
19999 rtx tmp;
20001 /* We play register width games, which are only valid after reload. */
20002 gcc_assert (reload_completed);
20004 /* Avoid HImode and its attendant prefix byte. */
20005 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
20006 dest = gen_rtx_REG (SImode, REGNO (dest));
20007 tmp = gen_rtx_SET (dest, const0_rtx);
20009 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
20011 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
20012 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
20015 emit_insn (tmp);
20018 void
20019 ix86_expand_move (machine_mode mode, rtx operands[])
20021 rtx op0, op1;
20022 rtx tmp, addend = NULL_RTX;
20023 enum tls_model model;
20025 op0 = operands[0];
20026 op1 = operands[1];
20028 switch (GET_CODE (op1))
20030 case CONST:
20031 tmp = XEXP (op1, 0);
20033 if (GET_CODE (tmp) != PLUS
20034 || GET_CODE (XEXP (tmp, 0)) != SYMBOL_REF)
20035 break;
20037 op1 = XEXP (tmp, 0);
20038 addend = XEXP (tmp, 1);
20039 /* FALLTHRU */
20041 case SYMBOL_REF:
20042 model = SYMBOL_REF_TLS_MODEL (op1);
20044 if (model)
20045 op1 = legitimize_tls_address (op1, model, true);
20046 else if (ix86_force_load_from_GOT_p (op1))
20048 /* Load the external function address via GOT slot to avoid PLT. */
20049 op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1),
20050 (TARGET_64BIT
20051 ? UNSPEC_GOTPCREL
20052 : UNSPEC_GOT));
20053 op1 = gen_rtx_CONST (Pmode, op1);
20054 op1 = gen_const_mem (Pmode, op1);
20055 set_mem_alias_set (op1, ix86_GOT_alias_set ());
20057 else
20059 tmp = legitimize_pe_coff_symbol (op1, addend != NULL_RTX);
20060 if (tmp)
20062 op1 = tmp;
20063 if (!addend)
20064 break;
20066 else
20068 op1 = operands[1];
20069 break;
20073 if (addend)
20075 op1 = force_operand (op1, NULL_RTX);
20076 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
20077 op0, 1, OPTAB_DIRECT);
20079 else
20080 op1 = force_operand (op1, op0);
20082 if (op1 == op0)
20083 return;
20085 op1 = convert_to_mode (mode, op1, 1);
20087 default:
20088 break;
20091 if ((flag_pic || MACHOPIC_INDIRECT)
20092 && symbolic_operand (op1, mode))
20094 if (TARGET_MACHO && !TARGET_64BIT)
20096 #if TARGET_MACHO
20097 /* dynamic-no-pic */
20098 if (MACHOPIC_INDIRECT)
20100 rtx temp = (op0 && REG_P (op0) && mode == Pmode)
20101 ? op0 : gen_reg_rtx (Pmode);
20102 op1 = machopic_indirect_data_reference (op1, temp);
20103 if (MACHOPIC_PURE)
20104 op1 = machopic_legitimize_pic_address (op1, mode,
20105 temp == op1 ? 0 : temp);
20107 if (op0 != op1 && GET_CODE (op0) != MEM)
20109 rtx insn = gen_rtx_SET (op0, op1);
20110 emit_insn (insn);
20111 return;
20113 if (GET_CODE (op0) == MEM)
20114 op1 = force_reg (Pmode, op1);
20115 else
20117 rtx temp = op0;
20118 if (GET_CODE (temp) != REG)
20119 temp = gen_reg_rtx (Pmode);
20120 temp = legitimize_pic_address (op1, temp);
20121 if (temp == op0)
20122 return;
20123 op1 = temp;
20125 /* dynamic-no-pic */
20126 #endif
20128 else
20130 if (MEM_P (op0))
20131 op1 = force_reg (mode, op1);
20132 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
20134 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
20135 op1 = legitimize_pic_address (op1, reg);
20136 if (op0 == op1)
20137 return;
20138 op1 = convert_to_mode (mode, op1, 1);
20142 else
20144 if (MEM_P (op0)
20145 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
20146 || !push_operand (op0, mode))
20147 && MEM_P (op1))
20148 op1 = force_reg (mode, op1);
20150 if (push_operand (op0, mode)
20151 && ! general_no_elim_operand (op1, mode))
20152 op1 = copy_to_mode_reg (mode, op1);
20154 /* Force large constants in 64bit compilation into register
20155 to get them CSEed. */
20156 if (can_create_pseudo_p ()
20157 && (mode == DImode) && TARGET_64BIT
20158 && immediate_operand (op1, mode)
20159 && !x86_64_zext_immediate_operand (op1, VOIDmode)
20160 && !register_operand (op0, mode)
20161 && optimize)
20162 op1 = copy_to_mode_reg (mode, op1);
20164 if (can_create_pseudo_p ()
20165 && CONST_DOUBLE_P (op1))
20167 /* If we are loading a floating point constant to a register,
20168 force the value to memory now, since we'll get better code
20169 out the back end. */
20171 op1 = validize_mem (force_const_mem (mode, op1));
20172 if (!register_operand (op0, mode))
20174 rtx temp = gen_reg_rtx (mode);
20175 emit_insn (gen_rtx_SET (temp, op1));
20176 emit_move_insn (op0, temp);
20177 return;
20182 emit_insn (gen_rtx_SET (op0, op1));
20185 void
20186 ix86_expand_vector_move (machine_mode mode, rtx operands[])
20188 rtx op0 = operands[0], op1 = operands[1];
20189 /* Use GET_MODE_BITSIZE instead of GET_MODE_ALIGNMENT for IA MCU
20190 psABI since the biggest alignment is 4 byte for IA MCU psABI. */
20191 unsigned int align = (TARGET_IAMCU
20192 ? GET_MODE_BITSIZE (mode)
20193 : GET_MODE_ALIGNMENT (mode));
20195 if (push_operand (op0, VOIDmode))
20196 op0 = emit_move_resolve_push (mode, op0);
20198 /* Force constants other than zero into memory. We do not know how
20199 the instructions used to build constants modify the upper 64 bits
20200 of the register, once we have that information we may be able
20201 to handle some of them more efficiently. */
20202 if (can_create_pseudo_p ()
20203 && (CONSTANT_P (op1)
20204 || (SUBREG_P (op1)
20205 && CONSTANT_P (SUBREG_REG (op1))))
20206 && ((register_operand (op0, mode)
20207 && !standard_sse_constant_p (op1, mode))
20208 /* ix86_expand_vector_move_misalign() does not like constants. */
20209 || (SSE_REG_MODE_P (mode)
20210 && MEM_P (op0)
20211 && MEM_ALIGN (op0) < align)))
20213 if (SUBREG_P (op1))
20215 machine_mode imode = GET_MODE (SUBREG_REG (op1));
20216 rtx r = force_const_mem (imode, SUBREG_REG (op1));
20217 if (r)
20218 r = validize_mem (r);
20219 else
20220 r = force_reg (imode, SUBREG_REG (op1));
20221 op1 = simplify_gen_subreg (mode, r, imode, SUBREG_BYTE (op1));
20223 else
20224 op1 = validize_mem (force_const_mem (mode, op1));
20227 /* We need to check memory alignment for SSE mode since attribute
20228 can make operands unaligned. */
20229 if (can_create_pseudo_p ()
20230 && SSE_REG_MODE_P (mode)
20231 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
20232 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
20234 rtx tmp[2];
20236 /* ix86_expand_vector_move_misalign() does not like both
20237 arguments in memory. */
20238 if (!register_operand (op0, mode)
20239 && !register_operand (op1, mode))
20240 op1 = force_reg (mode, op1);
20242 tmp[0] = op0; tmp[1] = op1;
20243 ix86_expand_vector_move_misalign (mode, tmp);
20244 return;
20247 /* Make operand1 a register if it isn't already. */
20248 if (can_create_pseudo_p ()
20249 && !register_operand (op0, mode)
20250 && !register_operand (op1, mode))
20252 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
20253 return;
20256 emit_insn (gen_rtx_SET (op0, op1));
20259 /* Split 32-byte AVX unaligned load and store if needed. */
20261 static void
20262 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
20264 rtx m;
20265 rtx (*extract) (rtx, rtx, rtx);
20266 machine_mode mode;
20268 if ((MEM_P (op1) && !TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
20269 || (MEM_P (op0) && !TARGET_AVX256_SPLIT_UNALIGNED_STORE))
20271 emit_insn (gen_rtx_SET (op0, op1));
20272 return;
20275 rtx orig_op0 = NULL_RTX;
20276 mode = GET_MODE (op0);
20277 switch (GET_MODE_CLASS (mode))
20279 case MODE_VECTOR_INT:
20280 case MODE_INT:
20281 if (mode != V32QImode)
20283 if (!MEM_P (op0))
20285 orig_op0 = op0;
20286 op0 = gen_reg_rtx (V32QImode);
20288 else
20289 op0 = gen_lowpart (V32QImode, op0);
20290 op1 = gen_lowpart (V32QImode, op1);
20291 mode = V32QImode;
20293 break;
20294 case MODE_VECTOR_FLOAT:
20295 break;
20296 default:
20297 gcc_unreachable ();
20300 switch (mode)
20302 default:
20303 gcc_unreachable ();
20304 case E_V32QImode:
20305 extract = gen_avx_vextractf128v32qi;
20306 mode = V16QImode;
20307 break;
20308 case E_V8SFmode:
20309 extract = gen_avx_vextractf128v8sf;
20310 mode = V4SFmode;
20311 break;
20312 case E_V4DFmode:
20313 extract = gen_avx_vextractf128v4df;
20314 mode = V2DFmode;
20315 break;
20318 if (MEM_P (op1))
20320 rtx r = gen_reg_rtx (mode);
20321 m = adjust_address (op1, mode, 0);
20322 emit_move_insn (r, m);
20323 m = adjust_address (op1, mode, 16);
20324 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
20325 emit_move_insn (op0, r);
20327 else if (MEM_P (op0))
20329 m = adjust_address (op0, mode, 0);
20330 emit_insn (extract (m, op1, const0_rtx));
20331 m = adjust_address (op0, mode, 16);
20332 emit_insn (extract (m, copy_rtx (op1), const1_rtx));
20334 else
20335 gcc_unreachable ();
20337 if (orig_op0)
20338 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
20341 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
20342 straight to ix86_expand_vector_move. */
20343 /* Code generation for scalar reg-reg moves of single and double precision data:
20344 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
20345 movaps reg, reg
20346 else
20347 movss reg, reg
20348 if (x86_sse_partial_reg_dependency == true)
20349 movapd reg, reg
20350 else
20351 movsd reg, reg
20353 Code generation for scalar loads of double precision data:
20354 if (x86_sse_split_regs == true)
20355 movlpd mem, reg (gas syntax)
20356 else
20357 movsd mem, reg
20359 Code generation for unaligned packed loads of single precision data
20360 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
20361 if (x86_sse_unaligned_move_optimal)
20362 movups mem, reg
20364 if (x86_sse_partial_reg_dependency == true)
20366 xorps reg, reg
20367 movlps mem, reg
20368 movhps mem+8, reg
20370 else
20372 movlps mem, reg
20373 movhps mem+8, reg
20376 Code generation for unaligned packed loads of double precision data
20377 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
20378 if (x86_sse_unaligned_move_optimal)
20379 movupd mem, reg
20381 if (x86_sse_split_regs == true)
20383 movlpd mem, reg
20384 movhpd mem+8, reg
20386 else
20388 movsd mem, reg
20389 movhpd mem+8, reg
20393 void
20394 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
20396 rtx op0, op1, m;
20398 op0 = operands[0];
20399 op1 = operands[1];
20401 /* Use unaligned load/store for AVX512 or when optimizing for size. */
20402 if (GET_MODE_SIZE (mode) == 64 || optimize_insn_for_size_p ())
20404 emit_insn (gen_rtx_SET (op0, op1));
20405 return;
20408 if (TARGET_AVX)
20410 if (GET_MODE_SIZE (mode) == 32)
20411 ix86_avx256_split_vector_move_misalign (op0, op1);
20412 else
20413 /* Always use 128-bit mov<mode>_internal pattern for AVX. */
20414 emit_insn (gen_rtx_SET (op0, op1));
20415 return;
20418 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
20419 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
20421 emit_insn (gen_rtx_SET (op0, op1));
20422 return;
20425 /* ??? If we have typed data, then it would appear that using
20426 movdqu is the only way to get unaligned data loaded with
20427 integer type. */
20428 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
20430 emit_insn (gen_rtx_SET (op0, op1));
20431 return;
20434 if (MEM_P (op1))
20436 if (TARGET_SSE2 && mode == V2DFmode)
20438 rtx zero;
20440 /* When SSE registers are split into halves, we can avoid
20441 writing to the top half twice. */
20442 if (TARGET_SSE_SPLIT_REGS)
20444 emit_clobber (op0);
20445 zero = op0;
20447 else
20449 /* ??? Not sure about the best option for the Intel chips.
20450 The following would seem to satisfy; the register is
20451 entirely cleared, breaking the dependency chain. We
20452 then store to the upper half, with a dependency depth
20453 of one. A rumor has it that Intel recommends two movsd
20454 followed by an unpacklpd, but this is unconfirmed. And
20455 given that the dependency depth of the unpacklpd would
20456 still be one, I'm not sure why this would be better. */
20457 zero = CONST0_RTX (V2DFmode);
20460 m = adjust_address (op1, DFmode, 0);
20461 emit_insn (gen_sse2_loadlpd (op0, zero, m));
20462 m = adjust_address (op1, DFmode, 8);
20463 emit_insn (gen_sse2_loadhpd (op0, op0, m));
20465 else
20467 rtx t;
20469 if (mode != V4SFmode)
20470 t = gen_reg_rtx (V4SFmode);
20471 else
20472 t = op0;
20474 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
20475 emit_move_insn (t, CONST0_RTX (V4SFmode));
20476 else
20477 emit_clobber (t);
20479 m = adjust_address (op1, V2SFmode, 0);
20480 emit_insn (gen_sse_loadlps (t, t, m));
20481 m = adjust_address (op1, V2SFmode, 8);
20482 emit_insn (gen_sse_loadhps (t, t, m));
20483 if (mode != V4SFmode)
20484 emit_move_insn (op0, gen_lowpart (mode, t));
20487 else if (MEM_P (op0))
20489 if (TARGET_SSE2 && mode == V2DFmode)
20491 m = adjust_address (op0, DFmode, 0);
20492 emit_insn (gen_sse2_storelpd (m, op1));
20493 m = adjust_address (op0, DFmode, 8);
20494 emit_insn (gen_sse2_storehpd (m, op1));
20496 else
20498 if (mode != V4SFmode)
20499 op1 = gen_lowpart (V4SFmode, op1);
20501 m = adjust_address (op0, V2SFmode, 0);
20502 emit_insn (gen_sse_storelps (m, op1));
20503 m = adjust_address (op0, V2SFmode, 8);
20504 emit_insn (gen_sse_storehps (m, copy_rtx (op1)));
20507 else
20508 gcc_unreachable ();
20511 /* Helper function of ix86_fixup_binary_operands to canonicalize
20512 operand order. Returns true if the operands should be swapped. */
20514 static bool
20515 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
20516 rtx operands[])
20518 rtx dst = operands[0];
20519 rtx src1 = operands[1];
20520 rtx src2 = operands[2];
20522 /* If the operation is not commutative, we can't do anything. */
20523 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH
20524 && GET_RTX_CLASS (code) != RTX_COMM_COMPARE)
20525 return false;
20527 /* Highest priority is that src1 should match dst. */
20528 if (rtx_equal_p (dst, src1))
20529 return false;
20530 if (rtx_equal_p (dst, src2))
20531 return true;
20533 /* Next highest priority is that immediate constants come second. */
20534 if (immediate_operand (src2, mode))
20535 return false;
20536 if (immediate_operand (src1, mode))
20537 return true;
20539 /* Lowest priority is that memory references should come second. */
20540 if (MEM_P (src2))
20541 return false;
20542 if (MEM_P (src1))
20543 return true;
20545 return false;
20549 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
20550 destination to use for the operation. If different from the true
20551 destination in operands[0], a copy operation will be required. */
20554 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
20555 rtx operands[])
20557 rtx dst = operands[0];
20558 rtx src1 = operands[1];
20559 rtx src2 = operands[2];
20561 /* Canonicalize operand order. */
20562 if (ix86_swap_binary_operands_p (code, mode, operands))
20564 /* It is invalid to swap operands of different modes. */
20565 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
20567 std::swap (src1, src2);
20570 /* Both source operands cannot be in memory. */
20571 if (MEM_P (src1) && MEM_P (src2))
20573 /* Optimization: Only read from memory once. */
20574 if (rtx_equal_p (src1, src2))
20576 src2 = force_reg (mode, src2);
20577 src1 = src2;
20579 else if (rtx_equal_p (dst, src1))
20580 src2 = force_reg (mode, src2);
20581 else
20582 src1 = force_reg (mode, src1);
20585 /* If the destination is memory, and we do not have matching source
20586 operands, do things in registers. */
20587 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
20588 dst = gen_reg_rtx (mode);
20590 /* Source 1 cannot be a constant. */
20591 if (CONSTANT_P (src1))
20592 src1 = force_reg (mode, src1);
20594 /* Source 1 cannot be a non-matching memory. */
20595 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
20596 src1 = force_reg (mode, src1);
20598 /* Improve address combine. */
20599 if (code == PLUS
20600 && GET_MODE_CLASS (mode) == MODE_INT
20601 && MEM_P (src2))
20602 src2 = force_reg (mode, src2);
20604 operands[1] = src1;
20605 operands[2] = src2;
20606 return dst;
20609 /* Similarly, but assume that the destination has already been
20610 set up properly. */
20612 void
20613 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
20614 machine_mode mode, rtx operands[])
20616 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
20617 gcc_assert (dst == operands[0]);
20620 /* Attempt to expand a binary operator. Make the expansion closer to the
20621 actual machine, then just general_operand, which will allow 3 separate
20622 memory references (one output, two input) in a single insn. */
20624 void
20625 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
20626 rtx operands[])
20628 rtx src1, src2, dst, op, clob;
20630 dst = ix86_fixup_binary_operands (code, mode, operands);
20631 src1 = operands[1];
20632 src2 = operands[2];
20634 /* Emit the instruction. */
20636 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, src1, src2));
20638 if (reload_completed
20639 && code == PLUS
20640 && !rtx_equal_p (dst, src1))
20642 /* This is going to be an LEA; avoid splitting it later. */
20643 emit_insn (op);
20645 else
20647 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
20648 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
20651 /* Fix up the destination if needed. */
20652 if (dst != operands[0])
20653 emit_move_insn (operands[0], dst);
20656 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
20657 the given OPERANDS. */
20659 void
20660 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
20661 rtx operands[])
20663 rtx op1 = NULL_RTX, op2 = NULL_RTX;
20664 if (SUBREG_P (operands[1]))
20666 op1 = operands[1];
20667 op2 = operands[2];
20669 else if (SUBREG_P (operands[2]))
20671 op1 = operands[2];
20672 op2 = operands[1];
20674 /* Optimize (__m128i) d | (__m128i) e and similar code
20675 when d and e are float vectors into float vector logical
20676 insn. In C/C++ without using intrinsics there is no other way
20677 to express vector logical operation on float vectors than
20678 to cast them temporarily to integer vectors. */
20679 if (op1
20680 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
20681 && (SUBREG_P (op2) || GET_CODE (op2) == CONST_VECTOR)
20682 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
20683 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
20684 && SUBREG_BYTE (op1) == 0
20685 && (GET_CODE (op2) == CONST_VECTOR
20686 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
20687 && SUBREG_BYTE (op2) == 0))
20688 && can_create_pseudo_p ())
20690 rtx dst;
20691 switch (GET_MODE (SUBREG_REG (op1)))
20693 case E_V4SFmode:
20694 case E_V8SFmode:
20695 case E_V16SFmode:
20696 case E_V2DFmode:
20697 case E_V4DFmode:
20698 case E_V8DFmode:
20699 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
20700 if (GET_CODE (op2) == CONST_VECTOR)
20702 op2 = gen_lowpart (GET_MODE (dst), op2);
20703 op2 = force_reg (GET_MODE (dst), op2);
20705 else
20707 op1 = operands[1];
20708 op2 = SUBREG_REG (operands[2]);
20709 if (!vector_operand (op2, GET_MODE (dst)))
20710 op2 = force_reg (GET_MODE (dst), op2);
20712 op1 = SUBREG_REG (op1);
20713 if (!vector_operand (op1, GET_MODE (dst)))
20714 op1 = force_reg (GET_MODE (dst), op1);
20715 emit_insn (gen_rtx_SET (dst,
20716 gen_rtx_fmt_ee (code, GET_MODE (dst),
20717 op1, op2)));
20718 emit_move_insn (operands[0], gen_lowpart (mode, dst));
20719 return;
20720 default:
20721 break;
20724 if (!vector_operand (operands[1], mode))
20725 operands[1] = force_reg (mode, operands[1]);
20726 if (!vector_operand (operands[2], mode))
20727 operands[2] = force_reg (mode, operands[2]);
20728 ix86_fixup_binary_operands_no_copy (code, mode, operands);
20729 emit_insn (gen_rtx_SET (operands[0],
20730 gen_rtx_fmt_ee (code, mode, operands[1],
20731 operands[2])));
20734 /* Return TRUE or FALSE depending on whether the binary operator meets the
20735 appropriate constraints. */
20737 bool
20738 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
20739 rtx operands[3])
20741 rtx dst = operands[0];
20742 rtx src1 = operands[1];
20743 rtx src2 = operands[2];
20745 /* Both source operands cannot be in memory. */
20746 if (MEM_P (src1) && MEM_P (src2))
20747 return false;
20749 /* Canonicalize operand order for commutative operators. */
20750 if (ix86_swap_binary_operands_p (code, mode, operands))
20751 std::swap (src1, src2);
20753 /* If the destination is memory, we must have a matching source operand. */
20754 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
20755 return false;
20757 /* Source 1 cannot be a constant. */
20758 if (CONSTANT_P (src1))
20759 return false;
20761 /* Source 1 cannot be a non-matching memory. */
20762 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
20763 /* Support "andhi/andsi/anddi" as a zero-extending move. */
20764 return (code == AND
20765 && (mode == HImode
20766 || mode == SImode
20767 || (TARGET_64BIT && mode == DImode))
20768 && satisfies_constraint_L (src2));
20770 return true;
20773 /* Attempt to expand a unary operator. Make the expansion closer to the
20774 actual machine, then just general_operand, which will allow 2 separate
20775 memory references (one output, one input) in a single insn. */
20777 void
20778 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
20779 rtx operands[])
20781 bool matching_memory = false;
20782 rtx src, dst, op, clob;
20784 dst = operands[0];
20785 src = operands[1];
20787 /* If the destination is memory, and we do not have matching source
20788 operands, do things in registers. */
20789 if (MEM_P (dst))
20791 if (rtx_equal_p (dst, src))
20792 matching_memory = true;
20793 else
20794 dst = gen_reg_rtx (mode);
20797 /* When source operand is memory, destination must match. */
20798 if (MEM_P (src) && !matching_memory)
20799 src = force_reg (mode, src);
20801 /* Emit the instruction. */
20803 op = gen_rtx_SET (dst, gen_rtx_fmt_e (code, mode, src));
20805 if (code == NOT)
20806 emit_insn (op);
20807 else
20809 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
20810 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
20813 /* Fix up the destination if needed. */
20814 if (dst != operands[0])
20815 emit_move_insn (operands[0], dst);
20818 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
20819 divisor are within the range [0-255]. */
20821 void
20822 ix86_split_idivmod (machine_mode mode, rtx operands[],
20823 bool signed_p)
20825 rtx_code_label *end_label, *qimode_label;
20826 rtx div, mod;
20827 rtx_insn *insn;
20828 rtx scratch, tmp0, tmp1, tmp2;
20829 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
20830 rtx (*gen_zero_extend) (rtx, rtx);
20831 rtx (*gen_test_ccno_1) (rtx, rtx);
20833 switch (mode)
20835 case E_SImode:
20836 if (GET_MODE (operands[0]) == SImode)
20838 if (GET_MODE (operands[1]) == SImode)
20839 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
20840 else
20841 gen_divmod4_1
20842 = signed_p ? gen_divmodsi4_zext_2 : gen_udivmodsi4_zext_2;
20843 gen_zero_extend = gen_zero_extendqisi2;
20845 else
20847 gen_divmod4_1
20848 = signed_p ? gen_divmodsi4_zext_1 : gen_udivmodsi4_zext_1;
20849 gen_zero_extend = gen_zero_extendqidi2;
20851 gen_test_ccno_1 = gen_testsi_ccno_1;
20852 break;
20853 case E_DImode:
20854 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
20855 gen_test_ccno_1 = gen_testdi_ccno_1;
20856 gen_zero_extend = gen_zero_extendqidi2;
20857 break;
20858 default:
20859 gcc_unreachable ();
20862 end_label = gen_label_rtx ();
20863 qimode_label = gen_label_rtx ();
20865 scratch = gen_reg_rtx (mode);
20867 /* Use 8bit unsigned divimod if dividend and divisor are within
20868 the range [0-255]. */
20869 emit_move_insn (scratch, operands[2]);
20870 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
20871 scratch, 1, OPTAB_DIRECT);
20872 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
20873 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
20874 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
20875 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
20876 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
20877 pc_rtx);
20878 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp0));
20879 predict_jump (REG_BR_PROB_BASE * 50 / 100);
20880 JUMP_LABEL (insn) = qimode_label;
20882 /* Generate original signed/unsigned divimod. */
20883 div = gen_divmod4_1 (operands[0], operands[1],
20884 operands[2], operands[3]);
20885 emit_insn (div);
20887 /* Branch to the end. */
20888 emit_jump_insn (gen_jump (end_label));
20889 emit_barrier ();
20891 /* Generate 8bit unsigned divide. */
20892 emit_label (qimode_label);
20893 /* Don't use operands[0] for result of 8bit divide since not all
20894 registers support QImode ZERO_EXTRACT. */
20895 tmp0 = lowpart_subreg (HImode, scratch, mode);
20896 tmp1 = lowpart_subreg (HImode, operands[2], mode);
20897 tmp2 = lowpart_subreg (QImode, operands[3], mode);
20898 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
20900 if (signed_p)
20902 div = gen_rtx_DIV (mode, operands[2], operands[3]);
20903 mod = gen_rtx_MOD (mode, operands[2], operands[3]);
20905 else
20907 div = gen_rtx_UDIV (mode, operands[2], operands[3]);
20908 mod = gen_rtx_UMOD (mode, operands[2], operands[3]);
20910 if (mode == SImode)
20912 if (GET_MODE (operands[0]) != SImode)
20913 div = gen_rtx_ZERO_EXTEND (DImode, div);
20914 if (GET_MODE (operands[1]) != SImode)
20915 mod = gen_rtx_ZERO_EXTEND (DImode, mod);
20918 /* Extract remainder from AH. */
20919 tmp1 = gen_rtx_ZERO_EXTRACT (GET_MODE (operands[1]),
20920 tmp0, GEN_INT (8), GEN_INT (8));
20921 if (REG_P (operands[1]))
20922 insn = emit_move_insn (operands[1], tmp1);
20923 else
20925 /* Need a new scratch register since the old one has result
20926 of 8bit divide. */
20927 scratch = gen_reg_rtx (GET_MODE (operands[1]));
20928 emit_move_insn (scratch, tmp1);
20929 insn = emit_move_insn (operands[1], scratch);
20931 set_unique_reg_note (insn, REG_EQUAL, mod);
20933 /* Zero extend quotient from AL. */
20934 tmp1 = gen_lowpart (QImode, tmp0);
20935 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
20936 set_unique_reg_note (insn, REG_EQUAL, div);
20938 emit_label (end_label);
20941 #define LEA_MAX_STALL (3)
20942 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
20944 /* Increase given DISTANCE in half-cycles according to
20945 dependencies between PREV and NEXT instructions.
20946 Add 1 half-cycle if there is no dependency and
20947 go to next cycle if there is some dependecy. */
20949 static unsigned int
20950 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
20952 df_ref def, use;
20954 if (!prev || !next)
20955 return distance + (distance & 1) + 2;
20957 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
20958 return distance + 1;
20960 FOR_EACH_INSN_USE (use, next)
20961 FOR_EACH_INSN_DEF (def, prev)
20962 if (!DF_REF_IS_ARTIFICIAL (def)
20963 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
20964 return distance + (distance & 1) + 2;
20966 return distance + 1;
20969 /* Function checks if instruction INSN defines register number
20970 REGNO1 or REGNO2. */
20972 static bool
20973 insn_defines_reg (unsigned int regno1, unsigned int regno2,
20974 rtx_insn *insn)
20976 df_ref def;
20978 FOR_EACH_INSN_DEF (def, insn)
20979 if (DF_REF_REG_DEF_P (def)
20980 && !DF_REF_IS_ARTIFICIAL (def)
20981 && (regno1 == DF_REF_REGNO (def)
20982 || regno2 == DF_REF_REGNO (def)))
20983 return true;
20985 return false;
20988 /* Function checks if instruction INSN uses register number
20989 REGNO as a part of address expression. */
20991 static bool
20992 insn_uses_reg_mem (unsigned int regno, rtx insn)
20994 df_ref use;
20996 FOR_EACH_INSN_USE (use, insn)
20997 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
20998 return true;
21000 return false;
21003 /* Search backward for non-agu definition of register number REGNO1
21004 or register number REGNO2 in basic block starting from instruction
21005 START up to head of basic block or instruction INSN.
21007 Function puts true value into *FOUND var if definition was found
21008 and false otherwise.
21010 Distance in half-cycles between START and found instruction or head
21011 of BB is added to DISTANCE and returned. */
21013 static int
21014 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
21015 rtx_insn *insn, int distance,
21016 rtx_insn *start, bool *found)
21018 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
21019 rtx_insn *prev = start;
21020 rtx_insn *next = NULL;
21022 *found = false;
21024 while (prev
21025 && prev != insn
21026 && distance < LEA_SEARCH_THRESHOLD)
21028 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
21030 distance = increase_distance (prev, next, distance);
21031 if (insn_defines_reg (regno1, regno2, prev))
21033 if (recog_memoized (prev) < 0
21034 || get_attr_type (prev) != TYPE_LEA)
21036 *found = true;
21037 return distance;
21041 next = prev;
21043 if (prev == BB_HEAD (bb))
21044 break;
21046 prev = PREV_INSN (prev);
21049 return distance;
21052 /* Search backward for non-agu definition of register number REGNO1
21053 or register number REGNO2 in INSN's basic block until
21054 1. Pass LEA_SEARCH_THRESHOLD instructions, or
21055 2. Reach neighbor BBs boundary, or
21056 3. Reach agu definition.
21057 Returns the distance between the non-agu definition point and INSN.
21058 If no definition point, returns -1. */
21060 static int
21061 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
21062 rtx_insn *insn)
21064 basic_block bb = BLOCK_FOR_INSN (insn);
21065 int distance = 0;
21066 bool found = false;
21068 if (insn != BB_HEAD (bb))
21069 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
21070 distance, PREV_INSN (insn),
21071 &found);
21073 if (!found && distance < LEA_SEARCH_THRESHOLD)
21075 edge e;
21076 edge_iterator ei;
21077 bool simple_loop = false;
21079 FOR_EACH_EDGE (e, ei, bb->preds)
21080 if (e->src == bb)
21082 simple_loop = true;
21083 break;
21086 if (simple_loop)
21087 distance = distance_non_agu_define_in_bb (regno1, regno2,
21088 insn, distance,
21089 BB_END (bb), &found);
21090 else
21092 int shortest_dist = -1;
21093 bool found_in_bb = false;
21095 FOR_EACH_EDGE (e, ei, bb->preds)
21097 int bb_dist
21098 = distance_non_agu_define_in_bb (regno1, regno2,
21099 insn, distance,
21100 BB_END (e->src),
21101 &found_in_bb);
21102 if (found_in_bb)
21104 if (shortest_dist < 0)
21105 shortest_dist = bb_dist;
21106 else if (bb_dist > 0)
21107 shortest_dist = MIN (bb_dist, shortest_dist);
21109 found = true;
21113 distance = shortest_dist;
21117 /* get_attr_type may modify recog data. We want to make sure
21118 that recog data is valid for instruction INSN, on which
21119 distance_non_agu_define is called. INSN is unchanged here. */
21120 extract_insn_cached (insn);
21122 if (!found)
21123 return -1;
21125 return distance >> 1;
21128 /* Return the distance in half-cycles between INSN and the next
21129 insn that uses register number REGNO in memory address added
21130 to DISTANCE. Return -1 if REGNO0 is set.
21132 Put true value into *FOUND if register usage was found and
21133 false otherwise.
21134 Put true value into *REDEFINED if register redefinition was
21135 found and false otherwise. */
21137 static int
21138 distance_agu_use_in_bb (unsigned int regno,
21139 rtx_insn *insn, int distance, rtx_insn *start,
21140 bool *found, bool *redefined)
21142 basic_block bb = NULL;
21143 rtx_insn *next = start;
21144 rtx_insn *prev = NULL;
21146 *found = false;
21147 *redefined = false;
21149 if (start != NULL_RTX)
21151 bb = BLOCK_FOR_INSN (start);
21152 if (start != BB_HEAD (bb))
21153 /* If insn and start belong to the same bb, set prev to insn,
21154 so the call to increase_distance will increase the distance
21155 between insns by 1. */
21156 prev = insn;
21159 while (next
21160 && next != insn
21161 && distance < LEA_SEARCH_THRESHOLD)
21163 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
21165 distance = increase_distance(prev, next, distance);
21166 if (insn_uses_reg_mem (regno, next))
21168 /* Return DISTANCE if OP0 is used in memory
21169 address in NEXT. */
21170 *found = true;
21171 return distance;
21174 if (insn_defines_reg (regno, INVALID_REGNUM, next))
21176 /* Return -1 if OP0 is set in NEXT. */
21177 *redefined = true;
21178 return -1;
21181 prev = next;
21184 if (next == BB_END (bb))
21185 break;
21187 next = NEXT_INSN (next);
21190 return distance;
21193 /* Return the distance between INSN and the next insn that uses
21194 register number REGNO0 in memory address. Return -1 if no such
21195 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
21197 static int
21198 distance_agu_use (unsigned int regno0, rtx_insn *insn)
21200 basic_block bb = BLOCK_FOR_INSN (insn);
21201 int distance = 0;
21202 bool found = false;
21203 bool redefined = false;
21205 if (insn != BB_END (bb))
21206 distance = distance_agu_use_in_bb (regno0, insn, distance,
21207 NEXT_INSN (insn),
21208 &found, &redefined);
21210 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
21212 edge e;
21213 edge_iterator ei;
21214 bool simple_loop = false;
21216 FOR_EACH_EDGE (e, ei, bb->succs)
21217 if (e->dest == bb)
21219 simple_loop = true;
21220 break;
21223 if (simple_loop)
21224 distance = distance_agu_use_in_bb (regno0, insn,
21225 distance, BB_HEAD (bb),
21226 &found, &redefined);
21227 else
21229 int shortest_dist = -1;
21230 bool found_in_bb = false;
21231 bool redefined_in_bb = false;
21233 FOR_EACH_EDGE (e, ei, bb->succs)
21235 int bb_dist
21236 = distance_agu_use_in_bb (regno0, insn,
21237 distance, BB_HEAD (e->dest),
21238 &found_in_bb, &redefined_in_bb);
21239 if (found_in_bb)
21241 if (shortest_dist < 0)
21242 shortest_dist = bb_dist;
21243 else if (bb_dist > 0)
21244 shortest_dist = MIN (bb_dist, shortest_dist);
21246 found = true;
21250 distance = shortest_dist;
21254 if (!found || redefined)
21255 return -1;
21257 return distance >> 1;
21260 /* Define this macro to tune LEA priority vs ADD, it take effect when
21261 there is a dilemma of choicing LEA or ADD
21262 Negative value: ADD is more preferred than LEA
21263 Zero: Netrual
21264 Positive value: LEA is more preferred than ADD*/
21265 #define IX86_LEA_PRIORITY 0
21267 /* Return true if usage of lea INSN has performance advantage
21268 over a sequence of instructions. Instructions sequence has
21269 SPLIT_COST cycles higher latency than lea latency. */
21271 static bool
21272 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
21273 unsigned int regno2, int split_cost, bool has_scale)
21275 int dist_define, dist_use;
21277 /* For Silvermont if using a 2-source or 3-source LEA for
21278 non-destructive destination purposes, or due to wanting
21279 ability to use SCALE, the use of LEA is justified. */
21280 if (TARGET_SILVERMONT || TARGET_INTEL)
21282 if (has_scale)
21283 return true;
21284 if (split_cost < 1)
21285 return false;
21286 if (regno0 == regno1 || regno0 == regno2)
21287 return false;
21288 return true;
21291 dist_define = distance_non_agu_define (regno1, regno2, insn);
21292 dist_use = distance_agu_use (regno0, insn);
21294 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
21296 /* If there is no non AGU operand definition, no AGU
21297 operand usage and split cost is 0 then both lea
21298 and non lea variants have same priority. Currently
21299 we prefer lea for 64 bit code and non lea on 32 bit
21300 code. */
21301 if (dist_use < 0 && split_cost == 0)
21302 return TARGET_64BIT || IX86_LEA_PRIORITY;
21303 else
21304 return true;
21307 /* With longer definitions distance lea is more preferable.
21308 Here we change it to take into account splitting cost and
21309 lea priority. */
21310 dist_define += split_cost + IX86_LEA_PRIORITY;
21312 /* If there is no use in memory addess then we just check
21313 that split cost exceeds AGU stall. */
21314 if (dist_use < 0)
21315 return dist_define > LEA_MAX_STALL;
21317 /* If this insn has both backward non-agu dependence and forward
21318 agu dependence, the one with short distance takes effect. */
21319 return dist_define >= dist_use;
21322 /* Return true if it is legal to clobber flags by INSN and
21323 false otherwise. */
21325 static bool
21326 ix86_ok_to_clobber_flags (rtx_insn *insn)
21328 basic_block bb = BLOCK_FOR_INSN (insn);
21329 df_ref use;
21330 bitmap live;
21332 while (insn)
21334 if (NONDEBUG_INSN_P (insn))
21336 FOR_EACH_INSN_USE (use, insn)
21337 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
21338 return false;
21340 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
21341 return true;
21344 if (insn == BB_END (bb))
21345 break;
21347 insn = NEXT_INSN (insn);
21350 live = df_get_live_out(bb);
21351 return !REGNO_REG_SET_P (live, FLAGS_REG);
21354 /* Return true if we need to split op0 = op1 + op2 into a sequence of
21355 move and add to avoid AGU stalls. */
21357 bool
21358 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
21360 unsigned int regno0, regno1, regno2;
21362 /* Check if we need to optimize. */
21363 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
21364 return false;
21366 /* Check it is correct to split here. */
21367 if (!ix86_ok_to_clobber_flags(insn))
21368 return false;
21370 regno0 = true_regnum (operands[0]);
21371 regno1 = true_regnum (operands[1]);
21372 regno2 = true_regnum (operands[2]);
21374 /* We need to split only adds with non destructive
21375 destination operand. */
21376 if (regno0 == regno1 || regno0 == regno2)
21377 return false;
21378 else
21379 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
21382 /* Return true if we should emit lea instruction instead of mov
21383 instruction. */
21385 bool
21386 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
21388 unsigned int regno0, regno1;
21390 /* Check if we need to optimize. */
21391 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
21392 return false;
21394 /* Use lea for reg to reg moves only. */
21395 if (!REG_P (operands[0]) || !REG_P (operands[1]))
21396 return false;
21398 regno0 = true_regnum (operands[0]);
21399 regno1 = true_regnum (operands[1]);
21401 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
21404 /* Return true if we need to split lea into a sequence of
21405 instructions to avoid AGU stalls. */
21407 bool
21408 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
21410 unsigned int regno0, regno1, regno2;
21411 int split_cost;
21412 struct ix86_address parts;
21413 int ok;
21415 /* Check we need to optimize. */
21416 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
21417 return false;
21419 /* The "at least two components" test below might not catch simple
21420 move or zero extension insns if parts.base is non-NULL and parts.disp
21421 is const0_rtx as the only components in the address, e.g. if the
21422 register is %rbp or %r13. As this test is much cheaper and moves or
21423 zero extensions are the common case, do this check first. */
21424 if (REG_P (operands[1])
21425 || (SImode_address_operand (operands[1], VOIDmode)
21426 && REG_P (XEXP (operands[1], 0))))
21427 return false;
21429 /* Check if it is OK to split here. */
21430 if (!ix86_ok_to_clobber_flags (insn))
21431 return false;
21433 ok = ix86_decompose_address (operands[1], &parts);
21434 gcc_assert (ok);
21436 /* There should be at least two components in the address. */
21437 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
21438 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
21439 return false;
21441 /* We should not split into add if non legitimate pic
21442 operand is used as displacement. */
21443 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
21444 return false;
21446 regno0 = true_regnum (operands[0]) ;
21447 regno1 = INVALID_REGNUM;
21448 regno2 = INVALID_REGNUM;
21450 if (parts.base)
21451 regno1 = true_regnum (parts.base);
21452 if (parts.index)
21453 regno2 = true_regnum (parts.index);
21455 split_cost = 0;
21457 /* Compute how many cycles we will add to execution time
21458 if split lea into a sequence of instructions. */
21459 if (parts.base || parts.index)
21461 /* Have to use mov instruction if non desctructive
21462 destination form is used. */
21463 if (regno1 != regno0 && regno2 != regno0)
21464 split_cost += 1;
21466 /* Have to add index to base if both exist. */
21467 if (parts.base && parts.index)
21468 split_cost += 1;
21470 /* Have to use shift and adds if scale is 2 or greater. */
21471 if (parts.scale > 1)
21473 if (regno0 != regno1)
21474 split_cost += 1;
21475 else if (regno2 == regno0)
21476 split_cost += 4;
21477 else
21478 split_cost += parts.scale;
21481 /* Have to use add instruction with immediate if
21482 disp is non zero. */
21483 if (parts.disp && parts.disp != const0_rtx)
21484 split_cost += 1;
21486 /* Subtract the price of lea. */
21487 split_cost -= 1;
21490 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
21491 parts.scale > 1);
21494 /* Emit x86 binary operand CODE in mode MODE, where the first operand
21495 matches destination. RTX includes clobber of FLAGS_REG. */
21497 static void
21498 ix86_emit_binop (enum rtx_code code, machine_mode mode,
21499 rtx dst, rtx src)
21501 rtx op, clob;
21503 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, dst, src));
21504 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
21506 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
21509 /* Return true if regno1 def is nearest to the insn. */
21511 static bool
21512 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
21514 rtx_insn *prev = insn;
21515 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
21517 if (insn == start)
21518 return false;
21519 while (prev && prev != start)
21521 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
21523 prev = PREV_INSN (prev);
21524 continue;
21526 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
21527 return true;
21528 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
21529 return false;
21530 prev = PREV_INSN (prev);
21533 /* None of the regs is defined in the bb. */
21534 return false;
21537 /* Split lea instructions into a sequence of instructions
21538 which are executed on ALU to avoid AGU stalls.
21539 It is assumed that it is allowed to clobber flags register
21540 at lea position. */
21542 void
21543 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
21545 unsigned int regno0, regno1, regno2;
21546 struct ix86_address parts;
21547 rtx target, tmp;
21548 int ok, adds;
21550 ok = ix86_decompose_address (operands[1], &parts);
21551 gcc_assert (ok);
21553 target = gen_lowpart (mode, operands[0]);
21555 regno0 = true_regnum (target);
21556 regno1 = INVALID_REGNUM;
21557 regno2 = INVALID_REGNUM;
21559 if (parts.base)
21561 parts.base = gen_lowpart (mode, parts.base);
21562 regno1 = true_regnum (parts.base);
21565 if (parts.index)
21567 parts.index = gen_lowpart (mode, parts.index);
21568 regno2 = true_regnum (parts.index);
21571 if (parts.disp)
21572 parts.disp = gen_lowpart (mode, parts.disp);
21574 if (parts.scale > 1)
21576 /* Case r1 = r1 + ... */
21577 if (regno1 == regno0)
21579 /* If we have a case r1 = r1 + C * r2 then we
21580 should use multiplication which is very
21581 expensive. Assume cost model is wrong if we
21582 have such case here. */
21583 gcc_assert (regno2 != regno0);
21585 for (adds = parts.scale; adds > 0; adds--)
21586 ix86_emit_binop (PLUS, mode, target, parts.index);
21588 else
21590 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
21591 if (regno0 != regno2)
21592 emit_insn (gen_rtx_SET (target, parts.index));
21594 /* Use shift for scaling. */
21595 ix86_emit_binop (ASHIFT, mode, target,
21596 GEN_INT (exact_log2 (parts.scale)));
21598 if (parts.base)
21599 ix86_emit_binop (PLUS, mode, target, parts.base);
21601 if (parts.disp && parts.disp != const0_rtx)
21602 ix86_emit_binop (PLUS, mode, target, parts.disp);
21605 else if (!parts.base && !parts.index)
21607 gcc_assert(parts.disp);
21608 emit_insn (gen_rtx_SET (target, parts.disp));
21610 else
21612 if (!parts.base)
21614 if (regno0 != regno2)
21615 emit_insn (gen_rtx_SET (target, parts.index));
21617 else if (!parts.index)
21619 if (regno0 != regno1)
21620 emit_insn (gen_rtx_SET (target, parts.base));
21622 else
21624 if (regno0 == regno1)
21625 tmp = parts.index;
21626 else if (regno0 == regno2)
21627 tmp = parts.base;
21628 else
21630 rtx tmp1;
21632 /* Find better operand for SET instruction, depending
21633 on which definition is farther from the insn. */
21634 if (find_nearest_reg_def (insn, regno1, regno2))
21635 tmp = parts.index, tmp1 = parts.base;
21636 else
21637 tmp = parts.base, tmp1 = parts.index;
21639 emit_insn (gen_rtx_SET (target, tmp));
21641 if (parts.disp && parts.disp != const0_rtx)
21642 ix86_emit_binop (PLUS, mode, target, parts.disp);
21644 ix86_emit_binop (PLUS, mode, target, tmp1);
21645 return;
21648 ix86_emit_binop (PLUS, mode, target, tmp);
21651 if (parts.disp && parts.disp != const0_rtx)
21652 ix86_emit_binop (PLUS, mode, target, parts.disp);
21656 /* Return true if it is ok to optimize an ADD operation to LEA
21657 operation to avoid flag register consumation. For most processors,
21658 ADD is faster than LEA. For the processors like BONNELL, if the
21659 destination register of LEA holds an actual address which will be
21660 used soon, LEA is better and otherwise ADD is better. */
21662 bool
21663 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
21665 unsigned int regno0 = true_regnum (operands[0]);
21666 unsigned int regno1 = true_regnum (operands[1]);
21667 unsigned int regno2 = true_regnum (operands[2]);
21669 /* If a = b + c, (a!=b && a!=c), must use lea form. */
21670 if (regno0 != regno1 && regno0 != regno2)
21671 return true;
21673 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
21674 return false;
21676 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
21679 /* Return true if destination reg of SET_BODY is shift count of
21680 USE_BODY. */
21682 static bool
21683 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
21685 rtx set_dest;
21686 rtx shift_rtx;
21687 int i;
21689 /* Retrieve destination of SET_BODY. */
21690 switch (GET_CODE (set_body))
21692 case SET:
21693 set_dest = SET_DEST (set_body);
21694 if (!set_dest || !REG_P (set_dest))
21695 return false;
21696 break;
21697 case PARALLEL:
21698 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
21699 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
21700 use_body))
21701 return true;
21702 /* FALLTHROUGH */
21703 default:
21704 return false;
21707 /* Retrieve shift count of USE_BODY. */
21708 switch (GET_CODE (use_body))
21710 case SET:
21711 shift_rtx = XEXP (use_body, 1);
21712 break;
21713 case PARALLEL:
21714 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
21715 if (ix86_dep_by_shift_count_body (set_body,
21716 XVECEXP (use_body, 0, i)))
21717 return true;
21718 /* FALLTHROUGH */
21719 default:
21720 return false;
21723 if (shift_rtx
21724 && (GET_CODE (shift_rtx) == ASHIFT
21725 || GET_CODE (shift_rtx) == LSHIFTRT
21726 || GET_CODE (shift_rtx) == ASHIFTRT
21727 || GET_CODE (shift_rtx) == ROTATE
21728 || GET_CODE (shift_rtx) == ROTATERT))
21730 rtx shift_count = XEXP (shift_rtx, 1);
21732 /* Return true if shift count is dest of SET_BODY. */
21733 if (REG_P (shift_count))
21735 /* Add check since it can be invoked before register
21736 allocation in pre-reload schedule. */
21737 if (reload_completed
21738 && true_regnum (set_dest) == true_regnum (shift_count))
21739 return true;
21740 else if (REGNO(set_dest) == REGNO(shift_count))
21741 return true;
21745 return false;
21748 /* Return true if destination reg of SET_INSN is shift count of
21749 USE_INSN. */
21751 bool
21752 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
21754 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
21755 PATTERN (use_insn));
21758 /* Return TRUE or FALSE depending on whether the unary operator meets the
21759 appropriate constraints. */
21761 bool
21762 ix86_unary_operator_ok (enum rtx_code,
21763 machine_mode,
21764 rtx operands[2])
21766 /* If one of operands is memory, source and destination must match. */
21767 if ((MEM_P (operands[0])
21768 || MEM_P (operands[1]))
21769 && ! rtx_equal_p (operands[0], operands[1]))
21770 return false;
21771 return true;
21774 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
21775 are ok, keeping in mind the possible movddup alternative. */
21777 bool
21778 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
21780 if (MEM_P (operands[0]))
21781 return rtx_equal_p (operands[0], operands[1 + high]);
21782 if (MEM_P (operands[1]) && MEM_P (operands[2]))
21783 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
21784 return true;
21787 /* Post-reload splitter for converting an SF or DFmode value in an
21788 SSE register into an unsigned SImode. */
21790 void
21791 ix86_split_convert_uns_si_sse (rtx operands[])
21793 machine_mode vecmode;
21794 rtx value, large, zero_or_two31, input, two31, x;
21796 large = operands[1];
21797 zero_or_two31 = operands[2];
21798 input = operands[3];
21799 two31 = operands[4];
21800 vecmode = GET_MODE (large);
21801 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
21803 /* Load up the value into the low element. We must ensure that the other
21804 elements are valid floats -- zero is the easiest such value. */
21805 if (MEM_P (input))
21807 if (vecmode == V4SFmode)
21808 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
21809 else
21810 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
21812 else
21814 input = gen_rtx_REG (vecmode, REGNO (input));
21815 emit_move_insn (value, CONST0_RTX (vecmode));
21816 if (vecmode == V4SFmode)
21817 emit_insn (gen_sse_movss (value, value, input));
21818 else
21819 emit_insn (gen_sse2_movsd (value, value, input));
21822 emit_move_insn (large, two31);
21823 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
21825 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
21826 emit_insn (gen_rtx_SET (large, x));
21828 x = gen_rtx_AND (vecmode, zero_or_two31, large);
21829 emit_insn (gen_rtx_SET (zero_or_two31, x));
21831 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
21832 emit_insn (gen_rtx_SET (value, x));
21834 large = gen_rtx_REG (V4SImode, REGNO (large));
21835 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
21837 x = gen_rtx_REG (V4SImode, REGNO (value));
21838 if (vecmode == V4SFmode)
21839 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
21840 else
21841 emit_insn (gen_sse2_cvttpd2dq (x, value));
21842 value = x;
21844 emit_insn (gen_xorv4si3 (value, value, large));
21847 /* Convert an unsigned DImode value into a DFmode, using only SSE.
21848 Expects the 64-bit DImode to be supplied in a pair of integral
21849 registers. Requires SSE2; will use SSE3 if available. For x86_32,
21850 -mfpmath=sse, !optimize_size only. */
21852 void
21853 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
21855 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
21856 rtx int_xmm, fp_xmm;
21857 rtx biases, exponents;
21858 rtx x;
21860 int_xmm = gen_reg_rtx (V4SImode);
21861 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
21862 emit_insn (gen_movdi_to_sse (int_xmm, input));
21863 else if (TARGET_SSE_SPLIT_REGS)
21865 emit_clobber (int_xmm);
21866 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
21868 else
21870 x = gen_reg_rtx (V2DImode);
21871 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
21872 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
21875 x = gen_rtx_CONST_VECTOR (V4SImode,
21876 gen_rtvec (4, GEN_INT (0x43300000UL),
21877 GEN_INT (0x45300000UL),
21878 const0_rtx, const0_rtx));
21879 exponents = validize_mem (force_const_mem (V4SImode, x));
21881 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
21882 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
21884 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
21885 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
21886 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
21887 (0x1.0p84 + double(fp_value_hi_xmm)).
21888 Note these exponents differ by 32. */
21890 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
21892 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
21893 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
21894 real_ldexp (&bias_lo_rvt, &dconst1, 52);
21895 real_ldexp (&bias_hi_rvt, &dconst1, 84);
21896 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
21897 x = const_double_from_real_value (bias_hi_rvt, DFmode);
21898 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
21899 biases = validize_mem (force_const_mem (V2DFmode, biases));
21900 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
21902 /* Add the upper and lower DFmode values together. */
21903 if (TARGET_SSE3)
21904 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
21905 else
21907 x = copy_to_mode_reg (V2DFmode, fp_xmm);
21908 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
21909 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
21912 ix86_expand_vector_extract (false, target, fp_xmm, 0);
21915 /* Not used, but eases macroization of patterns. */
21916 void
21917 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
21919 gcc_unreachable ();
21922 /* Convert an unsigned SImode value into a DFmode. Only currently used
21923 for SSE, but applicable anywhere. */
21925 void
21926 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
21928 REAL_VALUE_TYPE TWO31r;
21929 rtx x, fp;
21931 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
21932 NULL, 1, OPTAB_DIRECT);
21934 fp = gen_reg_rtx (DFmode);
21935 emit_insn (gen_floatsidf2 (fp, x));
21937 real_ldexp (&TWO31r, &dconst1, 31);
21938 x = const_double_from_real_value (TWO31r, DFmode);
21940 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
21941 if (x != target)
21942 emit_move_insn (target, x);
21945 /* Convert a signed DImode value into a DFmode. Only used for SSE in
21946 32-bit mode; otherwise we have a direct convert instruction. */
21948 void
21949 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
21951 REAL_VALUE_TYPE TWO32r;
21952 rtx fp_lo, fp_hi, x;
21954 fp_lo = gen_reg_rtx (DFmode);
21955 fp_hi = gen_reg_rtx (DFmode);
21957 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
21959 real_ldexp (&TWO32r, &dconst1, 32);
21960 x = const_double_from_real_value (TWO32r, DFmode);
21961 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
21963 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
21965 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
21966 0, OPTAB_DIRECT);
21967 if (x != target)
21968 emit_move_insn (target, x);
21971 /* Convert an unsigned SImode value into a SFmode, using only SSE.
21972 For x86_32, -mfpmath=sse, !optimize_size only. */
21973 void
21974 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
21976 REAL_VALUE_TYPE ONE16r;
21977 rtx fp_hi, fp_lo, int_hi, int_lo, x;
21979 real_ldexp (&ONE16r, &dconst1, 16);
21980 x = const_double_from_real_value (ONE16r, SFmode);
21981 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
21982 NULL, 0, OPTAB_DIRECT);
21983 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
21984 NULL, 0, OPTAB_DIRECT);
21985 fp_hi = gen_reg_rtx (SFmode);
21986 fp_lo = gen_reg_rtx (SFmode);
21987 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
21988 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
21989 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
21990 0, OPTAB_DIRECT);
21991 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
21992 0, OPTAB_DIRECT);
21993 if (!rtx_equal_p (target, fp_hi))
21994 emit_move_insn (target, fp_hi);
21997 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
21998 a vector of unsigned ints VAL to vector of floats TARGET. */
22000 void
22001 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
22003 rtx tmp[8];
22004 REAL_VALUE_TYPE TWO16r;
22005 machine_mode intmode = GET_MODE (val);
22006 machine_mode fltmode = GET_MODE (target);
22007 rtx (*cvt) (rtx, rtx);
22009 if (intmode == V4SImode)
22010 cvt = gen_floatv4siv4sf2;
22011 else
22012 cvt = gen_floatv8siv8sf2;
22013 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
22014 tmp[0] = force_reg (intmode, tmp[0]);
22015 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
22016 OPTAB_DIRECT);
22017 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
22018 NULL_RTX, 1, OPTAB_DIRECT);
22019 tmp[3] = gen_reg_rtx (fltmode);
22020 emit_insn (cvt (tmp[3], tmp[1]));
22021 tmp[4] = gen_reg_rtx (fltmode);
22022 emit_insn (cvt (tmp[4], tmp[2]));
22023 real_ldexp (&TWO16r, &dconst1, 16);
22024 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
22025 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
22026 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
22027 OPTAB_DIRECT);
22028 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
22029 OPTAB_DIRECT);
22030 if (tmp[7] != target)
22031 emit_move_insn (target, tmp[7]);
22034 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
22035 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
22036 This is done by doing just signed conversion if < 0x1p31, and otherwise by
22037 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
22040 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
22042 REAL_VALUE_TYPE TWO31r;
22043 rtx two31r, tmp[4];
22044 machine_mode mode = GET_MODE (val);
22045 machine_mode scalarmode = GET_MODE_INNER (mode);
22046 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
22047 rtx (*cmp) (rtx, rtx, rtx, rtx);
22048 int i;
22050 for (i = 0; i < 3; i++)
22051 tmp[i] = gen_reg_rtx (mode);
22052 real_ldexp (&TWO31r, &dconst1, 31);
22053 two31r = const_double_from_real_value (TWO31r, scalarmode);
22054 two31r = ix86_build_const_vector (mode, 1, two31r);
22055 two31r = force_reg (mode, two31r);
22056 switch (mode)
22058 case E_V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
22059 case E_V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
22060 case E_V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
22061 case E_V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
22062 default: gcc_unreachable ();
22064 tmp[3] = gen_rtx_LE (mode, two31r, val);
22065 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
22066 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
22067 0, OPTAB_DIRECT);
22068 if (intmode == V4SImode || TARGET_AVX2)
22069 *xorp = expand_simple_binop (intmode, ASHIFT,
22070 gen_lowpart (intmode, tmp[0]),
22071 GEN_INT (31), NULL_RTX, 0,
22072 OPTAB_DIRECT);
22073 else
22075 rtx two31 = GEN_INT (HOST_WIDE_INT_1U << 31);
22076 two31 = ix86_build_const_vector (intmode, 1, two31);
22077 *xorp = expand_simple_binop (intmode, AND,
22078 gen_lowpart (intmode, tmp[0]),
22079 two31, NULL_RTX, 0,
22080 OPTAB_DIRECT);
22082 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
22083 0, OPTAB_DIRECT);
22086 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
22087 then replicate the value for all elements of the vector
22088 register. */
22091 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
22093 int i, n_elt;
22094 rtvec v;
22095 machine_mode scalar_mode;
22097 switch (mode)
22099 case E_V64QImode:
22100 case E_V32QImode:
22101 case E_V16QImode:
22102 case E_V32HImode:
22103 case E_V16HImode:
22104 case E_V8HImode:
22105 case E_V16SImode:
22106 case E_V8SImode:
22107 case E_V4SImode:
22108 case E_V8DImode:
22109 case E_V4DImode:
22110 case E_V2DImode:
22111 gcc_assert (vect);
22112 /* FALLTHRU */
22113 case E_V16SFmode:
22114 case E_V8SFmode:
22115 case E_V4SFmode:
22116 case E_V8DFmode:
22117 case E_V4DFmode:
22118 case E_V2DFmode:
22119 n_elt = GET_MODE_NUNITS (mode);
22120 v = rtvec_alloc (n_elt);
22121 scalar_mode = GET_MODE_INNER (mode);
22123 RTVEC_ELT (v, 0) = value;
22125 for (i = 1; i < n_elt; ++i)
22126 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
22128 return gen_rtx_CONST_VECTOR (mode, v);
22130 default:
22131 gcc_unreachable ();
22135 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
22136 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
22137 for an SSE register. If VECT is true, then replicate the mask for
22138 all elements of the vector register. If INVERT is true, then create
22139 a mask excluding the sign bit. */
22142 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
22144 machine_mode vec_mode, imode;
22145 wide_int w;
22146 rtx mask, v;
22148 switch (mode)
22150 case E_V16SImode:
22151 case E_V16SFmode:
22152 case E_V8SImode:
22153 case E_V4SImode:
22154 case E_V8SFmode:
22155 case E_V4SFmode:
22156 vec_mode = mode;
22157 imode = SImode;
22158 break;
22160 case E_V8DImode:
22161 case E_V4DImode:
22162 case E_V2DImode:
22163 case E_V8DFmode:
22164 case E_V4DFmode:
22165 case E_V2DFmode:
22166 vec_mode = mode;
22167 imode = DImode;
22168 break;
22170 case E_TImode:
22171 case E_TFmode:
22172 vec_mode = VOIDmode;
22173 imode = TImode;
22174 break;
22176 default:
22177 gcc_unreachable ();
22180 machine_mode inner_mode = GET_MODE_INNER (mode);
22181 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
22182 GET_MODE_BITSIZE (inner_mode));
22183 if (invert)
22184 w = wi::bit_not (w);
22186 /* Force this value into the low part of a fp vector constant. */
22187 mask = immed_wide_int_const (w, imode);
22188 mask = gen_lowpart (inner_mode, mask);
22190 if (vec_mode == VOIDmode)
22191 return force_reg (inner_mode, mask);
22193 v = ix86_build_const_vector (vec_mode, vect, mask);
22194 return force_reg (vec_mode, v);
22197 /* Generate code for floating point ABS or NEG. */
22199 void
22200 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
22201 rtx operands[])
22203 rtx mask, set, dst, src;
22204 bool use_sse = false;
22205 bool vector_mode = VECTOR_MODE_P (mode);
22206 machine_mode vmode = mode;
22208 if (vector_mode)
22209 use_sse = true;
22210 else if (mode == TFmode)
22211 use_sse = true;
22212 else if (TARGET_SSE_MATH)
22214 use_sse = SSE_FLOAT_MODE_P (mode);
22215 if (mode == SFmode)
22216 vmode = V4SFmode;
22217 else if (mode == DFmode)
22218 vmode = V2DFmode;
22221 /* NEG and ABS performed with SSE use bitwise mask operations.
22222 Create the appropriate mask now. */
22223 if (use_sse)
22224 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
22225 else
22226 mask = NULL_RTX;
22228 dst = operands[0];
22229 src = operands[1];
22231 set = gen_rtx_fmt_e (code, mode, src);
22232 set = gen_rtx_SET (dst, set);
22234 if (mask)
22236 rtx use, clob;
22237 rtvec par;
22239 use = gen_rtx_USE (VOIDmode, mask);
22240 if (vector_mode)
22241 par = gen_rtvec (2, set, use);
22242 else
22244 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
22245 par = gen_rtvec (3, set, use, clob);
22247 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
22249 else
22250 emit_insn (set);
22253 /* Expand a copysign operation. Special case operand 0 being a constant. */
22255 void
22256 ix86_expand_copysign (rtx operands[])
22258 machine_mode mode, vmode;
22259 rtx dest, op0, op1, mask, nmask;
22261 dest = operands[0];
22262 op0 = operands[1];
22263 op1 = operands[2];
22265 mode = GET_MODE (dest);
22267 if (mode == SFmode)
22268 vmode = V4SFmode;
22269 else if (mode == DFmode)
22270 vmode = V2DFmode;
22271 else
22272 vmode = mode;
22274 if (CONST_DOUBLE_P (op0))
22276 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
22278 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
22279 op0 = simplify_unary_operation (ABS, mode, op0, mode);
22281 if (mode == SFmode || mode == DFmode)
22283 if (op0 == CONST0_RTX (mode))
22284 op0 = CONST0_RTX (vmode);
22285 else
22287 rtx v = ix86_build_const_vector (vmode, false, op0);
22289 op0 = force_reg (vmode, v);
22292 else if (op0 != CONST0_RTX (mode))
22293 op0 = force_reg (mode, op0);
22295 mask = ix86_build_signbit_mask (vmode, 0, 0);
22297 if (mode == SFmode)
22298 copysign_insn = gen_copysignsf3_const;
22299 else if (mode == DFmode)
22300 copysign_insn = gen_copysigndf3_const;
22301 else
22302 copysign_insn = gen_copysigntf3_const;
22304 emit_insn (copysign_insn (dest, op0, op1, mask));
22306 else
22308 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
22310 nmask = ix86_build_signbit_mask (vmode, 0, 1);
22311 mask = ix86_build_signbit_mask (vmode, 0, 0);
22313 if (mode == SFmode)
22314 copysign_insn = gen_copysignsf3_var;
22315 else if (mode == DFmode)
22316 copysign_insn = gen_copysigndf3_var;
22317 else
22318 copysign_insn = gen_copysigntf3_var;
22320 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
22324 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
22325 be a constant, and so has already been expanded into a vector constant. */
22327 void
22328 ix86_split_copysign_const (rtx operands[])
22330 machine_mode mode, vmode;
22331 rtx dest, op0, mask, x;
22333 dest = operands[0];
22334 op0 = operands[1];
22335 mask = operands[3];
22337 mode = GET_MODE (dest);
22338 vmode = GET_MODE (mask);
22340 dest = lowpart_subreg (vmode, dest, mode);
22341 x = gen_rtx_AND (vmode, dest, mask);
22342 emit_insn (gen_rtx_SET (dest, x));
22344 if (op0 != CONST0_RTX (vmode))
22346 x = gen_rtx_IOR (vmode, dest, op0);
22347 emit_insn (gen_rtx_SET (dest, x));
22351 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
22352 so we have to do two masks. */
22354 void
22355 ix86_split_copysign_var (rtx operands[])
22357 machine_mode mode, vmode;
22358 rtx dest, scratch, op0, op1, mask, nmask, x;
22360 dest = operands[0];
22361 scratch = operands[1];
22362 op0 = operands[2];
22363 op1 = operands[3];
22364 nmask = operands[4];
22365 mask = operands[5];
22367 mode = GET_MODE (dest);
22368 vmode = GET_MODE (mask);
22370 if (rtx_equal_p (op0, op1))
22372 /* Shouldn't happen often (it's useless, obviously), but when it does
22373 we'd generate incorrect code if we continue below. */
22374 emit_move_insn (dest, op0);
22375 return;
22378 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
22380 gcc_assert (REGNO (op1) == REGNO (scratch));
22382 x = gen_rtx_AND (vmode, scratch, mask);
22383 emit_insn (gen_rtx_SET (scratch, x));
22385 dest = mask;
22386 op0 = lowpart_subreg (vmode, op0, mode);
22387 x = gen_rtx_NOT (vmode, dest);
22388 x = gen_rtx_AND (vmode, x, op0);
22389 emit_insn (gen_rtx_SET (dest, x));
22391 else
22393 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
22395 x = gen_rtx_AND (vmode, scratch, mask);
22397 else /* alternative 2,4 */
22399 gcc_assert (REGNO (mask) == REGNO (scratch));
22400 op1 = lowpart_subreg (vmode, op1, mode);
22401 x = gen_rtx_AND (vmode, scratch, op1);
22403 emit_insn (gen_rtx_SET (scratch, x));
22405 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
22407 dest = lowpart_subreg (vmode, op0, mode);
22408 x = gen_rtx_AND (vmode, dest, nmask);
22410 else /* alternative 3,4 */
22412 gcc_assert (REGNO (nmask) == REGNO (dest));
22413 dest = nmask;
22414 op0 = lowpart_subreg (vmode, op0, mode);
22415 x = gen_rtx_AND (vmode, dest, op0);
22417 emit_insn (gen_rtx_SET (dest, x));
22420 x = gen_rtx_IOR (vmode, dest, scratch);
22421 emit_insn (gen_rtx_SET (dest, x));
22424 /* Return TRUE or FALSE depending on whether the first SET in INSN
22425 has source and destination with matching CC modes, and that the
22426 CC mode is at least as constrained as REQ_MODE. */
22428 bool
22429 ix86_match_ccmode (rtx insn, machine_mode req_mode)
22431 rtx set;
22432 machine_mode set_mode;
22434 set = PATTERN (insn);
22435 if (GET_CODE (set) == PARALLEL)
22436 set = XVECEXP (set, 0, 0);
22437 gcc_assert (GET_CODE (set) == SET);
22438 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
22440 set_mode = GET_MODE (SET_DEST (set));
22441 switch (set_mode)
22443 case E_CCNOmode:
22444 if (req_mode != CCNOmode
22445 && (req_mode != CCmode
22446 || XEXP (SET_SRC (set), 1) != const0_rtx))
22447 return false;
22448 break;
22449 case E_CCmode:
22450 if (req_mode == CCGCmode)
22451 return false;
22452 /* FALLTHRU */
22453 case E_CCGCmode:
22454 if (req_mode == CCGOCmode || req_mode == CCNOmode)
22455 return false;
22456 /* FALLTHRU */
22457 case E_CCGOCmode:
22458 if (req_mode == CCZmode)
22459 return false;
22460 /* FALLTHRU */
22461 case E_CCZmode:
22462 break;
22464 case E_CCGZmode:
22466 case E_CCAmode:
22467 case E_CCCmode:
22468 case E_CCOmode:
22469 case E_CCPmode:
22470 case E_CCSmode:
22471 if (set_mode != req_mode)
22472 return false;
22473 break;
22475 default:
22476 gcc_unreachable ();
22479 return GET_MODE (SET_SRC (set)) == set_mode;
22482 /* Generate insn patterns to do an integer compare of OPERANDS. */
22484 static rtx
22485 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
22487 machine_mode cmpmode;
22488 rtx tmp, flags;
22490 cmpmode = SELECT_CC_MODE (code, op0, op1);
22491 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
22493 /* This is very simple, but making the interface the same as in the
22494 FP case makes the rest of the code easier. */
22495 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
22496 emit_insn (gen_rtx_SET (flags, tmp));
22498 /* Return the test that should be put into the flags user, i.e.
22499 the bcc, scc, or cmov instruction. */
22500 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
22503 /* Figure out whether to use unordered fp comparisons. */
22505 static bool
22506 ix86_unordered_fp_compare (enum rtx_code code)
22508 if (!TARGET_IEEE_FP)
22509 return false;
22511 switch (code)
22513 case GT:
22514 case GE:
22515 case LT:
22516 case LE:
22517 return false;
22519 case EQ:
22520 case NE:
22522 case LTGT:
22523 case UNORDERED:
22524 case ORDERED:
22525 case UNLT:
22526 case UNLE:
22527 case UNGT:
22528 case UNGE:
22529 case UNEQ:
22530 return true;
22532 default:
22533 gcc_unreachable ();
22537 machine_mode
22538 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
22540 machine_mode mode = GET_MODE (op0);
22542 if (SCALAR_FLOAT_MODE_P (mode))
22544 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
22545 return CCFPmode;
22548 switch (code)
22550 /* Only zero flag is needed. */
22551 case EQ: /* ZF=0 */
22552 case NE: /* ZF!=0 */
22553 return CCZmode;
22554 /* Codes needing carry flag. */
22555 case GEU: /* CF=0 */
22556 case LTU: /* CF=1 */
22557 /* Detect overflow checks. They need just the carry flag. */
22558 if (GET_CODE (op0) == PLUS
22559 && (rtx_equal_p (op1, XEXP (op0, 0))
22560 || rtx_equal_p (op1, XEXP (op0, 1))))
22561 return CCCmode;
22562 else
22563 return CCmode;
22564 case GTU: /* CF=0 & ZF=0 */
22565 case LEU: /* CF=1 | ZF=1 */
22566 return CCmode;
22567 /* Codes possibly doable only with sign flag when
22568 comparing against zero. */
22569 case GE: /* SF=OF or SF=0 */
22570 case LT: /* SF<>OF or SF=1 */
22571 if (op1 == const0_rtx)
22572 return CCGOCmode;
22573 else
22574 /* For other cases Carry flag is not required. */
22575 return CCGCmode;
22576 /* Codes doable only with sign flag when comparing
22577 against zero, but we miss jump instruction for it
22578 so we need to use relational tests against overflow
22579 that thus needs to be zero. */
22580 case GT: /* ZF=0 & SF=OF */
22581 case LE: /* ZF=1 | SF<>OF */
22582 if (op1 == const0_rtx)
22583 return CCNOmode;
22584 else
22585 return CCGCmode;
22586 /* strcmp pattern do (use flags) and combine may ask us for proper
22587 mode. */
22588 case USE:
22589 return CCmode;
22590 default:
22591 gcc_unreachable ();
22595 /* Return the fixed registers used for condition codes. */
22597 static bool
22598 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
22600 *p1 = FLAGS_REG;
22601 *p2 = FPSR_REG;
22602 return true;
22605 /* If two condition code modes are compatible, return a condition code
22606 mode which is compatible with both. Otherwise, return
22607 VOIDmode. */
22609 static machine_mode
22610 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
22612 if (m1 == m2)
22613 return m1;
22615 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
22616 return VOIDmode;
22618 if ((m1 == CCGCmode && m2 == CCGOCmode)
22619 || (m1 == CCGOCmode && m2 == CCGCmode))
22620 return CCGCmode;
22622 if ((m1 == CCNOmode && m2 == CCGOCmode)
22623 || (m1 == CCGOCmode && m2 == CCNOmode))
22624 return CCNOmode;
22626 if (m1 == CCZmode
22627 && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode))
22628 return m2;
22629 else if (m2 == CCZmode
22630 && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode))
22631 return m1;
22633 switch (m1)
22635 default:
22636 gcc_unreachable ();
22638 case E_CCmode:
22639 case E_CCGCmode:
22640 case E_CCGOCmode:
22641 case E_CCNOmode:
22642 case E_CCAmode:
22643 case E_CCCmode:
22644 case E_CCOmode:
22645 case E_CCPmode:
22646 case E_CCSmode:
22647 case E_CCZmode:
22648 switch (m2)
22650 default:
22651 return VOIDmode;
22653 case E_CCmode:
22654 case E_CCGCmode:
22655 case E_CCGOCmode:
22656 case E_CCNOmode:
22657 case E_CCAmode:
22658 case E_CCCmode:
22659 case E_CCOmode:
22660 case E_CCPmode:
22661 case E_CCSmode:
22662 case E_CCZmode:
22663 return CCmode;
22666 case E_CCFPmode:
22667 /* These are only compatible with themselves, which we already
22668 checked above. */
22669 return VOIDmode;
22674 /* Return a comparison we can do and that it is equivalent to
22675 swap_condition (code) apart possibly from orderedness.
22676 But, never change orderedness if TARGET_IEEE_FP, returning
22677 UNKNOWN in that case if necessary. */
22679 static enum rtx_code
22680 ix86_fp_swap_condition (enum rtx_code code)
22682 switch (code)
22684 case GT: /* GTU - CF=0 & ZF=0 */
22685 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
22686 case GE: /* GEU - CF=0 */
22687 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
22688 case UNLT: /* LTU - CF=1 */
22689 return TARGET_IEEE_FP ? UNKNOWN : GT;
22690 case UNLE: /* LEU - CF=1 | ZF=1 */
22691 return TARGET_IEEE_FP ? UNKNOWN : GE;
22692 default:
22693 return swap_condition (code);
22697 /* Return cost of comparison CODE using the best strategy for performance.
22698 All following functions do use number of instructions as a cost metrics.
22699 In future this should be tweaked to compute bytes for optimize_size and
22700 take into account performance of various instructions on various CPUs. */
22702 static int
22703 ix86_fp_comparison_cost (enum rtx_code code)
22705 int arith_cost;
22707 /* The cost of code using bit-twiddling on %ah. */
22708 switch (code)
22710 case UNLE:
22711 case UNLT:
22712 case LTGT:
22713 case GT:
22714 case GE:
22715 case UNORDERED:
22716 case ORDERED:
22717 case UNEQ:
22718 arith_cost = 4;
22719 break;
22720 case LT:
22721 case NE:
22722 case EQ:
22723 case UNGE:
22724 arith_cost = TARGET_IEEE_FP ? 5 : 4;
22725 break;
22726 case LE:
22727 case UNGT:
22728 arith_cost = TARGET_IEEE_FP ? 6 : 4;
22729 break;
22730 default:
22731 gcc_unreachable ();
22734 switch (ix86_fp_comparison_strategy (code))
22736 case IX86_FPCMP_COMI:
22737 return arith_cost > 4 ? 3 : 2;
22738 case IX86_FPCMP_SAHF:
22739 return arith_cost > 4 ? 4 : 3;
22740 default:
22741 return arith_cost;
22745 /* Return strategy to use for floating-point. We assume that fcomi is always
22746 preferrable where available, since that is also true when looking at size
22747 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
22749 enum ix86_fpcmp_strategy
22750 ix86_fp_comparison_strategy (enum rtx_code)
22752 /* Do fcomi/sahf based test when profitable. */
22754 if (TARGET_CMOVE)
22755 return IX86_FPCMP_COMI;
22757 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
22758 return IX86_FPCMP_SAHF;
22760 return IX86_FPCMP_ARITH;
22763 /* Swap, force into registers, or otherwise massage the two operands
22764 to a fp comparison. The operands are updated in place; the new
22765 comparison code is returned. */
22767 static enum rtx_code
22768 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
22770 bool unordered_compare = ix86_unordered_fp_compare (code);
22771 rtx op0 = *pop0, op1 = *pop1;
22772 machine_mode op_mode = GET_MODE (op0);
22773 bool is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
22775 /* All of the unordered compare instructions only work on registers.
22776 The same is true of the fcomi compare instructions. The XFmode
22777 compare instructions require registers except when comparing
22778 against zero or when converting operand 1 from fixed point to
22779 floating point. */
22781 if (!is_sse
22782 && (unordered_compare
22783 || (op_mode == XFmode
22784 && ! (standard_80387_constant_p (op0) == 1
22785 || standard_80387_constant_p (op1) == 1)
22786 && GET_CODE (op1) != FLOAT)
22787 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
22789 op0 = force_reg (op_mode, op0);
22790 op1 = force_reg (op_mode, op1);
22792 else
22794 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
22795 things around if they appear profitable, otherwise force op0
22796 into a register. */
22798 if (standard_80387_constant_p (op0) == 0
22799 || (MEM_P (op0)
22800 && ! (standard_80387_constant_p (op1) == 0
22801 || MEM_P (op1))))
22803 enum rtx_code new_code = ix86_fp_swap_condition (code);
22804 if (new_code != UNKNOWN)
22806 std::swap (op0, op1);
22807 code = new_code;
22811 if (!REG_P (op0))
22812 op0 = force_reg (op_mode, op0);
22814 if (CONSTANT_P (op1))
22816 int tmp = standard_80387_constant_p (op1);
22817 if (tmp == 0)
22818 op1 = validize_mem (force_const_mem (op_mode, op1));
22819 else if (tmp == 1)
22821 if (TARGET_CMOVE)
22822 op1 = force_reg (op_mode, op1);
22824 else
22825 op1 = force_reg (op_mode, op1);
22829 /* Try to rearrange the comparison to make it cheaper. */
22830 if (ix86_fp_comparison_cost (code)
22831 > ix86_fp_comparison_cost (swap_condition (code))
22832 && (REG_P (op1) || can_create_pseudo_p ()))
22834 std::swap (op0, op1);
22835 code = swap_condition (code);
22836 if (!REG_P (op0))
22837 op0 = force_reg (op_mode, op0);
22840 *pop0 = op0;
22841 *pop1 = op1;
22842 return code;
22845 /* Convert comparison codes we use to represent FP comparison to integer
22846 code that will result in proper branch. Return UNKNOWN if no such code
22847 is available. */
22849 enum rtx_code
22850 ix86_fp_compare_code_to_integer (enum rtx_code code)
22852 switch (code)
22854 case GT:
22855 return GTU;
22856 case GE:
22857 return GEU;
22858 case ORDERED:
22859 case UNORDERED:
22860 return code;
22861 case UNEQ:
22862 return EQ;
22863 case UNLT:
22864 return LTU;
22865 case UNLE:
22866 return LEU;
22867 case LTGT:
22868 return NE;
22869 default:
22870 return UNKNOWN;
22874 /* Generate insn patterns to do a floating point compare of OPERANDS. */
22876 static rtx
22877 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
22879 bool unordered_compare = ix86_unordered_fp_compare (code);
22880 machine_mode intcmp_mode;
22881 rtx tmp, tmp2;
22883 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
22885 /* Do fcomi/sahf based test when profitable. */
22886 switch (ix86_fp_comparison_strategy (code))
22888 case IX86_FPCMP_COMI:
22889 intcmp_mode = CCFPmode;
22890 tmp = gen_rtx_COMPARE (CCFPmode, op0, op1);
22891 if (unordered_compare)
22892 tmp = gen_rtx_UNSPEC (CCFPmode, gen_rtvec (1, tmp), UNSPEC_NOTRAP);
22893 emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, FLAGS_REG), tmp));
22894 break;
22896 case IX86_FPCMP_SAHF:
22897 intcmp_mode = CCFPmode;
22898 tmp = gen_rtx_COMPARE (CCFPmode, op0, op1);
22899 if (unordered_compare)
22900 tmp = gen_rtx_UNSPEC (CCFPmode, gen_rtvec (1, tmp), UNSPEC_NOTRAP);
22901 tmp = gen_rtx_SET (gen_rtx_REG (CCFPmode, FLAGS_REG), tmp);
22902 if (!scratch)
22903 scratch = gen_reg_rtx (HImode);
22904 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
22905 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
22906 break;
22908 case IX86_FPCMP_ARITH:
22909 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
22910 tmp = gen_rtx_COMPARE (CCFPmode, op0, op1);
22911 if (unordered_compare)
22912 tmp = gen_rtx_UNSPEC (CCFPmode, gen_rtvec (1, tmp), UNSPEC_NOTRAP);
22913 tmp = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
22914 if (!scratch)
22915 scratch = gen_reg_rtx (HImode);
22916 emit_insn (gen_rtx_SET (scratch, tmp));
22918 /* In the unordered case, we have to check C2 for NaN's, which
22919 doesn't happen to work out to anything nice combination-wise.
22920 So do some bit twiddling on the value we've got in AH to come
22921 up with an appropriate set of condition codes. */
22923 intcmp_mode = CCNOmode;
22924 switch (code)
22926 case GT:
22927 case UNGT:
22928 if (code == GT || !TARGET_IEEE_FP)
22930 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x45)));
22931 code = EQ;
22933 else
22935 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
22936 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
22937 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
22938 intcmp_mode = CCmode;
22939 code = GEU;
22941 break;
22942 case LT:
22943 case UNLT:
22944 if (code == LT && TARGET_IEEE_FP)
22946 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
22947 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
22948 intcmp_mode = CCmode;
22949 code = EQ;
22951 else
22953 emit_insn (gen_testqi_ext_1_ccno (scratch, const1_rtx));
22954 code = NE;
22956 break;
22957 case GE:
22958 case UNGE:
22959 if (code == GE || !TARGET_IEEE_FP)
22961 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x05)));
22962 code = EQ;
22964 else
22966 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
22967 emit_insn (gen_xorqi_ext_1_cc (scratch, scratch, const1_rtx));
22968 code = NE;
22970 break;
22971 case LE:
22972 case UNLE:
22973 if (code == LE && TARGET_IEEE_FP)
22975 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
22976 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
22977 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
22978 intcmp_mode = CCmode;
22979 code = LTU;
22981 else
22983 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x45)));
22984 code = NE;
22986 break;
22987 case EQ:
22988 case UNEQ:
22989 if (code == EQ && TARGET_IEEE_FP)
22991 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
22992 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
22993 intcmp_mode = CCmode;
22994 code = EQ;
22996 else
22998 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x40)));
22999 code = NE;
23001 break;
23002 case NE:
23003 case LTGT:
23004 if (code == NE && TARGET_IEEE_FP)
23006 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
23007 emit_insn (gen_xorqi_ext_1_cc (scratch, scratch,
23008 GEN_INT (0x40)));
23009 code = NE;
23011 else
23013 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x40)));
23014 code = EQ;
23016 break;
23018 case UNORDERED:
23019 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x04)));
23020 code = NE;
23021 break;
23022 case ORDERED:
23023 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x04)));
23024 code = EQ;
23025 break;
23027 default:
23028 gcc_unreachable ();
23030 break;
23032 default:
23033 gcc_unreachable();
23036 /* Return the test that should be put into the flags user, i.e.
23037 the bcc, scc, or cmov instruction. */
23038 return gen_rtx_fmt_ee (code, VOIDmode,
23039 gen_rtx_REG (intcmp_mode, FLAGS_REG),
23040 const0_rtx);
23043 static rtx
23044 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
23046 rtx ret;
23048 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
23049 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
23051 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
23053 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
23054 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
23056 else
23057 ret = ix86_expand_int_compare (code, op0, op1);
23059 return ret;
23062 void
23063 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
23065 machine_mode mode = GET_MODE (op0);
23066 rtx tmp;
23068 /* Handle special case - vector comparsion with boolean result, transform
23069 it using ptest instruction. */
23070 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
23072 rtx flag = gen_rtx_REG (CCZmode, FLAGS_REG);
23073 machine_mode p_mode = GET_MODE_SIZE (mode) == 32 ? V4DImode : V2DImode;
23075 gcc_assert (code == EQ || code == NE);
23076 /* Generate XOR since we can't check that one operand is zero vector. */
23077 tmp = gen_reg_rtx (mode);
23078 emit_insn (gen_rtx_SET (tmp, gen_rtx_XOR (mode, op0, op1)));
23079 tmp = gen_lowpart (p_mode, tmp);
23080 emit_insn (gen_rtx_SET (gen_rtx_REG (CCmode, FLAGS_REG),
23081 gen_rtx_UNSPEC (CCmode,
23082 gen_rtvec (2, tmp, tmp),
23083 UNSPEC_PTEST)));
23084 tmp = gen_rtx_fmt_ee (code, VOIDmode, flag, const0_rtx);
23085 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
23086 gen_rtx_LABEL_REF (VOIDmode, label),
23087 pc_rtx);
23088 emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
23089 return;
23092 switch (mode)
23094 case E_SFmode:
23095 case E_DFmode:
23096 case E_XFmode:
23097 case E_QImode:
23098 case E_HImode:
23099 case E_SImode:
23100 simple:
23101 tmp = ix86_expand_compare (code, op0, op1);
23102 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
23103 gen_rtx_LABEL_REF (VOIDmode, label),
23104 pc_rtx);
23105 emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
23106 return;
23108 case E_DImode:
23109 if (TARGET_64BIT)
23110 goto simple;
23111 /* For 32-bit target DI comparison may be performed on
23112 SSE registers. To allow this we should avoid split
23113 to SI mode which is achieved by doing xor in DI mode
23114 and then comparing with zero (which is recognized by
23115 STV pass). We don't compare using xor when optimizing
23116 for size. */
23117 if (!optimize_insn_for_size_p ()
23118 && TARGET_STV
23119 && (code == EQ || code == NE))
23121 op0 = force_reg (mode, gen_rtx_XOR (mode, op0, op1));
23122 op1 = const0_rtx;
23124 /* FALLTHRU */
23125 case E_TImode:
23126 /* Expand DImode branch into multiple compare+branch. */
23128 rtx lo[2], hi[2];
23129 rtx_code_label *label2;
23130 enum rtx_code code1, code2, code3;
23131 machine_mode submode;
23133 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
23135 std::swap (op0, op1);
23136 code = swap_condition (code);
23139 split_double_mode (mode, &op0, 1, lo+0, hi+0);
23140 split_double_mode (mode, &op1, 1, lo+1, hi+1);
23142 submode = mode == DImode ? SImode : DImode;
23144 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
23145 avoid two branches. This costs one extra insn, so disable when
23146 optimizing for size. */
23148 if ((code == EQ || code == NE)
23149 && (!optimize_insn_for_size_p ()
23150 || hi[1] == const0_rtx || lo[1] == const0_rtx))
23152 rtx xor0, xor1;
23154 xor1 = hi[0];
23155 if (hi[1] != const0_rtx)
23156 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
23157 NULL_RTX, 0, OPTAB_WIDEN);
23159 xor0 = lo[0];
23160 if (lo[1] != const0_rtx)
23161 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
23162 NULL_RTX, 0, OPTAB_WIDEN);
23164 tmp = expand_binop (submode, ior_optab, xor1, xor0,
23165 NULL_RTX, 0, OPTAB_WIDEN);
23167 ix86_expand_branch (code, tmp, const0_rtx, label);
23168 return;
23171 /* Otherwise, if we are doing less-than or greater-or-equal-than,
23172 op1 is a constant and the low word is zero, then we can just
23173 examine the high word. Similarly for low word -1 and
23174 less-or-equal-than or greater-than. */
23176 if (CONST_INT_P (hi[1]))
23177 switch (code)
23179 case LT: case LTU: case GE: case GEU:
23180 if (lo[1] == const0_rtx)
23182 ix86_expand_branch (code, hi[0], hi[1], label);
23183 return;
23185 break;
23186 case LE: case LEU: case GT: case GTU:
23187 if (lo[1] == constm1_rtx)
23189 ix86_expand_branch (code, hi[0], hi[1], label);
23190 return;
23192 break;
23193 default:
23194 break;
23197 /* Emulate comparisons that do not depend on Zero flag with
23198 double-word subtraction. Note that only Overflow, Sign
23199 and Carry flags are valid, so swap arguments and condition
23200 of comparisons that would otherwise test Zero flag. */
23202 switch (code)
23204 case LE: case LEU: case GT: case GTU:
23205 std::swap (lo[0], lo[1]);
23206 std::swap (hi[0], hi[1]);
23207 code = swap_condition (code);
23208 /* FALLTHRU */
23210 case LT: case LTU: case GE: case GEU:
23212 rtx (*cmp_insn) (rtx, rtx);
23213 rtx (*sbb_insn) (rtx, rtx, rtx);
23214 bool uns = (code == LTU || code == GEU);
23216 if (TARGET_64BIT)
23218 cmp_insn = gen_cmpdi_1;
23219 sbb_insn
23220 = uns ? gen_subdi3_carry_ccc : gen_subdi3_carry_ccgz;
23222 else
23224 cmp_insn = gen_cmpsi_1;
23225 sbb_insn
23226 = uns ? gen_subsi3_carry_ccc : gen_subsi3_carry_ccgz;
23229 if (!nonimmediate_operand (lo[0], submode))
23230 lo[0] = force_reg (submode, lo[0]);
23231 if (!x86_64_general_operand (lo[1], submode))
23232 lo[1] = force_reg (submode, lo[1]);
23234 if (!register_operand (hi[0], submode))
23235 hi[0] = force_reg (submode, hi[0]);
23236 if ((uns && !nonimmediate_operand (hi[1], submode))
23237 || (!uns && !x86_64_general_operand (hi[1], submode)))
23238 hi[1] = force_reg (submode, hi[1]);
23240 emit_insn (cmp_insn (lo[0], lo[1]));
23241 emit_insn (sbb_insn (gen_rtx_SCRATCH (submode), hi[0], hi[1]));
23243 tmp = gen_rtx_REG (uns ? CCCmode : CCGZmode, FLAGS_REG);
23245 ix86_expand_branch (code, tmp, const0_rtx, label);
23246 return;
23249 default:
23250 break;
23253 /* Otherwise, we need two or three jumps. */
23255 label2 = gen_label_rtx ();
23257 code1 = code;
23258 code2 = swap_condition (code);
23259 code3 = unsigned_condition (code);
23261 switch (code)
23263 case LT: case GT: case LTU: case GTU:
23264 break;
23266 case LE: code1 = LT; code2 = GT; break;
23267 case GE: code1 = GT; code2 = LT; break;
23268 case LEU: code1 = LTU; code2 = GTU; break;
23269 case GEU: code1 = GTU; code2 = LTU; break;
23271 case EQ: code1 = UNKNOWN; code2 = NE; break;
23272 case NE: code2 = UNKNOWN; break;
23274 default:
23275 gcc_unreachable ();
23279 * a < b =>
23280 * if (hi(a) < hi(b)) goto true;
23281 * if (hi(a) > hi(b)) goto false;
23282 * if (lo(a) < lo(b)) goto true;
23283 * false:
23286 if (code1 != UNKNOWN)
23287 ix86_expand_branch (code1, hi[0], hi[1], label);
23288 if (code2 != UNKNOWN)
23289 ix86_expand_branch (code2, hi[0], hi[1], label2);
23291 ix86_expand_branch (code3, lo[0], lo[1], label);
23293 if (code2 != UNKNOWN)
23294 emit_label (label2);
23295 return;
23298 default:
23299 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
23300 goto simple;
23304 void
23305 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
23307 rtx ret;
23309 gcc_assert (GET_MODE (dest) == QImode);
23311 ret = ix86_expand_compare (code, op0, op1);
23312 PUT_MODE (ret, QImode);
23313 emit_insn (gen_rtx_SET (dest, ret));
23316 /* Expand comparison setting or clearing carry flag. Return true when
23317 successful and set pop for the operation. */
23318 static bool
23319 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
23321 machine_mode mode =
23322 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
23324 /* Do not handle double-mode compares that go through special path. */
23325 if (mode == (TARGET_64BIT ? TImode : DImode))
23326 return false;
23328 if (SCALAR_FLOAT_MODE_P (mode))
23330 rtx compare_op;
23331 rtx_insn *compare_seq;
23333 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
23335 /* Shortcut: following common codes never translate
23336 into carry flag compares. */
23337 if (code == EQ || code == NE || code == UNEQ || code == LTGT
23338 || code == ORDERED || code == UNORDERED)
23339 return false;
23341 /* These comparisons require zero flag; swap operands so they won't. */
23342 if ((code == GT || code == UNLE || code == LE || code == UNGT)
23343 && !TARGET_IEEE_FP)
23345 std::swap (op0, op1);
23346 code = swap_condition (code);
23349 /* Try to expand the comparison and verify that we end up with
23350 carry flag based comparison. This fails to be true only when
23351 we decide to expand comparison using arithmetic that is not
23352 too common scenario. */
23353 start_sequence ();
23354 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
23355 compare_seq = get_insns ();
23356 end_sequence ();
23358 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode)
23359 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
23360 else
23361 code = GET_CODE (compare_op);
23363 if (code != LTU && code != GEU)
23364 return false;
23366 emit_insn (compare_seq);
23367 *pop = compare_op;
23368 return true;
23371 if (!INTEGRAL_MODE_P (mode))
23372 return false;
23374 switch (code)
23376 case LTU:
23377 case GEU:
23378 break;
23380 /* Convert a==0 into (unsigned)a<1. */
23381 case EQ:
23382 case NE:
23383 if (op1 != const0_rtx)
23384 return false;
23385 op1 = const1_rtx;
23386 code = (code == EQ ? LTU : GEU);
23387 break;
23389 /* Convert a>b into b<a or a>=b-1. */
23390 case GTU:
23391 case LEU:
23392 if (CONST_INT_P (op1))
23394 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
23395 /* Bail out on overflow. We still can swap operands but that
23396 would force loading of the constant into register. */
23397 if (op1 == const0_rtx
23398 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
23399 return false;
23400 code = (code == GTU ? GEU : LTU);
23402 else
23404 std::swap (op0, op1);
23405 code = (code == GTU ? LTU : GEU);
23407 break;
23409 /* Convert a>=0 into (unsigned)a<0x80000000. */
23410 case LT:
23411 case GE:
23412 if (mode == DImode || op1 != const0_rtx)
23413 return false;
23414 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
23415 code = (code == LT ? GEU : LTU);
23416 break;
23417 case LE:
23418 case GT:
23419 if (mode == DImode || op1 != constm1_rtx)
23420 return false;
23421 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
23422 code = (code == LE ? GEU : LTU);
23423 break;
23425 default:
23426 return false;
23428 /* Swapping operands may cause constant to appear as first operand. */
23429 if (!nonimmediate_operand (op0, VOIDmode))
23431 if (!can_create_pseudo_p ())
23432 return false;
23433 op0 = force_reg (mode, op0);
23435 *pop = ix86_expand_compare (code, op0, op1);
23436 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
23437 return true;
23440 bool
23441 ix86_expand_int_movcc (rtx operands[])
23443 enum rtx_code code = GET_CODE (operands[1]), compare_code;
23444 rtx_insn *compare_seq;
23445 rtx compare_op;
23446 machine_mode mode = GET_MODE (operands[0]);
23447 bool sign_bit_compare_p = false;
23448 rtx op0 = XEXP (operands[1], 0);
23449 rtx op1 = XEXP (operands[1], 1);
23451 if (GET_MODE (op0) == TImode
23452 || (GET_MODE (op0) == DImode
23453 && !TARGET_64BIT))
23454 return false;
23456 start_sequence ();
23457 compare_op = ix86_expand_compare (code, op0, op1);
23458 compare_seq = get_insns ();
23459 end_sequence ();
23461 compare_code = GET_CODE (compare_op);
23463 if ((op1 == const0_rtx && (code == GE || code == LT))
23464 || (op1 == constm1_rtx && (code == GT || code == LE)))
23465 sign_bit_compare_p = true;
23467 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
23468 HImode insns, we'd be swallowed in word prefix ops. */
23470 if ((mode != HImode || TARGET_FAST_PREFIX)
23471 && (mode != (TARGET_64BIT ? TImode : DImode))
23472 && CONST_INT_P (operands[2])
23473 && CONST_INT_P (operands[3]))
23475 rtx out = operands[0];
23476 HOST_WIDE_INT ct = INTVAL (operands[2]);
23477 HOST_WIDE_INT cf = INTVAL (operands[3]);
23478 HOST_WIDE_INT diff;
23480 diff = ct - cf;
23481 /* Sign bit compares are better done using shifts than we do by using
23482 sbb. */
23483 if (sign_bit_compare_p
23484 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
23486 /* Detect overlap between destination and compare sources. */
23487 rtx tmp = out;
23489 if (!sign_bit_compare_p)
23491 rtx flags;
23492 bool fpcmp = false;
23494 compare_code = GET_CODE (compare_op);
23496 flags = XEXP (compare_op, 0);
23498 if (GET_MODE (flags) == CCFPmode)
23500 fpcmp = true;
23501 compare_code
23502 = ix86_fp_compare_code_to_integer (compare_code);
23505 /* To simplify rest of code, restrict to the GEU case. */
23506 if (compare_code == LTU)
23508 std::swap (ct, cf);
23509 compare_code = reverse_condition (compare_code);
23510 code = reverse_condition (code);
23512 else
23514 if (fpcmp)
23515 PUT_CODE (compare_op,
23516 reverse_condition_maybe_unordered
23517 (GET_CODE (compare_op)));
23518 else
23519 PUT_CODE (compare_op,
23520 reverse_condition (GET_CODE (compare_op)));
23522 diff = ct - cf;
23524 if (reg_overlap_mentioned_p (out, op0)
23525 || reg_overlap_mentioned_p (out, op1))
23526 tmp = gen_reg_rtx (mode);
23528 if (mode == DImode)
23529 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
23530 else
23531 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
23532 flags, compare_op));
23534 else
23536 if (code == GT || code == GE)
23537 code = reverse_condition (code);
23538 else
23540 std::swap (ct, cf);
23541 diff = ct - cf;
23543 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
23546 if (diff == 1)
23549 * cmpl op0,op1
23550 * sbbl dest,dest
23551 * [addl dest, ct]
23553 * Size 5 - 8.
23555 if (ct)
23556 tmp = expand_simple_binop (mode, PLUS,
23557 tmp, GEN_INT (ct),
23558 copy_rtx (tmp), 1, OPTAB_DIRECT);
23560 else if (cf == -1)
23563 * cmpl op0,op1
23564 * sbbl dest,dest
23565 * orl $ct, dest
23567 * Size 8.
23569 tmp = expand_simple_binop (mode, IOR,
23570 tmp, GEN_INT (ct),
23571 copy_rtx (tmp), 1, OPTAB_DIRECT);
23573 else if (diff == -1 && ct)
23576 * cmpl op0,op1
23577 * sbbl dest,dest
23578 * notl dest
23579 * [addl dest, cf]
23581 * Size 8 - 11.
23583 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
23584 if (cf)
23585 tmp = expand_simple_binop (mode, PLUS,
23586 copy_rtx (tmp), GEN_INT (cf),
23587 copy_rtx (tmp), 1, OPTAB_DIRECT);
23589 else
23592 * cmpl op0,op1
23593 * sbbl dest,dest
23594 * [notl dest]
23595 * andl cf - ct, dest
23596 * [addl dest, ct]
23598 * Size 8 - 11.
23601 if (cf == 0)
23603 cf = ct;
23604 ct = 0;
23605 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
23608 tmp = expand_simple_binop (mode, AND,
23609 copy_rtx (tmp),
23610 gen_int_mode (cf - ct, mode),
23611 copy_rtx (tmp), 1, OPTAB_DIRECT);
23612 if (ct)
23613 tmp = expand_simple_binop (mode, PLUS,
23614 copy_rtx (tmp), GEN_INT (ct),
23615 copy_rtx (tmp), 1, OPTAB_DIRECT);
23618 if (!rtx_equal_p (tmp, out))
23619 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
23621 return true;
23624 if (diff < 0)
23626 machine_mode cmp_mode = GET_MODE (op0);
23627 enum rtx_code new_code;
23629 if (SCALAR_FLOAT_MODE_P (cmp_mode))
23631 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
23633 /* We may be reversing unordered compare to normal compare, that
23634 is not valid in general (we may convert non-trapping condition
23635 to trapping one), however on i386 we currently emit all
23636 comparisons unordered. */
23637 new_code = reverse_condition_maybe_unordered (code);
23639 else
23640 new_code = ix86_reverse_condition (code, cmp_mode);
23641 if (new_code != UNKNOWN)
23643 std::swap (ct, cf);
23644 diff = -diff;
23645 code = new_code;
23649 compare_code = UNKNOWN;
23650 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
23651 && CONST_INT_P (op1))
23653 if (op1 == const0_rtx
23654 && (code == LT || code == GE))
23655 compare_code = code;
23656 else if (op1 == constm1_rtx)
23658 if (code == LE)
23659 compare_code = LT;
23660 else if (code == GT)
23661 compare_code = GE;
23665 /* Optimize dest = (op0 < 0) ? -1 : cf. */
23666 if (compare_code != UNKNOWN
23667 && GET_MODE (op0) == GET_MODE (out)
23668 && (cf == -1 || ct == -1))
23670 /* If lea code below could be used, only optimize
23671 if it results in a 2 insn sequence. */
23673 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
23674 || diff == 3 || diff == 5 || diff == 9)
23675 || (compare_code == LT && ct == -1)
23676 || (compare_code == GE && cf == -1))
23679 * notl op1 (if necessary)
23680 * sarl $31, op1
23681 * orl cf, op1
23683 if (ct != -1)
23685 cf = ct;
23686 ct = -1;
23687 code = reverse_condition (code);
23690 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
23692 out = expand_simple_binop (mode, IOR,
23693 out, GEN_INT (cf),
23694 out, 1, OPTAB_DIRECT);
23695 if (out != operands[0])
23696 emit_move_insn (operands[0], out);
23698 return true;
23703 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
23704 || diff == 3 || diff == 5 || diff == 9)
23705 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
23706 && (mode != DImode
23707 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
23710 * xorl dest,dest
23711 * cmpl op1,op2
23712 * setcc dest
23713 * lea cf(dest*(ct-cf)),dest
23715 * Size 14.
23717 * This also catches the degenerate setcc-only case.
23720 rtx tmp;
23721 int nops;
23723 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
23725 nops = 0;
23726 /* On x86_64 the lea instruction operates on Pmode, so we need
23727 to get arithmetics done in proper mode to match. */
23728 if (diff == 1)
23729 tmp = copy_rtx (out);
23730 else
23732 rtx out1;
23733 out1 = copy_rtx (out);
23734 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
23735 nops++;
23736 if (diff & 1)
23738 tmp = gen_rtx_PLUS (mode, tmp, out1);
23739 nops++;
23742 if (cf != 0)
23744 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
23745 nops++;
23747 if (!rtx_equal_p (tmp, out))
23749 if (nops == 1)
23750 out = force_operand (tmp, copy_rtx (out));
23751 else
23752 emit_insn (gen_rtx_SET (copy_rtx (out), copy_rtx (tmp)));
23754 if (!rtx_equal_p (out, operands[0]))
23755 emit_move_insn (operands[0], copy_rtx (out));
23757 return true;
23761 * General case: Jumpful:
23762 * xorl dest,dest cmpl op1, op2
23763 * cmpl op1, op2 movl ct, dest
23764 * setcc dest jcc 1f
23765 * decl dest movl cf, dest
23766 * andl (cf-ct),dest 1:
23767 * addl ct,dest
23769 * Size 20. Size 14.
23771 * This is reasonably steep, but branch mispredict costs are
23772 * high on modern cpus, so consider failing only if optimizing
23773 * for space.
23776 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
23777 && BRANCH_COST (optimize_insn_for_speed_p (),
23778 false) >= 2)
23780 if (cf == 0)
23782 machine_mode cmp_mode = GET_MODE (op0);
23783 enum rtx_code new_code;
23785 if (SCALAR_FLOAT_MODE_P (cmp_mode))
23787 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
23789 /* We may be reversing unordered compare to normal compare,
23790 that is not valid in general (we may convert non-trapping
23791 condition to trapping one), however on i386 we currently
23792 emit all comparisons unordered. */
23793 new_code = reverse_condition_maybe_unordered (code);
23795 else
23797 new_code = ix86_reverse_condition (code, cmp_mode);
23798 if (compare_code != UNKNOWN && new_code != UNKNOWN)
23799 compare_code = reverse_condition (compare_code);
23802 if (new_code != UNKNOWN)
23804 cf = ct;
23805 ct = 0;
23806 code = new_code;
23810 if (compare_code != UNKNOWN)
23812 /* notl op1 (if needed)
23813 sarl $31, op1
23814 andl (cf-ct), op1
23815 addl ct, op1
23817 For x < 0 (resp. x <= -1) there will be no notl,
23818 so if possible swap the constants to get rid of the
23819 complement.
23820 True/false will be -1/0 while code below (store flag
23821 followed by decrement) is 0/-1, so the constants need
23822 to be exchanged once more. */
23824 if (compare_code == GE || !cf)
23826 code = reverse_condition (code);
23827 compare_code = LT;
23829 else
23830 std::swap (ct, cf);
23832 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
23834 else
23836 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
23838 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
23839 constm1_rtx,
23840 copy_rtx (out), 1, OPTAB_DIRECT);
23843 out = expand_simple_binop (mode, AND, copy_rtx (out),
23844 gen_int_mode (cf - ct, mode),
23845 copy_rtx (out), 1, OPTAB_DIRECT);
23846 if (ct)
23847 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
23848 copy_rtx (out), 1, OPTAB_DIRECT);
23849 if (!rtx_equal_p (out, operands[0]))
23850 emit_move_insn (operands[0], copy_rtx (out));
23852 return true;
23856 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
23858 /* Try a few things more with specific constants and a variable. */
23860 optab op;
23861 rtx var, orig_out, out, tmp;
23863 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
23864 return false;
23866 /* If one of the two operands is an interesting constant, load a
23867 constant with the above and mask it in with a logical operation. */
23869 if (CONST_INT_P (operands[2]))
23871 var = operands[3];
23872 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
23873 operands[3] = constm1_rtx, op = and_optab;
23874 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
23875 operands[3] = const0_rtx, op = ior_optab;
23876 else
23877 return false;
23879 else if (CONST_INT_P (operands[3]))
23881 var = operands[2];
23882 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
23883 operands[2] = constm1_rtx, op = and_optab;
23884 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
23885 operands[2] = const0_rtx, op = ior_optab;
23886 else
23887 return false;
23889 else
23890 return false;
23892 orig_out = operands[0];
23893 tmp = gen_reg_rtx (mode);
23894 operands[0] = tmp;
23896 /* Recurse to get the constant loaded. */
23897 if (!ix86_expand_int_movcc (operands))
23898 return false;
23900 /* Mask in the interesting variable. */
23901 out = expand_binop (mode, op, var, tmp, orig_out, 0,
23902 OPTAB_WIDEN);
23903 if (!rtx_equal_p (out, orig_out))
23904 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
23906 return true;
23910 * For comparison with above,
23912 * movl cf,dest
23913 * movl ct,tmp
23914 * cmpl op1,op2
23915 * cmovcc tmp,dest
23917 * Size 15.
23920 if (! nonimmediate_operand (operands[2], mode))
23921 operands[2] = force_reg (mode, operands[2]);
23922 if (! nonimmediate_operand (operands[3], mode))
23923 operands[3] = force_reg (mode, operands[3]);
23925 if (! register_operand (operands[2], VOIDmode)
23926 && (mode == QImode
23927 || ! register_operand (operands[3], VOIDmode)))
23928 operands[2] = force_reg (mode, operands[2]);
23930 if (mode == QImode
23931 && ! register_operand (operands[3], VOIDmode))
23932 operands[3] = force_reg (mode, operands[3]);
23934 emit_insn (compare_seq);
23935 emit_insn (gen_rtx_SET (operands[0],
23936 gen_rtx_IF_THEN_ELSE (mode,
23937 compare_op, operands[2],
23938 operands[3])));
23939 return true;
23942 /* Swap, force into registers, or otherwise massage the two operands
23943 to an sse comparison with a mask result. Thus we differ a bit from
23944 ix86_prepare_fp_compare_args which expects to produce a flags result.
23946 The DEST operand exists to help determine whether to commute commutative
23947 operators. The POP0/POP1 operands are updated in place. The new
23948 comparison code is returned, or UNKNOWN if not implementable. */
23950 static enum rtx_code
23951 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
23952 rtx *pop0, rtx *pop1)
23954 switch (code)
23956 case LTGT:
23957 case UNEQ:
23958 /* AVX supports all the needed comparisons. */
23959 if (TARGET_AVX)
23960 break;
23961 /* We have no LTGT as an operator. We could implement it with
23962 NE & ORDERED, but this requires an extra temporary. It's
23963 not clear that it's worth it. */
23964 return UNKNOWN;
23966 case LT:
23967 case LE:
23968 case UNGT:
23969 case UNGE:
23970 /* These are supported directly. */
23971 break;
23973 case EQ:
23974 case NE:
23975 case UNORDERED:
23976 case ORDERED:
23977 /* AVX has 3 operand comparisons, no need to swap anything. */
23978 if (TARGET_AVX)
23979 break;
23980 /* For commutative operators, try to canonicalize the destination
23981 operand to be first in the comparison - this helps reload to
23982 avoid extra moves. */
23983 if (!dest || !rtx_equal_p (dest, *pop1))
23984 break;
23985 /* FALLTHRU */
23987 case GE:
23988 case GT:
23989 case UNLE:
23990 case UNLT:
23991 /* These are not supported directly before AVX, and furthermore
23992 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
23993 comparison operands to transform into something that is
23994 supported. */
23995 std::swap (*pop0, *pop1);
23996 code = swap_condition (code);
23997 break;
23999 default:
24000 gcc_unreachable ();
24003 return code;
24006 /* Detect conditional moves that exactly match min/max operational
24007 semantics. Note that this is IEEE safe, as long as we don't
24008 interchange the operands.
24010 Returns FALSE if this conditional move doesn't match a MIN/MAX,
24011 and TRUE if the operation is successful and instructions are emitted. */
24013 static bool
24014 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
24015 rtx cmp_op1, rtx if_true, rtx if_false)
24017 machine_mode mode;
24018 bool is_min;
24019 rtx tmp;
24021 if (code == LT)
24023 else if (code == UNGE)
24024 std::swap (if_true, if_false);
24025 else
24026 return false;
24028 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
24029 is_min = true;
24030 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
24031 is_min = false;
24032 else
24033 return false;
24035 mode = GET_MODE (dest);
24037 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
24038 but MODE may be a vector mode and thus not appropriate. */
24039 if (!flag_finite_math_only || flag_signed_zeros)
24041 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
24042 rtvec v;
24044 if_true = force_reg (mode, if_true);
24045 v = gen_rtvec (2, if_true, if_false);
24046 tmp = gen_rtx_UNSPEC (mode, v, u);
24048 else
24050 code = is_min ? SMIN : SMAX;
24051 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
24054 emit_insn (gen_rtx_SET (dest, tmp));
24055 return true;
24058 /* Expand an sse vector comparison. Return the register with the result. */
24060 static rtx
24061 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
24062 rtx op_true, rtx op_false)
24064 machine_mode mode = GET_MODE (dest);
24065 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
24067 /* In general case result of comparison can differ from operands' type. */
24068 machine_mode cmp_mode;
24070 /* In AVX512F the result of comparison is an integer mask. */
24071 bool maskcmp = false;
24072 rtx x;
24074 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
24076 unsigned int nbits = GET_MODE_NUNITS (cmp_ops_mode);
24077 cmp_mode = int_mode_for_size (nbits, 0).require ();
24078 maskcmp = true;
24080 else
24081 cmp_mode = cmp_ops_mode;
24084 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
24085 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
24086 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
24088 if (optimize
24089 || (maskcmp && cmp_mode != mode)
24090 || (op_true && reg_overlap_mentioned_p (dest, op_true))
24091 || (op_false && reg_overlap_mentioned_p (dest, op_false)))
24092 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
24094 /* Compare patterns for int modes are unspec in AVX512F only. */
24095 if (maskcmp && (code == GT || code == EQ))
24097 rtx (*gen)(rtx, rtx, rtx);
24099 switch (cmp_ops_mode)
24101 case E_V64QImode:
24102 gcc_assert (TARGET_AVX512BW);
24103 gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
24104 break;
24105 case E_V32HImode:
24106 gcc_assert (TARGET_AVX512BW);
24107 gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
24108 break;
24109 case E_V16SImode:
24110 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
24111 break;
24112 case E_V8DImode:
24113 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
24114 break;
24115 default:
24116 gen = NULL;
24119 if (gen)
24121 emit_insn (gen (dest, cmp_op0, cmp_op1));
24122 return dest;
24125 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
24127 if (cmp_mode != mode && !maskcmp)
24129 x = force_reg (cmp_ops_mode, x);
24130 convert_move (dest, x, false);
24132 else
24133 emit_insn (gen_rtx_SET (dest, x));
24135 return dest;
24138 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
24139 operations. This is used for both scalar and vector conditional moves. */
24141 void
24142 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
24144 machine_mode mode = GET_MODE (dest);
24145 machine_mode cmpmode = GET_MODE (cmp);
24147 /* In AVX512F the result of comparison is an integer mask. */
24148 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
24150 rtx t2, t3, x;
24152 /* If we have an integer mask and FP value then we need
24153 to cast mask to FP mode. */
24154 if (mode != cmpmode && VECTOR_MODE_P (cmpmode))
24156 cmp = force_reg (cmpmode, cmp);
24157 cmp = gen_rtx_SUBREG (mode, cmp, 0);
24160 if (vector_all_ones_operand (op_true, mode)
24161 && rtx_equal_p (op_false, CONST0_RTX (mode))
24162 && !maskcmp)
24164 emit_insn (gen_rtx_SET (dest, cmp));
24166 else if (op_false == CONST0_RTX (mode)
24167 && !maskcmp)
24169 op_true = force_reg (mode, op_true);
24170 x = gen_rtx_AND (mode, cmp, op_true);
24171 emit_insn (gen_rtx_SET (dest, x));
24173 else if (op_true == CONST0_RTX (mode)
24174 && !maskcmp)
24176 op_false = force_reg (mode, op_false);
24177 x = gen_rtx_NOT (mode, cmp);
24178 x = gen_rtx_AND (mode, x, op_false);
24179 emit_insn (gen_rtx_SET (dest, x));
24181 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
24182 && !maskcmp)
24184 op_false = force_reg (mode, op_false);
24185 x = gen_rtx_IOR (mode, cmp, op_false);
24186 emit_insn (gen_rtx_SET (dest, x));
24188 else if (TARGET_XOP
24189 && !maskcmp)
24191 op_true = force_reg (mode, op_true);
24193 if (!nonimmediate_operand (op_false, mode))
24194 op_false = force_reg (mode, op_false);
24196 emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cmp,
24197 op_true,
24198 op_false)));
24200 else
24202 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
24203 rtx d = dest;
24205 if (!nonimmediate_operand (op_true, mode))
24206 op_true = force_reg (mode, op_true);
24208 op_false = force_reg (mode, op_false);
24210 switch (mode)
24212 case E_V4SFmode:
24213 if (TARGET_SSE4_1)
24214 gen = gen_sse4_1_blendvps;
24215 break;
24216 case E_V2DFmode:
24217 if (TARGET_SSE4_1)
24218 gen = gen_sse4_1_blendvpd;
24219 break;
24220 case E_V16QImode:
24221 case E_V8HImode:
24222 case E_V4SImode:
24223 case E_V2DImode:
24224 if (TARGET_SSE4_1)
24226 gen = gen_sse4_1_pblendvb;
24227 if (mode != V16QImode)
24228 d = gen_reg_rtx (V16QImode);
24229 op_false = gen_lowpart (V16QImode, op_false);
24230 op_true = gen_lowpart (V16QImode, op_true);
24231 cmp = gen_lowpart (V16QImode, cmp);
24233 break;
24234 case E_V8SFmode:
24235 if (TARGET_AVX)
24236 gen = gen_avx_blendvps256;
24237 break;
24238 case E_V4DFmode:
24239 if (TARGET_AVX)
24240 gen = gen_avx_blendvpd256;
24241 break;
24242 case E_V32QImode:
24243 case E_V16HImode:
24244 case E_V8SImode:
24245 case E_V4DImode:
24246 if (TARGET_AVX2)
24248 gen = gen_avx2_pblendvb;
24249 if (mode != V32QImode)
24250 d = gen_reg_rtx (V32QImode);
24251 op_false = gen_lowpart (V32QImode, op_false);
24252 op_true = gen_lowpart (V32QImode, op_true);
24253 cmp = gen_lowpart (V32QImode, cmp);
24255 break;
24257 case E_V64QImode:
24258 gen = gen_avx512bw_blendmv64qi;
24259 break;
24260 case E_V32HImode:
24261 gen = gen_avx512bw_blendmv32hi;
24262 break;
24263 case E_V16SImode:
24264 gen = gen_avx512f_blendmv16si;
24265 break;
24266 case E_V8DImode:
24267 gen = gen_avx512f_blendmv8di;
24268 break;
24269 case E_V8DFmode:
24270 gen = gen_avx512f_blendmv8df;
24271 break;
24272 case E_V16SFmode:
24273 gen = gen_avx512f_blendmv16sf;
24274 break;
24276 default:
24277 break;
24280 if (gen != NULL)
24282 emit_insn (gen (d, op_false, op_true, cmp));
24283 if (d != dest)
24284 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
24286 else
24288 op_true = force_reg (mode, op_true);
24290 t2 = gen_reg_rtx (mode);
24291 if (optimize)
24292 t3 = gen_reg_rtx (mode);
24293 else
24294 t3 = dest;
24296 x = gen_rtx_AND (mode, op_true, cmp);
24297 emit_insn (gen_rtx_SET (t2, x));
24299 x = gen_rtx_NOT (mode, cmp);
24300 x = gen_rtx_AND (mode, x, op_false);
24301 emit_insn (gen_rtx_SET (t3, x));
24303 x = gen_rtx_IOR (mode, t3, t2);
24304 emit_insn (gen_rtx_SET (dest, x));
24309 /* Expand a floating-point conditional move. Return true if successful. */
24311 bool
24312 ix86_expand_fp_movcc (rtx operands[])
24314 machine_mode mode = GET_MODE (operands[0]);
24315 enum rtx_code code = GET_CODE (operands[1]);
24316 rtx tmp, compare_op;
24317 rtx op0 = XEXP (operands[1], 0);
24318 rtx op1 = XEXP (operands[1], 1);
24320 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
24322 machine_mode cmode;
24324 /* Since we've no cmove for sse registers, don't force bad register
24325 allocation just to gain access to it. Deny movcc when the
24326 comparison mode doesn't match the move mode. */
24327 cmode = GET_MODE (op0);
24328 if (cmode == VOIDmode)
24329 cmode = GET_MODE (op1);
24330 if (cmode != mode)
24331 return false;
24333 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
24334 if (code == UNKNOWN)
24335 return false;
24337 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
24338 operands[2], operands[3]))
24339 return true;
24341 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
24342 operands[2], operands[3]);
24343 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
24344 return true;
24347 if (GET_MODE (op0) == TImode
24348 || (GET_MODE (op0) == DImode
24349 && !TARGET_64BIT))
24350 return false;
24352 /* The floating point conditional move instructions don't directly
24353 support conditions resulting from a signed integer comparison. */
24355 compare_op = ix86_expand_compare (code, op0, op1);
24356 if (!fcmov_comparison_operator (compare_op, VOIDmode))
24358 tmp = gen_reg_rtx (QImode);
24359 ix86_expand_setcc (tmp, code, op0, op1);
24361 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
24364 emit_insn (gen_rtx_SET (operands[0],
24365 gen_rtx_IF_THEN_ELSE (mode, compare_op,
24366 operands[2], operands[3])));
24368 return true;
24371 /* Helper for ix86_cmp_code_to_pcmp_immediate for int modes. */
24373 static int
24374 ix86_int_cmp_code_to_pcmp_immediate (enum rtx_code code)
24376 switch (code)
24378 case EQ:
24379 return 0;
24380 case LT:
24381 case LTU:
24382 return 1;
24383 case LE:
24384 case LEU:
24385 return 2;
24386 case NE:
24387 return 4;
24388 case GE:
24389 case GEU:
24390 return 5;
24391 case GT:
24392 case GTU:
24393 return 6;
24394 default:
24395 gcc_unreachable ();
24399 /* Helper for ix86_cmp_code_to_pcmp_immediate for fp modes. */
24401 static int
24402 ix86_fp_cmp_code_to_pcmp_immediate (enum rtx_code code)
24404 switch (code)
24406 case EQ:
24407 return 0x00;
24408 case NE:
24409 return 0x04;
24410 case GT:
24411 return 0x0e;
24412 case LE:
24413 return 0x02;
24414 case GE:
24415 return 0x0d;
24416 case LT:
24417 return 0x01;
24418 case UNLE:
24419 return 0x0a;
24420 case UNLT:
24421 return 0x09;
24422 case UNGE:
24423 return 0x05;
24424 case UNGT:
24425 return 0x06;
24426 case UNEQ:
24427 return 0x18;
24428 case LTGT:
24429 return 0x0c;
24430 case ORDERED:
24431 return 0x07;
24432 case UNORDERED:
24433 return 0x03;
24434 default:
24435 gcc_unreachable ();
24439 /* Return immediate value to be used in UNSPEC_PCMP
24440 for comparison CODE in MODE. */
24442 static int
24443 ix86_cmp_code_to_pcmp_immediate (enum rtx_code code, machine_mode mode)
24445 if (FLOAT_MODE_P (mode))
24446 return ix86_fp_cmp_code_to_pcmp_immediate (code);
24447 return ix86_int_cmp_code_to_pcmp_immediate (code);
24450 /* Expand AVX-512 vector comparison. */
24452 bool
24453 ix86_expand_mask_vec_cmp (rtx operands[])
24455 machine_mode mask_mode = GET_MODE (operands[0]);
24456 machine_mode cmp_mode = GET_MODE (operands[2]);
24457 enum rtx_code code = GET_CODE (operands[1]);
24458 rtx imm = GEN_INT (ix86_cmp_code_to_pcmp_immediate (code, cmp_mode));
24459 int unspec_code;
24460 rtx unspec;
24462 switch (code)
24464 case LEU:
24465 case GTU:
24466 case GEU:
24467 case LTU:
24468 unspec_code = UNSPEC_UNSIGNED_PCMP;
24469 break;
24471 default:
24472 unspec_code = UNSPEC_PCMP;
24475 unspec = gen_rtx_UNSPEC (mask_mode, gen_rtvec (3, operands[2],
24476 operands[3], imm),
24477 unspec_code);
24478 emit_insn (gen_rtx_SET (operands[0], unspec));
24480 return true;
24483 /* Expand fp vector comparison. */
24485 bool
24486 ix86_expand_fp_vec_cmp (rtx operands[])
24488 enum rtx_code code = GET_CODE (operands[1]);
24489 rtx cmp;
24491 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
24492 &operands[2], &operands[3]);
24493 if (code == UNKNOWN)
24495 rtx temp;
24496 switch (GET_CODE (operands[1]))
24498 case LTGT:
24499 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[2],
24500 operands[3], NULL, NULL);
24501 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[2],
24502 operands[3], NULL, NULL);
24503 code = AND;
24504 break;
24505 case UNEQ:
24506 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[2],
24507 operands[3], NULL, NULL);
24508 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[2],
24509 operands[3], NULL, NULL);
24510 code = IOR;
24511 break;
24512 default:
24513 gcc_unreachable ();
24515 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
24516 OPTAB_DIRECT);
24518 else
24519 cmp = ix86_expand_sse_cmp (operands[0], code, operands[2], operands[3],
24520 operands[1], operands[2]);
24522 if (operands[0] != cmp)
24523 emit_move_insn (operands[0], cmp);
24525 return true;
24528 static rtx
24529 ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1,
24530 rtx op_true, rtx op_false, bool *negate)
24532 machine_mode data_mode = GET_MODE (dest);
24533 machine_mode mode = GET_MODE (cop0);
24534 rtx x;
24536 *negate = false;
24538 /* XOP supports all of the comparisons on all 128-bit vector int types. */
24539 if (TARGET_XOP
24540 && (mode == V16QImode || mode == V8HImode
24541 || mode == V4SImode || mode == V2DImode))
24543 else
24545 /* Canonicalize the comparison to EQ, GT, GTU. */
24546 switch (code)
24548 case EQ:
24549 case GT:
24550 case GTU:
24551 break;
24553 case NE:
24554 case LE:
24555 case LEU:
24556 code = reverse_condition (code);
24557 *negate = true;
24558 break;
24560 case GE:
24561 case GEU:
24562 code = reverse_condition (code);
24563 *negate = true;
24564 /* FALLTHRU */
24566 case LT:
24567 case LTU:
24568 std::swap (cop0, cop1);
24569 code = swap_condition (code);
24570 break;
24572 default:
24573 gcc_unreachable ();
24576 /* Only SSE4.1/SSE4.2 supports V2DImode. */
24577 if (mode == V2DImode)
24579 switch (code)
24581 case EQ:
24582 /* SSE4.1 supports EQ. */
24583 if (!TARGET_SSE4_1)
24584 return NULL;
24585 break;
24587 case GT:
24588 case GTU:
24589 /* SSE4.2 supports GT/GTU. */
24590 if (!TARGET_SSE4_2)
24591 return NULL;
24592 break;
24594 default:
24595 gcc_unreachable ();
24599 /* Unsigned parallel compare is not supported by the hardware.
24600 Play some tricks to turn this into a signed comparison
24601 against 0. */
24602 if (code == GTU)
24604 cop0 = force_reg (mode, cop0);
24606 switch (mode)
24608 case E_V16SImode:
24609 case E_V8DImode:
24610 case E_V8SImode:
24611 case E_V4DImode:
24612 case E_V4SImode:
24613 case E_V2DImode:
24615 rtx t1, t2, mask;
24616 rtx (*gen_sub3) (rtx, rtx, rtx);
24618 switch (mode)
24620 case E_V16SImode: gen_sub3 = gen_subv16si3; break;
24621 case E_V8DImode: gen_sub3 = gen_subv8di3; break;
24622 case E_V8SImode: gen_sub3 = gen_subv8si3; break;
24623 case E_V4DImode: gen_sub3 = gen_subv4di3; break;
24624 case E_V4SImode: gen_sub3 = gen_subv4si3; break;
24625 case E_V2DImode: gen_sub3 = gen_subv2di3; break;
24626 default:
24627 gcc_unreachable ();
24629 /* Subtract (-(INT MAX) - 1) from both operands to make
24630 them signed. */
24631 mask = ix86_build_signbit_mask (mode, true, false);
24632 t1 = gen_reg_rtx (mode);
24633 emit_insn (gen_sub3 (t1, cop0, mask));
24635 t2 = gen_reg_rtx (mode);
24636 emit_insn (gen_sub3 (t2, cop1, mask));
24638 cop0 = t1;
24639 cop1 = t2;
24640 code = GT;
24642 break;
24644 case E_V64QImode:
24645 case E_V32HImode:
24646 case E_V32QImode:
24647 case E_V16HImode:
24648 case E_V16QImode:
24649 case E_V8HImode:
24650 /* Perform a parallel unsigned saturating subtraction. */
24651 x = gen_reg_rtx (mode);
24652 emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, cop0,
24653 cop1)));
24655 cop0 = x;
24656 cop1 = CONST0_RTX (mode);
24657 code = EQ;
24658 *negate = !*negate;
24659 break;
24661 default:
24662 gcc_unreachable ();
24667 if (*negate)
24668 std::swap (op_true, op_false);
24670 /* Allow the comparison to be done in one mode, but the movcc to
24671 happen in another mode. */
24672 if (data_mode == mode)
24674 x = ix86_expand_sse_cmp (dest, code, cop0, cop1,
24675 op_true, op_false);
24677 else
24679 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
24680 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
24681 op_true, op_false);
24682 if (GET_MODE (x) == mode)
24683 x = gen_lowpart (data_mode, x);
24686 return x;
24689 /* Expand integer vector comparison. */
24691 bool
24692 ix86_expand_int_vec_cmp (rtx operands[])
24694 rtx_code code = GET_CODE (operands[1]);
24695 bool negate = false;
24696 rtx cmp = ix86_expand_int_sse_cmp (operands[0], code, operands[2],
24697 operands[3], NULL, NULL, &negate);
24699 if (!cmp)
24700 return false;
24702 if (negate)
24703 cmp = ix86_expand_int_sse_cmp (operands[0], EQ, cmp,
24704 CONST0_RTX (GET_MODE (cmp)),
24705 NULL, NULL, &negate);
24707 gcc_assert (!negate);
24709 if (operands[0] != cmp)
24710 emit_move_insn (operands[0], cmp);
24712 return true;
24715 /* Expand a floating-point vector conditional move; a vcond operation
24716 rather than a movcc operation. */
24718 bool
24719 ix86_expand_fp_vcond (rtx operands[])
24721 enum rtx_code code = GET_CODE (operands[3]);
24722 rtx cmp;
24724 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
24725 &operands[4], &operands[5]);
24726 if (code == UNKNOWN)
24728 rtx temp;
24729 switch (GET_CODE (operands[3]))
24731 case LTGT:
24732 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
24733 operands[5], operands[0], operands[0]);
24734 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
24735 operands[5], operands[1], operands[2]);
24736 code = AND;
24737 break;
24738 case UNEQ:
24739 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
24740 operands[5], operands[0], operands[0]);
24741 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
24742 operands[5], operands[1], operands[2]);
24743 code = IOR;
24744 break;
24745 default:
24746 gcc_unreachable ();
24748 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
24749 OPTAB_DIRECT);
24750 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
24751 return true;
24754 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
24755 operands[5], operands[1], operands[2]))
24756 return true;
24758 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
24759 operands[1], operands[2]);
24760 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
24761 return true;
24764 /* Expand a signed/unsigned integral vector conditional move. */
24766 bool
24767 ix86_expand_int_vcond (rtx operands[])
24769 machine_mode data_mode = GET_MODE (operands[0]);
24770 machine_mode mode = GET_MODE (operands[4]);
24771 enum rtx_code code = GET_CODE (operands[3]);
24772 bool negate = false;
24773 rtx x, cop0, cop1;
24775 cop0 = operands[4];
24776 cop1 = operands[5];
24778 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
24779 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
24780 if ((code == LT || code == GE)
24781 && data_mode == mode
24782 && cop1 == CONST0_RTX (mode)
24783 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
24784 && GET_MODE_UNIT_SIZE (data_mode) > 1
24785 && GET_MODE_UNIT_SIZE (data_mode) <= 8
24786 && (GET_MODE_SIZE (data_mode) == 16
24787 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
24789 rtx negop = operands[2 - (code == LT)];
24790 int shift = GET_MODE_UNIT_BITSIZE (data_mode) - 1;
24791 if (negop == CONST1_RTX (data_mode))
24793 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
24794 operands[0], 1, OPTAB_DIRECT);
24795 if (res != operands[0])
24796 emit_move_insn (operands[0], res);
24797 return true;
24799 else if (GET_MODE_INNER (data_mode) != DImode
24800 && vector_all_ones_operand (negop, data_mode))
24802 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
24803 operands[0], 0, OPTAB_DIRECT);
24804 if (res != operands[0])
24805 emit_move_insn (operands[0], res);
24806 return true;
24810 if (!nonimmediate_operand (cop1, mode))
24811 cop1 = force_reg (mode, cop1);
24812 if (!general_operand (operands[1], data_mode))
24813 operands[1] = force_reg (data_mode, operands[1]);
24814 if (!general_operand (operands[2], data_mode))
24815 operands[2] = force_reg (data_mode, operands[2]);
24817 x = ix86_expand_int_sse_cmp (operands[0], code, cop0, cop1,
24818 operands[1], operands[2], &negate);
24820 if (!x)
24821 return false;
24823 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
24824 operands[2-negate]);
24825 return true;
24828 /* AVX512F does support 64-byte integer vector operations,
24829 thus the longest vector we are faced with is V64QImode. */
24830 #define MAX_VECT_LEN 64
24832 struct expand_vec_perm_d
24834 rtx target, op0, op1;
24835 unsigned char perm[MAX_VECT_LEN];
24836 machine_mode vmode;
24837 unsigned char nelt;
24838 bool one_operand_p;
24839 bool testing_p;
24842 static bool
24843 ix86_expand_vec_perm_vpermt2 (rtx target, rtx mask, rtx op0, rtx op1,
24844 struct expand_vec_perm_d *d)
24846 /* ix86_expand_vec_perm_vpermt2 is called from both const and non-const
24847 expander, so args are either in d, or in op0, op1 etc. */
24848 machine_mode mode = GET_MODE (d ? d->op0 : op0);
24849 machine_mode maskmode = mode;
24850 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
24852 switch (mode)
24854 case E_V8HImode:
24855 if (TARGET_AVX512VL && TARGET_AVX512BW)
24856 gen = gen_avx512vl_vpermt2varv8hi3;
24857 break;
24858 case E_V16HImode:
24859 if (TARGET_AVX512VL && TARGET_AVX512BW)
24860 gen = gen_avx512vl_vpermt2varv16hi3;
24861 break;
24862 case E_V64QImode:
24863 if (TARGET_AVX512VBMI)
24864 gen = gen_avx512bw_vpermt2varv64qi3;
24865 break;
24866 case E_V32HImode:
24867 if (TARGET_AVX512BW)
24868 gen = gen_avx512bw_vpermt2varv32hi3;
24869 break;
24870 case E_V4SImode:
24871 if (TARGET_AVX512VL)
24872 gen = gen_avx512vl_vpermt2varv4si3;
24873 break;
24874 case E_V8SImode:
24875 if (TARGET_AVX512VL)
24876 gen = gen_avx512vl_vpermt2varv8si3;
24877 break;
24878 case E_V16SImode:
24879 if (TARGET_AVX512F)
24880 gen = gen_avx512f_vpermt2varv16si3;
24881 break;
24882 case E_V4SFmode:
24883 if (TARGET_AVX512VL)
24885 gen = gen_avx512vl_vpermt2varv4sf3;
24886 maskmode = V4SImode;
24888 break;
24889 case E_V8SFmode:
24890 if (TARGET_AVX512VL)
24892 gen = gen_avx512vl_vpermt2varv8sf3;
24893 maskmode = V8SImode;
24895 break;
24896 case E_V16SFmode:
24897 if (TARGET_AVX512F)
24899 gen = gen_avx512f_vpermt2varv16sf3;
24900 maskmode = V16SImode;
24902 break;
24903 case E_V2DImode:
24904 if (TARGET_AVX512VL)
24905 gen = gen_avx512vl_vpermt2varv2di3;
24906 break;
24907 case E_V4DImode:
24908 if (TARGET_AVX512VL)
24909 gen = gen_avx512vl_vpermt2varv4di3;
24910 break;
24911 case E_V8DImode:
24912 if (TARGET_AVX512F)
24913 gen = gen_avx512f_vpermt2varv8di3;
24914 break;
24915 case E_V2DFmode:
24916 if (TARGET_AVX512VL)
24918 gen = gen_avx512vl_vpermt2varv2df3;
24919 maskmode = V2DImode;
24921 break;
24922 case E_V4DFmode:
24923 if (TARGET_AVX512VL)
24925 gen = gen_avx512vl_vpermt2varv4df3;
24926 maskmode = V4DImode;
24928 break;
24929 case E_V8DFmode:
24930 if (TARGET_AVX512F)
24932 gen = gen_avx512f_vpermt2varv8df3;
24933 maskmode = V8DImode;
24935 break;
24936 default:
24937 break;
24940 if (gen == NULL)
24941 return false;
24943 /* ix86_expand_vec_perm_vpermt2 is called from both const and non-const
24944 expander, so args are either in d, or in op0, op1 etc. */
24945 if (d)
24947 rtx vec[64];
24948 target = d->target;
24949 op0 = d->op0;
24950 op1 = d->op1;
24951 for (int i = 0; i < d->nelt; ++i)
24952 vec[i] = GEN_INT (d->perm[i]);
24953 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
24956 emit_insn (gen (target, force_reg (maskmode, mask), op0, op1));
24957 return true;
24960 /* Expand a variable vector permutation. */
24962 void
24963 ix86_expand_vec_perm (rtx operands[])
24965 rtx target = operands[0];
24966 rtx op0 = operands[1];
24967 rtx op1 = operands[2];
24968 rtx mask = operands[3];
24969 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
24970 machine_mode mode = GET_MODE (op0);
24971 machine_mode maskmode = GET_MODE (mask);
24972 int w, e, i;
24973 bool one_operand_shuffle = rtx_equal_p (op0, op1);
24975 /* Number of elements in the vector. */
24976 w = GET_MODE_NUNITS (mode);
24977 e = GET_MODE_UNIT_SIZE (mode);
24978 gcc_assert (w <= 64);
24980 if (TARGET_AVX512F && one_operand_shuffle)
24982 rtx (*gen) (rtx, rtx, rtx) = NULL;
24983 switch (mode)
24985 case E_V16SImode:
24986 gen =gen_avx512f_permvarv16si;
24987 break;
24988 case E_V16SFmode:
24989 gen = gen_avx512f_permvarv16sf;
24990 break;
24991 case E_V8DImode:
24992 gen = gen_avx512f_permvarv8di;
24993 break;
24994 case E_V8DFmode:
24995 gen = gen_avx512f_permvarv8df;
24996 break;
24997 default:
24998 break;
25000 if (gen != NULL)
25002 emit_insn (gen (target, op0, mask));
25003 return;
25007 if (ix86_expand_vec_perm_vpermt2 (target, mask, op0, op1, NULL))
25008 return;
25010 if (TARGET_AVX2)
25012 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
25014 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
25015 an constant shuffle operand. With a tiny bit of effort we can
25016 use VPERMD instead. A re-interpretation stall for V4DFmode is
25017 unfortunate but there's no avoiding it.
25018 Similarly for V16HImode we don't have instructions for variable
25019 shuffling, while for V32QImode we can use after preparing suitable
25020 masks vpshufb; vpshufb; vpermq; vpor. */
25022 if (mode == V16HImode)
25024 maskmode = mode = V32QImode;
25025 w = 32;
25026 e = 1;
25028 else
25030 maskmode = mode = V8SImode;
25031 w = 8;
25032 e = 4;
25034 t1 = gen_reg_rtx (maskmode);
25036 /* Replicate the low bits of the V4DImode mask into V8SImode:
25037 mask = { A B C D }
25038 t1 = { A A B B C C D D }. */
25039 for (i = 0; i < w / 2; ++i)
25040 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
25041 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
25042 vt = force_reg (maskmode, vt);
25043 mask = gen_lowpart (maskmode, mask);
25044 if (maskmode == V8SImode)
25045 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
25046 else
25047 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
25049 /* Multiply the shuffle indicies by two. */
25050 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
25051 OPTAB_DIRECT);
25053 /* Add one to the odd shuffle indicies:
25054 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
25055 for (i = 0; i < w / 2; ++i)
25057 vec[i * 2] = const0_rtx;
25058 vec[i * 2 + 1] = const1_rtx;
25060 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
25061 vt = validize_mem (force_const_mem (maskmode, vt));
25062 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
25063 OPTAB_DIRECT);
25065 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
25066 operands[3] = mask = t1;
25067 target = gen_reg_rtx (mode);
25068 op0 = gen_lowpart (mode, op0);
25069 op1 = gen_lowpart (mode, op1);
25072 switch (mode)
25074 case E_V8SImode:
25075 /* The VPERMD and VPERMPS instructions already properly ignore
25076 the high bits of the shuffle elements. No need for us to
25077 perform an AND ourselves. */
25078 if (one_operand_shuffle)
25080 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
25081 if (target != operands[0])
25082 emit_move_insn (operands[0],
25083 gen_lowpart (GET_MODE (operands[0]), target));
25085 else
25087 t1 = gen_reg_rtx (V8SImode);
25088 t2 = gen_reg_rtx (V8SImode);
25089 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
25090 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
25091 goto merge_two;
25093 return;
25095 case E_V8SFmode:
25096 mask = gen_lowpart (V8SImode, mask);
25097 if (one_operand_shuffle)
25098 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
25099 else
25101 t1 = gen_reg_rtx (V8SFmode);
25102 t2 = gen_reg_rtx (V8SFmode);
25103 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
25104 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
25105 goto merge_two;
25107 return;
25109 case E_V4SImode:
25110 /* By combining the two 128-bit input vectors into one 256-bit
25111 input vector, we can use VPERMD and VPERMPS for the full
25112 two-operand shuffle. */
25113 t1 = gen_reg_rtx (V8SImode);
25114 t2 = gen_reg_rtx (V8SImode);
25115 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
25116 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
25117 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
25118 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
25119 return;
25121 case E_V4SFmode:
25122 t1 = gen_reg_rtx (V8SFmode);
25123 t2 = gen_reg_rtx (V8SImode);
25124 mask = gen_lowpart (V4SImode, mask);
25125 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
25126 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
25127 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
25128 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
25129 return;
25131 case E_V32QImode:
25132 t1 = gen_reg_rtx (V32QImode);
25133 t2 = gen_reg_rtx (V32QImode);
25134 t3 = gen_reg_rtx (V32QImode);
25135 vt2 = GEN_INT (-128);
25136 vt = gen_const_vec_duplicate (V32QImode, vt2);
25137 vt = force_reg (V32QImode, vt);
25138 for (i = 0; i < 32; i++)
25139 vec[i] = i < 16 ? vt2 : const0_rtx;
25140 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
25141 vt2 = force_reg (V32QImode, vt2);
25142 /* From mask create two adjusted masks, which contain the same
25143 bits as mask in the low 7 bits of each vector element.
25144 The first mask will have the most significant bit clear
25145 if it requests element from the same 128-bit lane
25146 and MSB set if it requests element from the other 128-bit lane.
25147 The second mask will have the opposite values of the MSB,
25148 and additionally will have its 128-bit lanes swapped.
25149 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
25150 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
25151 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
25152 stands for other 12 bytes. */
25153 /* The bit whether element is from the same lane or the other
25154 lane is bit 4, so shift it up by 3 to the MSB position. */
25155 t5 = gen_reg_rtx (V4DImode);
25156 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
25157 GEN_INT (3)));
25158 /* Clear MSB bits from the mask just in case it had them set. */
25159 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
25160 /* After this t1 will have MSB set for elements from other lane. */
25161 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
25162 /* Clear bits other than MSB. */
25163 emit_insn (gen_andv32qi3 (t1, t1, vt));
25164 /* Or in the lower bits from mask into t3. */
25165 emit_insn (gen_iorv32qi3 (t3, t1, t2));
25166 /* And invert MSB bits in t1, so MSB is set for elements from the same
25167 lane. */
25168 emit_insn (gen_xorv32qi3 (t1, t1, vt));
25169 /* Swap 128-bit lanes in t3. */
25170 t6 = gen_reg_rtx (V4DImode);
25171 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
25172 const2_rtx, GEN_INT (3),
25173 const0_rtx, const1_rtx));
25174 /* And or in the lower bits from mask into t1. */
25175 emit_insn (gen_iorv32qi3 (t1, t1, t2));
25176 if (one_operand_shuffle)
25178 /* Each of these shuffles will put 0s in places where
25179 element from the other 128-bit lane is needed, otherwise
25180 will shuffle in the requested value. */
25181 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
25182 gen_lowpart (V32QImode, t6)));
25183 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
25184 /* For t3 the 128-bit lanes are swapped again. */
25185 t7 = gen_reg_rtx (V4DImode);
25186 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
25187 const2_rtx, GEN_INT (3),
25188 const0_rtx, const1_rtx));
25189 /* And oring both together leads to the result. */
25190 emit_insn (gen_iorv32qi3 (target, t1,
25191 gen_lowpart (V32QImode, t7)));
25192 if (target != operands[0])
25193 emit_move_insn (operands[0],
25194 gen_lowpart (GET_MODE (operands[0]), target));
25195 return;
25198 t4 = gen_reg_rtx (V32QImode);
25199 /* Similarly to the above one_operand_shuffle code,
25200 just for repeated twice for each operand. merge_two:
25201 code will merge the two results together. */
25202 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
25203 gen_lowpart (V32QImode, t6)));
25204 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
25205 gen_lowpart (V32QImode, t6)));
25206 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
25207 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
25208 t7 = gen_reg_rtx (V4DImode);
25209 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
25210 const2_rtx, GEN_INT (3),
25211 const0_rtx, const1_rtx));
25212 t8 = gen_reg_rtx (V4DImode);
25213 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
25214 const2_rtx, GEN_INT (3),
25215 const0_rtx, const1_rtx));
25216 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
25217 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
25218 t1 = t4;
25219 t2 = t3;
25220 goto merge_two;
25222 default:
25223 gcc_assert (GET_MODE_SIZE (mode) <= 16);
25224 break;
25228 if (TARGET_XOP)
25230 /* The XOP VPPERM insn supports three inputs. By ignoring the
25231 one_operand_shuffle special case, we avoid creating another
25232 set of constant vectors in memory. */
25233 one_operand_shuffle = false;
25235 /* mask = mask & {2*w-1, ...} */
25236 vt = GEN_INT (2*w - 1);
25238 else
25240 /* mask = mask & {w-1, ...} */
25241 vt = GEN_INT (w - 1);
25244 vt = gen_const_vec_duplicate (maskmode, vt);
25245 mask = expand_simple_binop (maskmode, AND, mask, vt,
25246 NULL_RTX, 0, OPTAB_DIRECT);
25248 /* For non-QImode operations, convert the word permutation control
25249 into a byte permutation control. */
25250 if (mode != V16QImode)
25252 mask = expand_simple_binop (maskmode, ASHIFT, mask,
25253 GEN_INT (exact_log2 (e)),
25254 NULL_RTX, 0, OPTAB_DIRECT);
25256 /* Convert mask to vector of chars. */
25257 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
25259 /* Replicate each of the input bytes into byte positions:
25260 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
25261 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
25262 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
25263 for (i = 0; i < 16; ++i)
25264 vec[i] = GEN_INT (i/e * e);
25265 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
25266 vt = validize_mem (force_const_mem (V16QImode, vt));
25267 if (TARGET_XOP)
25268 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
25269 else
25270 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
25272 /* Convert it into the byte positions by doing
25273 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
25274 for (i = 0; i < 16; ++i)
25275 vec[i] = GEN_INT (i % e);
25276 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
25277 vt = validize_mem (force_const_mem (V16QImode, vt));
25278 emit_insn (gen_addv16qi3 (mask, mask, vt));
25281 /* The actual shuffle operations all operate on V16QImode. */
25282 op0 = gen_lowpart (V16QImode, op0);
25283 op1 = gen_lowpart (V16QImode, op1);
25285 if (TARGET_XOP)
25287 if (GET_MODE (target) != V16QImode)
25288 target = gen_reg_rtx (V16QImode);
25289 emit_insn (gen_xop_pperm (target, op0, op1, mask));
25290 if (target != operands[0])
25291 emit_move_insn (operands[0],
25292 gen_lowpart (GET_MODE (operands[0]), target));
25294 else if (one_operand_shuffle)
25296 if (GET_MODE (target) != V16QImode)
25297 target = gen_reg_rtx (V16QImode);
25298 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
25299 if (target != operands[0])
25300 emit_move_insn (operands[0],
25301 gen_lowpart (GET_MODE (operands[0]), target));
25303 else
25305 rtx xops[6];
25306 bool ok;
25308 /* Shuffle the two input vectors independently. */
25309 t1 = gen_reg_rtx (V16QImode);
25310 t2 = gen_reg_rtx (V16QImode);
25311 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
25312 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
25314 merge_two:
25315 /* Then merge them together. The key is whether any given control
25316 element contained a bit set that indicates the second word. */
25317 mask = operands[3];
25318 vt = GEN_INT (w);
25319 if (maskmode == V2DImode && !TARGET_SSE4_1)
25321 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
25322 more shuffle to convert the V2DI input mask into a V4SI
25323 input mask. At which point the masking that expand_int_vcond
25324 will work as desired. */
25325 rtx t3 = gen_reg_rtx (V4SImode);
25326 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
25327 const0_rtx, const0_rtx,
25328 const2_rtx, const2_rtx));
25329 mask = t3;
25330 maskmode = V4SImode;
25331 e = w = 4;
25334 vt = gen_const_vec_duplicate (maskmode, vt);
25335 vt = force_reg (maskmode, vt);
25336 mask = expand_simple_binop (maskmode, AND, mask, vt,
25337 NULL_RTX, 0, OPTAB_DIRECT);
25339 if (GET_MODE (target) != mode)
25340 target = gen_reg_rtx (mode);
25341 xops[0] = target;
25342 xops[1] = gen_lowpart (mode, t2);
25343 xops[2] = gen_lowpart (mode, t1);
25344 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
25345 xops[4] = mask;
25346 xops[5] = vt;
25347 ok = ix86_expand_int_vcond (xops);
25348 gcc_assert (ok);
25349 if (target != operands[0])
25350 emit_move_insn (operands[0],
25351 gen_lowpart (GET_MODE (operands[0]), target));
25355 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
25356 true if we should do zero extension, else sign extension. HIGH_P is
25357 true if we want the N/2 high elements, else the low elements. */
25359 void
25360 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
25362 machine_mode imode = GET_MODE (src);
25363 rtx tmp;
25365 if (TARGET_SSE4_1)
25367 rtx (*unpack)(rtx, rtx);
25368 rtx (*extract)(rtx, rtx) = NULL;
25369 machine_mode halfmode = BLKmode;
25371 switch (imode)
25373 case E_V64QImode:
25374 if (unsigned_p)
25375 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
25376 else
25377 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
25378 halfmode = V32QImode;
25379 extract
25380 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
25381 break;
25382 case E_V32QImode:
25383 if (unsigned_p)
25384 unpack = gen_avx2_zero_extendv16qiv16hi2;
25385 else
25386 unpack = gen_avx2_sign_extendv16qiv16hi2;
25387 halfmode = V16QImode;
25388 extract
25389 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
25390 break;
25391 case E_V32HImode:
25392 if (unsigned_p)
25393 unpack = gen_avx512f_zero_extendv16hiv16si2;
25394 else
25395 unpack = gen_avx512f_sign_extendv16hiv16si2;
25396 halfmode = V16HImode;
25397 extract
25398 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
25399 break;
25400 case E_V16HImode:
25401 if (unsigned_p)
25402 unpack = gen_avx2_zero_extendv8hiv8si2;
25403 else
25404 unpack = gen_avx2_sign_extendv8hiv8si2;
25405 halfmode = V8HImode;
25406 extract
25407 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
25408 break;
25409 case E_V16SImode:
25410 if (unsigned_p)
25411 unpack = gen_avx512f_zero_extendv8siv8di2;
25412 else
25413 unpack = gen_avx512f_sign_extendv8siv8di2;
25414 halfmode = V8SImode;
25415 extract
25416 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
25417 break;
25418 case E_V8SImode:
25419 if (unsigned_p)
25420 unpack = gen_avx2_zero_extendv4siv4di2;
25421 else
25422 unpack = gen_avx2_sign_extendv4siv4di2;
25423 halfmode = V4SImode;
25424 extract
25425 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
25426 break;
25427 case E_V16QImode:
25428 if (unsigned_p)
25429 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
25430 else
25431 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
25432 break;
25433 case E_V8HImode:
25434 if (unsigned_p)
25435 unpack = gen_sse4_1_zero_extendv4hiv4si2;
25436 else
25437 unpack = gen_sse4_1_sign_extendv4hiv4si2;
25438 break;
25439 case E_V4SImode:
25440 if (unsigned_p)
25441 unpack = gen_sse4_1_zero_extendv2siv2di2;
25442 else
25443 unpack = gen_sse4_1_sign_extendv2siv2di2;
25444 break;
25445 default:
25446 gcc_unreachable ();
25449 if (GET_MODE_SIZE (imode) >= 32)
25451 tmp = gen_reg_rtx (halfmode);
25452 emit_insn (extract (tmp, src));
25454 else if (high_p)
25456 /* Shift higher 8 bytes to lower 8 bytes. */
25457 tmp = gen_reg_rtx (V1TImode);
25458 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
25459 GEN_INT (64)));
25460 tmp = gen_lowpart (imode, tmp);
25462 else
25463 tmp = src;
25465 emit_insn (unpack (dest, tmp));
25467 else
25469 rtx (*unpack)(rtx, rtx, rtx);
25471 switch (imode)
25473 case E_V16QImode:
25474 if (high_p)
25475 unpack = gen_vec_interleave_highv16qi;
25476 else
25477 unpack = gen_vec_interleave_lowv16qi;
25478 break;
25479 case E_V8HImode:
25480 if (high_p)
25481 unpack = gen_vec_interleave_highv8hi;
25482 else
25483 unpack = gen_vec_interleave_lowv8hi;
25484 break;
25485 case E_V4SImode:
25486 if (high_p)
25487 unpack = gen_vec_interleave_highv4si;
25488 else
25489 unpack = gen_vec_interleave_lowv4si;
25490 break;
25491 default:
25492 gcc_unreachable ();
25495 if (unsigned_p)
25496 tmp = force_reg (imode, CONST0_RTX (imode));
25497 else
25498 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
25499 src, pc_rtx, pc_rtx);
25501 rtx tmp2 = gen_reg_rtx (imode);
25502 emit_insn (unpack (tmp2, src, tmp));
25503 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
25507 /* Expand conditional increment or decrement using adb/sbb instructions.
25508 The default case using setcc followed by the conditional move can be
25509 done by generic code. */
25510 bool
25511 ix86_expand_int_addcc (rtx operands[])
25513 enum rtx_code code = GET_CODE (operands[1]);
25514 rtx flags;
25515 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
25516 rtx compare_op;
25517 rtx val = const0_rtx;
25518 bool fpcmp = false;
25519 machine_mode mode;
25520 rtx op0 = XEXP (operands[1], 0);
25521 rtx op1 = XEXP (operands[1], 1);
25523 if (operands[3] != const1_rtx
25524 && operands[3] != constm1_rtx)
25525 return false;
25526 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
25527 return false;
25528 code = GET_CODE (compare_op);
25530 flags = XEXP (compare_op, 0);
25532 if (GET_MODE (flags) == CCFPmode)
25534 fpcmp = true;
25535 code = ix86_fp_compare_code_to_integer (code);
25538 if (code != LTU)
25540 val = constm1_rtx;
25541 if (fpcmp)
25542 PUT_CODE (compare_op,
25543 reverse_condition_maybe_unordered
25544 (GET_CODE (compare_op)));
25545 else
25546 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
25549 mode = GET_MODE (operands[0]);
25551 /* Construct either adc or sbb insn. */
25552 if ((code == LTU) == (operands[3] == constm1_rtx))
25554 switch (mode)
25556 case E_QImode:
25557 insn = gen_subqi3_carry;
25558 break;
25559 case E_HImode:
25560 insn = gen_subhi3_carry;
25561 break;
25562 case E_SImode:
25563 insn = gen_subsi3_carry;
25564 break;
25565 case E_DImode:
25566 insn = gen_subdi3_carry;
25567 break;
25568 default:
25569 gcc_unreachable ();
25572 else
25574 switch (mode)
25576 case E_QImode:
25577 insn = gen_addqi3_carry;
25578 break;
25579 case E_HImode:
25580 insn = gen_addhi3_carry;
25581 break;
25582 case E_SImode:
25583 insn = gen_addsi3_carry;
25584 break;
25585 case E_DImode:
25586 insn = gen_adddi3_carry;
25587 break;
25588 default:
25589 gcc_unreachable ();
25592 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
25594 return true;
25598 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
25599 but works for floating pointer parameters and nonoffsetable memories.
25600 For pushes, it returns just stack offsets; the values will be saved
25601 in the right order. Maximally three parts are generated. */
25603 static int
25604 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
25606 int size;
25608 if (!TARGET_64BIT)
25609 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
25610 else
25611 size = (GET_MODE_SIZE (mode) + 4) / 8;
25613 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
25614 gcc_assert (size >= 2 && size <= 4);
25616 /* Optimize constant pool reference to immediates. This is used by fp
25617 moves, that force all constants to memory to allow combining. */
25618 if (MEM_P (operand) && MEM_READONLY_P (operand))
25619 operand = avoid_constant_pool_reference (operand);
25621 if (MEM_P (operand) && !offsettable_memref_p (operand))
25623 /* The only non-offsetable memories we handle are pushes. */
25624 int ok = push_operand (operand, VOIDmode);
25626 gcc_assert (ok);
25628 operand = copy_rtx (operand);
25629 PUT_MODE (operand, word_mode);
25630 parts[0] = parts[1] = parts[2] = parts[3] = operand;
25631 return size;
25634 if (GET_CODE (operand) == CONST_VECTOR)
25636 scalar_int_mode imode = int_mode_for_mode (mode).require ();
25637 /* Caution: if we looked through a constant pool memory above,
25638 the operand may actually have a different mode now. That's
25639 ok, since we want to pun this all the way back to an integer. */
25640 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
25641 gcc_assert (operand != NULL);
25642 mode = imode;
25645 if (!TARGET_64BIT)
25647 if (mode == DImode)
25648 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
25649 else
25651 int i;
25653 if (REG_P (operand))
25655 gcc_assert (reload_completed);
25656 for (i = 0; i < size; i++)
25657 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
25659 else if (offsettable_memref_p (operand))
25661 operand = adjust_address (operand, SImode, 0);
25662 parts[0] = operand;
25663 for (i = 1; i < size; i++)
25664 parts[i] = adjust_address (operand, SImode, 4 * i);
25666 else if (CONST_DOUBLE_P (operand))
25668 const REAL_VALUE_TYPE *r;
25669 long l[4];
25671 r = CONST_DOUBLE_REAL_VALUE (operand);
25672 switch (mode)
25674 case E_TFmode:
25675 real_to_target (l, r, mode);
25676 parts[3] = gen_int_mode (l[3], SImode);
25677 parts[2] = gen_int_mode (l[2], SImode);
25678 break;
25679 case E_XFmode:
25680 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
25681 long double may not be 80-bit. */
25682 real_to_target (l, r, mode);
25683 parts[2] = gen_int_mode (l[2], SImode);
25684 break;
25685 case E_DFmode:
25686 REAL_VALUE_TO_TARGET_DOUBLE (*r, l);
25687 break;
25688 default:
25689 gcc_unreachable ();
25691 parts[1] = gen_int_mode (l[1], SImode);
25692 parts[0] = gen_int_mode (l[0], SImode);
25694 else
25695 gcc_unreachable ();
25698 else
25700 if (mode == TImode)
25701 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
25702 if (mode == XFmode || mode == TFmode)
25704 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
25705 if (REG_P (operand))
25707 gcc_assert (reload_completed);
25708 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
25709 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
25711 else if (offsettable_memref_p (operand))
25713 operand = adjust_address (operand, DImode, 0);
25714 parts[0] = operand;
25715 parts[1] = adjust_address (operand, upper_mode, 8);
25717 else if (CONST_DOUBLE_P (operand))
25719 long l[4];
25721 real_to_target (l, CONST_DOUBLE_REAL_VALUE (operand), mode);
25723 /* real_to_target puts 32-bit pieces in each long. */
25724 parts[0] = gen_int_mode ((l[0] & HOST_WIDE_INT_C (0xffffffff))
25725 | ((l[1] & HOST_WIDE_INT_C (0xffffffff))
25726 << 32), DImode);
25728 if (upper_mode == SImode)
25729 parts[1] = gen_int_mode (l[2], SImode);
25730 else
25731 parts[1]
25732 = gen_int_mode ((l[2] & HOST_WIDE_INT_C (0xffffffff))
25733 | ((l[3] & HOST_WIDE_INT_C (0xffffffff))
25734 << 32), DImode);
25736 else
25737 gcc_unreachable ();
25741 return size;
25744 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
25745 Return false when normal moves are needed; true when all required
25746 insns have been emitted. Operands 2-4 contain the input values
25747 int the correct order; operands 5-7 contain the output values. */
25749 void
25750 ix86_split_long_move (rtx operands[])
25752 rtx part[2][4];
25753 int nparts, i, j;
25754 int push = 0;
25755 int collisions = 0;
25756 machine_mode mode = GET_MODE (operands[0]);
25757 bool collisionparts[4];
25759 /* The DFmode expanders may ask us to move double.
25760 For 64bit target this is single move. By hiding the fact
25761 here we simplify i386.md splitters. */
25762 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
25764 /* Optimize constant pool reference to immediates. This is used by
25765 fp moves, that force all constants to memory to allow combining. */
25767 if (MEM_P (operands[1])
25768 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
25769 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
25770 operands[1] = get_pool_constant (XEXP (operands[1], 0));
25771 if (push_operand (operands[0], VOIDmode))
25773 operands[0] = copy_rtx (operands[0]);
25774 PUT_MODE (operands[0], word_mode);
25776 else
25777 operands[0] = gen_lowpart (DImode, operands[0]);
25778 operands[1] = gen_lowpart (DImode, operands[1]);
25779 emit_move_insn (operands[0], operands[1]);
25780 return;
25783 /* The only non-offsettable memory we handle is push. */
25784 if (push_operand (operands[0], VOIDmode))
25785 push = 1;
25786 else
25787 gcc_assert (!MEM_P (operands[0])
25788 || offsettable_memref_p (operands[0]));
25790 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
25791 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
25793 /* When emitting push, take care for source operands on the stack. */
25794 if (push && MEM_P (operands[1])
25795 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
25797 rtx src_base = XEXP (part[1][nparts - 1], 0);
25799 /* Compensate for the stack decrement by 4. */
25800 if (!TARGET_64BIT && nparts == 3
25801 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
25802 src_base = plus_constant (Pmode, src_base, 4);
25804 /* src_base refers to the stack pointer and is
25805 automatically decreased by emitted push. */
25806 for (i = 0; i < nparts; i++)
25807 part[1][i] = change_address (part[1][i],
25808 GET_MODE (part[1][i]), src_base);
25811 /* We need to do copy in the right order in case an address register
25812 of the source overlaps the destination. */
25813 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
25815 rtx tmp;
25817 for (i = 0; i < nparts; i++)
25819 collisionparts[i]
25820 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
25821 if (collisionparts[i])
25822 collisions++;
25825 /* Collision in the middle part can be handled by reordering. */
25826 if (collisions == 1 && nparts == 3 && collisionparts [1])
25828 std::swap (part[0][1], part[0][2]);
25829 std::swap (part[1][1], part[1][2]);
25831 else if (collisions == 1
25832 && nparts == 4
25833 && (collisionparts [1] || collisionparts [2]))
25835 if (collisionparts [1])
25837 std::swap (part[0][1], part[0][2]);
25838 std::swap (part[1][1], part[1][2]);
25840 else
25842 std::swap (part[0][2], part[0][3]);
25843 std::swap (part[1][2], part[1][3]);
25847 /* If there are more collisions, we can't handle it by reordering.
25848 Do an lea to the last part and use only one colliding move. */
25849 else if (collisions > 1)
25851 rtx base, addr;
25853 collisions = 1;
25855 base = part[0][nparts - 1];
25857 /* Handle the case when the last part isn't valid for lea.
25858 Happens in 64-bit mode storing the 12-byte XFmode. */
25859 if (GET_MODE (base) != Pmode)
25860 base = gen_rtx_REG (Pmode, REGNO (base));
25862 addr = XEXP (part[1][0], 0);
25863 if (TARGET_TLS_DIRECT_SEG_REFS)
25865 struct ix86_address parts;
25866 int ok = ix86_decompose_address (addr, &parts);
25867 gcc_assert (ok);
25868 /* It is not valid to use %gs: or %fs: in lea. */
25869 gcc_assert (parts.seg == ADDR_SPACE_GENERIC);
25871 emit_insn (gen_rtx_SET (base, addr));
25872 part[1][0] = replace_equiv_address (part[1][0], base);
25873 for (i = 1; i < nparts; i++)
25875 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
25876 part[1][i] = replace_equiv_address (part[1][i], tmp);
25881 if (push)
25883 if (!TARGET_64BIT)
25885 if (nparts == 3)
25887 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
25888 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
25889 stack_pointer_rtx, GEN_INT (-4)));
25890 emit_move_insn (part[0][2], part[1][2]);
25892 else if (nparts == 4)
25894 emit_move_insn (part[0][3], part[1][3]);
25895 emit_move_insn (part[0][2], part[1][2]);
25898 else
25900 /* In 64bit mode we don't have 32bit push available. In case this is
25901 register, it is OK - we will just use larger counterpart. We also
25902 retype memory - these comes from attempt to avoid REX prefix on
25903 moving of second half of TFmode value. */
25904 if (GET_MODE (part[1][1]) == SImode)
25906 switch (GET_CODE (part[1][1]))
25908 case MEM:
25909 part[1][1] = adjust_address (part[1][1], DImode, 0);
25910 break;
25912 case REG:
25913 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
25914 break;
25916 default:
25917 gcc_unreachable ();
25920 if (GET_MODE (part[1][0]) == SImode)
25921 part[1][0] = part[1][1];
25924 emit_move_insn (part[0][1], part[1][1]);
25925 emit_move_insn (part[0][0], part[1][0]);
25926 return;
25929 /* Choose correct order to not overwrite the source before it is copied. */
25930 if ((REG_P (part[0][0])
25931 && REG_P (part[1][1])
25932 && (REGNO (part[0][0]) == REGNO (part[1][1])
25933 || (nparts == 3
25934 && REGNO (part[0][0]) == REGNO (part[1][2]))
25935 || (nparts == 4
25936 && REGNO (part[0][0]) == REGNO (part[1][3]))))
25937 || (collisions > 0
25938 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
25940 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
25942 operands[2 + i] = part[0][j];
25943 operands[6 + i] = part[1][j];
25946 else
25948 for (i = 0; i < nparts; i++)
25950 operands[2 + i] = part[0][i];
25951 operands[6 + i] = part[1][i];
25955 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
25956 if (optimize_insn_for_size_p ())
25958 for (j = 0; j < nparts - 1; j++)
25959 if (CONST_INT_P (operands[6 + j])
25960 && operands[6 + j] != const0_rtx
25961 && REG_P (operands[2 + j]))
25962 for (i = j; i < nparts - 1; i++)
25963 if (CONST_INT_P (operands[7 + i])
25964 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
25965 operands[7 + i] = operands[2 + j];
25968 for (i = 0; i < nparts; i++)
25969 emit_move_insn (operands[2 + i], operands[6 + i]);
25971 return;
25974 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
25975 left shift by a constant, either using a single shift or
25976 a sequence of add instructions. */
25978 static void
25979 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
25981 rtx (*insn)(rtx, rtx, rtx);
25983 if (count == 1
25984 || (count * ix86_cost->add <= ix86_cost->shift_const
25985 && !optimize_insn_for_size_p ()))
25987 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
25988 while (count-- > 0)
25989 emit_insn (insn (operand, operand, operand));
25991 else
25993 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
25994 emit_insn (insn (operand, operand, GEN_INT (count)));
25998 void
25999 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
26001 rtx (*gen_ashl3)(rtx, rtx, rtx);
26002 rtx (*gen_shld)(rtx, rtx, rtx);
26003 int half_width = GET_MODE_BITSIZE (mode) >> 1;
26005 rtx low[2], high[2];
26006 int count;
26008 if (CONST_INT_P (operands[2]))
26010 split_double_mode (mode, operands, 2, low, high);
26011 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
26013 if (count >= half_width)
26015 emit_move_insn (high[0], low[1]);
26016 emit_move_insn (low[0], const0_rtx);
26018 if (count > half_width)
26019 ix86_expand_ashl_const (high[0], count - half_width, mode);
26021 else
26023 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
26025 if (!rtx_equal_p (operands[0], operands[1]))
26026 emit_move_insn (operands[0], operands[1]);
26028 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
26029 ix86_expand_ashl_const (low[0], count, mode);
26031 return;
26034 split_double_mode (mode, operands, 1, low, high);
26036 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
26038 if (operands[1] == const1_rtx)
26040 /* Assuming we've chosen a QImode capable registers, then 1 << N
26041 can be done with two 32/64-bit shifts, no branches, no cmoves. */
26042 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
26044 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
26046 ix86_expand_clear (low[0]);
26047 ix86_expand_clear (high[0]);
26048 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
26050 d = gen_lowpart (QImode, low[0]);
26051 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
26052 s = gen_rtx_EQ (QImode, flags, const0_rtx);
26053 emit_insn (gen_rtx_SET (d, s));
26055 d = gen_lowpart (QImode, high[0]);
26056 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
26057 s = gen_rtx_NE (QImode, flags, const0_rtx);
26058 emit_insn (gen_rtx_SET (d, s));
26061 /* Otherwise, we can get the same results by manually performing
26062 a bit extract operation on bit 5/6, and then performing the two
26063 shifts. The two methods of getting 0/1 into low/high are exactly
26064 the same size. Avoiding the shift in the bit extract case helps
26065 pentium4 a bit; no one else seems to care much either way. */
26066 else
26068 machine_mode half_mode;
26069 rtx (*gen_lshr3)(rtx, rtx, rtx);
26070 rtx (*gen_and3)(rtx, rtx, rtx);
26071 rtx (*gen_xor3)(rtx, rtx, rtx);
26072 HOST_WIDE_INT bits;
26073 rtx x;
26075 if (mode == DImode)
26077 half_mode = SImode;
26078 gen_lshr3 = gen_lshrsi3;
26079 gen_and3 = gen_andsi3;
26080 gen_xor3 = gen_xorsi3;
26081 bits = 5;
26083 else
26085 half_mode = DImode;
26086 gen_lshr3 = gen_lshrdi3;
26087 gen_and3 = gen_anddi3;
26088 gen_xor3 = gen_xordi3;
26089 bits = 6;
26092 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
26093 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
26094 else
26095 x = gen_lowpart (half_mode, operands[2]);
26096 emit_insn (gen_rtx_SET (high[0], x));
26098 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
26099 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
26100 emit_move_insn (low[0], high[0]);
26101 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
26104 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
26105 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
26106 return;
26109 if (operands[1] == constm1_rtx)
26111 /* For -1 << N, we can avoid the shld instruction, because we
26112 know that we're shifting 0...31/63 ones into a -1. */
26113 emit_move_insn (low[0], constm1_rtx);
26114 if (optimize_insn_for_size_p ())
26115 emit_move_insn (high[0], low[0]);
26116 else
26117 emit_move_insn (high[0], constm1_rtx);
26119 else
26121 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
26123 if (!rtx_equal_p (operands[0], operands[1]))
26124 emit_move_insn (operands[0], operands[1]);
26126 split_double_mode (mode, operands, 1, low, high);
26127 emit_insn (gen_shld (high[0], low[0], operands[2]));
26130 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
26132 if (TARGET_CMOVE && scratch)
26134 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
26135 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
26137 ix86_expand_clear (scratch);
26138 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
26140 else
26142 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
26143 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
26145 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
26149 void
26150 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
26152 rtx (*gen_ashr3)(rtx, rtx, rtx)
26153 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
26154 rtx (*gen_shrd)(rtx, rtx, rtx);
26155 int half_width = GET_MODE_BITSIZE (mode) >> 1;
26157 rtx low[2], high[2];
26158 int count;
26160 if (CONST_INT_P (operands[2]))
26162 split_double_mode (mode, operands, 2, low, high);
26163 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
26165 if (count == GET_MODE_BITSIZE (mode) - 1)
26167 emit_move_insn (high[0], high[1]);
26168 emit_insn (gen_ashr3 (high[0], high[0],
26169 GEN_INT (half_width - 1)));
26170 emit_move_insn (low[0], high[0]);
26173 else if (count >= half_width)
26175 emit_move_insn (low[0], high[1]);
26176 emit_move_insn (high[0], low[0]);
26177 emit_insn (gen_ashr3 (high[0], high[0],
26178 GEN_INT (half_width - 1)));
26180 if (count > half_width)
26181 emit_insn (gen_ashr3 (low[0], low[0],
26182 GEN_INT (count - half_width)));
26184 else
26186 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
26188 if (!rtx_equal_p (operands[0], operands[1]))
26189 emit_move_insn (operands[0], operands[1]);
26191 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
26192 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
26195 else
26197 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
26199 if (!rtx_equal_p (operands[0], operands[1]))
26200 emit_move_insn (operands[0], operands[1]);
26202 split_double_mode (mode, operands, 1, low, high);
26204 emit_insn (gen_shrd (low[0], high[0], operands[2]));
26205 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
26207 if (TARGET_CMOVE && scratch)
26209 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
26210 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
26212 emit_move_insn (scratch, high[0]);
26213 emit_insn (gen_ashr3 (scratch, scratch,
26214 GEN_INT (half_width - 1)));
26215 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
26216 scratch));
26218 else
26220 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
26221 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
26223 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
26228 void
26229 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
26231 rtx (*gen_lshr3)(rtx, rtx, rtx)
26232 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
26233 rtx (*gen_shrd)(rtx, rtx, rtx);
26234 int half_width = GET_MODE_BITSIZE (mode) >> 1;
26236 rtx low[2], high[2];
26237 int count;
26239 if (CONST_INT_P (operands[2]))
26241 split_double_mode (mode, operands, 2, low, high);
26242 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
26244 if (count >= half_width)
26246 emit_move_insn (low[0], high[1]);
26247 ix86_expand_clear (high[0]);
26249 if (count > half_width)
26250 emit_insn (gen_lshr3 (low[0], low[0],
26251 GEN_INT (count - half_width)));
26253 else
26255 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
26257 if (!rtx_equal_p (operands[0], operands[1]))
26258 emit_move_insn (operands[0], operands[1]);
26260 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
26261 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
26264 else
26266 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
26268 if (!rtx_equal_p (operands[0], operands[1]))
26269 emit_move_insn (operands[0], operands[1]);
26271 split_double_mode (mode, operands, 1, low, high);
26273 emit_insn (gen_shrd (low[0], high[0], operands[2]));
26274 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
26276 if (TARGET_CMOVE && scratch)
26278 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
26279 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
26281 ix86_expand_clear (scratch);
26282 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
26283 scratch));
26285 else
26287 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
26288 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
26290 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
26295 /* Predict just emitted jump instruction to be taken with probability PROB. */
26296 static void
26297 predict_jump (int prob)
26299 rtx_insn *insn = get_last_insn ();
26300 gcc_assert (JUMP_P (insn));
26301 add_reg_br_prob_note (insn, profile_probability::from_reg_br_prob_base (prob));
26304 /* Helper function for the string operations below. Dest VARIABLE whether
26305 it is aligned to VALUE bytes. If true, jump to the label. */
26306 static rtx_code_label *
26307 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
26309 rtx_code_label *label = gen_label_rtx ();
26310 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
26311 if (GET_MODE (variable) == DImode)
26312 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
26313 else
26314 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
26315 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
26316 1, label);
26317 if (epilogue)
26318 predict_jump (REG_BR_PROB_BASE * 50 / 100);
26319 else
26320 predict_jump (REG_BR_PROB_BASE * 90 / 100);
26321 return label;
26324 /* Adjust COUNTER by the VALUE. */
26325 static void
26326 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
26328 rtx (*gen_add)(rtx, rtx, rtx)
26329 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
26331 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
26334 /* Zero extend possibly SImode EXP to Pmode register. */
26336 ix86_zero_extend_to_Pmode (rtx exp)
26338 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
26341 /* Divide COUNTREG by SCALE. */
26342 static rtx
26343 scale_counter (rtx countreg, int scale)
26345 rtx sc;
26347 if (scale == 1)
26348 return countreg;
26349 if (CONST_INT_P (countreg))
26350 return GEN_INT (INTVAL (countreg) / scale);
26351 gcc_assert (REG_P (countreg));
26353 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
26354 GEN_INT (exact_log2 (scale)),
26355 NULL, 1, OPTAB_DIRECT);
26356 return sc;
26359 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
26360 DImode for constant loop counts. */
26362 static machine_mode
26363 counter_mode (rtx count_exp)
26365 if (GET_MODE (count_exp) != VOIDmode)
26366 return GET_MODE (count_exp);
26367 if (!CONST_INT_P (count_exp))
26368 return Pmode;
26369 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
26370 return DImode;
26371 return SImode;
26374 /* Copy the address to a Pmode register. This is used for x32 to
26375 truncate DImode TLS address to a SImode register. */
26377 static rtx
26378 ix86_copy_addr_to_reg (rtx addr)
26380 rtx reg;
26381 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
26383 reg = copy_addr_to_reg (addr);
26384 REG_POINTER (reg) = 1;
26385 return reg;
26387 else
26389 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
26390 reg = copy_to_mode_reg (DImode, addr);
26391 REG_POINTER (reg) = 1;
26392 return gen_rtx_SUBREG (SImode, reg, 0);
26396 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
26397 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
26398 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
26399 memory by VALUE (supposed to be in MODE).
26401 The size is rounded down to whole number of chunk size moved at once.
26402 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
26405 static void
26406 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
26407 rtx destptr, rtx srcptr, rtx value,
26408 rtx count, machine_mode mode, int unroll,
26409 int expected_size, bool issetmem)
26411 rtx_code_label *out_label, *top_label;
26412 rtx iter, tmp;
26413 machine_mode iter_mode = counter_mode (count);
26414 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
26415 rtx piece_size = GEN_INT (piece_size_n);
26416 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
26417 rtx size;
26418 int i;
26420 top_label = gen_label_rtx ();
26421 out_label = gen_label_rtx ();
26422 iter = gen_reg_rtx (iter_mode);
26424 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
26425 NULL, 1, OPTAB_DIRECT);
26426 /* Those two should combine. */
26427 if (piece_size == const1_rtx)
26429 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
26430 true, out_label);
26431 predict_jump (REG_BR_PROB_BASE * 10 / 100);
26433 emit_move_insn (iter, const0_rtx);
26435 emit_label (top_label);
26437 tmp = convert_modes (Pmode, iter_mode, iter, true);
26439 /* This assert could be relaxed - in this case we'll need to compute
26440 smallest power of two, containing in PIECE_SIZE_N and pass it to
26441 offset_address. */
26442 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
26443 destmem = offset_address (destmem, tmp, piece_size_n);
26444 destmem = adjust_address (destmem, mode, 0);
26446 if (!issetmem)
26448 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
26449 srcmem = adjust_address (srcmem, mode, 0);
26451 /* When unrolling for chips that reorder memory reads and writes,
26452 we can save registers by using single temporary.
26453 Also using 4 temporaries is overkill in 32bit mode. */
26454 if (!TARGET_64BIT && 0)
26456 for (i = 0; i < unroll; i++)
26458 if (i)
26460 destmem =
26461 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
26462 srcmem =
26463 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
26465 emit_move_insn (destmem, srcmem);
26468 else
26470 rtx tmpreg[4];
26471 gcc_assert (unroll <= 4);
26472 for (i = 0; i < unroll; i++)
26474 tmpreg[i] = gen_reg_rtx (mode);
26475 if (i)
26477 srcmem =
26478 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
26480 emit_move_insn (tmpreg[i], srcmem);
26482 for (i = 0; i < unroll; i++)
26484 if (i)
26486 destmem =
26487 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
26489 emit_move_insn (destmem, tmpreg[i]);
26493 else
26494 for (i = 0; i < unroll; i++)
26496 if (i)
26497 destmem =
26498 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
26499 emit_move_insn (destmem, value);
26502 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
26503 true, OPTAB_LIB_WIDEN);
26504 if (tmp != iter)
26505 emit_move_insn (iter, tmp);
26507 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
26508 true, top_label);
26509 if (expected_size != -1)
26511 expected_size /= GET_MODE_SIZE (mode) * unroll;
26512 if (expected_size == 0)
26513 predict_jump (0);
26514 else if (expected_size > REG_BR_PROB_BASE)
26515 predict_jump (REG_BR_PROB_BASE - 1);
26516 else
26517 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
26519 else
26520 predict_jump (REG_BR_PROB_BASE * 80 / 100);
26521 iter = ix86_zero_extend_to_Pmode (iter);
26522 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
26523 true, OPTAB_LIB_WIDEN);
26524 if (tmp != destptr)
26525 emit_move_insn (destptr, tmp);
26526 if (!issetmem)
26528 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
26529 true, OPTAB_LIB_WIDEN);
26530 if (tmp != srcptr)
26531 emit_move_insn (srcptr, tmp);
26533 emit_label (out_label);
26536 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
26537 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
26538 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
26539 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
26540 ORIG_VALUE is the original value passed to memset to fill the memory with.
26541 Other arguments have same meaning as for previous function. */
26543 static void
26544 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
26545 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
26546 rtx count,
26547 machine_mode mode, bool issetmem)
26549 rtx destexp;
26550 rtx srcexp;
26551 rtx countreg;
26552 HOST_WIDE_INT rounded_count;
26554 /* If possible, it is shorter to use rep movs.
26555 TODO: Maybe it is better to move this logic to decide_alg. */
26556 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
26557 && (!issetmem || orig_value == const0_rtx))
26558 mode = SImode;
26560 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
26561 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
26563 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
26564 GET_MODE_SIZE (mode)));
26565 if (mode != QImode)
26567 destexp = gen_rtx_ASHIFT (Pmode, countreg,
26568 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
26569 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
26571 else
26572 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
26573 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
26575 rounded_count
26576 = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode));
26577 destmem = shallow_copy_rtx (destmem);
26578 set_mem_size (destmem, rounded_count);
26580 else if (MEM_SIZE_KNOWN_P (destmem))
26581 clear_mem_size (destmem);
26583 if (issetmem)
26585 value = force_reg (mode, gen_lowpart (mode, value));
26586 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
26588 else
26590 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
26591 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
26592 if (mode != QImode)
26594 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
26595 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
26596 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
26598 else
26599 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
26600 if (CONST_INT_P (count))
26602 rounded_count
26603 = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode));
26604 srcmem = shallow_copy_rtx (srcmem);
26605 set_mem_size (srcmem, rounded_count);
26607 else
26609 if (MEM_SIZE_KNOWN_P (srcmem))
26610 clear_mem_size (srcmem);
26612 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
26613 destexp, srcexp));
26617 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
26618 DESTMEM.
26619 SRC is passed by pointer to be updated on return.
26620 Return value is updated DST. */
26621 static rtx
26622 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
26623 HOST_WIDE_INT size_to_move)
26625 rtx dst = destmem, src = *srcmem, adjust, tempreg;
26626 enum insn_code code;
26627 machine_mode move_mode;
26628 int piece_size, i;
26630 /* Find the widest mode in which we could perform moves.
26631 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
26632 it until move of such size is supported. */
26633 piece_size = 1 << floor_log2 (size_to_move);
26634 while (!int_mode_for_size (piece_size * BITS_PER_UNIT, 0).exists (&move_mode)
26635 || (code = optab_handler (mov_optab, move_mode)) == CODE_FOR_nothing)
26637 gcc_assert (piece_size > 1);
26638 piece_size >>= 1;
26641 /* Find the corresponding vector mode with the same size as MOVE_MODE.
26642 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
26643 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
26645 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
26646 if (!mode_for_vector (word_mode, nunits).exists (&move_mode)
26647 || (code = optab_handler (mov_optab, move_mode)) == CODE_FOR_nothing)
26649 move_mode = word_mode;
26650 piece_size = GET_MODE_SIZE (move_mode);
26651 code = optab_handler (mov_optab, move_mode);
26654 gcc_assert (code != CODE_FOR_nothing);
26656 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
26657 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
26659 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
26660 gcc_assert (size_to_move % piece_size == 0);
26661 adjust = GEN_INT (piece_size);
26662 for (i = 0; i < size_to_move; i += piece_size)
26664 /* We move from memory to memory, so we'll need to do it via
26665 a temporary register. */
26666 tempreg = gen_reg_rtx (move_mode);
26667 emit_insn (GEN_FCN (code) (tempreg, src));
26668 emit_insn (GEN_FCN (code) (dst, tempreg));
26670 emit_move_insn (destptr,
26671 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
26672 emit_move_insn (srcptr,
26673 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
26675 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
26676 piece_size);
26677 src = adjust_automodify_address_nv (src, move_mode, srcptr,
26678 piece_size);
26681 /* Update DST and SRC rtx. */
26682 *srcmem = src;
26683 return dst;
26686 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
26687 static void
26688 expand_movmem_epilogue (rtx destmem, rtx srcmem,
26689 rtx destptr, rtx srcptr, rtx count, int max_size)
26691 rtx src, dest;
26692 if (CONST_INT_P (count))
26694 HOST_WIDE_INT countval = INTVAL (count);
26695 HOST_WIDE_INT epilogue_size = countval % max_size;
26696 int i;
26698 /* For now MAX_SIZE should be a power of 2. This assert could be
26699 relaxed, but it'll require a bit more complicated epilogue
26700 expanding. */
26701 gcc_assert ((max_size & (max_size - 1)) == 0);
26702 for (i = max_size; i >= 1; i >>= 1)
26704 if (epilogue_size & i)
26705 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
26707 return;
26709 if (max_size > 8)
26711 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
26712 count, 1, OPTAB_DIRECT);
26713 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
26714 count, QImode, 1, 4, false);
26715 return;
26718 /* When there are stringops, we can cheaply increase dest and src pointers.
26719 Otherwise we save code size by maintaining offset (zero is readily
26720 available from preceding rep operation) and using x86 addressing modes.
26722 if (TARGET_SINGLE_STRINGOP)
26724 if (max_size > 4)
26726 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
26727 src = change_address (srcmem, SImode, srcptr);
26728 dest = change_address (destmem, SImode, destptr);
26729 emit_insn (gen_strmov (destptr, dest, srcptr, src));
26730 emit_label (label);
26731 LABEL_NUSES (label) = 1;
26733 if (max_size > 2)
26735 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
26736 src = change_address (srcmem, HImode, srcptr);
26737 dest = change_address (destmem, HImode, destptr);
26738 emit_insn (gen_strmov (destptr, dest, srcptr, src));
26739 emit_label (label);
26740 LABEL_NUSES (label) = 1;
26742 if (max_size > 1)
26744 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
26745 src = change_address (srcmem, QImode, srcptr);
26746 dest = change_address (destmem, QImode, destptr);
26747 emit_insn (gen_strmov (destptr, dest, srcptr, src));
26748 emit_label (label);
26749 LABEL_NUSES (label) = 1;
26752 else
26754 rtx offset = force_reg (Pmode, const0_rtx);
26755 rtx tmp;
26757 if (max_size > 4)
26759 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
26760 src = change_address (srcmem, SImode, srcptr);
26761 dest = change_address (destmem, SImode, destptr);
26762 emit_move_insn (dest, src);
26763 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
26764 true, OPTAB_LIB_WIDEN);
26765 if (tmp != offset)
26766 emit_move_insn (offset, tmp);
26767 emit_label (label);
26768 LABEL_NUSES (label) = 1;
26770 if (max_size > 2)
26772 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
26773 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
26774 src = change_address (srcmem, HImode, tmp);
26775 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
26776 dest = change_address (destmem, HImode, tmp);
26777 emit_move_insn (dest, src);
26778 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
26779 true, OPTAB_LIB_WIDEN);
26780 if (tmp != offset)
26781 emit_move_insn (offset, tmp);
26782 emit_label (label);
26783 LABEL_NUSES (label) = 1;
26785 if (max_size > 1)
26787 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
26788 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
26789 src = change_address (srcmem, QImode, tmp);
26790 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
26791 dest = change_address (destmem, QImode, tmp);
26792 emit_move_insn (dest, src);
26793 emit_label (label);
26794 LABEL_NUSES (label) = 1;
26799 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
26800 with value PROMOTED_VAL.
26801 SRC is passed by pointer to be updated on return.
26802 Return value is updated DST. */
26803 static rtx
26804 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
26805 HOST_WIDE_INT size_to_move)
26807 rtx dst = destmem, adjust;
26808 enum insn_code code;
26809 machine_mode move_mode;
26810 int piece_size, i;
26812 /* Find the widest mode in which we could perform moves.
26813 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
26814 it until move of such size is supported. */
26815 move_mode = GET_MODE (promoted_val);
26816 if (move_mode == VOIDmode)
26817 move_mode = QImode;
26818 if (size_to_move < GET_MODE_SIZE (move_mode))
26820 unsigned int move_bits = size_to_move * BITS_PER_UNIT;
26821 move_mode = int_mode_for_size (move_bits, 0).require ();
26822 promoted_val = gen_lowpart (move_mode, promoted_val);
26824 piece_size = GET_MODE_SIZE (move_mode);
26825 code = optab_handler (mov_optab, move_mode);
26826 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
26828 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
26830 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
26831 gcc_assert (size_to_move % piece_size == 0);
26832 adjust = GEN_INT (piece_size);
26833 for (i = 0; i < size_to_move; i += piece_size)
26835 if (piece_size <= GET_MODE_SIZE (word_mode))
26837 emit_insn (gen_strset (destptr, dst, promoted_val));
26838 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
26839 piece_size);
26840 continue;
26843 emit_insn (GEN_FCN (code) (dst, promoted_val));
26845 emit_move_insn (destptr,
26846 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
26848 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
26849 piece_size);
26852 /* Update DST rtx. */
26853 return dst;
26855 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
26856 static void
26857 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
26858 rtx count, int max_size)
26860 count =
26861 expand_simple_binop (counter_mode (count), AND, count,
26862 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
26863 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
26864 gen_lowpart (QImode, value), count, QImode,
26865 1, max_size / 2, true);
26868 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
26869 static void
26870 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
26871 rtx count, int max_size)
26873 rtx dest;
26875 if (CONST_INT_P (count))
26877 HOST_WIDE_INT countval = INTVAL (count);
26878 HOST_WIDE_INT epilogue_size = countval % max_size;
26879 int i;
26881 /* For now MAX_SIZE should be a power of 2. This assert could be
26882 relaxed, but it'll require a bit more complicated epilogue
26883 expanding. */
26884 gcc_assert ((max_size & (max_size - 1)) == 0);
26885 for (i = max_size; i >= 1; i >>= 1)
26887 if (epilogue_size & i)
26889 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
26890 destmem = emit_memset (destmem, destptr, vec_value, i);
26891 else
26892 destmem = emit_memset (destmem, destptr, value, i);
26895 return;
26897 if (max_size > 32)
26899 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
26900 return;
26902 if (max_size > 16)
26904 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
26905 if (TARGET_64BIT)
26907 dest = change_address (destmem, DImode, destptr);
26908 emit_insn (gen_strset (destptr, dest, value));
26909 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
26910 emit_insn (gen_strset (destptr, dest, value));
26912 else
26914 dest = change_address (destmem, SImode, destptr);
26915 emit_insn (gen_strset (destptr, dest, value));
26916 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
26917 emit_insn (gen_strset (destptr, dest, value));
26918 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
26919 emit_insn (gen_strset (destptr, dest, value));
26920 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
26921 emit_insn (gen_strset (destptr, dest, value));
26923 emit_label (label);
26924 LABEL_NUSES (label) = 1;
26926 if (max_size > 8)
26928 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
26929 if (TARGET_64BIT)
26931 dest = change_address (destmem, DImode, destptr);
26932 emit_insn (gen_strset (destptr, dest, value));
26934 else
26936 dest = change_address (destmem, SImode, destptr);
26937 emit_insn (gen_strset (destptr, dest, value));
26938 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
26939 emit_insn (gen_strset (destptr, dest, value));
26941 emit_label (label);
26942 LABEL_NUSES (label) = 1;
26944 if (max_size > 4)
26946 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
26947 dest = change_address (destmem, SImode, destptr);
26948 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
26949 emit_label (label);
26950 LABEL_NUSES (label) = 1;
26952 if (max_size > 2)
26954 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
26955 dest = change_address (destmem, HImode, destptr);
26956 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
26957 emit_label (label);
26958 LABEL_NUSES (label) = 1;
26960 if (max_size > 1)
26962 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
26963 dest = change_address (destmem, QImode, destptr);
26964 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
26965 emit_label (label);
26966 LABEL_NUSES (label) = 1;
26970 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
26971 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
26972 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
26973 ignored.
26974 Return value is updated DESTMEM. */
26975 static rtx
26976 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
26977 rtx destptr, rtx srcptr, rtx value,
26978 rtx vec_value, rtx count, int align,
26979 int desired_alignment, bool issetmem)
26981 int i;
26982 for (i = 1; i < desired_alignment; i <<= 1)
26984 if (align <= i)
26986 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
26987 if (issetmem)
26989 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
26990 destmem = emit_memset (destmem, destptr, vec_value, i);
26991 else
26992 destmem = emit_memset (destmem, destptr, value, i);
26994 else
26995 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
26996 ix86_adjust_counter (count, i);
26997 emit_label (label);
26998 LABEL_NUSES (label) = 1;
26999 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
27002 return destmem;
27005 /* Test if COUNT&SIZE is nonzero and if so, expand movme
27006 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
27007 and jump to DONE_LABEL. */
27008 static void
27009 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
27010 rtx destptr, rtx srcptr,
27011 rtx value, rtx vec_value,
27012 rtx count, int size,
27013 rtx done_label, bool issetmem)
27015 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
27016 machine_mode mode = int_mode_for_size (size * BITS_PER_UNIT, 1).else_blk ();
27017 rtx modesize;
27018 int n;
27020 /* If we do not have vector value to copy, we must reduce size. */
27021 if (issetmem)
27023 if (!vec_value)
27025 if (GET_MODE (value) == VOIDmode && size > 8)
27026 mode = Pmode;
27027 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
27028 mode = GET_MODE (value);
27030 else
27031 mode = GET_MODE (vec_value), value = vec_value;
27033 else
27035 /* Choose appropriate vector mode. */
27036 if (size >= 32)
27037 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
27038 else if (size >= 16)
27039 mode = TARGET_SSE ? V16QImode : DImode;
27040 srcmem = change_address (srcmem, mode, srcptr);
27042 destmem = change_address (destmem, mode, destptr);
27043 modesize = GEN_INT (GET_MODE_SIZE (mode));
27044 gcc_assert (GET_MODE_SIZE (mode) <= size);
27045 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
27047 if (issetmem)
27048 emit_move_insn (destmem, gen_lowpart (mode, value));
27049 else
27051 emit_move_insn (destmem, srcmem);
27052 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
27054 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
27057 destmem = offset_address (destmem, count, 1);
27058 destmem = offset_address (destmem, GEN_INT (-2 * size),
27059 GET_MODE_SIZE (mode));
27060 if (!issetmem)
27062 srcmem = offset_address (srcmem, count, 1);
27063 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
27064 GET_MODE_SIZE (mode));
27066 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
27068 if (issetmem)
27069 emit_move_insn (destmem, gen_lowpart (mode, value));
27070 else
27072 emit_move_insn (destmem, srcmem);
27073 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
27075 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
27077 emit_jump_insn (gen_jump (done_label));
27078 emit_barrier ();
27080 emit_label (label);
27081 LABEL_NUSES (label) = 1;
27084 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
27085 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
27086 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
27087 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
27088 DONE_LABEL is a label after the whole copying sequence. The label is created
27089 on demand if *DONE_LABEL is NULL.
27090 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
27091 bounds after the initial copies.
27093 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
27094 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
27095 we will dispatch to a library call for large blocks.
27097 In pseudocode we do:
27099 if (COUNT < SIZE)
27101 Assume that SIZE is 4. Bigger sizes are handled analogously
27102 if (COUNT & 4)
27104 copy 4 bytes from SRCPTR to DESTPTR
27105 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
27106 goto done_label
27108 if (!COUNT)
27109 goto done_label;
27110 copy 1 byte from SRCPTR to DESTPTR
27111 if (COUNT & 2)
27113 copy 2 bytes from SRCPTR to DESTPTR
27114 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
27117 else
27119 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
27120 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
27122 OLD_DESPTR = DESTPTR;
27123 Align DESTPTR up to DESIRED_ALIGN
27124 SRCPTR += DESTPTR - OLD_DESTPTR
27125 COUNT -= DEST_PTR - OLD_DESTPTR
27126 if (DYNAMIC_CHECK)
27127 Round COUNT down to multiple of SIZE
27128 << optional caller supplied zero size guard is here >>
27129 << optional caller supplied dynamic check is here >>
27130 << caller supplied main copy loop is here >>
27132 done_label:
27134 static void
27135 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
27136 rtx *destptr, rtx *srcptr,
27137 machine_mode mode,
27138 rtx value, rtx vec_value,
27139 rtx *count,
27140 rtx_code_label **done_label,
27141 int size,
27142 int desired_align,
27143 int align,
27144 unsigned HOST_WIDE_INT *min_size,
27145 bool dynamic_check,
27146 bool issetmem)
27148 rtx_code_label *loop_label = NULL, *label;
27149 int n;
27150 rtx modesize;
27151 int prolog_size = 0;
27152 rtx mode_value;
27154 /* Chose proper value to copy. */
27155 if (issetmem && VECTOR_MODE_P (mode))
27156 mode_value = vec_value;
27157 else
27158 mode_value = value;
27159 gcc_assert (GET_MODE_SIZE (mode) <= size);
27161 /* See if block is big or small, handle small blocks. */
27162 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
27164 int size2 = size;
27165 loop_label = gen_label_rtx ();
27167 if (!*done_label)
27168 *done_label = gen_label_rtx ();
27170 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
27171 1, loop_label);
27172 size2 >>= 1;
27174 /* Handle sizes > 3. */
27175 for (;size2 > 2; size2 >>= 1)
27176 expand_small_movmem_or_setmem (destmem, srcmem,
27177 *destptr, *srcptr,
27178 value, vec_value,
27179 *count,
27180 size2, *done_label, issetmem);
27181 /* Nothing to copy? Jump to DONE_LABEL if so */
27182 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
27183 1, *done_label);
27185 /* Do a byte copy. */
27186 destmem = change_address (destmem, QImode, *destptr);
27187 if (issetmem)
27188 emit_move_insn (destmem, gen_lowpart (QImode, value));
27189 else
27191 srcmem = change_address (srcmem, QImode, *srcptr);
27192 emit_move_insn (destmem, srcmem);
27195 /* Handle sizes 2 and 3. */
27196 label = ix86_expand_aligntest (*count, 2, false);
27197 destmem = change_address (destmem, HImode, *destptr);
27198 destmem = offset_address (destmem, *count, 1);
27199 destmem = offset_address (destmem, GEN_INT (-2), 2);
27200 if (issetmem)
27201 emit_move_insn (destmem, gen_lowpart (HImode, value));
27202 else
27204 srcmem = change_address (srcmem, HImode, *srcptr);
27205 srcmem = offset_address (srcmem, *count, 1);
27206 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
27207 emit_move_insn (destmem, srcmem);
27210 emit_label (label);
27211 LABEL_NUSES (label) = 1;
27212 emit_jump_insn (gen_jump (*done_label));
27213 emit_barrier ();
27215 else
27216 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
27217 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
27219 /* Start memcpy for COUNT >= SIZE. */
27220 if (loop_label)
27222 emit_label (loop_label);
27223 LABEL_NUSES (loop_label) = 1;
27226 /* Copy first desired_align bytes. */
27227 if (!issetmem)
27228 srcmem = change_address (srcmem, mode, *srcptr);
27229 destmem = change_address (destmem, mode, *destptr);
27230 modesize = GEN_INT (GET_MODE_SIZE (mode));
27231 for (n = 0; prolog_size < desired_align - align; n++)
27233 if (issetmem)
27234 emit_move_insn (destmem, mode_value);
27235 else
27237 emit_move_insn (destmem, srcmem);
27238 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
27240 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
27241 prolog_size += GET_MODE_SIZE (mode);
27245 /* Copy last SIZE bytes. */
27246 destmem = offset_address (destmem, *count, 1);
27247 destmem = offset_address (destmem,
27248 GEN_INT (-size - prolog_size),
27250 if (issetmem)
27251 emit_move_insn (destmem, mode_value);
27252 else
27254 srcmem = offset_address (srcmem, *count, 1);
27255 srcmem = offset_address (srcmem,
27256 GEN_INT (-size - prolog_size),
27258 emit_move_insn (destmem, srcmem);
27260 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
27262 destmem = offset_address (destmem, modesize, 1);
27263 if (issetmem)
27264 emit_move_insn (destmem, mode_value);
27265 else
27267 srcmem = offset_address (srcmem, modesize, 1);
27268 emit_move_insn (destmem, srcmem);
27272 /* Align destination. */
27273 if (desired_align > 1 && desired_align > align)
27275 rtx saveddest = *destptr;
27277 gcc_assert (desired_align <= size);
27278 /* Align destptr up, place it to new register. */
27279 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
27280 GEN_INT (prolog_size),
27281 NULL_RTX, 1, OPTAB_DIRECT);
27282 if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest))
27283 REG_POINTER (*destptr) = 1;
27284 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
27285 GEN_INT (-desired_align),
27286 *destptr, 1, OPTAB_DIRECT);
27287 /* See how many bytes we skipped. */
27288 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
27289 *destptr,
27290 saveddest, 1, OPTAB_DIRECT);
27291 /* Adjust srcptr and count. */
27292 if (!issetmem)
27293 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr,
27294 saveddest, *srcptr, 1, OPTAB_DIRECT);
27295 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
27296 saveddest, *count, 1, OPTAB_DIRECT);
27297 /* We copied at most size + prolog_size. */
27298 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
27299 *min_size
27300 = ROUND_DOWN (*min_size - size, (unsigned HOST_WIDE_INT)size);
27301 else
27302 *min_size = 0;
27304 /* Our loops always round down the block size, but for dispatch to
27305 library we need precise value. */
27306 if (dynamic_check)
27307 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
27308 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
27310 else
27312 gcc_assert (prolog_size == 0);
27313 /* Decrease count, so we won't end up copying last word twice. */
27314 if (!CONST_INT_P (*count))
27315 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
27316 constm1_rtx, *count, 1, OPTAB_DIRECT);
27317 else
27318 *count = GEN_INT (ROUND_DOWN (UINTVAL (*count) - 1,
27319 (unsigned HOST_WIDE_INT)size));
27320 if (*min_size)
27321 *min_size = ROUND_DOWN (*min_size - 1, (unsigned HOST_WIDE_INT)size);
27326 /* This function is like the previous one, except here we know how many bytes
27327 need to be copied. That allows us to update alignment not only of DST, which
27328 is returned, but also of SRC, which is passed as a pointer for that
27329 reason. */
27330 static rtx
27331 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
27332 rtx srcreg, rtx value, rtx vec_value,
27333 int desired_align, int align_bytes,
27334 bool issetmem)
27336 rtx src = NULL;
27337 rtx orig_dst = dst;
27338 rtx orig_src = NULL;
27339 int piece_size = 1;
27340 int copied_bytes = 0;
27342 if (!issetmem)
27344 gcc_assert (srcp != NULL);
27345 src = *srcp;
27346 orig_src = src;
27349 for (piece_size = 1;
27350 piece_size <= desired_align && copied_bytes < align_bytes;
27351 piece_size <<= 1)
27353 if (align_bytes & piece_size)
27355 if (issetmem)
27357 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
27358 dst = emit_memset (dst, destreg, vec_value, piece_size);
27359 else
27360 dst = emit_memset (dst, destreg, value, piece_size);
27362 else
27363 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
27364 copied_bytes += piece_size;
27367 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
27368 set_mem_align (dst, desired_align * BITS_PER_UNIT);
27369 if (MEM_SIZE_KNOWN_P (orig_dst))
27370 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
27372 if (!issetmem)
27374 int src_align_bytes = get_mem_align_offset (src, desired_align
27375 * BITS_PER_UNIT);
27376 if (src_align_bytes >= 0)
27377 src_align_bytes = desired_align - src_align_bytes;
27378 if (src_align_bytes >= 0)
27380 unsigned int src_align;
27381 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
27383 if ((src_align_bytes & (src_align - 1))
27384 == (align_bytes & (src_align - 1)))
27385 break;
27387 if (src_align > (unsigned int) desired_align)
27388 src_align = desired_align;
27389 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
27390 set_mem_align (src, src_align * BITS_PER_UNIT);
27392 if (MEM_SIZE_KNOWN_P (orig_src))
27393 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
27394 *srcp = src;
27397 return dst;
27400 /* Return true if ALG can be used in current context.
27401 Assume we expand memset if MEMSET is true. */
27402 static bool
27403 alg_usable_p (enum stringop_alg alg, bool memset, bool have_as)
27405 if (alg == no_stringop)
27406 return false;
27407 if (alg == vector_loop)
27408 return TARGET_SSE || TARGET_AVX;
27409 /* Algorithms using the rep prefix want at least edi and ecx;
27410 additionally, memset wants eax and memcpy wants esi. Don't
27411 consider such algorithms if the user has appropriated those
27412 registers for their own purposes, or if we have a non-default
27413 address space, since some string insns cannot override the segment. */
27414 if (alg == rep_prefix_1_byte
27415 || alg == rep_prefix_4_byte
27416 || alg == rep_prefix_8_byte)
27418 if (have_as)
27419 return false;
27420 if (fixed_regs[CX_REG]
27421 || fixed_regs[DI_REG]
27422 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]))
27423 return false;
27425 return true;
27428 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
27429 static enum stringop_alg
27430 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
27431 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
27432 bool memset, bool zero_memset, bool have_as,
27433 int *dynamic_check, bool *noalign, bool recur)
27435 const struct stringop_algs *algs;
27436 bool optimize_for_speed;
27437 int max = 0;
27438 const struct processor_costs *cost;
27439 int i;
27440 bool any_alg_usable_p = false;
27442 *noalign = false;
27443 *dynamic_check = -1;
27445 /* Even if the string operation call is cold, we still might spend a lot
27446 of time processing large blocks. */
27447 if (optimize_function_for_size_p (cfun)
27448 || (optimize_insn_for_size_p ()
27449 && (max_size < 256
27450 || (expected_size != -1 && expected_size < 256))))
27451 optimize_for_speed = false;
27452 else
27453 optimize_for_speed = true;
27455 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
27456 if (memset)
27457 algs = &cost->memset[TARGET_64BIT != 0];
27458 else
27459 algs = &cost->memcpy[TARGET_64BIT != 0];
27461 /* See maximal size for user defined algorithm. */
27462 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
27464 enum stringop_alg candidate = algs->size[i].alg;
27465 bool usable = alg_usable_p (candidate, memset, have_as);
27466 any_alg_usable_p |= usable;
27468 if (candidate != libcall && candidate && usable)
27469 max = algs->size[i].max;
27472 /* If expected size is not known but max size is small enough
27473 so inline version is a win, set expected size into
27474 the range. */
27475 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
27476 && expected_size == -1)
27477 expected_size = min_size / 2 + max_size / 2;
27479 /* If user specified the algorithm, honor it if possible. */
27480 if (ix86_stringop_alg != no_stringop
27481 && alg_usable_p (ix86_stringop_alg, memset, have_as))
27482 return ix86_stringop_alg;
27483 /* rep; movq or rep; movl is the smallest variant. */
27484 else if (!optimize_for_speed)
27486 *noalign = true;
27487 if (!count || (count & 3) || (memset && !zero_memset))
27488 return alg_usable_p (rep_prefix_1_byte, memset, have_as)
27489 ? rep_prefix_1_byte : loop_1_byte;
27490 else
27491 return alg_usable_p (rep_prefix_4_byte, memset, have_as)
27492 ? rep_prefix_4_byte : loop;
27494 /* Very tiny blocks are best handled via the loop, REP is expensive to
27495 setup. */
27496 else if (expected_size != -1 && expected_size < 4)
27497 return loop_1_byte;
27498 else if (expected_size != -1)
27500 enum stringop_alg alg = libcall;
27501 bool alg_noalign = false;
27502 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
27504 /* We get here if the algorithms that were not libcall-based
27505 were rep-prefix based and we are unable to use rep prefixes
27506 based on global register usage. Break out of the loop and
27507 use the heuristic below. */
27508 if (algs->size[i].max == 0)
27509 break;
27510 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
27512 enum stringop_alg candidate = algs->size[i].alg;
27514 if (candidate != libcall
27515 && alg_usable_p (candidate, memset, have_as))
27517 alg = candidate;
27518 alg_noalign = algs->size[i].noalign;
27520 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
27521 last non-libcall inline algorithm. */
27522 if (TARGET_INLINE_ALL_STRINGOPS)
27524 /* When the current size is best to be copied by a libcall,
27525 but we are still forced to inline, run the heuristic below
27526 that will pick code for medium sized blocks. */
27527 if (alg != libcall)
27529 *noalign = alg_noalign;
27530 return alg;
27532 else if (!any_alg_usable_p)
27533 break;
27535 else if (alg_usable_p (candidate, memset, have_as))
27537 *noalign = algs->size[i].noalign;
27538 return candidate;
27543 /* When asked to inline the call anyway, try to pick meaningful choice.
27544 We look for maximal size of block that is faster to copy by hand and
27545 take blocks of at most of that size guessing that average size will
27546 be roughly half of the block.
27548 If this turns out to be bad, we might simply specify the preferred
27549 choice in ix86_costs. */
27550 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
27551 && (algs->unknown_size == libcall
27552 || !alg_usable_p (algs->unknown_size, memset, have_as)))
27554 enum stringop_alg alg;
27555 HOST_WIDE_INT new_expected_size = (max > 0 ? max : 4096) / 2;
27557 /* If there aren't any usable algorithms or if recursing already,
27558 then recursing on smaller sizes or same size isn't going to
27559 find anything. Just return the simple byte-at-a-time copy loop. */
27560 if (!any_alg_usable_p || recur)
27562 /* Pick something reasonable. */
27563 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY && !recur)
27564 *dynamic_check = 128;
27565 return loop_1_byte;
27567 alg = decide_alg (count, new_expected_size, min_size, max_size, memset,
27568 zero_memset, have_as, dynamic_check, noalign, true);
27569 gcc_assert (*dynamic_check == -1);
27570 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
27571 *dynamic_check = max;
27572 else
27573 gcc_assert (alg != libcall);
27574 return alg;
27576 return (alg_usable_p (algs->unknown_size, memset, have_as)
27577 ? algs->unknown_size : libcall);
27580 /* Decide on alignment. We know that the operand is already aligned to ALIGN
27581 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
27582 static int
27583 decide_alignment (int align,
27584 enum stringop_alg alg,
27585 int expected_size,
27586 machine_mode move_mode)
27588 int desired_align = 0;
27590 gcc_assert (alg != no_stringop);
27592 if (alg == libcall)
27593 return 0;
27594 if (move_mode == VOIDmode)
27595 return 0;
27597 desired_align = GET_MODE_SIZE (move_mode);
27598 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
27599 copying whole cacheline at once. */
27600 if (TARGET_PENTIUMPRO
27601 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
27602 desired_align = 8;
27604 if (optimize_size)
27605 desired_align = 1;
27606 if (desired_align < align)
27607 desired_align = align;
27608 if (expected_size != -1 && expected_size < 4)
27609 desired_align = align;
27611 return desired_align;
27615 /* Helper function for memcpy. For QImode value 0xXY produce
27616 0xXYXYXYXY of wide specified by MODE. This is essentially
27617 a * 0x10101010, but we can do slightly better than
27618 synth_mult by unwinding the sequence by hand on CPUs with
27619 slow multiply. */
27620 static rtx
27621 promote_duplicated_reg (machine_mode mode, rtx val)
27623 machine_mode valmode = GET_MODE (val);
27624 rtx tmp;
27625 int nops = mode == DImode ? 3 : 2;
27627 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
27628 if (val == const0_rtx)
27629 return copy_to_mode_reg (mode, CONST0_RTX (mode));
27630 if (CONST_INT_P (val))
27632 HOST_WIDE_INT v = INTVAL (val) & 255;
27634 v |= v << 8;
27635 v |= v << 16;
27636 if (mode == DImode)
27637 v |= (v << 16) << 16;
27638 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
27641 if (valmode == VOIDmode)
27642 valmode = QImode;
27643 if (valmode != QImode)
27644 val = gen_lowpart (QImode, val);
27645 if (mode == QImode)
27646 return val;
27647 if (!TARGET_PARTIAL_REG_STALL)
27648 nops--;
27649 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
27650 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
27651 <= (ix86_cost->shift_const + ix86_cost->add) * nops
27652 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
27654 rtx reg = convert_modes (mode, QImode, val, true);
27655 tmp = promote_duplicated_reg (mode, const1_rtx);
27656 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
27657 OPTAB_DIRECT);
27659 else
27661 rtx reg = convert_modes (mode, QImode, val, true);
27663 if (!TARGET_PARTIAL_REG_STALL)
27664 if (mode == SImode)
27665 emit_insn (gen_insvsi_1 (reg, reg));
27666 else
27667 emit_insn (gen_insvdi_1 (reg, reg));
27668 else
27670 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
27671 NULL, 1, OPTAB_DIRECT);
27672 reg =
27673 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
27675 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
27676 NULL, 1, OPTAB_DIRECT);
27677 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
27678 if (mode == SImode)
27679 return reg;
27680 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
27681 NULL, 1, OPTAB_DIRECT);
27682 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
27683 return reg;
27687 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
27688 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
27689 alignment from ALIGN to DESIRED_ALIGN. */
27690 static rtx
27691 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
27692 int align)
27694 rtx promoted_val;
27696 if (TARGET_64BIT
27697 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
27698 promoted_val = promote_duplicated_reg (DImode, val);
27699 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
27700 promoted_val = promote_duplicated_reg (SImode, val);
27701 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
27702 promoted_val = promote_duplicated_reg (HImode, val);
27703 else
27704 promoted_val = val;
27706 return promoted_val;
27709 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
27710 operations when profitable. The code depends upon architecture, block size
27711 and alignment, but always has one of the following overall structures:
27713 Aligned move sequence:
27715 1) Prologue guard: Conditional that jumps up to epilogues for small
27716 blocks that can be handled by epilogue alone. This is faster
27717 but also needed for correctness, since prologue assume the block
27718 is larger than the desired alignment.
27720 Optional dynamic check for size and libcall for large
27721 blocks is emitted here too, with -minline-stringops-dynamically.
27723 2) Prologue: copy first few bytes in order to get destination
27724 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
27725 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
27726 copied. We emit either a jump tree on power of two sized
27727 blocks, or a byte loop.
27729 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
27730 with specified algorithm.
27732 4) Epilogue: code copying tail of the block that is too small to be
27733 handled by main body (or up to size guarded by prologue guard).
27735 Misaligned move sequence
27737 1) missaligned move prologue/epilogue containing:
27738 a) Prologue handling small memory blocks and jumping to done_label
27739 (skipped if blocks are known to be large enough)
27740 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
27741 needed by single possibly misaligned move
27742 (skipped if alignment is not needed)
27743 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
27745 2) Zero size guard dispatching to done_label, if needed
27747 3) dispatch to library call, if needed,
27749 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
27750 with specified algorithm. */
27751 bool
27752 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
27753 rtx align_exp, rtx expected_align_exp,
27754 rtx expected_size_exp, rtx min_size_exp,
27755 rtx max_size_exp, rtx probable_max_size_exp,
27756 bool issetmem)
27758 rtx destreg;
27759 rtx srcreg = NULL;
27760 rtx_code_label *label = NULL;
27761 rtx tmp;
27762 rtx_code_label *jump_around_label = NULL;
27763 HOST_WIDE_INT align = 1;
27764 unsigned HOST_WIDE_INT count = 0;
27765 HOST_WIDE_INT expected_size = -1;
27766 int size_needed = 0, epilogue_size_needed;
27767 int desired_align = 0, align_bytes = 0;
27768 enum stringop_alg alg;
27769 rtx promoted_val = NULL;
27770 rtx vec_promoted_val = NULL;
27771 bool force_loopy_epilogue = false;
27772 int dynamic_check;
27773 bool need_zero_guard = false;
27774 bool noalign;
27775 machine_mode move_mode = VOIDmode;
27776 machine_mode wider_mode;
27777 int unroll_factor = 1;
27778 /* TODO: Once value ranges are available, fill in proper data. */
27779 unsigned HOST_WIDE_INT min_size = 0;
27780 unsigned HOST_WIDE_INT max_size = -1;
27781 unsigned HOST_WIDE_INT probable_max_size = -1;
27782 bool misaligned_prologue_used = false;
27783 bool have_as;
27785 if (CONST_INT_P (align_exp))
27786 align = INTVAL (align_exp);
27787 /* i386 can do misaligned access on reasonably increased cost. */
27788 if (CONST_INT_P (expected_align_exp)
27789 && INTVAL (expected_align_exp) > align)
27790 align = INTVAL (expected_align_exp);
27791 /* ALIGN is the minimum of destination and source alignment, but we care here
27792 just about destination alignment. */
27793 else if (!issetmem
27794 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
27795 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
27797 if (CONST_INT_P (count_exp))
27799 min_size = max_size = probable_max_size = count = expected_size
27800 = INTVAL (count_exp);
27801 /* When COUNT is 0, there is nothing to do. */
27802 if (!count)
27803 return true;
27805 else
27807 if (min_size_exp)
27808 min_size = INTVAL (min_size_exp);
27809 if (max_size_exp)
27810 max_size = INTVAL (max_size_exp);
27811 if (probable_max_size_exp)
27812 probable_max_size = INTVAL (probable_max_size_exp);
27813 if (CONST_INT_P (expected_size_exp))
27814 expected_size = INTVAL (expected_size_exp);
27817 /* Make sure we don't need to care about overflow later on. */
27818 if (count > (HOST_WIDE_INT_1U << 30))
27819 return false;
27821 have_as = !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst));
27822 if (!issetmem)
27823 have_as |= !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src));
27825 /* Step 0: Decide on preferred algorithm, desired alignment and
27826 size of chunks to be copied by main loop. */
27827 alg = decide_alg (count, expected_size, min_size, probable_max_size,
27828 issetmem,
27829 issetmem && val_exp == const0_rtx, have_as,
27830 &dynamic_check, &noalign, false);
27831 if (alg == libcall)
27832 return false;
27833 gcc_assert (alg != no_stringop);
27835 /* For now vector-version of memset is generated only for memory zeroing, as
27836 creating of promoted vector value is very cheap in this case. */
27837 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
27838 alg = unrolled_loop;
27840 if (!count)
27841 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
27842 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
27843 if (!issetmem)
27844 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
27846 unroll_factor = 1;
27847 move_mode = word_mode;
27848 switch (alg)
27850 case libcall:
27851 case no_stringop:
27852 case last_alg:
27853 gcc_unreachable ();
27854 case loop_1_byte:
27855 need_zero_guard = true;
27856 move_mode = QImode;
27857 break;
27858 case loop:
27859 need_zero_guard = true;
27860 break;
27861 case unrolled_loop:
27862 need_zero_guard = true;
27863 unroll_factor = (TARGET_64BIT ? 4 : 2);
27864 break;
27865 case vector_loop:
27866 need_zero_guard = true;
27867 unroll_factor = 4;
27868 /* Find the widest supported mode. */
27869 move_mode = word_mode;
27870 while (GET_MODE_WIDER_MODE (move_mode).exists (&wider_mode)
27871 && optab_handler (mov_optab, wider_mode) != CODE_FOR_nothing)
27872 move_mode = wider_mode;
27874 if (TARGET_AVX128_OPTIMAL && GET_MODE_BITSIZE (move_mode) > 128)
27875 move_mode = TImode;
27877 /* Find the corresponding vector mode with the same size as MOVE_MODE.
27878 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
27879 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
27881 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
27882 if (!mode_for_vector (word_mode, nunits).exists (&move_mode)
27883 || optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
27884 move_mode = word_mode;
27886 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
27887 break;
27888 case rep_prefix_8_byte:
27889 move_mode = DImode;
27890 break;
27891 case rep_prefix_4_byte:
27892 move_mode = SImode;
27893 break;
27894 case rep_prefix_1_byte:
27895 move_mode = QImode;
27896 break;
27898 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
27899 epilogue_size_needed = size_needed;
27901 /* If we are going to call any library calls conditionally, make sure any
27902 pending stack adjustment happen before the first conditional branch,
27903 otherwise they will be emitted before the library call only and won't
27904 happen from the other branches. */
27905 if (dynamic_check != -1)
27906 do_pending_stack_adjust ();
27908 desired_align = decide_alignment (align, alg, expected_size, move_mode);
27909 if (!TARGET_ALIGN_STRINGOPS || noalign)
27910 align = desired_align;
27912 /* Step 1: Prologue guard. */
27914 /* Alignment code needs count to be in register. */
27915 if (CONST_INT_P (count_exp) && desired_align > align)
27917 if (INTVAL (count_exp) > desired_align
27918 && INTVAL (count_exp) > size_needed)
27920 align_bytes
27921 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
27922 if (align_bytes <= 0)
27923 align_bytes = 0;
27924 else
27925 align_bytes = desired_align - align_bytes;
27927 if (align_bytes == 0)
27928 count_exp = force_reg (counter_mode (count_exp), count_exp);
27930 gcc_assert (desired_align >= 1 && align >= 1);
27932 /* Misaligned move sequences handle both prologue and epilogue at once.
27933 Default code generation results in a smaller code for large alignments
27934 and also avoids redundant job when sizes are known precisely. */
27935 misaligned_prologue_used
27936 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
27937 && MAX (desired_align, epilogue_size_needed) <= 32
27938 && desired_align <= epilogue_size_needed
27939 && ((desired_align > align && !align_bytes)
27940 || (!count && epilogue_size_needed > 1)));
27942 /* Do the cheap promotion to allow better CSE across the
27943 main loop and epilogue (ie one load of the big constant in the
27944 front of all code.
27945 For now the misaligned move sequences do not have fast path
27946 without broadcasting. */
27947 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
27949 if (alg == vector_loop)
27951 gcc_assert (val_exp == const0_rtx);
27952 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
27953 promoted_val = promote_duplicated_reg_to_size (val_exp,
27954 GET_MODE_SIZE (word_mode),
27955 desired_align, align);
27957 else
27959 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
27960 desired_align, align);
27963 /* Misaligned move sequences handles both prologues and epilogues at once.
27964 Default code generation results in smaller code for large alignments and
27965 also avoids redundant job when sizes are known precisely. */
27966 if (misaligned_prologue_used)
27968 /* Misaligned move prologue handled small blocks by itself. */
27969 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
27970 (dst, src, &destreg, &srcreg,
27971 move_mode, promoted_val, vec_promoted_val,
27972 &count_exp,
27973 &jump_around_label,
27974 desired_align < align
27975 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
27976 desired_align, align, &min_size, dynamic_check, issetmem);
27977 if (!issetmem)
27978 src = change_address (src, BLKmode, srcreg);
27979 dst = change_address (dst, BLKmode, destreg);
27980 set_mem_align (dst, desired_align * BITS_PER_UNIT);
27981 epilogue_size_needed = 0;
27982 if (need_zero_guard
27983 && min_size < (unsigned HOST_WIDE_INT) size_needed)
27985 /* It is possible that we copied enough so the main loop will not
27986 execute. */
27987 gcc_assert (size_needed > 1);
27988 if (jump_around_label == NULL_RTX)
27989 jump_around_label = gen_label_rtx ();
27990 emit_cmp_and_jump_insns (count_exp,
27991 GEN_INT (size_needed),
27992 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
27993 if (expected_size == -1
27994 || expected_size < (desired_align - align) / 2 + size_needed)
27995 predict_jump (REG_BR_PROB_BASE * 20 / 100);
27996 else
27997 predict_jump (REG_BR_PROB_BASE * 60 / 100);
28000 /* Ensure that alignment prologue won't copy past end of block. */
28001 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
28003 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
28004 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
28005 Make sure it is power of 2. */
28006 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
28008 /* To improve performance of small blocks, we jump around the VAL
28009 promoting mode. This mean that if the promoted VAL is not constant,
28010 we might not use it in the epilogue and have to use byte
28011 loop variant. */
28012 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
28013 force_loopy_epilogue = true;
28014 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
28015 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
28017 /* If main algorithm works on QImode, no epilogue is needed.
28018 For small sizes just don't align anything. */
28019 if (size_needed == 1)
28020 desired_align = align;
28021 else
28022 goto epilogue;
28024 else if (!count
28025 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
28027 label = gen_label_rtx ();
28028 emit_cmp_and_jump_insns (count_exp,
28029 GEN_INT (epilogue_size_needed),
28030 LTU, 0, counter_mode (count_exp), 1, label);
28031 if (expected_size == -1 || expected_size < epilogue_size_needed)
28032 predict_jump (REG_BR_PROB_BASE * 60 / 100);
28033 else
28034 predict_jump (REG_BR_PROB_BASE * 20 / 100);
28038 /* Emit code to decide on runtime whether library call or inline should be
28039 used. */
28040 if (dynamic_check != -1)
28042 if (!issetmem && CONST_INT_P (count_exp))
28044 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
28046 emit_block_copy_via_libcall (dst, src, count_exp);
28047 count_exp = const0_rtx;
28048 goto epilogue;
28051 else
28053 rtx_code_label *hot_label = gen_label_rtx ();
28054 if (jump_around_label == NULL_RTX)
28055 jump_around_label = gen_label_rtx ();
28056 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
28057 LEU, 0, counter_mode (count_exp),
28058 1, hot_label);
28059 predict_jump (REG_BR_PROB_BASE * 90 / 100);
28060 if (issetmem)
28061 set_storage_via_libcall (dst, count_exp, val_exp);
28062 else
28063 emit_block_copy_via_libcall (dst, src, count_exp);
28064 emit_jump (jump_around_label);
28065 emit_label (hot_label);
28069 /* Step 2: Alignment prologue. */
28070 /* Do the expensive promotion once we branched off the small blocks. */
28071 if (issetmem && !promoted_val)
28072 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
28073 desired_align, align);
28075 if (desired_align > align && !misaligned_prologue_used)
28077 if (align_bytes == 0)
28079 /* Except for the first move in prologue, we no longer know
28080 constant offset in aliasing info. It don't seems to worth
28081 the pain to maintain it for the first move, so throw away
28082 the info early. */
28083 dst = change_address (dst, BLKmode, destreg);
28084 if (!issetmem)
28085 src = change_address (src, BLKmode, srcreg);
28086 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
28087 promoted_val, vec_promoted_val,
28088 count_exp, align, desired_align,
28089 issetmem);
28090 /* At most desired_align - align bytes are copied. */
28091 if (min_size < (unsigned)(desired_align - align))
28092 min_size = 0;
28093 else
28094 min_size -= desired_align - align;
28096 else
28098 /* If we know how many bytes need to be stored before dst is
28099 sufficiently aligned, maintain aliasing info accurately. */
28100 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
28101 srcreg,
28102 promoted_val,
28103 vec_promoted_val,
28104 desired_align,
28105 align_bytes,
28106 issetmem);
28108 count_exp = plus_constant (counter_mode (count_exp),
28109 count_exp, -align_bytes);
28110 count -= align_bytes;
28111 min_size -= align_bytes;
28112 max_size -= align_bytes;
28114 if (need_zero_guard
28115 && min_size < (unsigned HOST_WIDE_INT) size_needed
28116 && (count < (unsigned HOST_WIDE_INT) size_needed
28117 || (align_bytes == 0
28118 && count < ((unsigned HOST_WIDE_INT) size_needed
28119 + desired_align - align))))
28121 /* It is possible that we copied enough so the main loop will not
28122 execute. */
28123 gcc_assert (size_needed > 1);
28124 if (label == NULL_RTX)
28125 label = gen_label_rtx ();
28126 emit_cmp_and_jump_insns (count_exp,
28127 GEN_INT (size_needed),
28128 LTU, 0, counter_mode (count_exp), 1, label);
28129 if (expected_size == -1
28130 || expected_size < (desired_align - align) / 2 + size_needed)
28131 predict_jump (REG_BR_PROB_BASE * 20 / 100);
28132 else
28133 predict_jump (REG_BR_PROB_BASE * 60 / 100);
28136 if (label && size_needed == 1)
28138 emit_label (label);
28139 LABEL_NUSES (label) = 1;
28140 label = NULL;
28141 epilogue_size_needed = 1;
28142 if (issetmem)
28143 promoted_val = val_exp;
28145 else if (label == NULL_RTX && !misaligned_prologue_used)
28146 epilogue_size_needed = size_needed;
28148 /* Step 3: Main loop. */
28150 switch (alg)
28152 case libcall:
28153 case no_stringop:
28154 case last_alg:
28155 gcc_unreachable ();
28156 case loop_1_byte:
28157 case loop:
28158 case unrolled_loop:
28159 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
28160 count_exp, move_mode, unroll_factor,
28161 expected_size, issetmem);
28162 break;
28163 case vector_loop:
28164 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
28165 vec_promoted_val, count_exp, move_mode,
28166 unroll_factor, expected_size, issetmem);
28167 break;
28168 case rep_prefix_8_byte:
28169 case rep_prefix_4_byte:
28170 case rep_prefix_1_byte:
28171 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
28172 val_exp, count_exp, move_mode, issetmem);
28173 break;
28175 /* Adjust properly the offset of src and dest memory for aliasing. */
28176 if (CONST_INT_P (count_exp))
28178 if (!issetmem)
28179 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
28180 (count / size_needed) * size_needed);
28181 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
28182 (count / size_needed) * size_needed);
28184 else
28186 if (!issetmem)
28187 src = change_address (src, BLKmode, srcreg);
28188 dst = change_address (dst, BLKmode, destreg);
28191 /* Step 4: Epilogue to copy the remaining bytes. */
28192 epilogue:
28193 if (label)
28195 /* When the main loop is done, COUNT_EXP might hold original count,
28196 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
28197 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
28198 bytes. Compensate if needed. */
28200 if (size_needed < epilogue_size_needed)
28202 tmp =
28203 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
28204 GEN_INT (size_needed - 1), count_exp, 1,
28205 OPTAB_DIRECT);
28206 if (tmp != count_exp)
28207 emit_move_insn (count_exp, tmp);
28209 emit_label (label);
28210 LABEL_NUSES (label) = 1;
28213 if (count_exp != const0_rtx && epilogue_size_needed > 1)
28215 if (force_loopy_epilogue)
28216 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
28217 epilogue_size_needed);
28218 else
28220 if (issetmem)
28221 expand_setmem_epilogue (dst, destreg, promoted_val,
28222 vec_promoted_val, count_exp,
28223 epilogue_size_needed);
28224 else
28225 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
28226 epilogue_size_needed);
28229 if (jump_around_label)
28230 emit_label (jump_around_label);
28231 return true;
28235 /* Expand the appropriate insns for doing strlen if not just doing
28236 repnz; scasb
28238 out = result, initialized with the start address
28239 align_rtx = alignment of the address.
28240 scratch = scratch register, initialized with the startaddress when
28241 not aligned, otherwise undefined
28243 This is just the body. It needs the initializations mentioned above and
28244 some address computing at the end. These things are done in i386.md. */
28246 static void
28247 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
28249 int align;
28250 rtx tmp;
28251 rtx_code_label *align_2_label = NULL;
28252 rtx_code_label *align_3_label = NULL;
28253 rtx_code_label *align_4_label = gen_label_rtx ();
28254 rtx_code_label *end_0_label = gen_label_rtx ();
28255 rtx mem;
28256 rtx tmpreg = gen_reg_rtx (SImode);
28257 rtx scratch = gen_reg_rtx (SImode);
28258 rtx cmp;
28260 align = 0;
28261 if (CONST_INT_P (align_rtx))
28262 align = INTVAL (align_rtx);
28264 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
28266 /* Is there a known alignment and is it less than 4? */
28267 if (align < 4)
28269 rtx scratch1 = gen_reg_rtx (Pmode);
28270 emit_move_insn (scratch1, out);
28271 /* Is there a known alignment and is it not 2? */
28272 if (align != 2)
28274 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
28275 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
28277 /* Leave just the 3 lower bits. */
28278 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
28279 NULL_RTX, 0, OPTAB_WIDEN);
28281 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
28282 Pmode, 1, align_4_label);
28283 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
28284 Pmode, 1, align_2_label);
28285 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
28286 Pmode, 1, align_3_label);
28288 else
28290 /* Since the alignment is 2, we have to check 2 or 0 bytes;
28291 check if is aligned to 4 - byte. */
28293 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
28294 NULL_RTX, 0, OPTAB_WIDEN);
28296 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
28297 Pmode, 1, align_4_label);
28300 mem = change_address (src, QImode, out);
28302 /* Now compare the bytes. */
28304 /* Compare the first n unaligned byte on a byte per byte basis. */
28305 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
28306 QImode, 1, end_0_label);
28308 /* Increment the address. */
28309 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
28311 /* Not needed with an alignment of 2 */
28312 if (align != 2)
28314 emit_label (align_2_label);
28316 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
28317 end_0_label);
28319 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
28321 emit_label (align_3_label);
28324 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
28325 end_0_label);
28327 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
28330 /* Generate loop to check 4 bytes at a time. It is not a good idea to
28331 align this loop. It gives only huge programs, but does not help to
28332 speed up. */
28333 emit_label (align_4_label);
28335 mem = change_address (src, SImode, out);
28336 emit_move_insn (scratch, mem);
28337 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
28339 /* This formula yields a nonzero result iff one of the bytes is zero.
28340 This saves three branches inside loop and many cycles. */
28342 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
28343 emit_insn (gen_one_cmplsi2 (scratch, scratch));
28344 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
28345 emit_insn (gen_andsi3 (tmpreg, tmpreg,
28346 gen_int_mode (0x80808080, SImode)));
28347 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
28348 align_4_label);
28350 if (TARGET_CMOVE)
28352 rtx reg = gen_reg_rtx (SImode);
28353 rtx reg2 = gen_reg_rtx (Pmode);
28354 emit_move_insn (reg, tmpreg);
28355 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
28357 /* If zero is not in the first two bytes, move two bytes forward. */
28358 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
28359 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
28360 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
28361 emit_insn (gen_rtx_SET (tmpreg,
28362 gen_rtx_IF_THEN_ELSE (SImode, tmp,
28363 reg,
28364 tmpreg)));
28365 /* Emit lea manually to avoid clobbering of flags. */
28366 emit_insn (gen_rtx_SET (reg2, gen_rtx_PLUS (Pmode, out, const2_rtx)));
28368 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
28369 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
28370 emit_insn (gen_rtx_SET (out,
28371 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
28372 reg2,
28373 out)));
28375 else
28377 rtx_code_label *end_2_label = gen_label_rtx ();
28378 /* Is zero in the first two bytes? */
28380 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
28381 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
28382 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
28383 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
28384 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
28385 pc_rtx);
28386 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
28387 JUMP_LABEL (tmp) = end_2_label;
28389 /* Not in the first two. Move two bytes forward. */
28390 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
28391 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
28393 emit_label (end_2_label);
28397 /* Avoid branch in fixing the byte. */
28398 tmpreg = gen_lowpart (QImode, tmpreg);
28399 emit_insn (gen_addqi3_cconly_overflow (tmpreg, tmpreg));
28400 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
28401 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
28402 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
28404 emit_label (end_0_label);
28407 /* Expand strlen. */
28409 bool
28410 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
28412 rtx addr, scratch1, scratch2, scratch3, scratch4;
28414 /* The generic case of strlen expander is long. Avoid it's
28415 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
28417 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
28418 && !TARGET_INLINE_ALL_STRINGOPS
28419 && !optimize_insn_for_size_p ()
28420 && (!CONST_INT_P (align) || INTVAL (align) < 4))
28421 return false;
28423 addr = force_reg (Pmode, XEXP (src, 0));
28424 scratch1 = gen_reg_rtx (Pmode);
28426 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
28427 && !optimize_insn_for_size_p ())
28429 /* Well it seems that some optimizer does not combine a call like
28430 foo(strlen(bar), strlen(bar));
28431 when the move and the subtraction is done here. It does calculate
28432 the length just once when these instructions are done inside of
28433 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
28434 often used and I use one fewer register for the lifetime of
28435 output_strlen_unroll() this is better. */
28437 emit_move_insn (out, addr);
28439 ix86_expand_strlensi_unroll_1 (out, src, align);
28441 /* strlensi_unroll_1 returns the address of the zero at the end of
28442 the string, like memchr(), so compute the length by subtracting
28443 the start address. */
28444 emit_insn (ix86_gen_sub3 (out, out, addr));
28446 else
28448 rtx unspec;
28450 /* Can't use this if the user has appropriated eax, ecx, or edi. */
28451 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
28452 return false;
28453 /* Can't use this for non-default address spaces. */
28454 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src)))
28455 return false;
28457 scratch2 = gen_reg_rtx (Pmode);
28458 scratch3 = gen_reg_rtx (Pmode);
28459 scratch4 = force_reg (Pmode, constm1_rtx);
28461 emit_move_insn (scratch3, addr);
28462 eoschar = force_reg (QImode, eoschar);
28464 src = replace_equiv_address_nv (src, scratch3);
28466 /* If .md starts supporting :P, this can be done in .md. */
28467 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
28468 scratch4), UNSPEC_SCAS);
28469 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
28470 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
28471 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
28473 return true;
28476 /* For given symbol (function) construct code to compute address of it's PLT
28477 entry in large x86-64 PIC model. */
28478 static rtx
28479 construct_plt_address (rtx symbol)
28481 rtx tmp, unspec;
28483 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
28484 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
28485 gcc_assert (Pmode == DImode);
28487 tmp = gen_reg_rtx (Pmode);
28488 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
28490 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
28491 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
28492 return tmp;
28496 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
28497 rtx callarg2,
28498 rtx pop, bool sibcall)
28500 rtx vec[3];
28501 rtx use = NULL, call;
28502 unsigned int vec_len = 0;
28503 tree fndecl;
28505 if (GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
28507 fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0));
28508 if (fndecl
28509 && (lookup_attribute ("interrupt",
28510 TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))))
28511 error ("interrupt service routine can't be called directly");
28513 else
28514 fndecl = NULL_TREE;
28516 if (pop == const0_rtx)
28517 pop = NULL;
28518 gcc_assert (!TARGET_64BIT || !pop);
28520 if (TARGET_MACHO && !TARGET_64BIT)
28522 #if TARGET_MACHO
28523 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
28524 fnaddr = machopic_indirect_call_target (fnaddr);
28525 #endif
28527 else
28529 /* Static functions and indirect calls don't need the pic register. Also,
28530 check if PLT was explicitly avoided via no-plt or "noplt" attribute, making
28531 it an indirect call. */
28532 rtx addr = XEXP (fnaddr, 0);
28533 if (flag_pic
28534 && GET_CODE (addr) == SYMBOL_REF
28535 && !SYMBOL_REF_LOCAL_P (addr))
28537 if (flag_plt
28538 && (SYMBOL_REF_DECL (addr) == NULL_TREE
28539 || !lookup_attribute ("noplt",
28540 DECL_ATTRIBUTES (SYMBOL_REF_DECL (addr)))))
28542 if (!TARGET_64BIT
28543 || (ix86_cmodel == CM_LARGE_PIC
28544 && DEFAULT_ABI != MS_ABI))
28546 use_reg (&use, gen_rtx_REG (Pmode,
28547 REAL_PIC_OFFSET_TABLE_REGNUM));
28548 if (ix86_use_pseudo_pic_reg ())
28549 emit_move_insn (gen_rtx_REG (Pmode,
28550 REAL_PIC_OFFSET_TABLE_REGNUM),
28551 pic_offset_table_rtx);
28554 else if (!TARGET_PECOFF && !TARGET_MACHO)
28556 if (TARGET_64BIT)
28558 fnaddr = gen_rtx_UNSPEC (Pmode,
28559 gen_rtvec (1, addr),
28560 UNSPEC_GOTPCREL);
28561 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
28563 else
28565 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
28566 UNSPEC_GOT);
28567 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
28568 fnaddr = gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
28569 fnaddr);
28571 fnaddr = gen_const_mem (Pmode, fnaddr);
28572 /* Pmode may not be the same as word_mode for x32, which
28573 doesn't support indirect branch via 32-bit memory slot.
28574 Since x32 GOT slot is 64 bit with zero upper 32 bits,
28575 indirect branch via x32 GOT slot is OK. */
28576 if (GET_MODE (fnaddr) != word_mode)
28577 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
28578 fnaddr = gen_rtx_MEM (QImode, fnaddr);
28583 /* Skip setting up RAX register for -mskip-rax-setup when there are no
28584 parameters passed in vector registers. */
28585 if (TARGET_64BIT
28586 && (INTVAL (callarg2) > 0
28587 || (INTVAL (callarg2) == 0
28588 && (TARGET_SSE || !flag_skip_rax_setup))))
28590 rtx al = gen_rtx_REG (QImode, AX_REG);
28591 emit_move_insn (al, callarg2);
28592 use_reg (&use, al);
28595 if (ix86_cmodel == CM_LARGE_PIC
28596 && !TARGET_PECOFF
28597 && MEM_P (fnaddr)
28598 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
28599 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
28600 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
28601 /* Since x32 GOT slot is 64 bit with zero upper 32 bits, indirect
28602 branch via x32 GOT slot is OK. */
28603 else if (!(TARGET_X32
28604 && MEM_P (fnaddr)
28605 && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND
28606 && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode))
28607 && (sibcall
28608 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
28609 : !call_insn_operand (XEXP (fnaddr, 0), word_mode)))
28611 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
28612 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
28615 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
28617 if (retval)
28619 /* We should add bounds as destination register in case
28620 pointer with bounds may be returned. */
28621 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
28623 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
28624 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
28625 if (GET_CODE (retval) == PARALLEL)
28627 b0 = gen_rtx_EXPR_LIST (VOIDmode, b0, const0_rtx);
28628 b1 = gen_rtx_EXPR_LIST (VOIDmode, b1, const0_rtx);
28629 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, b0, b1));
28630 retval = chkp_join_splitted_slot (retval, par);
28632 else
28634 retval = gen_rtx_PARALLEL (VOIDmode,
28635 gen_rtvec (3, retval, b0, b1));
28636 chkp_put_regs_to_expr_list (retval);
28640 call = gen_rtx_SET (retval, call);
28642 vec[vec_len++] = call;
28644 if (pop)
28646 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
28647 pop = gen_rtx_SET (stack_pointer_rtx, pop);
28648 vec[vec_len++] = pop;
28651 if (cfun->machine->no_caller_saved_registers
28652 && (!fndecl
28653 || (!TREE_THIS_VOLATILE (fndecl)
28654 && !lookup_attribute ("no_caller_saved_registers",
28655 TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))))
28657 static const char ix86_call_used_regs[] = CALL_USED_REGISTERS;
28658 bool is_64bit_ms_abi = (TARGET_64BIT
28659 && ix86_function_abi (fndecl) == MS_ABI);
28660 char c_mask = CALL_USED_REGISTERS_MASK (is_64bit_ms_abi);
28662 /* If there are no caller-saved registers, add all registers
28663 that are clobbered by the call which returns. */
28664 for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++)
28665 if (!fixed_regs[i]
28666 && (ix86_call_used_regs[i] == 1
28667 || (ix86_call_used_regs[i] & c_mask))
28668 && !STACK_REGNO_P (i)
28669 && !MMX_REGNO_P (i))
28670 clobber_reg (&use,
28671 gen_rtx_REG (GET_MODE (regno_reg_rtx[i]), i));
28673 else if (TARGET_64BIT_MS_ABI
28674 && (!callarg2 || INTVAL (callarg2) != -2))
28676 unsigned i;
28678 for (i = 0; i < NUM_X86_64_MS_CLOBBERED_REGS; i++)
28680 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
28681 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
28683 clobber_reg (&use, gen_rtx_REG (mode, regno));
28686 /* Set here, but it may get cleared later. */
28687 if (TARGET_CALL_MS2SYSV_XLOGUES)
28689 if (!TARGET_SSE)
28692 /* Don't break hot-patched functions. */
28693 else if (ix86_function_ms_hook_prologue (current_function_decl))
28696 /* TODO: Cases not yet examined. */
28697 else if (flag_split_stack)
28698 warn_once_call_ms2sysv_xlogues ("-fsplit-stack");
28700 else
28702 gcc_assert (!reload_completed);
28703 cfun->machine->call_ms2sysv = true;
28708 if (vec_len > 1)
28709 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
28710 call = emit_call_insn (call);
28711 if (use)
28712 CALL_INSN_FUNCTION_USAGE (call) = use;
28714 return call;
28717 /* Return true if the function being called was marked with attribute
28718 "noplt" or using -fno-plt and we are compiling for non-PIC. We need
28719 to handle the non-PIC case in the backend because there is no easy
28720 interface for the front-end to force non-PLT calls to use the GOT.
28721 This is currently used only with 64-bit or 32-bit GOT32X ELF targets
28722 to call the function marked "noplt" indirectly. */
28724 static bool
28725 ix86_nopic_noplt_attribute_p (rtx call_op)
28727 if (flag_pic || ix86_cmodel == CM_LARGE
28728 || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X)
28729 || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
28730 || SYMBOL_REF_LOCAL_P (call_op))
28731 return false;
28733 tree symbol_decl = SYMBOL_REF_DECL (call_op);
28735 if (!flag_plt
28736 || (symbol_decl != NULL_TREE
28737 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
28738 return true;
28740 return false;
28743 /* Output indirect branch via a call and return thunk. CALL_OP is a
28744 register which contains the branch target. XASM is the assembly
28745 template for CALL_OP. Branch is a tail call if SIBCALL_P is true.
28746 A normal call is converted to:
28748 call __x86_indirect_thunk_reg
28750 and a tail call is converted to:
28752 jmp __x86_indirect_thunk_reg
28755 static void
28756 ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p)
28758 char thunk_name_buf[32];
28759 char *thunk_name;
28760 enum indirect_thunk_prefix need_prefix
28761 = indirect_thunk_need_prefix (current_output_insn);
28762 int regno = REGNO (call_op);
28764 if (cfun->machine->indirect_branch_type
28765 != indirect_branch_thunk_inline)
28767 if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
28769 int i = regno;
28770 if (i >= FIRST_REX_INT_REG)
28771 i -= (FIRST_REX_INT_REG - LAST_INT_REG - 1);
28772 if (need_prefix == indirect_thunk_prefix_bnd)
28773 indirect_thunks_bnd_used |= 1 << i;
28774 else
28775 indirect_thunks_used |= 1 << i;
28777 indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
28778 thunk_name = thunk_name_buf;
28780 else
28781 thunk_name = NULL;
28783 if (sibcall_p)
28785 if (thunk_name != NULL)
28787 if (need_prefix == indirect_thunk_prefix_bnd)
28788 fprintf (asm_out_file, "\tbnd jmp\t%s\n", thunk_name);
28789 else
28790 fprintf (asm_out_file, "\tjmp\t%s\n", thunk_name);
28792 else
28793 output_indirect_thunk (need_prefix, regno);
28795 else
28797 if (thunk_name != NULL)
28799 if (need_prefix == indirect_thunk_prefix_bnd)
28800 fprintf (asm_out_file, "\tbnd call\t%s\n", thunk_name);
28801 else
28802 fprintf (asm_out_file, "\tcall\t%s\n", thunk_name);
28803 return;
28806 char indirectlabel1[32];
28807 char indirectlabel2[32];
28809 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
28810 INDIRECT_LABEL,
28811 indirectlabelno++);
28812 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
28813 INDIRECT_LABEL,
28814 indirectlabelno++);
28816 /* Jump. */
28817 if (need_prefix == indirect_thunk_prefix_bnd)
28818 fputs ("\tbnd jmp\t", asm_out_file);
28819 else
28820 fputs ("\tjmp\t", asm_out_file);
28821 assemble_name_raw (asm_out_file, indirectlabel2);
28822 fputc ('\n', asm_out_file);
28824 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
28826 if (thunk_name != NULL)
28828 if (need_prefix == indirect_thunk_prefix_bnd)
28829 fprintf (asm_out_file, "\tbnd jmp\t%s\n", thunk_name);
28830 else
28831 fprintf (asm_out_file, "\tjmp\t%s\n", thunk_name);
28833 else
28834 output_indirect_thunk (need_prefix, regno);
28836 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
28838 /* Call. */
28839 if (need_prefix == indirect_thunk_prefix_bnd)
28840 fputs ("\tbnd call\t", asm_out_file);
28841 else
28842 fputs ("\tcall\t", asm_out_file);
28843 assemble_name_raw (asm_out_file, indirectlabel1);
28844 fputc ('\n', asm_out_file);
28848 /* Output indirect branch via a call and return thunk. CALL_OP is
28849 the branch target. XASM is the assembly template for CALL_OP.
28850 Branch is a tail call if SIBCALL_P is true. A normal call is
28851 converted to:
28853 jmp L2
28855 push CALL_OP
28856 jmp __x86_indirect_thunk
28858 call L1
28860 and a tail call is converted to:
28862 push CALL_OP
28863 jmp __x86_indirect_thunk
28866 static void
28867 ix86_output_indirect_branch_via_push (rtx call_op, const char *xasm,
28868 bool sibcall_p)
28870 char thunk_name_buf[32];
28871 char *thunk_name;
28872 char push_buf[64];
28873 enum indirect_thunk_prefix need_prefix
28874 = indirect_thunk_need_prefix (current_output_insn);
28875 int regno = -1;
28877 if (cfun->machine->indirect_branch_type
28878 != indirect_branch_thunk_inline)
28880 if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
28882 if (need_prefix == indirect_thunk_prefix_bnd)
28883 indirect_thunk_bnd_needed = true;
28884 else
28885 indirect_thunk_needed = true;
28887 indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
28888 thunk_name = thunk_name_buf;
28890 else
28891 thunk_name = NULL;
28893 snprintf (push_buf, sizeof (push_buf), "push{%c}\t%s",
28894 TARGET_64BIT ? 'q' : 'l', xasm);
28896 if (sibcall_p)
28898 output_asm_insn (push_buf, &call_op);
28899 if (thunk_name != NULL)
28901 if (need_prefix == indirect_thunk_prefix_bnd)
28902 fprintf (asm_out_file, "\tbnd jmp\t%s\n", thunk_name);
28903 else
28904 fprintf (asm_out_file, "\tjmp\t%s\n", thunk_name);
28906 else
28907 output_indirect_thunk (need_prefix, regno);
28909 else
28911 char indirectlabel1[32];
28912 char indirectlabel2[32];
28914 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
28915 INDIRECT_LABEL,
28916 indirectlabelno++);
28917 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
28918 INDIRECT_LABEL,
28919 indirectlabelno++);
28921 /* Jump. */
28922 if (need_prefix == indirect_thunk_prefix_bnd)
28923 fputs ("\tbnd jmp\t", asm_out_file);
28924 else
28925 fputs ("\tjmp\t", asm_out_file);
28926 assemble_name_raw (asm_out_file, indirectlabel2);
28927 fputc ('\n', asm_out_file);
28929 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
28931 /* An external function may be called via GOT, instead of PLT. */
28932 if (MEM_P (call_op))
28934 struct ix86_address parts;
28935 rtx addr = XEXP (call_op, 0);
28936 if (ix86_decompose_address (addr, &parts)
28937 && parts.base == stack_pointer_rtx)
28939 /* Since call will adjust stack by -UNITS_PER_WORD,
28940 we must convert "disp(stack, index, scale)" to
28941 "disp+UNITS_PER_WORD(stack, index, scale)". */
28942 if (parts.index)
28944 addr = gen_rtx_MULT (Pmode, parts.index,
28945 GEN_INT (parts.scale));
28946 addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
28947 addr);
28949 else
28950 addr = stack_pointer_rtx;
28952 rtx disp;
28953 if (parts.disp != NULL_RTX)
28954 disp = plus_constant (Pmode, parts.disp,
28955 UNITS_PER_WORD);
28956 else
28957 disp = GEN_INT (UNITS_PER_WORD);
28959 addr = gen_rtx_PLUS (Pmode, addr, disp);
28960 call_op = gen_rtx_MEM (GET_MODE (call_op), addr);
28964 output_asm_insn (push_buf, &call_op);
28966 if (thunk_name != NULL)
28968 if (need_prefix == indirect_thunk_prefix_bnd)
28969 fprintf (asm_out_file, "\tbnd jmp\t%s\n", thunk_name);
28970 else
28971 fprintf (asm_out_file, "\tjmp\t%s\n", thunk_name);
28973 else
28974 output_indirect_thunk (need_prefix, regno);
28976 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
28978 /* Call. */
28979 if (need_prefix == indirect_thunk_prefix_bnd)
28980 fputs ("\tbnd call\t", asm_out_file);
28981 else
28982 fputs ("\tcall\t", asm_out_file);
28983 assemble_name_raw (asm_out_file, indirectlabel1);
28984 fputc ('\n', asm_out_file);
28988 /* Output indirect branch via a call and return thunk. CALL_OP is
28989 the branch target. XASM is the assembly template for CALL_OP.
28990 Branch is a tail call if SIBCALL_P is true. */
28992 static void
28993 ix86_output_indirect_branch (rtx call_op, const char *xasm,
28994 bool sibcall_p)
28996 if (REG_P (call_op))
28997 ix86_output_indirect_branch_via_reg (call_op, sibcall_p);
28998 else
28999 ix86_output_indirect_branch_via_push (call_op, xasm, sibcall_p);
29002 /* Output indirect jump. CALL_OP is the jump target. Jump is a
29003 function return if RET_P is true. */
29005 const char *
29006 ix86_output_indirect_jmp (rtx call_op)
29008 if (cfun->machine->indirect_branch_type != indirect_branch_keep)
29010 /* We can't have red-zone since "call" in the indirect thunk
29011 pushes the return address onto stack, destroying red-zone. */
29012 if (ix86_red_zone_size != 0)
29013 gcc_unreachable ();
29015 ix86_output_indirect_branch (call_op, "%0", true);
29016 return "";
29018 else
29019 return "%!jmp\t%A0";
29022 /* Output function return. CALL_OP is the jump target. Add a REP
29023 prefix to RET if LONG_P is true and function return is kept. */
29025 const char *
29026 ix86_output_function_return (bool long_p)
29028 if (cfun->machine->function_return_type != indirect_branch_keep)
29030 char thunk_name[32];
29031 enum indirect_thunk_prefix need_prefix
29032 = indirect_thunk_need_prefix (current_output_insn);
29034 if (cfun->machine->function_return_type
29035 != indirect_branch_thunk_inline)
29037 bool need_thunk = (cfun->machine->function_return_type
29038 == indirect_branch_thunk);
29039 indirect_thunk_name (thunk_name, INVALID_REGNUM, need_prefix,
29040 true);
29041 if (need_prefix == indirect_thunk_prefix_bnd)
29043 indirect_thunk_bnd_needed |= need_thunk;
29044 fprintf (asm_out_file, "\tbnd jmp\t%s\n", thunk_name);
29046 else
29048 indirect_thunk_needed |= need_thunk;
29049 fprintf (asm_out_file, "\tjmp\t%s\n", thunk_name);
29052 else
29053 output_indirect_thunk (need_prefix, INVALID_REGNUM);
29055 return "";
29058 if (!long_p || ix86_bnd_prefixed_insn_p (current_output_insn))
29059 return "%!ret";
29061 return "rep%; ret";
29064 /* Output indirect function return. RET_OP is the function return
29065 target. */
29067 const char *
29068 ix86_output_indirect_function_return (rtx ret_op)
29070 if (cfun->machine->function_return_type != indirect_branch_keep)
29072 char thunk_name[32];
29073 enum indirect_thunk_prefix need_prefix
29074 = indirect_thunk_need_prefix (current_output_insn);
29075 unsigned int regno = REGNO (ret_op);
29076 gcc_assert (regno == CX_REG);
29078 if (cfun->machine->function_return_type
29079 != indirect_branch_thunk_inline)
29081 bool need_thunk = (cfun->machine->function_return_type
29082 == indirect_branch_thunk);
29083 indirect_thunk_name (thunk_name, regno, need_prefix, true);
29084 if (need_prefix == indirect_thunk_prefix_bnd)
29086 if (need_thunk)
29088 indirect_return_via_cx_bnd = true;
29089 indirect_thunks_bnd_used |= 1 << CX_REG;
29091 fprintf (asm_out_file, "\tbnd jmp\t%s\n", thunk_name);
29093 else
29095 if (need_thunk)
29097 indirect_return_via_cx = true;
29098 indirect_thunks_used |= 1 << CX_REG;
29100 fprintf (asm_out_file, "\tjmp\t%s\n", thunk_name);
29103 else
29104 output_indirect_thunk (need_prefix, regno);
29106 return "";
29108 else
29109 return "%!jmp\t%A0";
29112 /* Split simple return with popping POPC bytes from stack to indirect
29113 branch with stack adjustment . */
29115 void
29116 ix86_split_simple_return_pop_internal (rtx popc)
29118 struct machine_function *m = cfun->machine;
29119 rtx ecx = gen_rtx_REG (SImode, CX_REG);
29120 rtx_insn *insn;
29122 /* There is no "pascal" calling convention in any 64bit ABI. */
29123 gcc_assert (!TARGET_64BIT);
29125 insn = emit_insn (gen_pop (ecx));
29126 m->fs.cfa_offset -= UNITS_PER_WORD;
29127 m->fs.sp_offset -= UNITS_PER_WORD;
29129 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
29130 x = gen_rtx_SET (stack_pointer_rtx, x);
29131 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
29132 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
29133 RTX_FRAME_RELATED_P (insn) = 1;
29135 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, popc);
29136 x = gen_rtx_SET (stack_pointer_rtx, x);
29137 insn = emit_insn (x);
29138 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
29139 RTX_FRAME_RELATED_P (insn) = 1;
29141 /* Now return address is in ECX. */
29142 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
29145 /* Output the assembly for a call instruction. */
29147 const char *
29148 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
29150 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
29151 bool output_indirect_p
29152 = (!TARGET_SEH
29153 && cfun->machine->indirect_branch_type != indirect_branch_keep);
29154 bool seh_nop_p = false;
29155 const char *xasm;
29157 if (SIBLING_CALL_P (insn))
29159 if (direct_p)
29161 if (ix86_nopic_noplt_attribute_p (call_op))
29163 direct_p = false;
29164 if (TARGET_64BIT)
29166 if (output_indirect_p)
29167 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
29168 else
29169 xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
29171 else
29173 if (output_indirect_p)
29174 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
29175 else
29176 xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
29179 else
29180 xasm = "%!jmp\t%P0";
29182 /* SEH epilogue detection requires the indirect branch case
29183 to include REX.W. */
29184 else if (TARGET_SEH)
29185 xasm = "%!rex.W jmp\t%A0";
29186 else
29188 if (output_indirect_p)
29189 xasm = "%0";
29190 else
29191 xasm = "%!jmp\t%A0";
29194 if (output_indirect_p && !direct_p)
29195 ix86_output_indirect_branch (call_op, xasm, true);
29196 else
29197 output_asm_insn (xasm, &call_op);
29198 return "";
29201 /* SEH unwinding can require an extra nop to be emitted in several
29202 circumstances. Determine if we have one of those. */
29203 if (TARGET_SEH)
29205 rtx_insn *i;
29207 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
29209 /* If we get to another real insn, we don't need the nop. */
29210 if (INSN_P (i))
29211 break;
29213 /* If we get to the epilogue note, prevent a catch region from
29214 being adjacent to the standard epilogue sequence. If non-
29215 call-exceptions, we'll have done this during epilogue emission. */
29216 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
29217 && !flag_non_call_exceptions
29218 && !can_throw_internal (insn))
29220 seh_nop_p = true;
29221 break;
29225 /* If we didn't find a real insn following the call, prevent the
29226 unwinder from looking into the next function. */
29227 if (i == NULL)
29228 seh_nop_p = true;
29231 if (direct_p)
29233 if (ix86_nopic_noplt_attribute_p (call_op))
29235 direct_p = false;
29236 if (TARGET_64BIT)
29238 if (output_indirect_p)
29239 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
29240 else
29241 xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
29243 else
29245 if (output_indirect_p)
29246 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
29247 else
29248 xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
29251 else
29252 xasm = "%!call\t%P0";
29254 else
29256 if (output_indirect_p)
29257 xasm = "%0";
29258 else
29259 xasm = "%!call\t%A0";
29262 if (output_indirect_p && !direct_p)
29263 ix86_output_indirect_branch (call_op, xasm, false);
29264 else
29265 output_asm_insn (xasm, &call_op);
29267 if (seh_nop_p)
29268 return "nop";
29270 return "";
29273 /* Clear stack slot assignments remembered from previous functions.
29274 This is called from INIT_EXPANDERS once before RTL is emitted for each
29275 function. */
29277 static struct machine_function *
29278 ix86_init_machine_status (void)
29280 struct machine_function *f;
29282 f = ggc_cleared_alloc<machine_function> ();
29283 f->call_abi = ix86_abi;
29285 return f;
29288 /* Return a MEM corresponding to a stack slot with mode MODE.
29289 Allocate a new slot if necessary.
29291 The RTL for a function can have several slots available: N is
29292 which slot to use. */
29295 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
29297 struct stack_local_entry *s;
29299 gcc_assert (n < MAX_386_STACK_LOCALS);
29301 for (s = ix86_stack_locals; s; s = s->next)
29302 if (s->mode == mode && s->n == n)
29303 return validize_mem (copy_rtx (s->rtl));
29305 s = ggc_alloc<stack_local_entry> ();
29306 s->n = n;
29307 s->mode = mode;
29308 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
29310 s->next = ix86_stack_locals;
29311 ix86_stack_locals = s;
29312 return validize_mem (copy_rtx (s->rtl));
29315 static void
29316 ix86_instantiate_decls (void)
29318 struct stack_local_entry *s;
29320 for (s = ix86_stack_locals; s; s = s->next)
29321 if (s->rtl != NULL_RTX)
29322 instantiate_decl_rtl (s->rtl);
29325 /* Return the number used for encoding REG, in the range 0..7. */
29327 static int
29328 reg_encoded_number (rtx reg)
29330 unsigned regno = REGNO (reg);
29331 switch (regno)
29333 case AX_REG:
29334 return 0;
29335 case CX_REG:
29336 return 1;
29337 case DX_REG:
29338 return 2;
29339 case BX_REG:
29340 return 3;
29341 case SP_REG:
29342 return 4;
29343 case BP_REG:
29344 return 5;
29345 case SI_REG:
29346 return 6;
29347 case DI_REG:
29348 return 7;
29349 default:
29350 break;
29352 if (IN_RANGE (regno, FIRST_STACK_REG, LAST_STACK_REG))
29353 return regno - FIRST_STACK_REG;
29354 if (IN_RANGE (regno, FIRST_SSE_REG, LAST_SSE_REG))
29355 return regno - FIRST_SSE_REG;
29356 if (IN_RANGE (regno, FIRST_MMX_REG, LAST_MMX_REG))
29357 return regno - FIRST_MMX_REG;
29358 if (IN_RANGE (regno, FIRST_REX_SSE_REG, LAST_REX_SSE_REG))
29359 return regno - FIRST_REX_SSE_REG;
29360 if (IN_RANGE (regno, FIRST_REX_INT_REG, LAST_REX_INT_REG))
29361 return regno - FIRST_REX_INT_REG;
29362 if (IN_RANGE (regno, FIRST_MASK_REG, LAST_MASK_REG))
29363 return regno - FIRST_MASK_REG;
29364 if (IN_RANGE (regno, FIRST_BND_REG, LAST_BND_REG))
29365 return regno - FIRST_BND_REG;
29366 return -1;
29369 /* Given an insn INSN with NOPERANDS OPERANDS, return the modr/m byte used
29370 in its encoding if it could be relevant for ROP mitigation, otherwise
29371 return -1. If POPNO0 and POPNO1 are nonnull, store the operand numbers
29372 used for calculating it into them. */
29374 static int
29375 ix86_get_modrm_for_rop (rtx_insn *insn, rtx *operands, int noperands,
29376 int *popno0 = 0, int *popno1 = 0)
29378 if (asm_noperands (PATTERN (insn)) >= 0)
29379 return -1;
29380 int has_modrm = get_attr_modrm (insn);
29381 if (!has_modrm)
29382 return -1;
29383 enum attr_modrm_class cls = get_attr_modrm_class (insn);
29384 rtx op0, op1;
29385 switch (cls)
29387 case MODRM_CLASS_OP02:
29388 gcc_assert (noperands >= 3);
29389 if (popno0)
29391 *popno0 = 0;
29392 *popno1 = 2;
29394 op0 = operands[0];
29395 op1 = operands[2];
29396 break;
29397 case MODRM_CLASS_OP01:
29398 gcc_assert (noperands >= 2);
29399 if (popno0)
29401 *popno0 = 0;
29402 *popno1 = 1;
29404 op0 = operands[0];
29405 op1 = operands[1];
29406 break;
29407 default:
29408 return -1;
29410 if (REG_P (op0) && REG_P (op1))
29412 int enc0 = reg_encoded_number (op0);
29413 int enc1 = reg_encoded_number (op1);
29414 return 0xc0 + (enc1 << 3) + enc0;
29416 return -1;
29419 /* Check whether x86 address PARTS is a pc-relative address. */
29421 bool
29422 ix86_rip_relative_addr_p (struct ix86_address *parts)
29424 rtx base, index, disp;
29426 base = parts->base;
29427 index = parts->index;
29428 disp = parts->disp;
29430 if (disp && !base && !index)
29432 if (TARGET_64BIT)
29434 rtx symbol = disp;
29436 if (GET_CODE (disp) == CONST)
29437 symbol = XEXP (disp, 0);
29438 if (GET_CODE (symbol) == PLUS
29439 && CONST_INT_P (XEXP (symbol, 1)))
29440 symbol = XEXP (symbol, 0);
29442 if (GET_CODE (symbol) == LABEL_REF
29443 || (GET_CODE (symbol) == SYMBOL_REF
29444 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
29445 || (GET_CODE (symbol) == UNSPEC
29446 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
29447 || XINT (symbol, 1) == UNSPEC_PCREL
29448 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
29449 return true;
29452 return false;
29455 /* Calculate the length of the memory address in the instruction encoding.
29456 Includes addr32 prefix, does not include the one-byte modrm, opcode,
29457 or other prefixes. We never generate addr32 prefix for LEA insn. */
29460 memory_address_length (rtx addr, bool lea)
29462 struct ix86_address parts;
29463 rtx base, index, disp;
29464 int len;
29465 int ok;
29467 if (GET_CODE (addr) == PRE_DEC
29468 || GET_CODE (addr) == POST_INC
29469 || GET_CODE (addr) == PRE_MODIFY
29470 || GET_CODE (addr) == POST_MODIFY)
29471 return 0;
29473 ok = ix86_decompose_address (addr, &parts);
29474 gcc_assert (ok);
29476 len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1;
29478 /* If this is not LEA instruction, add the length of addr32 prefix. */
29479 if (TARGET_64BIT && !lea
29480 && (SImode_address_operand (addr, VOIDmode)
29481 || (parts.base && GET_MODE (parts.base) == SImode)
29482 || (parts.index && GET_MODE (parts.index) == SImode)))
29483 len++;
29485 base = parts.base;
29486 index = parts.index;
29487 disp = parts.disp;
29489 if (base && SUBREG_P (base))
29490 base = SUBREG_REG (base);
29491 if (index && SUBREG_P (index))
29492 index = SUBREG_REG (index);
29494 gcc_assert (base == NULL_RTX || REG_P (base));
29495 gcc_assert (index == NULL_RTX || REG_P (index));
29497 /* Rule of thumb:
29498 - esp as the base always wants an index,
29499 - ebp as the base always wants a displacement,
29500 - r12 as the base always wants an index,
29501 - r13 as the base always wants a displacement. */
29503 /* Register Indirect. */
29504 if (base && !index && !disp)
29506 /* esp (for its index) and ebp (for its displacement) need
29507 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
29508 code. */
29509 if (base == arg_pointer_rtx
29510 || base == frame_pointer_rtx
29511 || REGNO (base) == SP_REG
29512 || REGNO (base) == BP_REG
29513 || REGNO (base) == R12_REG
29514 || REGNO (base) == R13_REG)
29515 len++;
29518 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
29519 is not disp32, but disp32(%rip), so for disp32
29520 SIB byte is needed, unless print_operand_address
29521 optimizes it into disp32(%rip) or (%rip) is implied
29522 by UNSPEC. */
29523 else if (disp && !base && !index)
29525 len += 4;
29526 if (!ix86_rip_relative_addr_p (&parts))
29527 len++;
29529 else
29531 /* Find the length of the displacement constant. */
29532 if (disp)
29534 if (base && satisfies_constraint_K (disp))
29535 len += 1;
29536 else
29537 len += 4;
29539 /* ebp always wants a displacement. Similarly r13. */
29540 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
29541 len++;
29543 /* An index requires the two-byte modrm form.... */
29544 if (index
29545 /* ...like esp (or r12), which always wants an index. */
29546 || base == arg_pointer_rtx
29547 || base == frame_pointer_rtx
29548 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
29549 len++;
29552 return len;
29555 /* Compute default value for "length_immediate" attribute. When SHORTFORM
29556 is set, expect that insn have 8bit immediate alternative. */
29558 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
29560 int len = 0;
29561 int i;
29562 extract_insn_cached (insn);
29563 for (i = recog_data.n_operands - 1; i >= 0; --i)
29564 if (CONSTANT_P (recog_data.operand[i]))
29566 enum attr_mode mode = get_attr_mode (insn);
29568 gcc_assert (!len);
29569 if (shortform && CONST_INT_P (recog_data.operand[i]))
29571 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
29572 switch (mode)
29574 case MODE_QI:
29575 len = 1;
29576 continue;
29577 case MODE_HI:
29578 ival = trunc_int_for_mode (ival, HImode);
29579 break;
29580 case MODE_SI:
29581 ival = trunc_int_for_mode (ival, SImode);
29582 break;
29583 default:
29584 break;
29586 if (IN_RANGE (ival, -128, 127))
29588 len = 1;
29589 continue;
29592 switch (mode)
29594 case MODE_QI:
29595 len = 1;
29596 break;
29597 case MODE_HI:
29598 len = 2;
29599 break;
29600 case MODE_SI:
29601 len = 4;
29602 break;
29603 /* Immediates for DImode instructions are encoded
29604 as 32bit sign extended values. */
29605 case MODE_DI:
29606 len = 4;
29607 break;
29608 default:
29609 fatal_insn ("unknown insn mode", insn);
29612 return len;
29615 /* Compute default value for "length_address" attribute. */
29617 ix86_attr_length_address_default (rtx_insn *insn)
29619 int i;
29621 if (get_attr_type (insn) == TYPE_LEA)
29623 rtx set = PATTERN (insn), addr;
29625 if (GET_CODE (set) == PARALLEL)
29626 set = XVECEXP (set, 0, 0);
29628 gcc_assert (GET_CODE (set) == SET);
29630 addr = SET_SRC (set);
29632 return memory_address_length (addr, true);
29635 extract_insn_cached (insn);
29636 for (i = recog_data.n_operands - 1; i >= 0; --i)
29638 rtx op = recog_data.operand[i];
29639 if (MEM_P (op))
29641 constrain_operands_cached (insn, reload_completed);
29642 if (which_alternative != -1)
29644 const char *constraints = recog_data.constraints[i];
29645 int alt = which_alternative;
29647 while (*constraints == '=' || *constraints == '+')
29648 constraints++;
29649 while (alt-- > 0)
29650 while (*constraints++ != ',')
29652 /* Skip ignored operands. */
29653 if (*constraints == 'X')
29654 continue;
29657 int len = memory_address_length (XEXP (op, 0), false);
29659 /* Account for segment prefix for non-default addr spaces. */
29660 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op)))
29661 len++;
29663 return len;
29666 return 0;
29669 /* Compute default value for "length_vex" attribute. It includes
29670 2 or 3 byte VEX prefix and 1 opcode byte. */
29673 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
29674 bool has_vex_w)
29676 int i;
29678 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
29679 byte VEX prefix. */
29680 if (!has_0f_opcode || has_vex_w)
29681 return 3 + 1;
29683 /* We can always use 2 byte VEX prefix in 32bit. */
29684 if (!TARGET_64BIT)
29685 return 2 + 1;
29687 extract_insn_cached (insn);
29689 for (i = recog_data.n_operands - 1; i >= 0; --i)
29690 if (REG_P (recog_data.operand[i]))
29692 /* REX.W bit uses 3 byte VEX prefix. */
29693 if (GET_MODE (recog_data.operand[i]) == DImode
29694 && GENERAL_REG_P (recog_data.operand[i]))
29695 return 3 + 1;
29697 else
29699 /* REX.X or REX.B bits use 3 byte VEX prefix. */
29700 if (MEM_P (recog_data.operand[i])
29701 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
29702 return 3 + 1;
29705 return 2 + 1;
29709 static bool
29710 ix86_class_likely_spilled_p (reg_class_t);
29712 /* Returns true if lhs of insn is HW function argument register and set up
29713 is_spilled to true if it is likely spilled HW register. */
29714 static bool
29715 insn_is_function_arg (rtx insn, bool* is_spilled)
29717 rtx dst;
29719 if (!NONDEBUG_INSN_P (insn))
29720 return false;
29721 /* Call instructions are not movable, ignore it. */
29722 if (CALL_P (insn))
29723 return false;
29724 insn = PATTERN (insn);
29725 if (GET_CODE (insn) == PARALLEL)
29726 insn = XVECEXP (insn, 0, 0);
29727 if (GET_CODE (insn) != SET)
29728 return false;
29729 dst = SET_DEST (insn);
29730 if (REG_P (dst) && HARD_REGISTER_P (dst)
29731 && ix86_function_arg_regno_p (REGNO (dst)))
29733 /* Is it likely spilled HW register? */
29734 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
29735 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
29736 *is_spilled = true;
29737 return true;
29739 return false;
29742 /* Add output dependencies for chain of function adjacent arguments if only
29743 there is a move to likely spilled HW register. Return first argument
29744 if at least one dependence was added or NULL otherwise. */
29745 static rtx_insn *
29746 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
29748 rtx_insn *insn;
29749 rtx_insn *last = call;
29750 rtx_insn *first_arg = NULL;
29751 bool is_spilled = false;
29753 head = PREV_INSN (head);
29755 /* Find nearest to call argument passing instruction. */
29756 while (true)
29758 last = PREV_INSN (last);
29759 if (last == head)
29760 return NULL;
29761 if (!NONDEBUG_INSN_P (last))
29762 continue;
29763 if (insn_is_function_arg (last, &is_spilled))
29764 break;
29765 return NULL;
29768 first_arg = last;
29769 while (true)
29771 insn = PREV_INSN (last);
29772 if (!INSN_P (insn))
29773 break;
29774 if (insn == head)
29775 break;
29776 if (!NONDEBUG_INSN_P (insn))
29778 last = insn;
29779 continue;
29781 if (insn_is_function_arg (insn, &is_spilled))
29783 /* Add output depdendence between two function arguments if chain
29784 of output arguments contains likely spilled HW registers. */
29785 if (is_spilled)
29786 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
29787 first_arg = last = insn;
29789 else
29790 break;
29792 if (!is_spilled)
29793 return NULL;
29794 return first_arg;
29797 /* Add output or anti dependency from insn to first_arg to restrict its code
29798 motion. */
29799 static void
29800 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
29802 rtx set;
29803 rtx tmp;
29805 /* Add anti dependencies for bounds stores. */
29806 if (INSN_P (insn)
29807 && GET_CODE (PATTERN (insn)) == PARALLEL
29808 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
29809 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_BNDSTX)
29811 add_dependence (first_arg, insn, REG_DEP_ANTI);
29812 return;
29815 set = single_set (insn);
29816 if (!set)
29817 return;
29818 tmp = SET_DEST (set);
29819 if (REG_P (tmp))
29821 /* Add output dependency to the first function argument. */
29822 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
29823 return;
29825 /* Add anti dependency. */
29826 add_dependence (first_arg, insn, REG_DEP_ANTI);
29829 /* Avoid cross block motion of function argument through adding dependency
29830 from the first non-jump instruction in bb. */
29831 static void
29832 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
29834 rtx_insn *insn = BB_END (bb);
29836 while (insn)
29838 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
29840 rtx set = single_set (insn);
29841 if (set)
29843 avoid_func_arg_motion (arg, insn);
29844 return;
29847 if (insn == BB_HEAD (bb))
29848 return;
29849 insn = PREV_INSN (insn);
29853 /* Hook for pre-reload schedule - avoid motion of function arguments
29854 passed in likely spilled HW registers. */
29855 static void
29856 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
29858 rtx_insn *insn;
29859 rtx_insn *first_arg = NULL;
29860 if (reload_completed)
29861 return;
29862 while (head != tail && DEBUG_INSN_P (head))
29863 head = NEXT_INSN (head);
29864 for (insn = tail; insn != head; insn = PREV_INSN (insn))
29865 if (INSN_P (insn) && CALL_P (insn))
29867 first_arg = add_parameter_dependencies (insn, head);
29868 if (first_arg)
29870 /* Add dependee for first argument to predecessors if only
29871 region contains more than one block. */
29872 basic_block bb = BLOCK_FOR_INSN (insn);
29873 int rgn = CONTAINING_RGN (bb->index);
29874 int nr_blks = RGN_NR_BLOCKS (rgn);
29875 /* Skip trivial regions and region head blocks that can have
29876 predecessors outside of region. */
29877 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
29879 edge e;
29880 edge_iterator ei;
29882 /* Regions are SCCs with the exception of selective
29883 scheduling with pipelining of outer blocks enabled.
29884 So also check that immediate predecessors of a non-head
29885 block are in the same region. */
29886 FOR_EACH_EDGE (e, ei, bb->preds)
29888 /* Avoid creating of loop-carried dependencies through
29889 using topological ordering in the region. */
29890 if (rgn == CONTAINING_RGN (e->src->index)
29891 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
29892 add_dependee_for_func_arg (first_arg, e->src);
29895 insn = first_arg;
29896 if (insn == head)
29897 break;
29900 else if (first_arg)
29901 avoid_func_arg_motion (first_arg, insn);
29904 /* Hook for pre-reload schedule - set priority of moves from likely spilled
29905 HW registers to maximum, to schedule them at soon as possible. These are
29906 moves from function argument registers at the top of the function entry
29907 and moves from function return value registers after call. */
29908 static int
29909 ix86_adjust_priority (rtx_insn *insn, int priority)
29911 rtx set;
29913 if (reload_completed)
29914 return priority;
29916 if (!NONDEBUG_INSN_P (insn))
29917 return priority;
29919 set = single_set (insn);
29920 if (set)
29922 rtx tmp = SET_SRC (set);
29923 if (REG_P (tmp)
29924 && HARD_REGISTER_P (tmp)
29925 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
29926 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
29927 return current_sched_info->sched_max_insns_priority;
29930 return priority;
29933 /* Prepare for scheduling pass. */
29934 static void
29935 ix86_sched_init_global (FILE *, int, int)
29937 /* Install scheduling hooks for current CPU. Some of these hooks are used
29938 in time-critical parts of the scheduler, so we only set them up when
29939 they are actually used. */
29940 switch (ix86_tune)
29942 case PROCESSOR_CORE2:
29943 case PROCESSOR_NEHALEM:
29944 case PROCESSOR_SANDYBRIDGE:
29945 case PROCESSOR_HASWELL:
29946 case PROCESSOR_GENERIC:
29947 /* Do not perform multipass scheduling for pre-reload schedule
29948 to save compile time. */
29949 if (reload_completed)
29951 ix86_core2i7_init_hooks ();
29952 break;
29954 /* Fall through. */
29955 default:
29956 targetm.sched.dfa_post_advance_cycle = NULL;
29957 targetm.sched.first_cycle_multipass_init = NULL;
29958 targetm.sched.first_cycle_multipass_begin = NULL;
29959 targetm.sched.first_cycle_multipass_issue = NULL;
29960 targetm.sched.first_cycle_multipass_backtrack = NULL;
29961 targetm.sched.first_cycle_multipass_end = NULL;
29962 targetm.sched.first_cycle_multipass_fini = NULL;
29963 break;
29968 /* Implement TARGET_STATIC_RTX_ALIGNMENT. */
29970 static HOST_WIDE_INT
29971 ix86_static_rtx_alignment (machine_mode mode)
29973 if (mode == DFmode)
29974 return 64;
29975 if (ALIGN_MODE_128 (mode))
29976 return MAX (128, GET_MODE_ALIGNMENT (mode));
29977 return GET_MODE_ALIGNMENT (mode);
29980 /* Implement TARGET_CONSTANT_ALIGNMENT. */
29982 static HOST_WIDE_INT
29983 ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align)
29985 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
29986 || TREE_CODE (exp) == INTEGER_CST)
29988 machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
29989 HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode);
29990 return MAX (mode_align, align);
29992 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
29993 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
29994 return BITS_PER_WORD;
29996 return align;
29999 /* Implement TARGET_EMPTY_RECORD_P. */
30001 static bool
30002 ix86_is_empty_record (const_tree type)
30004 if (!TARGET_64BIT)
30005 return false;
30006 return default_is_empty_record (type);
30009 /* Implement TARGET_WARN_PARAMETER_PASSING_ABI. */
30011 static void
30012 ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type)
30014 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
30016 if (!cum->warn_empty)
30017 return;
30019 if (!TYPE_EMPTY_P (type))
30020 return;
30022 const_tree ctx = get_ultimate_context (cum->decl);
30023 if (ctx != NULL_TREE
30024 && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx))
30025 return;
30027 /* If the actual size of the type is zero, then there is no change
30028 in how objects of this size are passed. */
30029 if (int_size_in_bytes (type) == 0)
30030 return;
30032 warning (OPT_Wabi, "empty class %qT parameter passing ABI "
30033 "changes in -fabi-version=12 (GCC 8)", type);
30035 /* Only warn once. */
30036 cum->warn_empty = false;
30039 /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
30040 the data type, and ALIGN is the alignment that the object would
30041 ordinarily have. */
30043 static int
30044 iamcu_alignment (tree type, int align)
30046 machine_mode mode;
30048 if (align < 32 || TYPE_USER_ALIGN (type))
30049 return align;
30051 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
30052 bytes. */
30053 mode = TYPE_MODE (strip_array_types (type));
30054 switch (GET_MODE_CLASS (mode))
30056 case MODE_INT:
30057 case MODE_COMPLEX_INT:
30058 case MODE_COMPLEX_FLOAT:
30059 case MODE_FLOAT:
30060 case MODE_DECIMAL_FLOAT:
30061 return 32;
30062 default:
30063 return align;
30067 /* Compute the alignment for a static variable.
30068 TYPE is the data type, and ALIGN is the alignment that
30069 the object would ordinarily have. The value of this function is used
30070 instead of that alignment to align the object. */
30073 ix86_data_alignment (tree type, int align, bool opt)
30075 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
30076 for symbols from other compilation units or symbols that don't need
30077 to bind locally. In order to preserve some ABI compatibility with
30078 those compilers, ensure we don't decrease alignment from what we
30079 used to assume. */
30081 int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
30083 /* A data structure, equal or greater than the size of a cache line
30084 (64 bytes in the Pentium 4 and other recent Intel processors, including
30085 processors based on Intel Core microarchitecture) should be aligned
30086 so that its base address is a multiple of a cache line size. */
30088 int max_align
30089 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
30091 if (max_align < BITS_PER_WORD)
30092 max_align = BITS_PER_WORD;
30094 switch (ix86_align_data_type)
30096 case ix86_align_data_type_abi: opt = false; break;
30097 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
30098 case ix86_align_data_type_cacheline: break;
30101 if (TARGET_IAMCU)
30102 align = iamcu_alignment (type, align);
30104 if (opt
30105 && AGGREGATE_TYPE_P (type)
30106 && TYPE_SIZE (type)
30107 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
30109 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align_compat)
30110 && align < max_align_compat)
30111 align = max_align_compat;
30112 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align)
30113 && align < max_align)
30114 align = max_align;
30117 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
30118 to 16byte boundary. */
30119 if (TARGET_64BIT)
30121 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
30122 && TYPE_SIZE (type)
30123 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
30124 && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
30125 && align < 128)
30126 return 128;
30129 if (!opt)
30130 return align;
30132 if (TREE_CODE (type) == ARRAY_TYPE)
30134 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
30135 return 64;
30136 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
30137 return 128;
30139 else if (TREE_CODE (type) == COMPLEX_TYPE)
30142 if (TYPE_MODE (type) == DCmode && align < 64)
30143 return 64;
30144 if ((TYPE_MODE (type) == XCmode
30145 || TYPE_MODE (type) == TCmode) && align < 128)
30146 return 128;
30148 else if ((TREE_CODE (type) == RECORD_TYPE
30149 || TREE_CODE (type) == UNION_TYPE
30150 || TREE_CODE (type) == QUAL_UNION_TYPE)
30151 && TYPE_FIELDS (type))
30153 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
30154 return 64;
30155 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
30156 return 128;
30158 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
30159 || TREE_CODE (type) == INTEGER_TYPE)
30161 if (TYPE_MODE (type) == DFmode && align < 64)
30162 return 64;
30163 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
30164 return 128;
30167 return align;
30170 /* Compute the alignment for a local variable or a stack slot. EXP is
30171 the data type or decl itself, MODE is the widest mode available and
30172 ALIGN is the alignment that the object would ordinarily have. The
30173 value of this macro is used instead of that alignment to align the
30174 object. */
30176 unsigned int
30177 ix86_local_alignment (tree exp, machine_mode mode,
30178 unsigned int align)
30180 tree type, decl;
30182 if (exp && DECL_P (exp))
30184 type = TREE_TYPE (exp);
30185 decl = exp;
30187 else
30189 type = exp;
30190 decl = NULL;
30193 /* Don't do dynamic stack realignment for long long objects with
30194 -mpreferred-stack-boundary=2. */
30195 if (!TARGET_64BIT
30196 && align == 64
30197 && ix86_preferred_stack_boundary < 64
30198 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
30199 && (!type || !TYPE_USER_ALIGN (type))
30200 && (!decl || !DECL_USER_ALIGN (decl)))
30201 align = 32;
30203 /* If TYPE is NULL, we are allocating a stack slot for caller-save
30204 register in MODE. We will return the largest alignment of XF
30205 and DF. */
30206 if (!type)
30208 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
30209 align = GET_MODE_ALIGNMENT (DFmode);
30210 return align;
30213 /* Don't increase alignment for Intel MCU psABI. */
30214 if (TARGET_IAMCU)
30215 return align;
30217 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
30218 to 16byte boundary. Exact wording is:
30220 An array uses the same alignment as its elements, except that a local or
30221 global array variable of length at least 16 bytes or
30222 a C99 variable-length array variable always has alignment of at least 16 bytes.
30224 This was added to allow use of aligned SSE instructions at arrays. This
30225 rule is meant for static storage (where compiler can not do the analysis
30226 by itself). We follow it for automatic variables only when convenient.
30227 We fully control everything in the function compiled and functions from
30228 other unit can not rely on the alignment.
30230 Exclude va_list type. It is the common case of local array where
30231 we can not benefit from the alignment.
30233 TODO: Probably one should optimize for size only when var is not escaping. */
30234 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
30235 && TARGET_SSE)
30237 if (AGGREGATE_TYPE_P (type)
30238 && (va_list_type_node == NULL_TREE
30239 || (TYPE_MAIN_VARIANT (type)
30240 != TYPE_MAIN_VARIANT (va_list_type_node)))
30241 && TYPE_SIZE (type)
30242 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
30243 && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
30244 && align < 128)
30245 return 128;
30247 if (TREE_CODE (type) == ARRAY_TYPE)
30249 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
30250 return 64;
30251 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
30252 return 128;
30254 else if (TREE_CODE (type) == COMPLEX_TYPE)
30256 if (TYPE_MODE (type) == DCmode && align < 64)
30257 return 64;
30258 if ((TYPE_MODE (type) == XCmode
30259 || TYPE_MODE (type) == TCmode) && align < 128)
30260 return 128;
30262 else if ((TREE_CODE (type) == RECORD_TYPE
30263 || TREE_CODE (type) == UNION_TYPE
30264 || TREE_CODE (type) == QUAL_UNION_TYPE)
30265 && TYPE_FIELDS (type))
30267 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
30268 return 64;
30269 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
30270 return 128;
30272 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
30273 || TREE_CODE (type) == INTEGER_TYPE)
30276 if (TYPE_MODE (type) == DFmode && align < 64)
30277 return 64;
30278 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
30279 return 128;
30281 return align;
30284 /* Compute the minimum required alignment for dynamic stack realignment
30285 purposes for a local variable, parameter or a stack slot. EXP is
30286 the data type or decl itself, MODE is its mode and ALIGN is the
30287 alignment that the object would ordinarily have. */
30289 unsigned int
30290 ix86_minimum_alignment (tree exp, machine_mode mode,
30291 unsigned int align)
30293 tree type, decl;
30295 if (exp && DECL_P (exp))
30297 type = TREE_TYPE (exp);
30298 decl = exp;
30300 else
30302 type = exp;
30303 decl = NULL;
30306 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
30307 return align;
30309 /* Don't do dynamic stack realignment for long long objects with
30310 -mpreferred-stack-boundary=2. */
30311 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
30312 && (!type || !TYPE_USER_ALIGN (type))
30313 && (!decl || !DECL_USER_ALIGN (decl)))
30315 gcc_checking_assert (!TARGET_STV);
30316 return 32;
30319 return align;
30322 /* Find a location for the static chain incoming to a nested function.
30323 This is a register, unless all free registers are used by arguments. */
30325 static rtx
30326 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
30328 unsigned regno;
30330 if (TARGET_64BIT)
30332 /* We always use R10 in 64-bit mode. */
30333 regno = R10_REG;
30335 else
30337 const_tree fntype, fndecl;
30338 unsigned int ccvt;
30340 /* By default in 32-bit mode we use ECX to pass the static chain. */
30341 regno = CX_REG;
30343 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
30345 fntype = TREE_TYPE (fndecl_or_type);
30346 fndecl = fndecl_or_type;
30348 else
30350 fntype = fndecl_or_type;
30351 fndecl = NULL;
30354 ccvt = ix86_get_callcvt (fntype);
30355 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
30357 /* Fastcall functions use ecx/edx for arguments, which leaves
30358 us with EAX for the static chain.
30359 Thiscall functions use ecx for arguments, which also
30360 leaves us with EAX for the static chain. */
30361 regno = AX_REG;
30363 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
30365 /* Thiscall functions use ecx for arguments, which leaves
30366 us with EAX and EDX for the static chain.
30367 We are using for abi-compatibility EAX. */
30368 regno = AX_REG;
30370 else if (ix86_function_regparm (fntype, fndecl) == 3)
30372 /* For regparm 3, we have no free call-clobbered registers in
30373 which to store the static chain. In order to implement this,
30374 we have the trampoline push the static chain to the stack.
30375 However, we can't push a value below the return address when
30376 we call the nested function directly, so we have to use an
30377 alternate entry point. For this we use ESI, and have the
30378 alternate entry point push ESI, so that things appear the
30379 same once we're executing the nested function. */
30380 if (incoming_p)
30382 if (fndecl == current_function_decl
30383 && !ix86_static_chain_on_stack)
30385 gcc_assert (!reload_completed);
30386 ix86_static_chain_on_stack = true;
30388 return gen_frame_mem (SImode,
30389 plus_constant (Pmode,
30390 arg_pointer_rtx, -8));
30392 regno = SI_REG;
30396 return gen_rtx_REG (Pmode, regno);
30399 /* Emit RTL insns to initialize the variable parts of a trampoline.
30400 FNDECL is the decl of the target address; M_TRAMP is a MEM for
30401 the trampoline, and CHAIN_VALUE is an RTX for the static chain
30402 to be passed to the target function. */
30404 static void
30405 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
30407 rtx mem, fnaddr;
30408 int opcode;
30409 int offset = 0;
30411 fnaddr = XEXP (DECL_RTL (fndecl), 0);
30413 if (TARGET_64BIT)
30415 int size;
30417 /* Load the function address to r11. Try to load address using
30418 the shorter movl instead of movabs. We may want to support
30419 movq for kernel mode, but kernel does not use trampolines at
30420 the moment. FNADDR is a 32bit address and may not be in
30421 DImode when ptr_mode == SImode. Always use movl in this
30422 case. */
30423 if (ptr_mode == SImode
30424 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
30426 fnaddr = copy_addr_to_reg (fnaddr);
30428 mem = adjust_address (m_tramp, HImode, offset);
30429 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
30431 mem = adjust_address (m_tramp, SImode, offset + 2);
30432 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
30433 offset += 6;
30435 else
30437 mem = adjust_address (m_tramp, HImode, offset);
30438 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
30440 mem = adjust_address (m_tramp, DImode, offset + 2);
30441 emit_move_insn (mem, fnaddr);
30442 offset += 10;
30445 /* Load static chain using movabs to r10. Use the shorter movl
30446 instead of movabs when ptr_mode == SImode. */
30447 if (ptr_mode == SImode)
30449 opcode = 0xba41;
30450 size = 6;
30452 else
30454 opcode = 0xba49;
30455 size = 10;
30458 mem = adjust_address (m_tramp, HImode, offset);
30459 emit_move_insn (mem, gen_int_mode (opcode, HImode));
30461 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
30462 emit_move_insn (mem, chain_value);
30463 offset += size;
30465 /* Jump to r11; the last (unused) byte is a nop, only there to
30466 pad the write out to a single 32-bit store. */
30467 mem = adjust_address (m_tramp, SImode, offset);
30468 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
30469 offset += 4;
30471 else
30473 rtx disp, chain;
30475 /* Depending on the static chain location, either load a register
30476 with a constant, or push the constant to the stack. All of the
30477 instructions are the same size. */
30478 chain = ix86_static_chain (fndecl, true);
30479 if (REG_P (chain))
30481 switch (REGNO (chain))
30483 case AX_REG:
30484 opcode = 0xb8; break;
30485 case CX_REG:
30486 opcode = 0xb9; break;
30487 default:
30488 gcc_unreachable ();
30491 else
30492 opcode = 0x68;
30494 mem = adjust_address (m_tramp, QImode, offset);
30495 emit_move_insn (mem, gen_int_mode (opcode, QImode));
30497 mem = adjust_address (m_tramp, SImode, offset + 1);
30498 emit_move_insn (mem, chain_value);
30499 offset += 5;
30501 mem = adjust_address (m_tramp, QImode, offset);
30502 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
30504 mem = adjust_address (m_tramp, SImode, offset + 1);
30506 /* Compute offset from the end of the jmp to the target function.
30507 In the case in which the trampoline stores the static chain on
30508 the stack, we need to skip the first insn which pushes the
30509 (call-saved) register static chain; this push is 1 byte. */
30510 offset += 5;
30511 disp = expand_binop (SImode, sub_optab, fnaddr,
30512 plus_constant (Pmode, XEXP (m_tramp, 0),
30513 offset - (MEM_P (chain) ? 1 : 0)),
30514 NULL_RTX, 1, OPTAB_DIRECT);
30515 emit_move_insn (mem, disp);
30518 gcc_assert (offset <= TRAMPOLINE_SIZE);
30520 #ifdef HAVE_ENABLE_EXECUTE_STACK
30521 #ifdef CHECK_EXECUTE_STACK_ENABLED
30522 if (CHECK_EXECUTE_STACK_ENABLED)
30523 #endif
30524 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
30525 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
30526 #endif
30529 static bool
30530 ix86_allocate_stack_slots_for_args (void)
30532 /* Naked functions should not allocate stack slots for arguments. */
30533 return !ix86_function_naked (current_function_decl);
30536 static bool
30537 ix86_warn_func_return (tree decl)
30539 /* Naked functions are implemented entirely in assembly, including the
30540 return sequence, so suppress warnings about this. */
30541 return !ix86_function_naked (decl);
30544 /* The following file contains several enumerations and data structures
30545 built from the definitions in i386-builtin-types.def. */
30547 #include "i386-builtin-types.inc"
30549 /* Table for the ix86 builtin non-function types. */
30550 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
30552 /* Retrieve an element from the above table, building some of
30553 the types lazily. */
30555 static tree
30556 ix86_get_builtin_type (enum ix86_builtin_type tcode)
30558 unsigned int index;
30559 tree type, itype;
30561 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
30563 type = ix86_builtin_type_tab[(int) tcode];
30564 if (type != NULL)
30565 return type;
30567 gcc_assert (tcode > IX86_BT_LAST_PRIM);
30568 if (tcode <= IX86_BT_LAST_VECT)
30570 machine_mode mode;
30572 index = tcode - IX86_BT_LAST_PRIM - 1;
30573 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
30574 mode = ix86_builtin_type_vect_mode[index];
30576 type = build_vector_type_for_mode (itype, mode);
30578 else
30580 int quals;
30582 index = tcode - IX86_BT_LAST_VECT - 1;
30583 if (tcode <= IX86_BT_LAST_PTR)
30584 quals = TYPE_UNQUALIFIED;
30585 else
30586 quals = TYPE_QUAL_CONST;
30588 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
30589 if (quals != TYPE_UNQUALIFIED)
30590 itype = build_qualified_type (itype, quals);
30592 type = build_pointer_type (itype);
30595 ix86_builtin_type_tab[(int) tcode] = type;
30596 return type;
30599 /* Table for the ix86 builtin function types. */
30600 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
30602 /* Retrieve an element from the above table, building some of
30603 the types lazily. */
30605 static tree
30606 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
30608 tree type;
30610 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
30612 type = ix86_builtin_func_type_tab[(int) tcode];
30613 if (type != NULL)
30614 return type;
30616 if (tcode <= IX86_BT_LAST_FUNC)
30618 unsigned start = ix86_builtin_func_start[(int) tcode];
30619 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
30620 tree rtype, atype, args = void_list_node;
30621 unsigned i;
30623 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
30624 for (i = after - 1; i > start; --i)
30626 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
30627 args = tree_cons (NULL, atype, args);
30630 type = build_function_type (rtype, args);
30632 else
30634 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
30635 enum ix86_builtin_func_type icode;
30637 icode = ix86_builtin_func_alias_base[index];
30638 type = ix86_get_builtin_func_type (icode);
30641 ix86_builtin_func_type_tab[(int) tcode] = type;
30642 return type;
30646 /* Codes for all the SSE/MMX builtins. Builtins not mentioned in any
30647 bdesc_* arrays below should come first, then builtins for each bdesc_*
30648 array in ascending order, so that we can use direct array accesses. */
30649 enum ix86_builtins
30651 IX86_BUILTIN_MASKMOVQ,
30652 IX86_BUILTIN_LDMXCSR,
30653 IX86_BUILTIN_STMXCSR,
30654 IX86_BUILTIN_MASKMOVDQU,
30655 IX86_BUILTIN_PSLLDQ128,
30656 IX86_BUILTIN_CLFLUSH,
30657 IX86_BUILTIN_MONITOR,
30658 IX86_BUILTIN_MWAIT,
30659 IX86_BUILTIN_CLZERO,
30660 IX86_BUILTIN_VEC_INIT_V2SI,
30661 IX86_BUILTIN_VEC_INIT_V4HI,
30662 IX86_BUILTIN_VEC_INIT_V8QI,
30663 IX86_BUILTIN_VEC_EXT_V2DF,
30664 IX86_BUILTIN_VEC_EXT_V2DI,
30665 IX86_BUILTIN_VEC_EXT_V4SF,
30666 IX86_BUILTIN_VEC_EXT_V4SI,
30667 IX86_BUILTIN_VEC_EXT_V8HI,
30668 IX86_BUILTIN_VEC_EXT_V2SI,
30669 IX86_BUILTIN_VEC_EXT_V4HI,
30670 IX86_BUILTIN_VEC_EXT_V16QI,
30671 IX86_BUILTIN_VEC_SET_V2DI,
30672 IX86_BUILTIN_VEC_SET_V4SF,
30673 IX86_BUILTIN_VEC_SET_V4SI,
30674 IX86_BUILTIN_VEC_SET_V8HI,
30675 IX86_BUILTIN_VEC_SET_V4HI,
30676 IX86_BUILTIN_VEC_SET_V16QI,
30677 IX86_BUILTIN_GATHERSIV2DF,
30678 IX86_BUILTIN_GATHERSIV4DF,
30679 IX86_BUILTIN_GATHERDIV2DF,
30680 IX86_BUILTIN_GATHERDIV4DF,
30681 IX86_BUILTIN_GATHERSIV4SF,
30682 IX86_BUILTIN_GATHERSIV8SF,
30683 IX86_BUILTIN_GATHERDIV4SF,
30684 IX86_BUILTIN_GATHERDIV8SF,
30685 IX86_BUILTIN_GATHERSIV2DI,
30686 IX86_BUILTIN_GATHERSIV4DI,
30687 IX86_BUILTIN_GATHERDIV2DI,
30688 IX86_BUILTIN_GATHERDIV4DI,
30689 IX86_BUILTIN_GATHERSIV4SI,
30690 IX86_BUILTIN_GATHERSIV8SI,
30691 IX86_BUILTIN_GATHERDIV4SI,
30692 IX86_BUILTIN_GATHERDIV8SI,
30693 IX86_BUILTIN_VFMSUBSD3_MASK3,
30694 IX86_BUILTIN_VFMSUBSS3_MASK3,
30695 IX86_BUILTIN_GATHER3SIV8SF,
30696 IX86_BUILTIN_GATHER3SIV4SF,
30697 IX86_BUILTIN_GATHER3SIV4DF,
30698 IX86_BUILTIN_GATHER3SIV2DF,
30699 IX86_BUILTIN_GATHER3DIV8SF,
30700 IX86_BUILTIN_GATHER3DIV4SF,
30701 IX86_BUILTIN_GATHER3DIV4DF,
30702 IX86_BUILTIN_GATHER3DIV2DF,
30703 IX86_BUILTIN_GATHER3SIV8SI,
30704 IX86_BUILTIN_GATHER3SIV4SI,
30705 IX86_BUILTIN_GATHER3SIV4DI,
30706 IX86_BUILTIN_GATHER3SIV2DI,
30707 IX86_BUILTIN_GATHER3DIV8SI,
30708 IX86_BUILTIN_GATHER3DIV4SI,
30709 IX86_BUILTIN_GATHER3DIV4DI,
30710 IX86_BUILTIN_GATHER3DIV2DI,
30711 IX86_BUILTIN_SCATTERSIV8SF,
30712 IX86_BUILTIN_SCATTERSIV4SF,
30713 IX86_BUILTIN_SCATTERSIV4DF,
30714 IX86_BUILTIN_SCATTERSIV2DF,
30715 IX86_BUILTIN_SCATTERDIV8SF,
30716 IX86_BUILTIN_SCATTERDIV4SF,
30717 IX86_BUILTIN_SCATTERDIV4DF,
30718 IX86_BUILTIN_SCATTERDIV2DF,
30719 IX86_BUILTIN_SCATTERSIV8SI,
30720 IX86_BUILTIN_SCATTERSIV4SI,
30721 IX86_BUILTIN_SCATTERSIV4DI,
30722 IX86_BUILTIN_SCATTERSIV2DI,
30723 IX86_BUILTIN_SCATTERDIV8SI,
30724 IX86_BUILTIN_SCATTERDIV4SI,
30725 IX86_BUILTIN_SCATTERDIV4DI,
30726 IX86_BUILTIN_SCATTERDIV2DI,
30727 /* Alternate 4 and 8 element gather/scatter for the vectorizer
30728 where all operands are 32-byte or 64-byte wide respectively. */
30729 IX86_BUILTIN_GATHERALTSIV4DF,
30730 IX86_BUILTIN_GATHERALTDIV8SF,
30731 IX86_BUILTIN_GATHERALTSIV4DI,
30732 IX86_BUILTIN_GATHERALTDIV8SI,
30733 IX86_BUILTIN_GATHER3ALTDIV16SF,
30734 IX86_BUILTIN_GATHER3ALTDIV16SI,
30735 IX86_BUILTIN_GATHER3ALTSIV4DF,
30736 IX86_BUILTIN_GATHER3ALTDIV8SF,
30737 IX86_BUILTIN_GATHER3ALTSIV4DI,
30738 IX86_BUILTIN_GATHER3ALTDIV8SI,
30739 IX86_BUILTIN_GATHER3ALTSIV8DF,
30740 IX86_BUILTIN_GATHER3ALTSIV8DI,
30741 IX86_BUILTIN_GATHER3DIV16SF,
30742 IX86_BUILTIN_GATHER3DIV16SI,
30743 IX86_BUILTIN_GATHER3DIV8DF,
30744 IX86_BUILTIN_GATHER3DIV8DI,
30745 IX86_BUILTIN_GATHER3SIV16SF,
30746 IX86_BUILTIN_GATHER3SIV16SI,
30747 IX86_BUILTIN_GATHER3SIV8DF,
30748 IX86_BUILTIN_GATHER3SIV8DI,
30749 IX86_BUILTIN_SCATTERALTSIV8DF,
30750 IX86_BUILTIN_SCATTERALTDIV16SF,
30751 IX86_BUILTIN_SCATTERALTSIV8DI,
30752 IX86_BUILTIN_SCATTERALTDIV16SI,
30753 IX86_BUILTIN_SCATTERDIV16SF,
30754 IX86_BUILTIN_SCATTERDIV16SI,
30755 IX86_BUILTIN_SCATTERDIV8DF,
30756 IX86_BUILTIN_SCATTERDIV8DI,
30757 IX86_BUILTIN_SCATTERSIV16SF,
30758 IX86_BUILTIN_SCATTERSIV16SI,
30759 IX86_BUILTIN_SCATTERSIV8DF,
30760 IX86_BUILTIN_SCATTERSIV8DI,
30761 IX86_BUILTIN_GATHERPFQPD,
30762 IX86_BUILTIN_GATHERPFDPS,
30763 IX86_BUILTIN_GATHERPFDPD,
30764 IX86_BUILTIN_GATHERPFQPS,
30765 IX86_BUILTIN_SCATTERPFDPD,
30766 IX86_BUILTIN_SCATTERPFDPS,
30767 IX86_BUILTIN_SCATTERPFQPD,
30768 IX86_BUILTIN_SCATTERPFQPS,
30769 IX86_BUILTIN_CLWB,
30770 IX86_BUILTIN_CLFLUSHOPT,
30771 IX86_BUILTIN_INFQ,
30772 IX86_BUILTIN_HUGE_VALQ,
30773 IX86_BUILTIN_NANQ,
30774 IX86_BUILTIN_NANSQ,
30775 IX86_BUILTIN_XABORT,
30776 IX86_BUILTIN_ADDCARRYX32,
30777 IX86_BUILTIN_ADDCARRYX64,
30778 IX86_BUILTIN_SBB32,
30779 IX86_BUILTIN_SBB64,
30780 IX86_BUILTIN_RDRAND16_STEP,
30781 IX86_BUILTIN_RDRAND32_STEP,
30782 IX86_BUILTIN_RDRAND64_STEP,
30783 IX86_BUILTIN_RDSEED16_STEP,
30784 IX86_BUILTIN_RDSEED32_STEP,
30785 IX86_BUILTIN_RDSEED64_STEP,
30786 IX86_BUILTIN_MONITORX,
30787 IX86_BUILTIN_MWAITX,
30788 IX86_BUILTIN_CFSTRING,
30789 IX86_BUILTIN_CPU_INIT,
30790 IX86_BUILTIN_CPU_IS,
30791 IX86_BUILTIN_CPU_SUPPORTS,
30792 IX86_BUILTIN_READ_FLAGS,
30793 IX86_BUILTIN_WRITE_FLAGS,
30795 /* All the remaining builtins are tracked in bdesc_* arrays in
30796 i386-builtin.def. Don't add any IX86_BUILTIN_* enumerators after
30797 this point. */
30798 #define BDESC(mask, icode, name, code, comparison, flag) \
30799 code,
30800 #define BDESC_FIRST(kind, kindu, mask, icode, name, code, comparison, flag) \
30801 code, \
30802 IX86_BUILTIN__BDESC_##kindu##_FIRST = code,
30803 #define BDESC_END(kind, next_kind)
30805 #include "i386-builtin.def"
30807 #undef BDESC
30808 #undef BDESC_FIRST
30809 #undef BDESC_END
30811 IX86_BUILTIN_MAX,
30813 IX86_BUILTIN__BDESC_MAX_FIRST = IX86_BUILTIN_MAX,
30815 /* Now just the aliases for bdesc_* start/end. */
30816 #define BDESC(mask, icode, name, code, comparison, flag)
30817 #define BDESC_FIRST(kind, kindu, mask, icode, name, code, comparison, flag)
30818 #define BDESC_END(kind, next_kind) \
30819 IX86_BUILTIN__BDESC_##kind##_LAST \
30820 = IX86_BUILTIN__BDESC_##next_kind##_FIRST - 1,
30822 #include "i386-builtin.def"
30824 #undef BDESC
30825 #undef BDESC_FIRST
30826 #undef BDESC_END
30828 /* Just to make sure there is no comma after the last enumerator. */
30829 IX86_BUILTIN__BDESC_MAX_LAST = IX86_BUILTIN__BDESC_MAX_FIRST
30832 /* Table for the ix86 builtin decls. */
30833 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
30835 /* Table of all of the builtin functions that are possible with different ISA's
30836 but are waiting to be built until a function is declared to use that
30837 ISA. */
30838 struct builtin_isa {
30839 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
30840 HOST_WIDE_INT isa2; /* additional isa_flags this builtin is defined for */
30841 const char *name; /* function name */
30842 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
30843 unsigned char const_p:1; /* true if the declaration is constant */
30844 unsigned char pure_p:1; /* true if the declaration has pure attribute */
30845 bool leaf_p; /* true if the declaration has leaf attribute */
30846 bool nothrow_p; /* true if the declaration has nothrow attribute */
30847 bool set_and_not_built_p;
30850 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
30852 /* Bits that can still enable any inclusion of a builtin. */
30853 static HOST_WIDE_INT deferred_isa_values = 0;
30854 static HOST_WIDE_INT deferred_isa_values2 = 0;
30856 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
30857 of which isa_flags to use in the ix86_builtins_isa array. Stores the
30858 function decl in the ix86_builtins array. Returns the function decl or
30859 NULL_TREE, if the builtin was not added.
30861 If the front end has a special hook for builtin functions, delay adding
30862 builtin functions that aren't in the current ISA until the ISA is changed
30863 with function specific optimization. Doing so, can save about 300K for the
30864 default compiler. When the builtin is expanded, check at that time whether
30865 it is valid.
30867 If the front end doesn't have a special hook, record all builtins, even if
30868 it isn't an instruction set in the current ISA in case the user uses
30869 function specific options for a different ISA, so that we don't get scope
30870 errors if a builtin is added in the middle of a function scope. */
30872 static inline tree
30873 def_builtin (HOST_WIDE_INT mask, const char *name,
30874 enum ix86_builtin_func_type tcode,
30875 enum ix86_builtins code)
30877 tree decl = NULL_TREE;
30879 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
30881 ix86_builtins_isa[(int) code].isa = mask;
30883 mask &= ~OPTION_MASK_ISA_64BIT;
30885 /* Filter out the masks most often ored together with others. */
30886 if ((mask & ix86_isa_flags & OPTION_MASK_ISA_AVX512VL)
30887 && mask != OPTION_MASK_ISA_AVX512VL)
30888 mask &= ~OPTION_MASK_ISA_AVX512VL;
30889 if ((mask & ix86_isa_flags & OPTION_MASK_ISA_AVX512BW)
30890 && mask != OPTION_MASK_ISA_AVX512BW)
30891 mask &= ~OPTION_MASK_ISA_AVX512BW;
30893 if (mask == 0
30894 || (mask & ix86_isa_flags) != 0
30895 || (lang_hooks.builtin_function
30896 == lang_hooks.builtin_function_ext_scope))
30898 tree type = ix86_get_builtin_func_type (tcode);
30899 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30900 NULL, NULL_TREE);
30901 ix86_builtins[(int) code] = decl;
30902 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
30904 else
30906 /* Just a MASK where set_and_not_built_p == true can potentially
30907 include a builtin. */
30908 deferred_isa_values |= mask;
30909 ix86_builtins[(int) code] = NULL_TREE;
30910 ix86_builtins_isa[(int) code].tcode = tcode;
30911 ix86_builtins_isa[(int) code].name = name;
30912 ix86_builtins_isa[(int) code].leaf_p = false;
30913 ix86_builtins_isa[(int) code].nothrow_p = false;
30914 ix86_builtins_isa[(int) code].const_p = false;
30915 ix86_builtins_isa[(int) code].pure_p = false;
30916 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
30920 return decl;
30923 /* Like def_builtin, but also marks the function decl "const". */
30925 static inline tree
30926 def_builtin_const (HOST_WIDE_INT mask, const char *name,
30927 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30929 tree decl = def_builtin (mask, name, tcode, code);
30930 if (decl)
30931 TREE_READONLY (decl) = 1;
30932 else
30933 ix86_builtins_isa[(int) code].const_p = true;
30935 return decl;
30938 /* Like def_builtin, but also marks the function decl "pure". */
30940 static inline tree
30941 def_builtin_pure (HOST_WIDE_INT mask, const char *name,
30942 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30944 tree decl = def_builtin (mask, name, tcode, code);
30945 if (decl)
30946 DECL_PURE_P (decl) = 1;
30947 else
30948 ix86_builtins_isa[(int) code].pure_p = true;
30950 return decl;
30953 /* Like def_builtin, but for additional isa2 flags. */
30955 static inline tree
30956 def_builtin2 (HOST_WIDE_INT mask, const char *name,
30957 enum ix86_builtin_func_type tcode,
30958 enum ix86_builtins code)
30960 tree decl = NULL_TREE;
30962 ix86_builtins_isa[(int) code].isa2 = mask;
30964 if (mask == 0
30965 || (mask & ix86_isa_flags2) != 0
30966 || (lang_hooks.builtin_function
30967 == lang_hooks.builtin_function_ext_scope))
30970 tree type = ix86_get_builtin_func_type (tcode);
30971 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30972 NULL, NULL_TREE);
30973 ix86_builtins[(int) code] = decl;
30974 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
30976 else
30978 /* Just a MASK where set_and_not_built_p == true can potentially
30979 include a builtin. */
30980 deferred_isa_values2 |= mask;
30981 ix86_builtins[(int) code] = NULL_TREE;
30982 ix86_builtins_isa[(int) code].tcode = tcode;
30983 ix86_builtins_isa[(int) code].name = name;
30984 ix86_builtins_isa[(int) code].leaf_p = false;
30985 ix86_builtins_isa[(int) code].nothrow_p = false;
30986 ix86_builtins_isa[(int) code].const_p = false;
30987 ix86_builtins_isa[(int) code].pure_p = false;
30988 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
30991 return decl;
30994 /* Like def_builtin, but also marks the function decl "const". */
30996 static inline tree
30997 def_builtin_const2 (HOST_WIDE_INT mask, const char *name,
30998 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
31000 tree decl = def_builtin2 (mask, name, tcode, code);
31001 if (decl)
31002 TREE_READONLY (decl) = 1;
31003 else
31004 ix86_builtins_isa[(int) code].const_p = true;
31006 return decl;
31009 /* Like def_builtin, but also marks the function decl "pure". */
31011 static inline tree
31012 def_builtin_pure2 (HOST_WIDE_INT mask, const char *name,
31013 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
31015 tree decl = def_builtin2 (mask, name, tcode, code);
31016 if (decl)
31017 DECL_PURE_P (decl) = 1;
31018 else
31019 ix86_builtins_isa[(int) code].pure_p = true;
31021 return decl;
31024 /* Add any new builtin functions for a given ISA that may not have been
31025 declared. This saves a bit of space compared to adding all of the
31026 declarations to the tree, even if we didn't use them. */
31028 static void
31029 ix86_add_new_builtins (HOST_WIDE_INT isa, HOST_WIDE_INT isa2)
31031 isa &= ~OPTION_MASK_ISA_64BIT;
31033 if ((isa & deferred_isa_values) == 0
31034 && (isa2 & deferred_isa_values2) == 0)
31035 return;
31037 /* Bits in ISA value can be removed from potential isa values. */
31038 deferred_isa_values &= ~isa;
31039 deferred_isa_values2 &= ~isa2;
31041 int i;
31042 tree saved_current_target_pragma = current_target_pragma;
31043 current_target_pragma = NULL_TREE;
31045 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
31047 if (((ix86_builtins_isa[i].isa & isa) != 0
31048 || (ix86_builtins_isa[i].isa2 & isa2) != 0)
31049 && ix86_builtins_isa[i].set_and_not_built_p)
31051 tree decl, type;
31053 /* Don't define the builtin again. */
31054 ix86_builtins_isa[i].set_and_not_built_p = false;
31056 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
31057 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
31058 type, i, BUILT_IN_MD, NULL,
31059 NULL_TREE);
31061 ix86_builtins[i] = decl;
31062 if (ix86_builtins_isa[i].const_p)
31063 TREE_READONLY (decl) = 1;
31064 if (ix86_builtins_isa[i].pure_p)
31065 DECL_PURE_P (decl) = 1;
31066 if (ix86_builtins_isa[i].leaf_p)
31067 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
31068 NULL_TREE);
31069 if (ix86_builtins_isa[i].nothrow_p)
31070 TREE_NOTHROW (decl) = 1;
31074 current_target_pragma = saved_current_target_pragma;
31077 /* Bits for builtin_description.flag. */
31079 /* Set when we don't support the comparison natively, and should
31080 swap_comparison in order to support it. */
31081 #define BUILTIN_DESC_SWAP_OPERANDS 1
31083 struct builtin_description
31085 const HOST_WIDE_INT mask;
31086 const enum insn_code icode;
31087 const char *const name;
31088 const enum ix86_builtins code;
31089 const enum rtx_code comparison;
31090 const int flag;
31093 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
31094 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
31095 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
31096 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
31097 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
31098 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
31099 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
31100 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
31101 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
31102 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
31103 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
31104 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
31105 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
31106 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
31107 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
31108 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
31109 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
31110 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
31111 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
31112 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
31113 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
31114 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
31115 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
31116 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
31117 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
31118 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
31119 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
31120 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
31121 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
31122 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
31123 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
31124 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
31125 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
31126 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
31127 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
31128 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
31129 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
31130 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
31131 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
31132 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
31133 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
31134 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
31135 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
31136 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
31137 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
31138 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
31139 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
31140 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
31141 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
31142 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
31143 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
31144 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
31146 #define BDESC(mask, icode, name, code, comparison, flag) \
31147 { mask, icode, name, code, comparison, flag },
31148 #define BDESC_FIRST(kind, kindu, mask, icode, name, code, comparison, flag) \
31149 static const struct builtin_description bdesc_##kind[] = \
31151 BDESC (mask, icode, name, code, comparison, flag)
31152 #define BDESC_END(kind, next_kind) \
31155 #include "i386-builtin.def"
31157 #undef BDESC
31158 #undef BDESC_FIRST
31159 #undef BDESC_END
31161 /* TM vector builtins. */
31163 /* Reuse the existing x86-specific `struct builtin_description' cause
31164 we're lazy. Add casts to make them fit. */
31165 static const struct builtin_description bdesc_tm[] =
31167 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
31168 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
31169 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
31170 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
31171 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
31172 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
31173 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
31175 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
31176 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
31177 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
31178 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
31179 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
31180 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
31181 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
31183 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
31184 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
31185 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
31186 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
31187 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
31188 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
31189 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
31191 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
31192 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
31193 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
31196 /* Initialize the transactional memory vector load/store builtins. */
31198 static void
31199 ix86_init_tm_builtins (void)
31201 enum ix86_builtin_func_type ftype;
31202 const struct builtin_description *d;
31203 size_t i;
31204 tree decl;
31205 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
31206 tree attrs_log, attrs_type_log;
31208 if (!flag_tm)
31209 return;
31211 /* If there are no builtins defined, we must be compiling in a
31212 language without trans-mem support. */
31213 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
31214 return;
31216 /* Use whatever attributes a normal TM load has. */
31217 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
31218 attrs_load = DECL_ATTRIBUTES (decl);
31219 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
31220 /* Use whatever attributes a normal TM store has. */
31221 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
31222 attrs_store = DECL_ATTRIBUTES (decl);
31223 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
31224 /* Use whatever attributes a normal TM log has. */
31225 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
31226 attrs_log = DECL_ATTRIBUTES (decl);
31227 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
31229 for (i = 0, d = bdesc_tm;
31230 i < ARRAY_SIZE (bdesc_tm);
31231 i++, d++)
31233 if ((d->mask & ix86_isa_flags) != 0
31234 || (lang_hooks.builtin_function
31235 == lang_hooks.builtin_function_ext_scope))
31237 tree type, attrs, attrs_type;
31238 enum built_in_function code = (enum built_in_function) d->code;
31240 ftype = (enum ix86_builtin_func_type) d->flag;
31241 type = ix86_get_builtin_func_type (ftype);
31243 if (BUILTIN_TM_LOAD_P (code))
31245 attrs = attrs_load;
31246 attrs_type = attrs_type_load;
31248 else if (BUILTIN_TM_STORE_P (code))
31250 attrs = attrs_store;
31251 attrs_type = attrs_type_store;
31253 else
31255 attrs = attrs_log;
31256 attrs_type = attrs_type_log;
31258 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
31259 /* The builtin without the prefix for
31260 calling it directly. */
31261 d->name + strlen ("__builtin_"),
31262 attrs);
31263 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
31264 set the TYPE_ATTRIBUTES. */
31265 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
31267 set_builtin_decl (code, decl, false);
31272 /* Macros for verification of enum ix86_builtins order. */
31273 #define BDESC_VERIFY(x, y, z) \
31274 gcc_checking_assert ((x) == (enum ix86_builtins) ((y) + (z)))
31275 #define BDESC_VERIFYS(x, y, z) \
31276 STATIC_ASSERT ((x) == (enum ix86_builtins) ((y) + (z)))
31278 BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPESTR_FIRST,
31279 IX86_BUILTIN__BDESC_COMI_LAST, 1);
31280 BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPISTR_FIRST,
31281 IX86_BUILTIN__BDESC_PCMPESTR_LAST, 1);
31282 BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST,
31283 IX86_BUILTIN__BDESC_PCMPISTR_LAST, 1);
31284 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS_FIRST,
31285 IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST, 1);
31286 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST,
31287 IX86_BUILTIN__BDESC_ARGS_LAST, 1);
31288 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS2_FIRST,
31289 IX86_BUILTIN__BDESC_ROUND_ARGS_LAST, 1);
31290 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MPX_FIRST,
31291 IX86_BUILTIN__BDESC_ARGS2_LAST, 1);
31292 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MPX_CONST_FIRST,
31293 IX86_BUILTIN__BDESC_MPX_LAST, 1);
31294 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MULTI_ARG_FIRST,
31295 IX86_BUILTIN__BDESC_MPX_CONST_LAST, 1);
31296 BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_FIRST,
31297 IX86_BUILTIN__BDESC_MULTI_ARG_LAST, 1);
31298 BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_NORMAL_FIRST,
31299 IX86_BUILTIN__BDESC_CET_LAST, 1);
31300 BDESC_VERIFYS (IX86_BUILTIN_MAX,
31301 IX86_BUILTIN__BDESC_CET_NORMAL_LAST, 1);
31303 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
31304 in the current target ISA to allow the user to compile particular modules
31305 with different target specific options that differ from the command line
31306 options. */
31307 static void
31308 ix86_init_mmx_sse_builtins (void)
31310 const struct builtin_description * d;
31311 enum ix86_builtin_func_type ftype;
31312 size_t i;
31314 /* Add all special builtins with variable number of operands. */
31315 for (i = 0, d = bdesc_special_args;
31316 i < ARRAY_SIZE (bdesc_special_args);
31317 i++, d++)
31319 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST, i);
31320 if (d->name == 0)
31321 continue;
31323 ftype = (enum ix86_builtin_func_type) d->flag;
31324 def_builtin (d->mask, d->name, ftype, d->code);
31326 BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST,
31327 IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST,
31328 ARRAY_SIZE (bdesc_special_args) - 1);
31330 /* Add all builtins with variable number of operands. */
31331 for (i = 0, d = bdesc_args;
31332 i < ARRAY_SIZE (bdesc_args);
31333 i++, d++)
31335 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_ARGS_FIRST, i);
31336 if (d->name == 0)
31337 continue;
31339 ftype = (enum ix86_builtin_func_type) d->flag;
31340 def_builtin_const (d->mask, d->name, ftype, d->code);
31342 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS_LAST,
31343 IX86_BUILTIN__BDESC_ARGS_FIRST,
31344 ARRAY_SIZE (bdesc_args) - 1);
31346 /* Add all builtins with variable number of operands. */
31347 for (i = 0, d = bdesc_args2;
31348 i < ARRAY_SIZE (bdesc_args2);
31349 i++, d++)
31351 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_ARGS2_FIRST, i);
31352 if (d->name == 0)
31353 continue;
31355 ftype = (enum ix86_builtin_func_type) d->flag;
31356 def_builtin_const2 (d->mask, d->name, ftype, d->code);
31358 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS2_LAST,
31359 IX86_BUILTIN__BDESC_ARGS2_FIRST,
31360 ARRAY_SIZE (bdesc_args2) - 1);
31362 /* Add all builtins with rounding. */
31363 for (i = 0, d = bdesc_round_args;
31364 i < ARRAY_SIZE (bdesc_round_args);
31365 i++, d++)
31367 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST, i);
31368 if (d->name == 0)
31369 continue;
31371 ftype = (enum ix86_builtin_func_type) d->flag;
31372 def_builtin_const (d->mask, d->name, ftype, d->code);
31374 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ROUND_ARGS_LAST,
31375 IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST,
31376 ARRAY_SIZE (bdesc_round_args) - 1);
31378 /* pcmpestr[im] insns. */
31379 for (i = 0, d = bdesc_pcmpestr;
31380 i < ARRAY_SIZE (bdesc_pcmpestr);
31381 i++, d++)
31383 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_PCMPESTR_FIRST, i);
31384 if (d->code == IX86_BUILTIN_PCMPESTRM128)
31385 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
31386 else
31387 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
31388 def_builtin_const (d->mask, d->name, ftype, d->code);
31390 BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPESTR_LAST,
31391 IX86_BUILTIN__BDESC_PCMPESTR_FIRST,
31392 ARRAY_SIZE (bdesc_pcmpestr) - 1);
31394 /* pcmpistr[im] insns. */
31395 for (i = 0, d = bdesc_pcmpistr;
31396 i < ARRAY_SIZE (bdesc_pcmpistr);
31397 i++, d++)
31399 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_PCMPISTR_FIRST, i);
31400 if (d->code == IX86_BUILTIN_PCMPISTRM128)
31401 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
31402 else
31403 ftype = INT_FTYPE_V16QI_V16QI_INT;
31404 def_builtin_const (d->mask, d->name, ftype, d->code);
31406 BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPISTR_LAST,
31407 IX86_BUILTIN__BDESC_PCMPISTR_FIRST,
31408 ARRAY_SIZE (bdesc_pcmpistr) - 1);
31410 /* comi/ucomi insns. */
31411 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
31413 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_COMI_FIRST, i);
31414 if (d->mask == OPTION_MASK_ISA_SSE2)
31415 ftype = INT_FTYPE_V2DF_V2DF;
31416 else
31417 ftype = INT_FTYPE_V4SF_V4SF;
31418 def_builtin_const (d->mask, d->name, ftype, d->code);
31420 BDESC_VERIFYS (IX86_BUILTIN__BDESC_COMI_LAST,
31421 IX86_BUILTIN__BDESC_COMI_FIRST,
31422 ARRAY_SIZE (bdesc_comi) - 1);
31424 /* SSE */
31425 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
31426 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
31427 def_builtin_pure (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
31428 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
31430 /* SSE or 3DNow!A */
31431 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A
31432 /* As it uses V4HImode, we have to require -mmmx too. */
31433 | OPTION_MASK_ISA_MMX,
31434 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
31435 IX86_BUILTIN_MASKMOVQ);
31437 /* SSE2 */
31438 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
31439 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
31441 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
31442 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
31443 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
31444 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
31446 /* SSE3. */
31447 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
31448 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
31449 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
31450 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
31452 /* AES */
31453 def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2,
31454 "__builtin_ia32_aesenc128",
31455 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
31456 def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2,
31457 "__builtin_ia32_aesenclast128",
31458 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
31459 def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2,
31460 "__builtin_ia32_aesdec128",
31461 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
31462 def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2,
31463 "__builtin_ia32_aesdeclast128",
31464 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
31465 def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2,
31466 "__builtin_ia32_aesimc128",
31467 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
31468 def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2,
31469 "__builtin_ia32_aeskeygenassist128",
31470 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
31472 /* PCLMUL */
31473 def_builtin_const (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2,
31474 "__builtin_ia32_pclmulqdq128",
31475 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
31477 /* RDRND */
31478 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
31479 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
31480 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
31481 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
31482 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
31483 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
31484 IX86_BUILTIN_RDRAND64_STEP);
31486 /* AVX2 */
31487 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
31488 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
31489 IX86_BUILTIN_GATHERSIV2DF);
31491 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
31492 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
31493 IX86_BUILTIN_GATHERSIV4DF);
31495 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
31496 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
31497 IX86_BUILTIN_GATHERDIV2DF);
31499 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
31500 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
31501 IX86_BUILTIN_GATHERDIV4DF);
31503 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
31504 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
31505 IX86_BUILTIN_GATHERSIV4SF);
31507 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
31508 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
31509 IX86_BUILTIN_GATHERSIV8SF);
31511 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
31512 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
31513 IX86_BUILTIN_GATHERDIV4SF);
31515 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
31516 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
31517 IX86_BUILTIN_GATHERDIV8SF);
31519 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
31520 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
31521 IX86_BUILTIN_GATHERSIV2DI);
31523 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
31524 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
31525 IX86_BUILTIN_GATHERSIV4DI);
31527 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
31528 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
31529 IX86_BUILTIN_GATHERDIV2DI);
31531 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
31532 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
31533 IX86_BUILTIN_GATHERDIV4DI);
31535 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
31536 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
31537 IX86_BUILTIN_GATHERSIV4SI);
31539 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
31540 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
31541 IX86_BUILTIN_GATHERSIV8SI);
31543 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
31544 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
31545 IX86_BUILTIN_GATHERDIV4SI);
31547 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
31548 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
31549 IX86_BUILTIN_GATHERDIV8SI);
31551 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
31552 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
31553 IX86_BUILTIN_GATHERALTSIV4DF);
31555 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
31556 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
31557 IX86_BUILTIN_GATHERALTDIV8SF);
31559 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
31560 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
31561 IX86_BUILTIN_GATHERALTSIV4DI);
31563 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
31564 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
31565 IX86_BUILTIN_GATHERALTDIV8SI);
31567 /* AVX512F */
31568 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
31569 V16SF_FTYPE_V16SF_PCVOID_V16SI_HI_INT,
31570 IX86_BUILTIN_GATHER3SIV16SF);
31572 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
31573 V8DF_FTYPE_V8DF_PCVOID_V8SI_QI_INT,
31574 IX86_BUILTIN_GATHER3SIV8DF);
31576 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
31577 V8SF_FTYPE_V8SF_PCVOID_V8DI_QI_INT,
31578 IX86_BUILTIN_GATHER3DIV16SF);
31580 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
31581 V8DF_FTYPE_V8DF_PCVOID_V8DI_QI_INT,
31582 IX86_BUILTIN_GATHER3DIV8DF);
31584 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
31585 V16SI_FTYPE_V16SI_PCVOID_V16SI_HI_INT,
31586 IX86_BUILTIN_GATHER3SIV16SI);
31588 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
31589 V8DI_FTYPE_V8DI_PCVOID_V8SI_QI_INT,
31590 IX86_BUILTIN_GATHER3SIV8DI);
31592 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
31593 V8SI_FTYPE_V8SI_PCVOID_V8DI_QI_INT,
31594 IX86_BUILTIN_GATHER3DIV16SI);
31596 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
31597 V8DI_FTYPE_V8DI_PCVOID_V8DI_QI_INT,
31598 IX86_BUILTIN_GATHER3DIV8DI);
31600 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
31601 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
31602 IX86_BUILTIN_GATHER3ALTSIV8DF);
31604 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
31605 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
31606 IX86_BUILTIN_GATHER3ALTDIV16SF);
31608 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
31609 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
31610 IX86_BUILTIN_GATHER3ALTSIV8DI);
31612 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
31613 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
31614 IX86_BUILTIN_GATHER3ALTDIV16SI);
31616 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
31617 VOID_FTYPE_PVOID_HI_V16SI_V16SF_INT,
31618 IX86_BUILTIN_SCATTERSIV16SF);
31620 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
31621 VOID_FTYPE_PVOID_QI_V8SI_V8DF_INT,
31622 IX86_BUILTIN_SCATTERSIV8DF);
31624 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
31625 VOID_FTYPE_PVOID_QI_V8DI_V8SF_INT,
31626 IX86_BUILTIN_SCATTERDIV16SF);
31628 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
31629 VOID_FTYPE_PVOID_QI_V8DI_V8DF_INT,
31630 IX86_BUILTIN_SCATTERDIV8DF);
31632 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
31633 VOID_FTYPE_PVOID_HI_V16SI_V16SI_INT,
31634 IX86_BUILTIN_SCATTERSIV16SI);
31636 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
31637 VOID_FTYPE_PVOID_QI_V8SI_V8DI_INT,
31638 IX86_BUILTIN_SCATTERSIV8DI);
31640 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
31641 VOID_FTYPE_PVOID_QI_V8DI_V8SI_INT,
31642 IX86_BUILTIN_SCATTERDIV16SI);
31644 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
31645 VOID_FTYPE_PVOID_QI_V8DI_V8DI_INT,
31646 IX86_BUILTIN_SCATTERDIV8DI);
31648 /* AVX512VL */
31649 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
31650 V2DF_FTYPE_V2DF_PCVOID_V4SI_QI_INT,
31651 IX86_BUILTIN_GATHER3SIV2DF);
31653 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
31654 V4DF_FTYPE_V4DF_PCVOID_V4SI_QI_INT,
31655 IX86_BUILTIN_GATHER3SIV4DF);
31657 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
31658 V2DF_FTYPE_V2DF_PCVOID_V2DI_QI_INT,
31659 IX86_BUILTIN_GATHER3DIV2DF);
31661 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
31662 V4DF_FTYPE_V4DF_PCVOID_V4DI_QI_INT,
31663 IX86_BUILTIN_GATHER3DIV4DF);
31665 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
31666 V4SF_FTYPE_V4SF_PCVOID_V4SI_QI_INT,
31667 IX86_BUILTIN_GATHER3SIV4SF);
31669 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
31670 V8SF_FTYPE_V8SF_PCVOID_V8SI_QI_INT,
31671 IX86_BUILTIN_GATHER3SIV8SF);
31673 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
31674 V4SF_FTYPE_V4SF_PCVOID_V2DI_QI_INT,
31675 IX86_BUILTIN_GATHER3DIV4SF);
31677 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
31678 V4SF_FTYPE_V4SF_PCVOID_V4DI_QI_INT,
31679 IX86_BUILTIN_GATHER3DIV8SF);
31681 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
31682 V2DI_FTYPE_V2DI_PCVOID_V4SI_QI_INT,
31683 IX86_BUILTIN_GATHER3SIV2DI);
31685 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
31686 V4DI_FTYPE_V4DI_PCVOID_V4SI_QI_INT,
31687 IX86_BUILTIN_GATHER3SIV4DI);
31689 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
31690 V2DI_FTYPE_V2DI_PCVOID_V2DI_QI_INT,
31691 IX86_BUILTIN_GATHER3DIV2DI);
31693 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
31694 V4DI_FTYPE_V4DI_PCVOID_V4DI_QI_INT,
31695 IX86_BUILTIN_GATHER3DIV4DI);
31697 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
31698 V4SI_FTYPE_V4SI_PCVOID_V4SI_QI_INT,
31699 IX86_BUILTIN_GATHER3SIV4SI);
31701 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
31702 V8SI_FTYPE_V8SI_PCVOID_V8SI_QI_INT,
31703 IX86_BUILTIN_GATHER3SIV8SI);
31705 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
31706 V4SI_FTYPE_V4SI_PCVOID_V2DI_QI_INT,
31707 IX86_BUILTIN_GATHER3DIV4SI);
31709 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
31710 V4SI_FTYPE_V4SI_PCVOID_V4DI_QI_INT,
31711 IX86_BUILTIN_GATHER3DIV8SI);
31713 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
31714 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
31715 IX86_BUILTIN_GATHER3ALTSIV4DF);
31717 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
31718 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
31719 IX86_BUILTIN_GATHER3ALTDIV8SF);
31721 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
31722 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
31723 IX86_BUILTIN_GATHER3ALTSIV4DI);
31725 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
31726 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
31727 IX86_BUILTIN_GATHER3ALTDIV8SI);
31729 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
31730 VOID_FTYPE_PVOID_QI_V8SI_V8SF_INT,
31731 IX86_BUILTIN_SCATTERSIV8SF);
31733 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
31734 VOID_FTYPE_PVOID_QI_V4SI_V4SF_INT,
31735 IX86_BUILTIN_SCATTERSIV4SF);
31737 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
31738 VOID_FTYPE_PVOID_QI_V4SI_V4DF_INT,
31739 IX86_BUILTIN_SCATTERSIV4DF);
31741 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
31742 VOID_FTYPE_PVOID_QI_V4SI_V2DF_INT,
31743 IX86_BUILTIN_SCATTERSIV2DF);
31745 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
31746 VOID_FTYPE_PVOID_QI_V4DI_V4SF_INT,
31747 IX86_BUILTIN_SCATTERDIV8SF);
31749 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
31750 VOID_FTYPE_PVOID_QI_V2DI_V4SF_INT,
31751 IX86_BUILTIN_SCATTERDIV4SF);
31753 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
31754 VOID_FTYPE_PVOID_QI_V4DI_V4DF_INT,
31755 IX86_BUILTIN_SCATTERDIV4DF);
31757 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
31758 VOID_FTYPE_PVOID_QI_V2DI_V2DF_INT,
31759 IX86_BUILTIN_SCATTERDIV2DF);
31761 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
31762 VOID_FTYPE_PVOID_QI_V8SI_V8SI_INT,
31763 IX86_BUILTIN_SCATTERSIV8SI);
31765 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
31766 VOID_FTYPE_PVOID_QI_V4SI_V4SI_INT,
31767 IX86_BUILTIN_SCATTERSIV4SI);
31769 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
31770 VOID_FTYPE_PVOID_QI_V4SI_V4DI_INT,
31771 IX86_BUILTIN_SCATTERSIV4DI);
31773 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
31774 VOID_FTYPE_PVOID_QI_V4SI_V2DI_INT,
31775 IX86_BUILTIN_SCATTERSIV2DI);
31777 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
31778 VOID_FTYPE_PVOID_QI_V4DI_V4SI_INT,
31779 IX86_BUILTIN_SCATTERDIV8SI);
31781 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
31782 VOID_FTYPE_PVOID_QI_V2DI_V4SI_INT,
31783 IX86_BUILTIN_SCATTERDIV4SI);
31785 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
31786 VOID_FTYPE_PVOID_QI_V4DI_V4DI_INT,
31787 IX86_BUILTIN_SCATTERDIV4DI);
31789 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
31790 VOID_FTYPE_PVOID_QI_V2DI_V2DI_INT,
31791 IX86_BUILTIN_SCATTERDIV2DI);
31792 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8df ",
31793 VOID_FTYPE_PDOUBLE_QI_V16SI_V8DF_INT,
31794 IX86_BUILTIN_SCATTERALTSIV8DF);
31796 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8sf ",
31797 VOID_FTYPE_PFLOAT_HI_V8DI_V16SF_INT,
31798 IX86_BUILTIN_SCATTERALTDIV16SF);
31800 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8di ",
31801 VOID_FTYPE_PLONGLONG_QI_V16SI_V8DI_INT,
31802 IX86_BUILTIN_SCATTERALTSIV8DI);
31804 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8si ",
31805 VOID_FTYPE_PINT_HI_V8DI_V16SI_INT,
31806 IX86_BUILTIN_SCATTERALTDIV16SI);
31808 /* AVX512PF */
31809 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
31810 VOID_FTYPE_QI_V8SI_PCVOID_INT_INT,
31811 IX86_BUILTIN_GATHERPFDPD);
31812 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
31813 VOID_FTYPE_HI_V16SI_PCVOID_INT_INT,
31814 IX86_BUILTIN_GATHERPFDPS);
31815 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
31816 VOID_FTYPE_QI_V8DI_PCVOID_INT_INT,
31817 IX86_BUILTIN_GATHERPFQPD);
31818 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
31819 VOID_FTYPE_QI_V8DI_PCVOID_INT_INT,
31820 IX86_BUILTIN_GATHERPFQPS);
31821 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
31822 VOID_FTYPE_QI_V8SI_PCVOID_INT_INT,
31823 IX86_BUILTIN_SCATTERPFDPD);
31824 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
31825 VOID_FTYPE_HI_V16SI_PCVOID_INT_INT,
31826 IX86_BUILTIN_SCATTERPFDPS);
31827 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
31828 VOID_FTYPE_QI_V8DI_PCVOID_INT_INT,
31829 IX86_BUILTIN_SCATTERPFQPD);
31830 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
31831 VOID_FTYPE_QI_V8DI_PCVOID_INT_INT,
31832 IX86_BUILTIN_SCATTERPFQPS);
31834 /* SHA */
31835 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
31836 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
31837 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
31838 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
31839 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
31840 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
31841 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
31842 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
31843 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
31844 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
31845 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
31846 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
31847 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
31848 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
31850 /* RTM. */
31851 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
31852 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
31854 /* MMX access to the vec_init patterns. */
31855 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
31856 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
31858 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
31859 V4HI_FTYPE_HI_HI_HI_HI,
31860 IX86_BUILTIN_VEC_INIT_V4HI);
31862 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
31863 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
31864 IX86_BUILTIN_VEC_INIT_V8QI);
31866 /* Access to the vec_extract patterns. */
31867 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
31868 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
31869 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
31870 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
31871 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
31872 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
31873 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
31874 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
31875 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
31876 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
31878 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A
31879 /* As it uses V4HImode, we have to require -mmmx too. */
31880 | OPTION_MASK_ISA_MMX,
31881 "__builtin_ia32_vec_ext_v4hi",
31882 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
31884 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
31885 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
31887 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
31888 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
31890 /* Access to the vec_set patterns. */
31891 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
31892 "__builtin_ia32_vec_set_v2di",
31893 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
31895 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
31896 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
31898 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
31899 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
31901 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
31902 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
31904 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A
31905 /* As it uses V4HImode, we have to require -mmmx too. */
31906 | OPTION_MASK_ISA_MMX,
31907 "__builtin_ia32_vec_set_v4hi",
31908 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
31910 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
31911 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
31913 /* RDSEED */
31914 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
31915 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
31916 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
31917 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
31918 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
31919 "__builtin_ia32_rdseed_di_step",
31920 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
31922 /* ADCX */
31923 def_builtin (0, "__builtin_ia32_addcarryx_u32",
31924 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
31925 def_builtin (OPTION_MASK_ISA_64BIT,
31926 "__builtin_ia32_addcarryx_u64",
31927 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31928 IX86_BUILTIN_ADDCARRYX64);
31930 /* SBB */
31931 def_builtin (0, "__builtin_ia32_sbb_u32",
31932 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
31933 def_builtin (OPTION_MASK_ISA_64BIT,
31934 "__builtin_ia32_sbb_u64",
31935 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31936 IX86_BUILTIN_SBB64);
31938 /* Read/write FLAGS. */
31939 def_builtin (0, "__builtin_ia32_readeflags_u32",
31940 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31941 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
31942 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31943 def_builtin (0, "__builtin_ia32_writeeflags_u32",
31944 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
31945 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
31946 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
31948 /* CLFLUSHOPT. */
31949 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
31950 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
31952 /* CLWB. */
31953 def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
31954 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
31956 /* MONITORX and MWAITX. */
31957 def_builtin2 (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_monitorx",
31958 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITORX);
31959 def_builtin2 (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_mwaitx",
31960 VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAITX);
31962 /* CLZERO. */
31963 def_builtin2 (OPTION_MASK_ISA_CLZERO, "__builtin_ia32_clzero",
31964 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLZERO);
31966 /* Add FMA4 multi-arg argument instructions */
31967 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
31969 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_MULTI_ARG_FIRST, i);
31970 if (d->name == 0)
31971 continue;
31973 ftype = (enum ix86_builtin_func_type) d->flag;
31974 def_builtin_const (d->mask, d->name, ftype, d->code);
31976 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MULTI_ARG_LAST,
31977 IX86_BUILTIN__BDESC_MULTI_ARG_FIRST,
31978 ARRAY_SIZE (bdesc_multi_arg) - 1);
31980 /* Add CET inrinsics. */
31981 for (i = 0, d = bdesc_cet; i < ARRAY_SIZE (bdesc_cet); i++, d++)
31983 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_CET_FIRST, i);
31984 if (d->name == 0)
31985 continue;
31987 ftype = (enum ix86_builtin_func_type) d->flag;
31988 def_builtin (d->mask, d->name, ftype, d->code);
31990 BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_LAST,
31991 IX86_BUILTIN__BDESC_CET_FIRST,
31992 ARRAY_SIZE (bdesc_cet) - 1);
31994 for (i = 0, d = bdesc_cet_rdssp;
31995 i < ARRAY_SIZE (bdesc_cet_rdssp);
31996 i++, d++)
31998 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_CET_NORMAL_FIRST, i);
31999 if (d->name == 0)
32000 continue;
32002 ftype = (enum ix86_builtin_func_type) d->flag;
32003 def_builtin (d->mask, d->name, ftype, d->code);
32005 BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_NORMAL_LAST,
32006 IX86_BUILTIN__BDESC_CET_NORMAL_FIRST,
32007 ARRAY_SIZE (bdesc_cet_rdssp) - 1);
32010 static void
32011 ix86_init_mpx_builtins ()
32013 const struct builtin_description * d;
32014 enum ix86_builtin_func_type ftype;
32015 tree decl;
32016 size_t i;
32018 for (i = 0, d = bdesc_mpx;
32019 i < ARRAY_SIZE (bdesc_mpx);
32020 i++, d++)
32022 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_MPX_FIRST, i);
32023 if (d->name == 0)
32024 continue;
32026 ftype = (enum ix86_builtin_func_type) d->flag;
32027 decl = def_builtin2 (d->mask, d->name, ftype, d->code);
32029 /* With no leaf and nothrow flags for MPX builtins
32030 abnormal edges may follow its call when setjmp
32031 presents in the function. Since we may have a lot
32032 of MPX builtins calls it causes lots of useless
32033 edges and enormous PHI nodes. To avoid this we mark
32034 MPX builtins as leaf and nothrow. */
32035 if (decl)
32037 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
32038 NULL_TREE);
32039 TREE_NOTHROW (decl) = 1;
32041 else
32043 ix86_builtins_isa[(int)d->code].leaf_p = true;
32044 ix86_builtins_isa[(int)d->code].nothrow_p = true;
32047 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MPX_LAST,
32048 IX86_BUILTIN__BDESC_MPX_FIRST,
32049 ARRAY_SIZE (bdesc_mpx) - 1);
32051 for (i = 0, d = bdesc_mpx_const;
32052 i < ARRAY_SIZE (bdesc_mpx_const);
32053 i++, d++)
32055 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_MPX_CONST_FIRST, i);
32056 if (d->name == 0)
32057 continue;
32059 ftype = (enum ix86_builtin_func_type) d->flag;
32060 decl = def_builtin_const2 (d->mask, d->name, ftype, d->code);
32062 if (decl)
32064 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
32065 NULL_TREE);
32066 TREE_NOTHROW (decl) = 1;
32068 else
32070 ix86_builtins_isa[(int)d->code].leaf_p = true;
32071 ix86_builtins_isa[(int)d->code].nothrow_p = true;
32074 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MPX_CONST_LAST,
32075 IX86_BUILTIN__BDESC_MPX_CONST_FIRST,
32076 ARRAY_SIZE (bdesc_mpx_const) - 1);
32078 #undef BDESC_VERIFY
32079 #undef BDESC_VERIFYS
32081 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
32082 to return a pointer to VERSION_DECL if the outcome of the expression
32083 formed by PREDICATE_CHAIN is true. This function will be called during
32084 version dispatch to decide which function version to execute. It returns
32085 the basic block at the end, to which more conditions can be added. */
32087 static basic_block
32088 add_condition_to_bb (tree function_decl, tree version_decl,
32089 tree predicate_chain, basic_block new_bb)
32091 gimple *return_stmt;
32092 tree convert_expr, result_var;
32093 gimple *convert_stmt;
32094 gimple *call_cond_stmt;
32095 gimple *if_else_stmt;
32097 basic_block bb1, bb2, bb3;
32098 edge e12, e23;
32100 tree cond_var, and_expr_var = NULL_TREE;
32101 gimple_seq gseq;
32103 tree predicate_decl, predicate_arg;
32105 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
32107 gcc_assert (new_bb != NULL);
32108 gseq = bb_seq (new_bb);
32111 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
32112 build_fold_addr_expr (version_decl));
32113 result_var = create_tmp_var (ptr_type_node);
32114 convert_stmt = gimple_build_assign (result_var, convert_expr);
32115 return_stmt = gimple_build_return (result_var);
32117 if (predicate_chain == NULL_TREE)
32119 gimple_seq_add_stmt (&gseq, convert_stmt);
32120 gimple_seq_add_stmt (&gseq, return_stmt);
32121 set_bb_seq (new_bb, gseq);
32122 gimple_set_bb (convert_stmt, new_bb);
32123 gimple_set_bb (return_stmt, new_bb);
32124 pop_cfun ();
32125 return new_bb;
32128 while (predicate_chain != NULL)
32130 cond_var = create_tmp_var (integer_type_node);
32131 predicate_decl = TREE_PURPOSE (predicate_chain);
32132 predicate_arg = TREE_VALUE (predicate_chain);
32133 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
32134 gimple_call_set_lhs (call_cond_stmt, cond_var);
32136 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
32137 gimple_set_bb (call_cond_stmt, new_bb);
32138 gimple_seq_add_stmt (&gseq, call_cond_stmt);
32140 predicate_chain = TREE_CHAIN (predicate_chain);
32142 if (and_expr_var == NULL)
32143 and_expr_var = cond_var;
32144 else
32146 gimple *assign_stmt;
32147 /* Use MIN_EXPR to check if any integer is zero?.
32148 and_expr_var = min_expr <cond_var, and_expr_var> */
32149 assign_stmt = gimple_build_assign (and_expr_var,
32150 build2 (MIN_EXPR, integer_type_node,
32151 cond_var, and_expr_var));
32153 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
32154 gimple_set_bb (assign_stmt, new_bb);
32155 gimple_seq_add_stmt (&gseq, assign_stmt);
32159 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
32160 integer_zero_node,
32161 NULL_TREE, NULL_TREE);
32162 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
32163 gimple_set_bb (if_else_stmt, new_bb);
32164 gimple_seq_add_stmt (&gseq, if_else_stmt);
32166 gimple_seq_add_stmt (&gseq, convert_stmt);
32167 gimple_seq_add_stmt (&gseq, return_stmt);
32168 set_bb_seq (new_bb, gseq);
32170 bb1 = new_bb;
32171 e12 = split_block (bb1, if_else_stmt);
32172 bb2 = e12->dest;
32173 e12->flags &= ~EDGE_FALLTHRU;
32174 e12->flags |= EDGE_TRUE_VALUE;
32176 e23 = split_block (bb2, return_stmt);
32178 gimple_set_bb (convert_stmt, bb2);
32179 gimple_set_bb (return_stmt, bb2);
32181 bb3 = e23->dest;
32182 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
32184 remove_edge (e23);
32185 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
32187 pop_cfun ();
32189 return bb3;
32192 /* This parses the attribute arguments to target in DECL and determines
32193 the right builtin to use to match the platform specification.
32194 It returns the priority value for this version decl. If PREDICATE_LIST
32195 is not NULL, it stores the list of cpu features that need to be checked
32196 before dispatching this function. */
32198 static unsigned int
32199 get_builtin_code_for_version (tree decl, tree *predicate_list)
32201 tree attrs;
32202 struct cl_target_option cur_target;
32203 tree target_node;
32204 struct cl_target_option *new_target;
32205 const char *arg_str = NULL;
32206 const char *attrs_str = NULL;
32207 char *tok_str = NULL;
32208 char *token;
32210 /* Priority of i386 features, greater value is higher priority. This is
32211 used to decide the order in which function dispatch must happen. For
32212 instance, a version specialized for SSE4.2 should be checked for dispatch
32213 before a version for SSE3, as SSE4.2 implies SSE3. */
32214 enum feature_priority
32216 P_ZERO = 0,
32217 P_MMX,
32218 P_SSE,
32219 P_SSE2,
32220 P_SSE3,
32221 P_SSSE3,
32222 P_PROC_SSSE3,
32223 P_SSE4_A,
32224 P_PROC_SSE4_A,
32225 P_SSE4_1,
32226 P_SSE4_2,
32227 P_PROC_SSE4_2,
32228 P_POPCNT,
32229 P_AES,
32230 P_PCLMUL,
32231 P_AVX,
32232 P_PROC_AVX,
32233 P_BMI,
32234 P_PROC_BMI,
32235 P_FMA4,
32236 P_XOP,
32237 P_PROC_XOP,
32238 P_FMA,
32239 P_PROC_FMA,
32240 P_BMI2,
32241 P_AVX2,
32242 P_PROC_AVX2,
32243 P_AVX512F,
32244 P_PROC_AVX512F
32247 enum feature_priority priority = P_ZERO;
32249 /* These are the target attribute strings for which a dispatcher is
32250 available, from fold_builtin_cpu. */
32252 static struct _feature_list
32254 const char *const name;
32255 const enum feature_priority priority;
32257 const feature_list[] =
32259 {"mmx", P_MMX},
32260 {"sse", P_SSE},
32261 {"sse2", P_SSE2},
32262 {"sse3", P_SSE3},
32263 {"sse4a", P_SSE4_A},
32264 {"ssse3", P_SSSE3},
32265 {"sse4.1", P_SSE4_1},
32266 {"sse4.2", P_SSE4_2},
32267 {"popcnt", P_POPCNT},
32268 {"aes", P_AES},
32269 {"pclmul", P_PCLMUL},
32270 {"avx", P_AVX},
32271 {"bmi", P_BMI},
32272 {"fma4", P_FMA4},
32273 {"xop", P_XOP},
32274 {"fma", P_FMA},
32275 {"bmi2", P_BMI2},
32276 {"avx2", P_AVX2},
32277 {"avx512f", P_AVX512F}
32281 static unsigned int NUM_FEATURES
32282 = sizeof (feature_list) / sizeof (struct _feature_list);
32284 unsigned int i;
32286 tree predicate_chain = NULL_TREE;
32287 tree predicate_decl, predicate_arg;
32289 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
32290 gcc_assert (attrs != NULL);
32292 attrs = TREE_VALUE (TREE_VALUE (attrs));
32294 gcc_assert (TREE_CODE (attrs) == STRING_CST);
32295 attrs_str = TREE_STRING_POINTER (attrs);
32297 /* Return priority zero for default function. */
32298 if (strcmp (attrs_str, "default") == 0)
32299 return 0;
32301 /* Handle arch= if specified. For priority, set it to be 1 more than
32302 the best instruction set the processor can handle. For instance, if
32303 there is a version for atom and a version for ssse3 (the highest ISA
32304 priority for atom), the atom version must be checked for dispatch
32305 before the ssse3 version. */
32306 if (strstr (attrs_str, "arch=") != NULL)
32308 cl_target_option_save (&cur_target, &global_options);
32309 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
32310 &global_options_set);
32312 gcc_assert (target_node);
32313 new_target = TREE_TARGET_OPTION (target_node);
32314 gcc_assert (new_target);
32316 if (new_target->arch_specified && new_target->arch > 0)
32318 switch (new_target->arch)
32320 case PROCESSOR_CORE2:
32321 arg_str = "core2";
32322 priority = P_PROC_SSSE3;
32323 break;
32324 case PROCESSOR_NEHALEM:
32325 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
32327 arg_str = "westmere";
32328 priority = P_AES;
32330 else
32332 /* We translate "arch=corei7" and "arch=nehalem" to
32333 "corei7" so that it will be mapped to M_INTEL_COREI7
32334 as cpu type to cover all M_INTEL_COREI7_XXXs. */
32335 arg_str = "corei7";
32336 priority = P_PROC_SSE4_2;
32338 break;
32339 case PROCESSOR_SANDYBRIDGE:
32340 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
32341 arg_str = "ivybridge";
32342 else
32343 arg_str = "sandybridge";
32344 priority = P_PROC_AVX;
32345 break;
32346 case PROCESSOR_HASWELL:
32347 case PROCESSOR_SKYLAKE_AVX512:
32348 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_GFNI)
32349 arg_str = "icelake";
32350 else if (new_target->x_ix86_isa_flags
32351 & OPTION_MASK_ISA_AVX512VBMI)
32352 arg_str = "cannonlake";
32353 else if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AVX512VL)
32354 arg_str = "skylake-avx512";
32355 else if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_XSAVES)
32356 arg_str = "skylake";
32357 else if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
32358 arg_str = "broadwell";
32359 else
32360 arg_str = "haswell";
32361 priority = P_PROC_AVX2;
32362 break;
32363 case PROCESSOR_BONNELL:
32364 arg_str = "bonnell";
32365 priority = P_PROC_SSSE3;
32366 break;
32367 case PROCESSOR_KNL:
32368 arg_str = "knl";
32369 priority = P_PROC_AVX512F;
32370 break;
32371 case PROCESSOR_KNM:
32372 arg_str = "knm";
32373 priority = P_PROC_AVX512F;
32374 break;
32375 case PROCESSOR_SILVERMONT:
32376 arg_str = "silvermont";
32377 priority = P_PROC_SSE4_2;
32378 break;
32379 case PROCESSOR_AMDFAM10:
32380 arg_str = "amdfam10h";
32381 priority = P_PROC_SSE4_A;
32382 break;
32383 case PROCESSOR_BTVER1:
32384 arg_str = "btver1";
32385 priority = P_PROC_SSE4_A;
32386 break;
32387 case PROCESSOR_BTVER2:
32388 arg_str = "btver2";
32389 priority = P_PROC_BMI;
32390 break;
32391 case PROCESSOR_BDVER1:
32392 arg_str = "bdver1";
32393 priority = P_PROC_XOP;
32394 break;
32395 case PROCESSOR_BDVER2:
32396 arg_str = "bdver2";
32397 priority = P_PROC_FMA;
32398 break;
32399 case PROCESSOR_BDVER3:
32400 arg_str = "bdver3";
32401 priority = P_PROC_FMA;
32402 break;
32403 case PROCESSOR_BDVER4:
32404 arg_str = "bdver4";
32405 priority = P_PROC_AVX2;
32406 break;
32407 case PROCESSOR_ZNVER1:
32408 arg_str = "znver1";
32409 priority = P_PROC_AVX2;
32410 break;
32414 cl_target_option_restore (&global_options, &cur_target);
32416 if (predicate_list && arg_str == NULL)
32418 error_at (DECL_SOURCE_LOCATION (decl),
32419 "No dispatcher found for the versioning attributes");
32420 return 0;
32423 if (predicate_list)
32425 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
32426 /* For a C string literal the length includes the trailing NULL. */
32427 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
32428 predicate_chain = tree_cons (predicate_decl, predicate_arg,
32429 predicate_chain);
32433 /* Process feature name. */
32434 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
32435 strcpy (tok_str, attrs_str);
32436 token = strtok (tok_str, ",");
32437 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
32439 while (token != NULL)
32441 /* Do not process "arch=" */
32442 if (strncmp (token, "arch=", 5) == 0)
32444 token = strtok (NULL, ",");
32445 continue;
32447 for (i = 0; i < NUM_FEATURES; ++i)
32449 if (strcmp (token, feature_list[i].name) == 0)
32451 if (predicate_list)
32453 predicate_arg = build_string_literal (
32454 strlen (feature_list[i].name) + 1,
32455 feature_list[i].name);
32456 predicate_chain = tree_cons (predicate_decl, predicate_arg,
32457 predicate_chain);
32459 /* Find the maximum priority feature. */
32460 if (feature_list[i].priority > priority)
32461 priority = feature_list[i].priority;
32463 break;
32466 if (predicate_list && i == NUM_FEATURES)
32468 error_at (DECL_SOURCE_LOCATION (decl),
32469 "No dispatcher found for %s", token);
32470 return 0;
32472 token = strtok (NULL, ",");
32474 free (tok_str);
32476 if (predicate_list && predicate_chain == NULL_TREE)
32478 error_at (DECL_SOURCE_LOCATION (decl),
32479 "No dispatcher found for the versioning attributes : %s",
32480 attrs_str);
32481 return 0;
32483 else if (predicate_list)
32485 predicate_chain = nreverse (predicate_chain);
32486 *predicate_list = predicate_chain;
32489 return priority;
32492 /* This compares the priority of target features in function DECL1
32493 and DECL2. It returns positive value if DECL1 is higher priority,
32494 negative value if DECL2 is higher priority and 0 if they are the
32495 same. */
32497 static int
32498 ix86_compare_version_priority (tree decl1, tree decl2)
32500 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
32501 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
32503 return (int)priority1 - (int)priority2;
32506 /* V1 and V2 point to function versions with different priorities
32507 based on the target ISA. This function compares their priorities. */
32509 static int
32510 feature_compare (const void *v1, const void *v2)
32512 typedef struct _function_version_info
32514 tree version_decl;
32515 tree predicate_chain;
32516 unsigned int dispatch_priority;
32517 } function_version_info;
32519 const function_version_info c1 = *(const function_version_info *)v1;
32520 const function_version_info c2 = *(const function_version_info *)v2;
32521 return (c2.dispatch_priority - c1.dispatch_priority);
32524 /* This function generates the dispatch function for
32525 multi-versioned functions. DISPATCH_DECL is the function which will
32526 contain the dispatch logic. FNDECLS are the function choices for
32527 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
32528 in DISPATCH_DECL in which the dispatch code is generated. */
32530 static int
32531 dispatch_function_versions (tree dispatch_decl,
32532 void *fndecls_p,
32533 basic_block *empty_bb)
32535 tree default_decl;
32536 gimple *ifunc_cpu_init_stmt;
32537 gimple_seq gseq;
32538 int ix;
32539 tree ele;
32540 vec<tree> *fndecls;
32541 unsigned int num_versions = 0;
32542 unsigned int actual_versions = 0;
32543 unsigned int i;
32545 struct _function_version_info
32547 tree version_decl;
32548 tree predicate_chain;
32549 unsigned int dispatch_priority;
32550 }*function_version_info;
32552 gcc_assert (dispatch_decl != NULL
32553 && fndecls_p != NULL
32554 && empty_bb != NULL);
32556 /*fndecls_p is actually a vector. */
32557 fndecls = static_cast<vec<tree> *> (fndecls_p);
32559 /* At least one more version other than the default. */
32560 num_versions = fndecls->length ();
32561 gcc_assert (num_versions >= 2);
32563 function_version_info = (struct _function_version_info *)
32564 XNEWVEC (struct _function_version_info, (num_versions - 1));
32566 /* The first version in the vector is the default decl. */
32567 default_decl = (*fndecls)[0];
32569 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
32571 gseq = bb_seq (*empty_bb);
32572 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
32573 constructors, so explicity call __builtin_cpu_init here. */
32574 ifunc_cpu_init_stmt = gimple_build_call_vec (
32575 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
32576 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
32577 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
32578 set_bb_seq (*empty_bb, gseq);
32580 pop_cfun ();
32583 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
32585 tree version_decl = ele;
32586 tree predicate_chain = NULL_TREE;
32587 unsigned int priority;
32588 /* Get attribute string, parse it and find the right predicate decl.
32589 The predicate function could be a lengthy combination of many
32590 features, like arch-type and various isa-variants. */
32591 priority = get_builtin_code_for_version (version_decl,
32592 &predicate_chain);
32594 if (predicate_chain == NULL_TREE)
32595 continue;
32597 function_version_info [actual_versions].version_decl = version_decl;
32598 function_version_info [actual_versions].predicate_chain
32599 = predicate_chain;
32600 function_version_info [actual_versions].dispatch_priority = priority;
32601 actual_versions++;
32604 /* Sort the versions according to descending order of dispatch priority. The
32605 priority is based on the ISA. This is not a perfect solution. There
32606 could still be ambiguity. If more than one function version is suitable
32607 to execute, which one should be dispatched? In future, allow the user
32608 to specify a dispatch priority next to the version. */
32609 qsort (function_version_info, actual_versions,
32610 sizeof (struct _function_version_info), feature_compare);
32612 for (i = 0; i < actual_versions; ++i)
32613 *empty_bb = add_condition_to_bb (dispatch_decl,
32614 function_version_info[i].version_decl,
32615 function_version_info[i].predicate_chain,
32616 *empty_bb);
32618 /* dispatch default version at the end. */
32619 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
32620 NULL, *empty_bb);
32622 free (function_version_info);
32623 return 0;
32626 /* This function changes the assembler name for functions that are
32627 versions. If DECL is a function version and has a "target"
32628 attribute, it appends the attribute string to its assembler name. */
32630 static tree
32631 ix86_mangle_function_version_assembler_name (tree decl, tree id)
32633 tree version_attr;
32634 const char *orig_name, *version_string;
32635 char *attr_str, *assembler_name;
32637 if (DECL_DECLARED_INLINE_P (decl)
32638 && lookup_attribute ("gnu_inline",
32639 DECL_ATTRIBUTES (decl)))
32640 error_at (DECL_SOURCE_LOCATION (decl),
32641 "Function versions cannot be marked as gnu_inline,"
32642 " bodies have to be generated");
32644 if (DECL_VIRTUAL_P (decl)
32645 || DECL_VINDEX (decl))
32646 sorry ("Virtual function multiversioning not supported");
32648 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
32650 /* target attribute string cannot be NULL. */
32651 gcc_assert (version_attr != NULL_TREE);
32653 orig_name = IDENTIFIER_POINTER (id);
32654 version_string
32655 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
32657 if (strcmp (version_string, "default") == 0)
32658 return id;
32660 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
32661 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
32663 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
32665 /* Allow assembler name to be modified if already set. */
32666 if (DECL_ASSEMBLER_NAME_SET_P (decl))
32667 SET_DECL_RTL (decl, NULL);
32669 tree ret = get_identifier (assembler_name);
32670 XDELETEVEC (attr_str);
32671 XDELETEVEC (assembler_name);
32672 return ret;
32676 static tree
32677 ix86_mangle_decl_assembler_name (tree decl, tree id)
32679 /* For function version, add the target suffix to the assembler name. */
32680 if (TREE_CODE (decl) == FUNCTION_DECL
32681 && DECL_FUNCTION_VERSIONED (decl))
32682 id = ix86_mangle_function_version_assembler_name (decl, id);
32683 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
32684 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
32685 #endif
32687 return id;
32690 /* Make a dispatcher declaration for the multi-versioned function DECL.
32691 Calls to DECL function will be replaced with calls to the dispatcher
32692 by the front-end. Returns the decl of the dispatcher function. */
32694 static tree
32695 ix86_get_function_versions_dispatcher (void *decl)
32697 tree fn = (tree) decl;
32698 struct cgraph_node *node = NULL;
32699 struct cgraph_node *default_node = NULL;
32700 struct cgraph_function_version_info *node_v = NULL;
32701 struct cgraph_function_version_info *first_v = NULL;
32703 tree dispatch_decl = NULL;
32705 struct cgraph_function_version_info *default_version_info = NULL;
32707 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
32709 node = cgraph_node::get (fn);
32710 gcc_assert (node != NULL);
32712 node_v = node->function_version ();
32713 gcc_assert (node_v != NULL);
32715 if (node_v->dispatcher_resolver != NULL)
32716 return node_v->dispatcher_resolver;
32718 /* Find the default version and make it the first node. */
32719 first_v = node_v;
32720 /* Go to the beginning of the chain. */
32721 while (first_v->prev != NULL)
32722 first_v = first_v->prev;
32723 default_version_info = first_v;
32724 while (default_version_info != NULL)
32726 if (is_function_default_version
32727 (default_version_info->this_node->decl))
32728 break;
32729 default_version_info = default_version_info->next;
32732 /* If there is no default node, just return NULL. */
32733 if (default_version_info == NULL)
32734 return NULL;
32736 /* Make default info the first node. */
32737 if (first_v != default_version_info)
32739 default_version_info->prev->next = default_version_info->next;
32740 if (default_version_info->next)
32741 default_version_info->next->prev = default_version_info->prev;
32742 first_v->prev = default_version_info;
32743 default_version_info->next = first_v;
32744 default_version_info->prev = NULL;
32747 default_node = default_version_info->this_node;
32749 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32750 if (targetm.has_ifunc_p ())
32752 struct cgraph_function_version_info *it_v = NULL;
32753 struct cgraph_node *dispatcher_node = NULL;
32754 struct cgraph_function_version_info *dispatcher_version_info = NULL;
32756 /* Right now, the dispatching is done via ifunc. */
32757 dispatch_decl = make_dispatcher_decl (default_node->decl);
32759 dispatcher_node = cgraph_node::get_create (dispatch_decl);
32760 gcc_assert (dispatcher_node != NULL);
32761 dispatcher_node->dispatcher_function = 1;
32762 dispatcher_version_info
32763 = dispatcher_node->insert_new_function_version ();
32764 dispatcher_version_info->next = default_version_info;
32765 dispatcher_node->definition = 1;
32767 /* Set the dispatcher for all the versions. */
32768 it_v = default_version_info;
32769 while (it_v != NULL)
32771 it_v->dispatcher_resolver = dispatch_decl;
32772 it_v = it_v->next;
32775 else
32776 #endif
32778 error_at (DECL_SOURCE_LOCATION (default_node->decl),
32779 "multiversioning needs ifunc which is not supported "
32780 "on this target");
32783 return dispatch_decl;
32786 /* Make the resolver function decl to dispatch the versions of
32787 a multi-versioned function, DEFAULT_DECL. IFUNC_ALIAS_DECL is
32788 ifunc alias that will point to the created resolver. Create an
32789 empty basic block in the resolver and store the pointer in
32790 EMPTY_BB. Return the decl of the resolver function. */
32792 static tree
32793 make_resolver_func (const tree default_decl,
32794 const tree ifunc_alias_decl,
32795 basic_block *empty_bb)
32797 char *resolver_name;
32798 tree decl, type, decl_name, t;
32800 /* IFUNC's have to be globally visible. So, if the default_decl is
32801 not, then the name of the IFUNC should be made unique. */
32802 if (TREE_PUBLIC (default_decl) == 0)
32804 char *ifunc_name = make_unique_name (default_decl, "ifunc", true);
32805 symtab->change_decl_assembler_name (ifunc_alias_decl,
32806 get_identifier (ifunc_name));
32807 XDELETEVEC (ifunc_name);
32810 resolver_name = make_unique_name (default_decl, "resolver", false);
32812 /* The resolver function should return a (void *). */
32813 type = build_function_type_list (ptr_type_node, NULL_TREE);
32815 decl = build_fn_decl (resolver_name, type);
32816 decl_name = get_identifier (resolver_name);
32817 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
32819 DECL_NAME (decl) = decl_name;
32820 TREE_USED (decl) = 1;
32821 DECL_ARTIFICIAL (decl) = 1;
32822 DECL_IGNORED_P (decl) = 1;
32823 TREE_PUBLIC (decl) = 0;
32824 DECL_UNINLINABLE (decl) = 1;
32826 /* Resolver is not external, body is generated. */
32827 DECL_EXTERNAL (decl) = 0;
32828 DECL_EXTERNAL (ifunc_alias_decl) = 0;
32830 DECL_CONTEXT (decl) = NULL_TREE;
32831 DECL_INITIAL (decl) = make_node (BLOCK);
32832 DECL_STATIC_CONSTRUCTOR (decl) = 0;
32834 if (DECL_COMDAT_GROUP (default_decl)
32835 || TREE_PUBLIC (default_decl))
32837 /* In this case, each translation unit with a call to this
32838 versioned function will put out a resolver. Ensure it
32839 is comdat to keep just one copy. */
32840 DECL_COMDAT (decl) = 1;
32841 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
32843 /* Build result decl and add to function_decl. */
32844 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
32845 DECL_ARTIFICIAL (t) = 1;
32846 DECL_IGNORED_P (t) = 1;
32847 DECL_RESULT (decl) = t;
32849 gimplify_function_tree (decl);
32850 push_cfun (DECL_STRUCT_FUNCTION (decl));
32851 *empty_bb = init_lowered_empty_function (decl, false,
32852 profile_count::uninitialized ());
32854 cgraph_node::add_new_function (decl, true);
32855 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
32857 pop_cfun ();
32859 gcc_assert (ifunc_alias_decl != NULL);
32860 /* Mark ifunc_alias_decl as "ifunc" with resolver as resolver_name. */
32861 DECL_ATTRIBUTES (ifunc_alias_decl)
32862 = make_attribute ("ifunc", resolver_name,
32863 DECL_ATTRIBUTES (ifunc_alias_decl));
32865 /* Create the alias for dispatch to resolver here. */
32866 cgraph_node::create_same_body_alias (ifunc_alias_decl, decl);
32867 XDELETEVEC (resolver_name);
32868 return decl;
32871 /* Generate the dispatching code body to dispatch multi-versioned function
32872 DECL. The target hook is called to process the "target" attributes and
32873 provide the code to dispatch the right function at run-time. NODE points
32874 to the dispatcher decl whose body will be created. */
32876 static tree
32877 ix86_generate_version_dispatcher_body (void *node_p)
32879 tree resolver_decl;
32880 basic_block empty_bb;
32881 tree default_ver_decl;
32882 struct cgraph_node *versn;
32883 struct cgraph_node *node;
32885 struct cgraph_function_version_info *node_version_info = NULL;
32886 struct cgraph_function_version_info *versn_info = NULL;
32888 node = (cgraph_node *)node_p;
32890 node_version_info = node->function_version ();
32891 gcc_assert (node->dispatcher_function
32892 && node_version_info != NULL);
32894 if (node_version_info->dispatcher_resolver)
32895 return node_version_info->dispatcher_resolver;
32897 /* The first version in the chain corresponds to the default version. */
32898 default_ver_decl = node_version_info->next->this_node->decl;
32900 /* node is going to be an alias, so remove the finalized bit. */
32901 node->definition = false;
32903 resolver_decl = make_resolver_func (default_ver_decl,
32904 node->decl, &empty_bb);
32906 node_version_info->dispatcher_resolver = resolver_decl;
32908 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
32910 auto_vec<tree, 2> fn_ver_vec;
32912 for (versn_info = node_version_info->next; versn_info;
32913 versn_info = versn_info->next)
32915 versn = versn_info->this_node;
32916 /* Check for virtual functions here again, as by this time it should
32917 have been determined if this function needs a vtable index or
32918 not. This happens for methods in derived classes that override
32919 virtual methods in base classes but are not explicitly marked as
32920 virtual. */
32921 if (DECL_VINDEX (versn->decl))
32922 sorry ("Virtual function multiversioning not supported");
32924 fn_ver_vec.safe_push (versn->decl);
32927 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
32928 cgraph_edge::rebuild_edges ();
32929 pop_cfun ();
32930 return resolver_decl;
32932 /* This builds the processor_model struct type defined in
32933 libgcc/config/i386/cpuinfo.c */
32935 static tree
32936 build_processor_model_struct (void)
32938 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
32939 "__cpu_features"};
32940 tree field = NULL_TREE, field_chain = NULL_TREE;
32941 int i;
32942 tree type = make_node (RECORD_TYPE);
32944 /* The first 3 fields are unsigned int. */
32945 for (i = 0; i < 3; ++i)
32947 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32948 get_identifier (field_name[i]), unsigned_type_node);
32949 if (field_chain != NULL_TREE)
32950 DECL_CHAIN (field) = field_chain;
32951 field_chain = field;
32954 /* The last field is an array of unsigned integers of size one. */
32955 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32956 get_identifier (field_name[3]),
32957 build_array_type (unsigned_type_node,
32958 build_index_type (size_one_node)));
32959 if (field_chain != NULL_TREE)
32960 DECL_CHAIN (field) = field_chain;
32961 field_chain = field;
32963 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
32964 return type;
32967 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
32969 static tree
32970 make_var_decl (tree type, const char *name)
32972 tree new_decl;
32974 new_decl = build_decl (UNKNOWN_LOCATION,
32975 VAR_DECL,
32976 get_identifier(name),
32977 type);
32979 DECL_EXTERNAL (new_decl) = 1;
32980 TREE_STATIC (new_decl) = 1;
32981 TREE_PUBLIC (new_decl) = 1;
32982 DECL_INITIAL (new_decl) = 0;
32983 DECL_ARTIFICIAL (new_decl) = 0;
32984 DECL_PRESERVE_P (new_decl) = 1;
32986 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
32987 assemble_variable (new_decl, 0, 0, 0);
32989 return new_decl;
32992 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
32993 into an integer defined in libgcc/config/i386/cpuinfo.c */
32995 static tree
32996 fold_builtin_cpu (tree fndecl, tree *args)
32998 unsigned int i;
32999 enum ix86_builtins fn_code = (enum ix86_builtins)
33000 DECL_FUNCTION_CODE (fndecl);
33001 tree param_string_cst = NULL;
33003 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
33004 enum processor_features
33006 F_CMOV = 0,
33007 F_MMX,
33008 F_POPCNT,
33009 F_SSE,
33010 F_SSE2,
33011 F_SSE3,
33012 F_SSSE3,
33013 F_SSE4_1,
33014 F_SSE4_2,
33015 F_AVX,
33016 F_AVX2,
33017 F_SSE4_A,
33018 F_FMA4,
33019 F_XOP,
33020 F_FMA,
33021 F_AVX512F,
33022 F_BMI,
33023 F_BMI2,
33024 F_AES,
33025 F_PCLMUL,
33026 F_AVX512VL,
33027 F_AVX512BW,
33028 F_AVX512DQ,
33029 F_AVX512CD,
33030 F_AVX512ER,
33031 F_AVX512PF,
33032 F_AVX512VBMI,
33033 F_AVX512IFMA,
33034 F_AVX5124VNNIW,
33035 F_AVX5124FMAPS,
33036 F_AVX512VPOPCNTDQ,
33037 F_MAX
33040 /* These are the values for vendor types and cpu types and subtypes
33041 in cpuinfo.c. Cpu types and subtypes should be subtracted by
33042 the corresponding start value. */
33043 enum processor_model
33045 M_INTEL = 1,
33046 M_AMD,
33047 M_CPU_TYPE_START,
33048 M_INTEL_BONNELL,
33049 M_INTEL_CORE2,
33050 M_INTEL_COREI7,
33051 M_AMDFAM10H,
33052 M_AMDFAM15H,
33053 M_INTEL_SILVERMONT,
33054 M_INTEL_KNL,
33055 M_AMD_BTVER1,
33056 M_AMD_BTVER2,
33057 M_AMDFAM17H,
33058 M_INTEL_KNM,
33059 M_CPU_SUBTYPE_START,
33060 M_INTEL_COREI7_NEHALEM,
33061 M_INTEL_COREI7_WESTMERE,
33062 M_INTEL_COREI7_SANDYBRIDGE,
33063 M_AMDFAM10H_BARCELONA,
33064 M_AMDFAM10H_SHANGHAI,
33065 M_AMDFAM10H_ISTANBUL,
33066 M_AMDFAM15H_BDVER1,
33067 M_AMDFAM15H_BDVER2,
33068 M_AMDFAM15H_BDVER3,
33069 M_AMDFAM15H_BDVER4,
33070 M_AMDFAM17H_ZNVER1,
33071 M_INTEL_COREI7_IVYBRIDGE,
33072 M_INTEL_COREI7_HASWELL,
33073 M_INTEL_COREI7_BROADWELL,
33074 M_INTEL_COREI7_SKYLAKE,
33075 M_INTEL_COREI7_SKYLAKE_AVX512,
33076 M_INTEL_COREI7_CANNONLAKE,
33077 M_INTEL_COREI7_ICELAKE
33080 static struct _arch_names_table
33082 const char *const name;
33083 const enum processor_model model;
33085 const arch_names_table[] =
33087 {"amd", M_AMD},
33088 {"intel", M_INTEL},
33089 {"atom", M_INTEL_BONNELL},
33090 {"slm", M_INTEL_SILVERMONT},
33091 {"core2", M_INTEL_CORE2},
33092 {"corei7", M_INTEL_COREI7},
33093 {"nehalem", M_INTEL_COREI7_NEHALEM},
33094 {"westmere", M_INTEL_COREI7_WESTMERE},
33095 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
33096 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
33097 {"haswell", M_INTEL_COREI7_HASWELL},
33098 {"broadwell", M_INTEL_COREI7_BROADWELL},
33099 {"skylake", M_INTEL_COREI7_SKYLAKE},
33100 {"skylake-avx512", M_INTEL_COREI7_SKYLAKE_AVX512},
33101 {"cannonlake", M_INTEL_COREI7_CANNONLAKE},
33102 {"icelake", M_INTEL_COREI7_ICELAKE},
33103 {"bonnell", M_INTEL_BONNELL},
33104 {"silvermont", M_INTEL_SILVERMONT},
33105 {"knl", M_INTEL_KNL},
33106 {"knm", M_INTEL_KNM},
33107 {"amdfam10h", M_AMDFAM10H},
33108 {"barcelona", M_AMDFAM10H_BARCELONA},
33109 {"shanghai", M_AMDFAM10H_SHANGHAI},
33110 {"istanbul", M_AMDFAM10H_ISTANBUL},
33111 {"btver1", M_AMD_BTVER1},
33112 {"amdfam15h", M_AMDFAM15H},
33113 {"bdver1", M_AMDFAM15H_BDVER1},
33114 {"bdver2", M_AMDFAM15H_BDVER2},
33115 {"bdver3", M_AMDFAM15H_BDVER3},
33116 {"bdver4", M_AMDFAM15H_BDVER4},
33117 {"btver2", M_AMD_BTVER2},
33118 {"amdfam17h", M_AMDFAM17H},
33119 {"znver1", M_AMDFAM17H_ZNVER1},
33122 static struct _isa_names_table
33124 const char *const name;
33125 const enum processor_features feature;
33127 const isa_names_table[] =
33129 {"cmov", F_CMOV},
33130 {"mmx", F_MMX},
33131 {"popcnt", F_POPCNT},
33132 {"sse", F_SSE},
33133 {"sse2", F_SSE2},
33134 {"sse3", F_SSE3},
33135 {"ssse3", F_SSSE3},
33136 {"sse4a", F_SSE4_A},
33137 {"sse4.1", F_SSE4_1},
33138 {"sse4.2", F_SSE4_2},
33139 {"avx", F_AVX},
33140 {"fma4", F_FMA4},
33141 {"xop", F_XOP},
33142 {"fma", F_FMA},
33143 {"avx2", F_AVX2},
33144 {"avx512f", F_AVX512F},
33145 {"bmi", F_BMI},
33146 {"bmi2", F_BMI2},
33147 {"aes", F_AES},
33148 {"pclmul", F_PCLMUL},
33149 {"avx512vl",F_AVX512VL},
33150 {"avx512bw",F_AVX512BW},
33151 {"avx512dq",F_AVX512DQ},
33152 {"avx512cd",F_AVX512CD},
33153 {"avx512er",F_AVX512ER},
33154 {"avx512pf",F_AVX512PF},
33155 {"avx512vbmi",F_AVX512VBMI},
33156 {"avx512ifma",F_AVX512IFMA},
33157 {"avx5124vnniw",F_AVX5124VNNIW},
33158 {"avx5124fmaps",F_AVX5124FMAPS},
33159 {"avx512vpopcntdq",F_AVX512VPOPCNTDQ}
33162 tree __processor_model_type = build_processor_model_struct ();
33163 tree __cpu_model_var = make_var_decl (__processor_model_type,
33164 "__cpu_model");
33167 varpool_node::add (__cpu_model_var);
33169 gcc_assert ((args != NULL) && (*args != NULL));
33171 param_string_cst = *args;
33172 while (param_string_cst
33173 && TREE_CODE (param_string_cst) != STRING_CST)
33175 /* *args must be a expr that can contain other EXPRS leading to a
33176 STRING_CST. */
33177 if (!EXPR_P (param_string_cst))
33179 error ("Parameter to builtin must be a string constant or literal");
33180 return integer_zero_node;
33182 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
33185 gcc_assert (param_string_cst);
33187 if (fn_code == IX86_BUILTIN_CPU_IS)
33189 tree ref;
33190 tree field;
33191 tree final;
33193 unsigned int field_val = 0;
33194 unsigned int NUM_ARCH_NAMES
33195 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
33197 for (i = 0; i < NUM_ARCH_NAMES; i++)
33198 if (strcmp (arch_names_table[i].name,
33199 TREE_STRING_POINTER (param_string_cst)) == 0)
33200 break;
33202 if (i == NUM_ARCH_NAMES)
33204 error ("Parameter to builtin not valid: %s",
33205 TREE_STRING_POINTER (param_string_cst));
33206 return integer_zero_node;
33209 field = TYPE_FIELDS (__processor_model_type);
33210 field_val = arch_names_table[i].model;
33212 /* CPU types are stored in the next field. */
33213 if (field_val > M_CPU_TYPE_START
33214 && field_val < M_CPU_SUBTYPE_START)
33216 field = DECL_CHAIN (field);
33217 field_val -= M_CPU_TYPE_START;
33220 /* CPU subtypes are stored in the next field. */
33221 if (field_val > M_CPU_SUBTYPE_START)
33223 field = DECL_CHAIN ( DECL_CHAIN (field));
33224 field_val -= M_CPU_SUBTYPE_START;
33227 /* Get the appropriate field in __cpu_model. */
33228 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
33229 field, NULL_TREE);
33231 /* Check the value. */
33232 final = build2 (EQ_EXPR, unsigned_type_node, ref,
33233 build_int_cstu (unsigned_type_node, field_val));
33234 return build1 (CONVERT_EXPR, integer_type_node, final);
33236 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
33238 tree ref;
33239 tree array_elt;
33240 tree field;
33241 tree final;
33243 unsigned int field_val = 0;
33244 unsigned int NUM_ISA_NAMES
33245 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
33247 for (i = 0; i < NUM_ISA_NAMES; i++)
33248 if (strcmp (isa_names_table[i].name,
33249 TREE_STRING_POINTER (param_string_cst)) == 0)
33250 break;
33252 if (i == NUM_ISA_NAMES)
33254 error ("Parameter to builtin not valid: %s",
33255 TREE_STRING_POINTER (param_string_cst));
33256 return integer_zero_node;
33259 field = TYPE_FIELDS (__processor_model_type);
33260 /* Get the last field, which is __cpu_features. */
33261 while (DECL_CHAIN (field))
33262 field = DECL_CHAIN (field);
33264 /* Get the appropriate field: __cpu_model.__cpu_features */
33265 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
33266 field, NULL_TREE);
33268 /* Access the 0th element of __cpu_features array. */
33269 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
33270 integer_zero_node, NULL_TREE, NULL_TREE);
33272 field_val = (1 << isa_names_table[i].feature);
33273 /* Return __cpu_model.__cpu_features[0] & field_val */
33274 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
33275 build_int_cstu (unsigned_type_node, field_val));
33276 return build1 (CONVERT_EXPR, integer_type_node, final);
33278 gcc_unreachable ();
33281 static tree
33282 ix86_fold_builtin (tree fndecl, int n_args,
33283 tree *args, bool ignore ATTRIBUTE_UNUSED)
33285 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
33287 enum ix86_builtins fn_code = (enum ix86_builtins)
33288 DECL_FUNCTION_CODE (fndecl);
33289 switch (fn_code)
33291 case IX86_BUILTIN_CPU_IS:
33292 case IX86_BUILTIN_CPU_SUPPORTS:
33293 gcc_assert (n_args == 1);
33294 return fold_builtin_cpu (fndecl, args);
33296 case IX86_BUILTIN_NANQ:
33297 case IX86_BUILTIN_NANSQ:
33299 tree type = TREE_TYPE (TREE_TYPE (fndecl));
33300 const char *str = c_getstr (*args);
33301 int quiet = fn_code == IX86_BUILTIN_NANQ;
33302 REAL_VALUE_TYPE real;
33304 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
33305 return build_real (type, real);
33306 return NULL_TREE;
33309 case IX86_BUILTIN_INFQ:
33310 case IX86_BUILTIN_HUGE_VALQ:
33312 tree type = TREE_TYPE (TREE_TYPE (fndecl));
33313 REAL_VALUE_TYPE inf;
33314 real_inf (&inf);
33315 return build_real (type, inf);
33318 case IX86_BUILTIN_TZCNT16:
33319 case IX86_BUILTIN_CTZS:
33320 case IX86_BUILTIN_TZCNT32:
33321 case IX86_BUILTIN_TZCNT64:
33322 gcc_assert (n_args == 1);
33323 if (TREE_CODE (args[0]) == INTEGER_CST)
33325 tree type = TREE_TYPE (TREE_TYPE (fndecl));
33326 tree arg = args[0];
33327 if (fn_code == IX86_BUILTIN_TZCNT16
33328 || fn_code == IX86_BUILTIN_CTZS)
33329 arg = fold_convert (short_unsigned_type_node, arg);
33330 if (integer_zerop (arg))
33331 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
33332 else
33333 return fold_const_call (CFN_CTZ, type, arg);
33335 break;
33337 case IX86_BUILTIN_LZCNT16:
33338 case IX86_BUILTIN_CLZS:
33339 case IX86_BUILTIN_LZCNT32:
33340 case IX86_BUILTIN_LZCNT64:
33341 gcc_assert (n_args == 1);
33342 if (TREE_CODE (args[0]) == INTEGER_CST)
33344 tree type = TREE_TYPE (TREE_TYPE (fndecl));
33345 tree arg = args[0];
33346 if (fn_code == IX86_BUILTIN_LZCNT16
33347 || fn_code == IX86_BUILTIN_CLZS)
33348 arg = fold_convert (short_unsigned_type_node, arg);
33349 if (integer_zerop (arg))
33350 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
33351 else
33352 return fold_const_call (CFN_CLZ, type, arg);
33354 break;
33356 case IX86_BUILTIN_BEXTR32:
33357 case IX86_BUILTIN_BEXTR64:
33358 case IX86_BUILTIN_BEXTRI32:
33359 case IX86_BUILTIN_BEXTRI64:
33360 gcc_assert (n_args == 2);
33361 if (tree_fits_uhwi_p (args[1]))
33363 unsigned HOST_WIDE_INT res = 0;
33364 unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0]));
33365 unsigned int start = tree_to_uhwi (args[1]);
33366 unsigned int len = (start & 0xff00) >> 8;
33367 start &= 0xff;
33368 if (start >= prec || len == 0)
33369 res = 0;
33370 else if (!tree_fits_uhwi_p (args[0]))
33371 break;
33372 else
33373 res = tree_to_uhwi (args[0]) >> start;
33374 if (len > prec)
33375 len = prec;
33376 if (len < HOST_BITS_PER_WIDE_INT)
33377 res &= (HOST_WIDE_INT_1U << len) - 1;
33378 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
33380 break;
33382 case IX86_BUILTIN_BZHI32:
33383 case IX86_BUILTIN_BZHI64:
33384 gcc_assert (n_args == 2);
33385 if (tree_fits_uhwi_p (args[1]))
33387 unsigned int idx = tree_to_uhwi (args[1]) & 0xff;
33388 if (idx >= TYPE_PRECISION (TREE_TYPE (args[0])))
33389 return args[0];
33390 if (!tree_fits_uhwi_p (args[0]))
33391 break;
33392 unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]);
33393 res &= ~(HOST_WIDE_INT_M1U << idx);
33394 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
33396 break;
33398 case IX86_BUILTIN_PDEP32:
33399 case IX86_BUILTIN_PDEP64:
33400 gcc_assert (n_args == 2);
33401 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
33403 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
33404 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
33405 unsigned HOST_WIDE_INT res = 0;
33406 unsigned HOST_WIDE_INT m, k = 1;
33407 for (m = 1; m; m <<= 1)
33408 if ((mask & m) != 0)
33410 if ((src & k) != 0)
33411 res |= m;
33412 k <<= 1;
33414 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
33416 break;
33418 case IX86_BUILTIN_PEXT32:
33419 case IX86_BUILTIN_PEXT64:
33420 gcc_assert (n_args == 2);
33421 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
33423 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
33424 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
33425 unsigned HOST_WIDE_INT res = 0;
33426 unsigned HOST_WIDE_INT m, k = 1;
33427 for (m = 1; m; m <<= 1)
33428 if ((mask & m) != 0)
33430 if ((src & m) != 0)
33431 res |= k;
33432 k <<= 1;
33434 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
33436 break;
33438 default:
33439 break;
33443 #ifdef SUBTARGET_FOLD_BUILTIN
33444 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
33445 #endif
33447 return NULL_TREE;
33450 /* Fold a MD builtin (use ix86_fold_builtin for folding into
33451 constant) in GIMPLE. */
33453 bool
33454 ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
33456 gimple *stmt = gsi_stmt (*gsi);
33457 tree fndecl = gimple_call_fndecl (stmt);
33458 gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD);
33459 int n_args = gimple_call_num_args (stmt);
33460 enum ix86_builtins fn_code = (enum ix86_builtins) DECL_FUNCTION_CODE (fndecl);
33461 tree decl = NULL_TREE;
33462 tree arg0, arg1;
33464 switch (fn_code)
33466 case IX86_BUILTIN_TZCNT32:
33467 decl = builtin_decl_implicit (BUILT_IN_CTZ);
33468 goto fold_tzcnt_lzcnt;
33470 case IX86_BUILTIN_TZCNT64:
33471 decl = builtin_decl_implicit (BUILT_IN_CTZLL);
33472 goto fold_tzcnt_lzcnt;
33474 case IX86_BUILTIN_LZCNT32:
33475 decl = builtin_decl_implicit (BUILT_IN_CLZ);
33476 goto fold_tzcnt_lzcnt;
33478 case IX86_BUILTIN_LZCNT64:
33479 decl = builtin_decl_implicit (BUILT_IN_CLZLL);
33480 goto fold_tzcnt_lzcnt;
33482 fold_tzcnt_lzcnt:
33483 gcc_assert (n_args == 1);
33484 arg0 = gimple_call_arg (stmt, 0);
33485 if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (stmt))
33487 int prec = TYPE_PRECISION (TREE_TYPE (arg0));
33488 /* If arg0 is provably non-zero, optimize into generic
33489 __builtin_c[tl]z{,ll} function the middle-end handles
33490 better. */
33491 if (!expr_not_equal_to (arg0, wi::zero (prec)))
33492 return false;
33494 location_t loc = gimple_location (stmt);
33495 gimple *g = gimple_build_call (decl, 1, arg0);
33496 gimple_set_location (g, loc);
33497 tree lhs = make_ssa_name (integer_type_node);
33498 gimple_call_set_lhs (g, lhs);
33499 gsi_insert_before (gsi, g, GSI_SAME_STMT);
33500 g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, lhs);
33501 gimple_set_location (g, loc);
33502 gsi_replace (gsi, g, false);
33503 return true;
33505 break;
33507 case IX86_BUILTIN_BZHI32:
33508 case IX86_BUILTIN_BZHI64:
33509 gcc_assert (n_args == 2);
33510 arg1 = gimple_call_arg (stmt, 1);
33511 if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (stmt))
33513 unsigned int idx = tree_to_uhwi (arg1) & 0xff;
33514 arg0 = gimple_call_arg (stmt, 0);
33515 if (idx < TYPE_PRECISION (TREE_TYPE (arg0)))
33516 break;
33517 location_t loc = gimple_location (stmt);
33518 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
33519 gimple_set_location (g, loc);
33520 gsi_replace (gsi, g, false);
33521 return true;
33523 break;
33525 case IX86_BUILTIN_PDEP32:
33526 case IX86_BUILTIN_PDEP64:
33527 case IX86_BUILTIN_PEXT32:
33528 case IX86_BUILTIN_PEXT64:
33529 gcc_assert (n_args == 2);
33530 arg1 = gimple_call_arg (stmt, 1);
33531 if (integer_all_onesp (arg1) && gimple_call_lhs (stmt))
33533 location_t loc = gimple_location (stmt);
33534 arg0 = gimple_call_arg (stmt, 0);
33535 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
33536 gimple_set_location (g, loc);
33537 gsi_replace (gsi, g, false);
33538 return true;
33540 break;
33542 default:
33543 break;
33546 return false;
33549 /* Make builtins to detect cpu type and features supported. NAME is
33550 the builtin name, CODE is the builtin code, and FTYPE is the function
33551 type of the builtin. */
33553 static void
33554 make_cpu_type_builtin (const char* name, int code,
33555 enum ix86_builtin_func_type ftype, bool is_const)
33557 tree decl;
33558 tree type;
33560 type = ix86_get_builtin_func_type (ftype);
33561 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
33562 NULL, NULL_TREE);
33563 gcc_assert (decl != NULL_TREE);
33564 ix86_builtins[(int) code] = decl;
33565 TREE_READONLY (decl) = is_const;
33568 /* Make builtins to get CPU type and features supported. The created
33569 builtins are :
33571 __builtin_cpu_init (), to detect cpu type and features,
33572 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
33573 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
33576 static void
33577 ix86_init_platform_type_builtins (void)
33579 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
33580 INT_FTYPE_VOID, false);
33581 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
33582 INT_FTYPE_PCCHAR, true);
33583 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
33584 INT_FTYPE_PCCHAR, true);
33587 /* Internal method for ix86_init_builtins. */
33589 static void
33590 ix86_init_builtins_va_builtins_abi (void)
33592 tree ms_va_ref, sysv_va_ref;
33593 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
33594 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
33595 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
33596 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
33598 if (!TARGET_64BIT)
33599 return;
33600 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
33601 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
33602 ms_va_ref = build_reference_type (ms_va_list_type_node);
33603 sysv_va_ref =
33604 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
33606 fnvoid_va_end_ms =
33607 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
33608 fnvoid_va_start_ms =
33609 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
33610 fnvoid_va_end_sysv =
33611 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
33612 fnvoid_va_start_sysv =
33613 build_varargs_function_type_list (void_type_node, sysv_va_ref,
33614 NULL_TREE);
33615 fnvoid_va_copy_ms =
33616 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
33617 NULL_TREE);
33618 fnvoid_va_copy_sysv =
33619 build_function_type_list (void_type_node, sysv_va_ref,
33620 sysv_va_ref, NULL_TREE);
33622 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
33623 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
33624 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
33625 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
33626 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
33627 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
33628 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
33629 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
33630 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
33631 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
33632 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
33633 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
33636 static void
33637 ix86_init_builtin_types (void)
33639 tree float80_type_node, const_string_type_node;
33641 /* The __float80 type. */
33642 float80_type_node = long_double_type_node;
33643 if (TYPE_MODE (float80_type_node) != XFmode)
33645 if (float64x_type_node != NULL_TREE
33646 && TYPE_MODE (float64x_type_node) == XFmode)
33647 float80_type_node = float64x_type_node;
33648 else
33650 /* The __float80 type. */
33651 float80_type_node = make_node (REAL_TYPE);
33653 TYPE_PRECISION (float80_type_node) = 80;
33654 layout_type (float80_type_node);
33657 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
33659 /* The __float128 type. The node has already been created as
33660 _Float128, so we only need to register the __float128 name for
33661 it. */
33662 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
33664 const_string_type_node
33665 = build_pointer_type (build_qualified_type
33666 (char_type_node, TYPE_QUAL_CONST));
33668 /* This macro is built by i386-builtin-types.awk. */
33669 DEFINE_BUILTIN_PRIMITIVE_TYPES;
33672 static void
33673 ix86_init_builtins (void)
33675 tree ftype, decl;
33677 ix86_init_builtin_types ();
33679 /* Builtins to get CPU type and features. */
33680 ix86_init_platform_type_builtins ();
33682 /* TFmode support builtins. */
33683 def_builtin_const (0, "__builtin_infq",
33684 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
33685 def_builtin_const (0, "__builtin_huge_valq",
33686 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
33688 ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_CONST_STRING);
33689 decl = add_builtin_function ("__builtin_nanq", ftype, IX86_BUILTIN_NANQ,
33690 BUILT_IN_MD, "nanq", NULL_TREE);
33691 TREE_READONLY (decl) = 1;
33692 ix86_builtins[(int) IX86_BUILTIN_NANQ] = decl;
33694 decl = add_builtin_function ("__builtin_nansq", ftype, IX86_BUILTIN_NANSQ,
33695 BUILT_IN_MD, "nansq", NULL_TREE);
33696 TREE_READONLY (decl) = 1;
33697 ix86_builtins[(int) IX86_BUILTIN_NANSQ] = decl;
33699 /* We will expand them to normal call if SSE isn't available since
33700 they are used by libgcc. */
33701 ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
33702 decl = add_builtin_function ("__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ,
33703 BUILT_IN_MD, "__fabstf2", NULL_TREE);
33704 TREE_READONLY (decl) = 1;
33705 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
33707 ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
33708 decl = add_builtin_function ("__builtin_copysignq", ftype,
33709 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
33710 "__copysigntf3", NULL_TREE);
33711 TREE_READONLY (decl) = 1;
33712 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
33714 ix86_init_tm_builtins ();
33715 ix86_init_mmx_sse_builtins ();
33716 ix86_init_mpx_builtins ();
33718 if (TARGET_LP64)
33719 ix86_init_builtins_va_builtins_abi ();
33721 #ifdef SUBTARGET_INIT_BUILTINS
33722 SUBTARGET_INIT_BUILTINS;
33723 #endif
33726 /* Return the ix86 builtin for CODE. */
33728 static tree
33729 ix86_builtin_decl (unsigned code, bool)
33731 if (code >= IX86_BUILTIN_MAX)
33732 return error_mark_node;
33734 return ix86_builtins[code];
33737 /* Errors in the source file can cause expand_expr to return const0_rtx
33738 where we expect a vector. To avoid crashing, use one of the vector
33739 clear instructions. */
33740 static rtx
33741 safe_vector_operand (rtx x, machine_mode mode)
33743 if (x == const0_rtx)
33744 x = CONST0_RTX (mode);
33745 return x;
33748 /* Fixup modeless constants to fit required mode. */
33749 static rtx
33750 fixup_modeless_constant (rtx x, machine_mode mode)
33752 if (GET_MODE (x) == VOIDmode)
33753 x = convert_to_mode (mode, x, 1);
33754 return x;
33757 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
33759 static rtx
33760 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
33762 rtx pat;
33763 tree arg0 = CALL_EXPR_ARG (exp, 0);
33764 tree arg1 = CALL_EXPR_ARG (exp, 1);
33765 rtx op0 = expand_normal (arg0);
33766 rtx op1 = expand_normal (arg1);
33767 machine_mode tmode = insn_data[icode].operand[0].mode;
33768 machine_mode mode0 = insn_data[icode].operand[1].mode;
33769 machine_mode mode1 = insn_data[icode].operand[2].mode;
33771 if (VECTOR_MODE_P (mode0))
33772 op0 = safe_vector_operand (op0, mode0);
33773 if (VECTOR_MODE_P (mode1))
33774 op1 = safe_vector_operand (op1, mode1);
33776 if (optimize || !target
33777 || GET_MODE (target) != tmode
33778 || !insn_data[icode].operand[0].predicate (target, tmode))
33779 target = gen_reg_rtx (tmode);
33781 if (GET_MODE (op1) == SImode && mode1 == TImode)
33783 rtx x = gen_reg_rtx (V4SImode);
33784 emit_insn (gen_sse2_loadd (x, op1));
33785 op1 = gen_lowpart (TImode, x);
33788 if (!insn_data[icode].operand[1].predicate (op0, mode0))
33789 op0 = copy_to_mode_reg (mode0, op0);
33790 if (!insn_data[icode].operand[2].predicate (op1, mode1))
33791 op1 = copy_to_mode_reg (mode1, op1);
33793 pat = GEN_FCN (icode) (target, op0, op1);
33794 if (! pat)
33795 return 0;
33797 emit_insn (pat);
33799 return target;
33802 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
33804 static rtx
33805 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
33806 enum ix86_builtin_func_type m_type,
33807 enum rtx_code sub_code)
33809 rtx pat;
33810 int i;
33811 int nargs;
33812 bool comparison_p = false;
33813 bool tf_p = false;
33814 bool last_arg_constant = false;
33815 int num_memory = 0;
33816 struct {
33817 rtx op;
33818 machine_mode mode;
33819 } args[4];
33821 machine_mode tmode = insn_data[icode].operand[0].mode;
33823 switch (m_type)
33825 case MULTI_ARG_4_DF2_DI_I:
33826 case MULTI_ARG_4_DF2_DI_I1:
33827 case MULTI_ARG_4_SF2_SI_I:
33828 case MULTI_ARG_4_SF2_SI_I1:
33829 nargs = 4;
33830 last_arg_constant = true;
33831 break;
33833 case MULTI_ARG_3_SF:
33834 case MULTI_ARG_3_DF:
33835 case MULTI_ARG_3_SF2:
33836 case MULTI_ARG_3_DF2:
33837 case MULTI_ARG_3_DI:
33838 case MULTI_ARG_3_SI:
33839 case MULTI_ARG_3_SI_DI:
33840 case MULTI_ARG_3_HI:
33841 case MULTI_ARG_3_HI_SI:
33842 case MULTI_ARG_3_QI:
33843 case MULTI_ARG_3_DI2:
33844 case MULTI_ARG_3_SI2:
33845 case MULTI_ARG_3_HI2:
33846 case MULTI_ARG_3_QI2:
33847 nargs = 3;
33848 break;
33850 case MULTI_ARG_2_SF:
33851 case MULTI_ARG_2_DF:
33852 case MULTI_ARG_2_DI:
33853 case MULTI_ARG_2_SI:
33854 case MULTI_ARG_2_HI:
33855 case MULTI_ARG_2_QI:
33856 nargs = 2;
33857 break;
33859 case MULTI_ARG_2_DI_IMM:
33860 case MULTI_ARG_2_SI_IMM:
33861 case MULTI_ARG_2_HI_IMM:
33862 case MULTI_ARG_2_QI_IMM:
33863 nargs = 2;
33864 last_arg_constant = true;
33865 break;
33867 case MULTI_ARG_1_SF:
33868 case MULTI_ARG_1_DF:
33869 case MULTI_ARG_1_SF2:
33870 case MULTI_ARG_1_DF2:
33871 case MULTI_ARG_1_DI:
33872 case MULTI_ARG_1_SI:
33873 case MULTI_ARG_1_HI:
33874 case MULTI_ARG_1_QI:
33875 case MULTI_ARG_1_SI_DI:
33876 case MULTI_ARG_1_HI_DI:
33877 case MULTI_ARG_1_HI_SI:
33878 case MULTI_ARG_1_QI_DI:
33879 case MULTI_ARG_1_QI_SI:
33880 case MULTI_ARG_1_QI_HI:
33881 nargs = 1;
33882 break;
33884 case MULTI_ARG_2_DI_CMP:
33885 case MULTI_ARG_2_SI_CMP:
33886 case MULTI_ARG_2_HI_CMP:
33887 case MULTI_ARG_2_QI_CMP:
33888 nargs = 2;
33889 comparison_p = true;
33890 break;
33892 case MULTI_ARG_2_SF_TF:
33893 case MULTI_ARG_2_DF_TF:
33894 case MULTI_ARG_2_DI_TF:
33895 case MULTI_ARG_2_SI_TF:
33896 case MULTI_ARG_2_HI_TF:
33897 case MULTI_ARG_2_QI_TF:
33898 nargs = 2;
33899 tf_p = true;
33900 break;
33902 default:
33903 gcc_unreachable ();
33906 if (optimize || !target
33907 || GET_MODE (target) != tmode
33908 || !insn_data[icode].operand[0].predicate (target, tmode))
33909 target = gen_reg_rtx (tmode);
33910 else if (memory_operand (target, tmode))
33911 num_memory++;
33913 gcc_assert (nargs <= 4);
33915 for (i = 0; i < nargs; i++)
33917 tree arg = CALL_EXPR_ARG (exp, i);
33918 rtx op = expand_normal (arg);
33919 int adjust = (comparison_p) ? 1 : 0;
33920 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
33922 if (last_arg_constant && i == nargs - 1)
33924 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
33926 enum insn_code new_icode = icode;
33927 switch (icode)
33929 case CODE_FOR_xop_vpermil2v2df3:
33930 case CODE_FOR_xop_vpermil2v4sf3:
33931 case CODE_FOR_xop_vpermil2v4df3:
33932 case CODE_FOR_xop_vpermil2v8sf3:
33933 error ("the last argument must be a 2-bit immediate");
33934 return gen_reg_rtx (tmode);
33935 case CODE_FOR_xop_rotlv2di3:
33936 new_icode = CODE_FOR_rotlv2di3;
33937 goto xop_rotl;
33938 case CODE_FOR_xop_rotlv4si3:
33939 new_icode = CODE_FOR_rotlv4si3;
33940 goto xop_rotl;
33941 case CODE_FOR_xop_rotlv8hi3:
33942 new_icode = CODE_FOR_rotlv8hi3;
33943 goto xop_rotl;
33944 case CODE_FOR_xop_rotlv16qi3:
33945 new_icode = CODE_FOR_rotlv16qi3;
33946 xop_rotl:
33947 if (CONST_INT_P (op))
33949 int mask = GET_MODE_UNIT_BITSIZE (tmode) - 1;
33950 op = GEN_INT (INTVAL (op) & mask);
33951 gcc_checking_assert
33952 (insn_data[icode].operand[i + 1].predicate (op, mode));
33954 else
33956 gcc_checking_assert
33957 (nargs == 2
33958 && insn_data[new_icode].operand[0].mode == tmode
33959 && insn_data[new_icode].operand[1].mode == tmode
33960 && insn_data[new_icode].operand[2].mode == mode
33961 && insn_data[new_icode].operand[0].predicate
33962 == insn_data[icode].operand[0].predicate
33963 && insn_data[new_icode].operand[1].predicate
33964 == insn_data[icode].operand[1].predicate);
33965 icode = new_icode;
33966 goto non_constant;
33968 break;
33969 default:
33970 gcc_unreachable ();
33974 else
33976 non_constant:
33977 if (VECTOR_MODE_P (mode))
33978 op = safe_vector_operand (op, mode);
33980 /* If we aren't optimizing, only allow one memory operand to be
33981 generated. */
33982 if (memory_operand (op, mode))
33983 num_memory++;
33985 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
33987 if (optimize
33988 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
33989 || num_memory > 1)
33990 op = force_reg (mode, op);
33993 args[i].op = op;
33994 args[i].mode = mode;
33997 switch (nargs)
33999 case 1:
34000 pat = GEN_FCN (icode) (target, args[0].op);
34001 break;
34003 case 2:
34004 if (tf_p)
34005 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34006 GEN_INT ((int)sub_code));
34007 else if (! comparison_p)
34008 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34009 else
34011 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
34012 args[0].op,
34013 args[1].op);
34015 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
34017 break;
34019 case 3:
34020 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
34021 break;
34023 case 4:
34024 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
34025 break;
34027 default:
34028 gcc_unreachable ();
34031 if (! pat)
34032 return 0;
34034 emit_insn (pat);
34035 return target;
34038 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
34039 insns with vec_merge. */
34041 static rtx
34042 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
34043 rtx target)
34045 rtx pat;
34046 tree arg0 = CALL_EXPR_ARG (exp, 0);
34047 rtx op1, op0 = expand_normal (arg0);
34048 machine_mode tmode = insn_data[icode].operand[0].mode;
34049 machine_mode mode0 = insn_data[icode].operand[1].mode;
34051 if (optimize || !target
34052 || GET_MODE (target) != tmode
34053 || !insn_data[icode].operand[0].predicate (target, tmode))
34054 target = gen_reg_rtx (tmode);
34056 if (VECTOR_MODE_P (mode0))
34057 op0 = safe_vector_operand (op0, mode0);
34059 if ((optimize && !register_operand (op0, mode0))
34060 || !insn_data[icode].operand[1].predicate (op0, mode0))
34061 op0 = copy_to_mode_reg (mode0, op0);
34063 op1 = op0;
34064 if (!insn_data[icode].operand[2].predicate (op1, mode0))
34065 op1 = copy_to_mode_reg (mode0, op1);
34067 pat = GEN_FCN (icode) (target, op0, op1);
34068 if (! pat)
34069 return 0;
34070 emit_insn (pat);
34071 return target;
34074 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
34076 static rtx
34077 ix86_expand_sse_compare (const struct builtin_description *d,
34078 tree exp, rtx target, bool swap)
34080 rtx pat;
34081 tree arg0 = CALL_EXPR_ARG (exp, 0);
34082 tree arg1 = CALL_EXPR_ARG (exp, 1);
34083 rtx op0 = expand_normal (arg0);
34084 rtx op1 = expand_normal (arg1);
34085 rtx op2;
34086 machine_mode tmode = insn_data[d->icode].operand[0].mode;
34087 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
34088 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
34089 enum rtx_code comparison = d->comparison;
34091 if (VECTOR_MODE_P (mode0))
34092 op0 = safe_vector_operand (op0, mode0);
34093 if (VECTOR_MODE_P (mode1))
34094 op1 = safe_vector_operand (op1, mode1);
34096 /* Swap operands if we have a comparison that isn't available in
34097 hardware. */
34098 if (swap)
34099 std::swap (op0, op1);
34101 if (optimize || !target
34102 || GET_MODE (target) != tmode
34103 || !insn_data[d->icode].operand[0].predicate (target, tmode))
34104 target = gen_reg_rtx (tmode);
34106 if ((optimize && !register_operand (op0, mode0))
34107 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
34108 op0 = copy_to_mode_reg (mode0, op0);
34109 if ((optimize && !register_operand (op1, mode1))
34110 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
34111 op1 = copy_to_mode_reg (mode1, op1);
34113 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
34114 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
34115 if (! pat)
34116 return 0;
34117 emit_insn (pat);
34118 return target;
34121 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
34123 static rtx
34124 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
34125 rtx target)
34127 rtx pat;
34128 tree arg0 = CALL_EXPR_ARG (exp, 0);
34129 tree arg1 = CALL_EXPR_ARG (exp, 1);
34130 rtx op0 = expand_normal (arg0);
34131 rtx op1 = expand_normal (arg1);
34132 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
34133 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
34134 enum rtx_code comparison = d->comparison;
34136 if (VECTOR_MODE_P (mode0))
34137 op0 = safe_vector_operand (op0, mode0);
34138 if (VECTOR_MODE_P (mode1))
34139 op1 = safe_vector_operand (op1, mode1);
34141 /* Swap operands if we have a comparison that isn't available in
34142 hardware. */
34143 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
34144 std::swap (op0, op1);
34146 target = gen_reg_rtx (SImode);
34147 emit_move_insn (target, const0_rtx);
34148 target = gen_rtx_SUBREG (QImode, target, 0);
34150 if ((optimize && !register_operand (op0, mode0))
34151 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
34152 op0 = copy_to_mode_reg (mode0, op0);
34153 if ((optimize && !register_operand (op1, mode1))
34154 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
34155 op1 = copy_to_mode_reg (mode1, op1);
34157 pat = GEN_FCN (d->icode) (op0, op1);
34158 if (! pat)
34159 return 0;
34160 emit_insn (pat);
34161 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34162 gen_rtx_fmt_ee (comparison, QImode,
34163 SET_DEST (pat),
34164 const0_rtx)));
34166 return SUBREG_REG (target);
34169 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
34171 static rtx
34172 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
34173 rtx target)
34175 rtx pat;
34176 tree arg0 = CALL_EXPR_ARG (exp, 0);
34177 rtx op1, op0 = expand_normal (arg0);
34178 machine_mode tmode = insn_data[d->icode].operand[0].mode;
34179 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
34181 if (optimize || target == 0
34182 || GET_MODE (target) != tmode
34183 || !insn_data[d->icode].operand[0].predicate (target, tmode))
34184 target = gen_reg_rtx (tmode);
34186 if (VECTOR_MODE_P (mode0))
34187 op0 = safe_vector_operand (op0, mode0);
34189 if ((optimize && !register_operand (op0, mode0))
34190 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
34191 op0 = copy_to_mode_reg (mode0, op0);
34193 op1 = GEN_INT (d->comparison);
34195 pat = GEN_FCN (d->icode) (target, op0, op1);
34196 if (! pat)
34197 return 0;
34198 emit_insn (pat);
34199 return target;
34202 static rtx
34203 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
34204 tree exp, rtx target)
34206 rtx pat;
34207 tree arg0 = CALL_EXPR_ARG (exp, 0);
34208 tree arg1 = CALL_EXPR_ARG (exp, 1);
34209 rtx op0 = expand_normal (arg0);
34210 rtx op1 = expand_normal (arg1);
34211 rtx op2;
34212 machine_mode tmode = insn_data[d->icode].operand[0].mode;
34213 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
34214 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
34216 if (optimize || target == 0
34217 || GET_MODE (target) != tmode
34218 || !insn_data[d->icode].operand[0].predicate (target, tmode))
34219 target = gen_reg_rtx (tmode);
34221 op0 = safe_vector_operand (op0, mode0);
34222 op1 = safe_vector_operand (op1, mode1);
34224 if ((optimize && !register_operand (op0, mode0))
34225 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
34226 op0 = copy_to_mode_reg (mode0, op0);
34227 if ((optimize && !register_operand (op1, mode1))
34228 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
34229 op1 = copy_to_mode_reg (mode1, op1);
34231 op2 = GEN_INT (d->comparison);
34233 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
34234 if (! pat)
34235 return 0;
34236 emit_insn (pat);
34237 return target;
34240 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
34242 static rtx
34243 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
34244 rtx target)
34246 rtx pat;
34247 tree arg0 = CALL_EXPR_ARG (exp, 0);
34248 tree arg1 = CALL_EXPR_ARG (exp, 1);
34249 rtx op0 = expand_normal (arg0);
34250 rtx op1 = expand_normal (arg1);
34251 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
34252 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
34253 enum rtx_code comparison = d->comparison;
34255 if (VECTOR_MODE_P (mode0))
34256 op0 = safe_vector_operand (op0, mode0);
34257 if (VECTOR_MODE_P (mode1))
34258 op1 = safe_vector_operand (op1, mode1);
34260 target = gen_reg_rtx (SImode);
34261 emit_move_insn (target, const0_rtx);
34262 target = gen_rtx_SUBREG (QImode, target, 0);
34264 if ((optimize && !register_operand (op0, mode0))
34265 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
34266 op0 = copy_to_mode_reg (mode0, op0);
34267 if ((optimize && !register_operand (op1, mode1))
34268 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
34269 op1 = copy_to_mode_reg (mode1, op1);
34271 pat = GEN_FCN (d->icode) (op0, op1);
34272 if (! pat)
34273 return 0;
34274 emit_insn (pat);
34275 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34276 gen_rtx_fmt_ee (comparison, QImode,
34277 SET_DEST (pat),
34278 const0_rtx)));
34280 return SUBREG_REG (target);
34283 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
34285 static rtx
34286 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
34287 tree exp, rtx target)
34289 rtx pat;
34290 tree arg0 = CALL_EXPR_ARG (exp, 0);
34291 tree arg1 = CALL_EXPR_ARG (exp, 1);
34292 tree arg2 = CALL_EXPR_ARG (exp, 2);
34293 tree arg3 = CALL_EXPR_ARG (exp, 3);
34294 tree arg4 = CALL_EXPR_ARG (exp, 4);
34295 rtx scratch0, scratch1;
34296 rtx op0 = expand_normal (arg0);
34297 rtx op1 = expand_normal (arg1);
34298 rtx op2 = expand_normal (arg2);
34299 rtx op3 = expand_normal (arg3);
34300 rtx op4 = expand_normal (arg4);
34301 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
34303 tmode0 = insn_data[d->icode].operand[0].mode;
34304 tmode1 = insn_data[d->icode].operand[1].mode;
34305 modev2 = insn_data[d->icode].operand[2].mode;
34306 modei3 = insn_data[d->icode].operand[3].mode;
34307 modev4 = insn_data[d->icode].operand[4].mode;
34308 modei5 = insn_data[d->icode].operand[5].mode;
34309 modeimm = insn_data[d->icode].operand[6].mode;
34311 if (VECTOR_MODE_P (modev2))
34312 op0 = safe_vector_operand (op0, modev2);
34313 if (VECTOR_MODE_P (modev4))
34314 op2 = safe_vector_operand (op2, modev4);
34316 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
34317 op0 = copy_to_mode_reg (modev2, op0);
34318 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
34319 op1 = copy_to_mode_reg (modei3, op1);
34320 if ((optimize && !register_operand (op2, modev4))
34321 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
34322 op2 = copy_to_mode_reg (modev4, op2);
34323 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
34324 op3 = copy_to_mode_reg (modei5, op3);
34326 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
34328 error ("the fifth argument must be an 8-bit immediate");
34329 return const0_rtx;
34332 if (d->code == IX86_BUILTIN_PCMPESTRI128)
34334 if (optimize || !target
34335 || GET_MODE (target) != tmode0
34336 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
34337 target = gen_reg_rtx (tmode0);
34339 scratch1 = gen_reg_rtx (tmode1);
34341 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
34343 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
34345 if (optimize || !target
34346 || GET_MODE (target) != tmode1
34347 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
34348 target = gen_reg_rtx (tmode1);
34350 scratch0 = gen_reg_rtx (tmode0);
34352 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
34354 else
34356 gcc_assert (d->flag);
34358 scratch0 = gen_reg_rtx (tmode0);
34359 scratch1 = gen_reg_rtx (tmode1);
34361 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
34364 if (! pat)
34365 return 0;
34367 emit_insn (pat);
34369 if (d->flag)
34371 target = gen_reg_rtx (SImode);
34372 emit_move_insn (target, const0_rtx);
34373 target = gen_rtx_SUBREG (QImode, target, 0);
34375 emit_insn
34376 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34377 gen_rtx_fmt_ee (EQ, QImode,
34378 gen_rtx_REG ((machine_mode) d->flag,
34379 FLAGS_REG),
34380 const0_rtx)));
34381 return SUBREG_REG (target);
34383 else
34384 return target;
34388 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
34390 static rtx
34391 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
34392 tree exp, rtx target)
34394 rtx pat;
34395 tree arg0 = CALL_EXPR_ARG (exp, 0);
34396 tree arg1 = CALL_EXPR_ARG (exp, 1);
34397 tree arg2 = CALL_EXPR_ARG (exp, 2);
34398 rtx scratch0, scratch1;
34399 rtx op0 = expand_normal (arg0);
34400 rtx op1 = expand_normal (arg1);
34401 rtx op2 = expand_normal (arg2);
34402 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
34404 tmode0 = insn_data[d->icode].operand[0].mode;
34405 tmode1 = insn_data[d->icode].operand[1].mode;
34406 modev2 = insn_data[d->icode].operand[2].mode;
34407 modev3 = insn_data[d->icode].operand[3].mode;
34408 modeimm = insn_data[d->icode].operand[4].mode;
34410 if (VECTOR_MODE_P (modev2))
34411 op0 = safe_vector_operand (op0, modev2);
34412 if (VECTOR_MODE_P (modev3))
34413 op1 = safe_vector_operand (op1, modev3);
34415 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
34416 op0 = copy_to_mode_reg (modev2, op0);
34417 if ((optimize && !register_operand (op1, modev3))
34418 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
34419 op1 = copy_to_mode_reg (modev3, op1);
34421 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
34423 error ("the third argument must be an 8-bit immediate");
34424 return const0_rtx;
34427 if (d->code == IX86_BUILTIN_PCMPISTRI128)
34429 if (optimize || !target
34430 || GET_MODE (target) != tmode0
34431 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
34432 target = gen_reg_rtx (tmode0);
34434 scratch1 = gen_reg_rtx (tmode1);
34436 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
34438 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
34440 if (optimize || !target
34441 || GET_MODE (target) != tmode1
34442 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
34443 target = gen_reg_rtx (tmode1);
34445 scratch0 = gen_reg_rtx (tmode0);
34447 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
34449 else
34451 gcc_assert (d->flag);
34453 scratch0 = gen_reg_rtx (tmode0);
34454 scratch1 = gen_reg_rtx (tmode1);
34456 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
34459 if (! pat)
34460 return 0;
34462 emit_insn (pat);
34464 if (d->flag)
34466 target = gen_reg_rtx (SImode);
34467 emit_move_insn (target, const0_rtx);
34468 target = gen_rtx_SUBREG (QImode, target, 0);
34470 emit_insn
34471 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34472 gen_rtx_fmt_ee (EQ, QImode,
34473 gen_rtx_REG ((machine_mode) d->flag,
34474 FLAGS_REG),
34475 const0_rtx)));
34476 return SUBREG_REG (target);
34478 else
34479 return target;
34482 /* Subroutine of ix86_expand_builtin to take care of insns with
34483 variable number of operands. */
34485 static rtx
34486 ix86_expand_args_builtin (const struct builtin_description *d,
34487 tree exp, rtx target)
34489 rtx pat, real_target;
34490 unsigned int i, nargs;
34491 unsigned int nargs_constant = 0;
34492 unsigned int mask_pos = 0;
34493 int num_memory = 0;
34494 struct
34496 rtx op;
34497 machine_mode mode;
34498 } args[6];
34499 bool second_arg_count = false;
34500 enum insn_code icode = d->icode;
34501 const struct insn_data_d *insn_p = &insn_data[icode];
34502 machine_mode tmode = insn_p->operand[0].mode;
34503 machine_mode rmode = VOIDmode;
34504 bool swap = false;
34505 enum rtx_code comparison = d->comparison;
34507 switch ((enum ix86_builtin_func_type) d->flag)
34509 case V2DF_FTYPE_V2DF_ROUND:
34510 case V4DF_FTYPE_V4DF_ROUND:
34511 case V8DF_FTYPE_V8DF_ROUND:
34512 case V4SF_FTYPE_V4SF_ROUND:
34513 case V8SF_FTYPE_V8SF_ROUND:
34514 case V16SF_FTYPE_V16SF_ROUND:
34515 case V4SI_FTYPE_V4SF_ROUND:
34516 case V8SI_FTYPE_V8SF_ROUND:
34517 case V16SI_FTYPE_V16SF_ROUND:
34518 return ix86_expand_sse_round (d, exp, target);
34519 case V4SI_FTYPE_V2DF_V2DF_ROUND:
34520 case V8SI_FTYPE_V4DF_V4DF_ROUND:
34521 case V16SI_FTYPE_V8DF_V8DF_ROUND:
34522 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
34523 case INT_FTYPE_V8SF_V8SF_PTEST:
34524 case INT_FTYPE_V4DI_V4DI_PTEST:
34525 case INT_FTYPE_V4DF_V4DF_PTEST:
34526 case INT_FTYPE_V4SF_V4SF_PTEST:
34527 case INT_FTYPE_V2DI_V2DI_PTEST:
34528 case INT_FTYPE_V2DF_V2DF_PTEST:
34529 return ix86_expand_sse_ptest (d, exp, target);
34530 case FLOAT128_FTYPE_FLOAT128:
34531 case FLOAT_FTYPE_FLOAT:
34532 case INT_FTYPE_INT:
34533 case UINT_FTYPE_UINT:
34534 case UINT16_FTYPE_UINT16:
34535 case UINT64_FTYPE_INT:
34536 case UINT64_FTYPE_UINT64:
34537 case INT64_FTYPE_INT64:
34538 case INT64_FTYPE_V4SF:
34539 case INT64_FTYPE_V2DF:
34540 case INT_FTYPE_V16QI:
34541 case INT_FTYPE_V8QI:
34542 case INT_FTYPE_V8SF:
34543 case INT_FTYPE_V4DF:
34544 case INT_FTYPE_V4SF:
34545 case INT_FTYPE_V2DF:
34546 case INT_FTYPE_V32QI:
34547 case V16QI_FTYPE_V16QI:
34548 case V8SI_FTYPE_V8SF:
34549 case V8SI_FTYPE_V4SI:
34550 case V8HI_FTYPE_V8HI:
34551 case V8HI_FTYPE_V16QI:
34552 case V8QI_FTYPE_V8QI:
34553 case V8SF_FTYPE_V8SF:
34554 case V8SF_FTYPE_V8SI:
34555 case V8SF_FTYPE_V4SF:
34556 case V8SF_FTYPE_V8HI:
34557 case V4SI_FTYPE_V4SI:
34558 case V4SI_FTYPE_V16QI:
34559 case V4SI_FTYPE_V4SF:
34560 case V4SI_FTYPE_V8SI:
34561 case V4SI_FTYPE_V8HI:
34562 case V4SI_FTYPE_V4DF:
34563 case V4SI_FTYPE_V2DF:
34564 case V4HI_FTYPE_V4HI:
34565 case V4DF_FTYPE_V4DF:
34566 case V4DF_FTYPE_V4SI:
34567 case V4DF_FTYPE_V4SF:
34568 case V4DF_FTYPE_V2DF:
34569 case V4SF_FTYPE_V4SF:
34570 case V4SF_FTYPE_V4SI:
34571 case V4SF_FTYPE_V8SF:
34572 case V4SF_FTYPE_V4DF:
34573 case V4SF_FTYPE_V8HI:
34574 case V4SF_FTYPE_V2DF:
34575 case V2DI_FTYPE_V2DI:
34576 case V2DI_FTYPE_V16QI:
34577 case V2DI_FTYPE_V8HI:
34578 case V2DI_FTYPE_V4SI:
34579 case V2DF_FTYPE_V2DF:
34580 case V2DF_FTYPE_V4SI:
34581 case V2DF_FTYPE_V4DF:
34582 case V2DF_FTYPE_V4SF:
34583 case V2DF_FTYPE_V2SI:
34584 case V2SI_FTYPE_V2SI:
34585 case V2SI_FTYPE_V4SF:
34586 case V2SI_FTYPE_V2SF:
34587 case V2SI_FTYPE_V2DF:
34588 case V2SF_FTYPE_V2SF:
34589 case V2SF_FTYPE_V2SI:
34590 case V32QI_FTYPE_V32QI:
34591 case V32QI_FTYPE_V16QI:
34592 case V16HI_FTYPE_V16HI:
34593 case V16HI_FTYPE_V8HI:
34594 case V8SI_FTYPE_V8SI:
34595 case V16HI_FTYPE_V16QI:
34596 case V8SI_FTYPE_V16QI:
34597 case V4DI_FTYPE_V16QI:
34598 case V8SI_FTYPE_V8HI:
34599 case V4DI_FTYPE_V8HI:
34600 case V4DI_FTYPE_V4SI:
34601 case V4DI_FTYPE_V2DI:
34602 case UQI_FTYPE_UQI:
34603 case UHI_FTYPE_UHI:
34604 case USI_FTYPE_USI:
34605 case USI_FTYPE_UQI:
34606 case USI_FTYPE_UHI:
34607 case UDI_FTYPE_UDI:
34608 case UHI_FTYPE_V16QI:
34609 case USI_FTYPE_V32QI:
34610 case UDI_FTYPE_V64QI:
34611 case V16QI_FTYPE_UHI:
34612 case V32QI_FTYPE_USI:
34613 case V64QI_FTYPE_UDI:
34614 case V8HI_FTYPE_UQI:
34615 case V16HI_FTYPE_UHI:
34616 case V32HI_FTYPE_USI:
34617 case V4SI_FTYPE_UQI:
34618 case V8SI_FTYPE_UQI:
34619 case V4SI_FTYPE_UHI:
34620 case V8SI_FTYPE_UHI:
34621 case UQI_FTYPE_V8HI:
34622 case UHI_FTYPE_V16HI:
34623 case USI_FTYPE_V32HI:
34624 case UQI_FTYPE_V4SI:
34625 case UQI_FTYPE_V8SI:
34626 case UHI_FTYPE_V16SI:
34627 case UQI_FTYPE_V2DI:
34628 case UQI_FTYPE_V4DI:
34629 case UQI_FTYPE_V8DI:
34630 case V16SI_FTYPE_UHI:
34631 case V2DI_FTYPE_UQI:
34632 case V4DI_FTYPE_UQI:
34633 case V16SI_FTYPE_INT:
34634 case V16SF_FTYPE_V8SF:
34635 case V16SI_FTYPE_V8SI:
34636 case V16SF_FTYPE_V4SF:
34637 case V16SI_FTYPE_V4SI:
34638 case V16SI_FTYPE_V16SF:
34639 case V16SI_FTYPE_V16SI:
34640 case V64QI_FTYPE_V64QI:
34641 case V32HI_FTYPE_V32HI:
34642 case V16SF_FTYPE_V16SF:
34643 case V8DI_FTYPE_UQI:
34644 case V8DI_FTYPE_V8DI:
34645 case V8DF_FTYPE_V4DF:
34646 case V8DF_FTYPE_V2DF:
34647 case V8DF_FTYPE_V8DF:
34648 case V4DI_FTYPE_V4DI:
34649 nargs = 1;
34650 break;
34651 case V4SF_FTYPE_V4SF_VEC_MERGE:
34652 case V2DF_FTYPE_V2DF_VEC_MERGE:
34653 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
34654 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
34655 case V16QI_FTYPE_V16QI_V16QI:
34656 case V16QI_FTYPE_V8HI_V8HI:
34657 case V16SF_FTYPE_V16SF_V16SF:
34658 case V8QI_FTYPE_V8QI_V8QI:
34659 case V8QI_FTYPE_V4HI_V4HI:
34660 case V8HI_FTYPE_V8HI_V8HI:
34661 case V8HI_FTYPE_V16QI_V16QI:
34662 case V8HI_FTYPE_V4SI_V4SI:
34663 case V8SF_FTYPE_V8SF_V8SF:
34664 case V8SF_FTYPE_V8SF_V8SI:
34665 case V8DF_FTYPE_V8DF_V8DF:
34666 case V4SI_FTYPE_V4SI_V4SI:
34667 case V4SI_FTYPE_V8HI_V8HI:
34668 case V4SI_FTYPE_V2DF_V2DF:
34669 case V4HI_FTYPE_V4HI_V4HI:
34670 case V4HI_FTYPE_V8QI_V8QI:
34671 case V4HI_FTYPE_V2SI_V2SI:
34672 case V4DF_FTYPE_V4DF_V4DF:
34673 case V4DF_FTYPE_V4DF_V4DI:
34674 case V4SF_FTYPE_V4SF_V4SF:
34675 case V4SF_FTYPE_V4SF_V4SI:
34676 case V4SF_FTYPE_V4SF_V2SI:
34677 case V4SF_FTYPE_V4SF_V2DF:
34678 case V4SF_FTYPE_V4SF_UINT:
34679 case V4SF_FTYPE_V4SF_DI:
34680 case V4SF_FTYPE_V4SF_SI:
34681 case V2DI_FTYPE_V2DI_V2DI:
34682 case V2DI_FTYPE_V16QI_V16QI:
34683 case V2DI_FTYPE_V4SI_V4SI:
34684 case V2DI_FTYPE_V2DI_V16QI:
34685 case V2SI_FTYPE_V2SI_V2SI:
34686 case V2SI_FTYPE_V4HI_V4HI:
34687 case V2SI_FTYPE_V2SF_V2SF:
34688 case V2DF_FTYPE_V2DF_V2DF:
34689 case V2DF_FTYPE_V2DF_V4SF:
34690 case V2DF_FTYPE_V2DF_V2DI:
34691 case V2DF_FTYPE_V2DF_DI:
34692 case V2DF_FTYPE_V2DF_SI:
34693 case V2DF_FTYPE_V2DF_UINT:
34694 case V2SF_FTYPE_V2SF_V2SF:
34695 case V1DI_FTYPE_V1DI_V1DI:
34696 case V1DI_FTYPE_V8QI_V8QI:
34697 case V1DI_FTYPE_V2SI_V2SI:
34698 case V32QI_FTYPE_V16HI_V16HI:
34699 case V16HI_FTYPE_V8SI_V8SI:
34700 case V64QI_FTYPE_V64QI_V64QI:
34701 case V32QI_FTYPE_V32QI_V32QI:
34702 case V16HI_FTYPE_V32QI_V32QI:
34703 case V16HI_FTYPE_V16HI_V16HI:
34704 case V8SI_FTYPE_V4DF_V4DF:
34705 case V8SI_FTYPE_V8SI_V8SI:
34706 case V8SI_FTYPE_V16HI_V16HI:
34707 case V4DI_FTYPE_V4DI_V4DI:
34708 case V4DI_FTYPE_V8SI_V8SI:
34709 case V8DI_FTYPE_V64QI_V64QI:
34710 if (comparison == UNKNOWN)
34711 return ix86_expand_binop_builtin (icode, exp, target);
34712 nargs = 2;
34713 break;
34714 case V4SF_FTYPE_V4SF_V4SF_SWAP:
34715 case V2DF_FTYPE_V2DF_V2DF_SWAP:
34716 gcc_assert (comparison != UNKNOWN);
34717 nargs = 2;
34718 swap = true;
34719 break;
34720 case V16HI_FTYPE_V16HI_V8HI_COUNT:
34721 case V16HI_FTYPE_V16HI_SI_COUNT:
34722 case V8SI_FTYPE_V8SI_V4SI_COUNT:
34723 case V8SI_FTYPE_V8SI_SI_COUNT:
34724 case V4DI_FTYPE_V4DI_V2DI_COUNT:
34725 case V4DI_FTYPE_V4DI_INT_COUNT:
34726 case V8HI_FTYPE_V8HI_V8HI_COUNT:
34727 case V8HI_FTYPE_V8HI_SI_COUNT:
34728 case V4SI_FTYPE_V4SI_V4SI_COUNT:
34729 case V4SI_FTYPE_V4SI_SI_COUNT:
34730 case V4HI_FTYPE_V4HI_V4HI_COUNT:
34731 case V4HI_FTYPE_V4HI_SI_COUNT:
34732 case V2DI_FTYPE_V2DI_V2DI_COUNT:
34733 case V2DI_FTYPE_V2DI_SI_COUNT:
34734 case V2SI_FTYPE_V2SI_V2SI_COUNT:
34735 case V2SI_FTYPE_V2SI_SI_COUNT:
34736 case V1DI_FTYPE_V1DI_V1DI_COUNT:
34737 case V1DI_FTYPE_V1DI_SI_COUNT:
34738 nargs = 2;
34739 second_arg_count = true;
34740 break;
34741 case V16HI_FTYPE_V16HI_INT_V16HI_UHI_COUNT:
34742 case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI_COUNT:
34743 case V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT:
34744 case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT:
34745 case V2DI_FTYPE_V2DI_INT_V2DI_UQI_COUNT:
34746 case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI_COUNT:
34747 case V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT:
34748 case V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT:
34749 case V4DI_FTYPE_V4DI_INT_V4DI_UQI_COUNT:
34750 case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI_COUNT:
34751 case V4SI_FTYPE_V4SI_INT_V4SI_UQI_COUNT:
34752 case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI_COUNT:
34753 case V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT:
34754 case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT:
34755 case V8HI_FTYPE_V8HI_INT_V8HI_UQI_COUNT:
34756 case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI_COUNT:
34757 case V8SI_FTYPE_V8SI_INT_V8SI_UQI_COUNT:
34758 case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI_COUNT:
34759 nargs = 4;
34760 second_arg_count = true;
34761 break;
34762 case UINT64_FTYPE_UINT64_UINT64:
34763 case UINT_FTYPE_UINT_UINT:
34764 case UINT_FTYPE_UINT_USHORT:
34765 case UINT_FTYPE_UINT_UCHAR:
34766 case UINT16_FTYPE_UINT16_INT:
34767 case UINT8_FTYPE_UINT8_INT:
34768 case UQI_FTYPE_UQI_UQI:
34769 case UHI_FTYPE_UHI_UHI:
34770 case USI_FTYPE_USI_USI:
34771 case UDI_FTYPE_UDI_UDI:
34772 case V16SI_FTYPE_V8DF_V8DF:
34773 nargs = 2;
34774 break;
34775 case V2DI_FTYPE_V2DI_INT_CONVERT:
34776 nargs = 2;
34777 rmode = V1TImode;
34778 nargs_constant = 1;
34779 break;
34780 case V4DI_FTYPE_V4DI_INT_CONVERT:
34781 nargs = 2;
34782 rmode = V2TImode;
34783 nargs_constant = 1;
34784 break;
34785 case V8DI_FTYPE_V8DI_INT_CONVERT:
34786 nargs = 2;
34787 rmode = V4TImode;
34788 nargs_constant = 1;
34789 break;
34790 case V8HI_FTYPE_V8HI_INT:
34791 case V8HI_FTYPE_V8SF_INT:
34792 case V16HI_FTYPE_V16SF_INT:
34793 case V8HI_FTYPE_V4SF_INT:
34794 case V8SF_FTYPE_V8SF_INT:
34795 case V4SF_FTYPE_V16SF_INT:
34796 case V16SF_FTYPE_V16SF_INT:
34797 case V4SI_FTYPE_V4SI_INT:
34798 case V4SI_FTYPE_V8SI_INT:
34799 case V4HI_FTYPE_V4HI_INT:
34800 case V4DF_FTYPE_V4DF_INT:
34801 case V4DF_FTYPE_V8DF_INT:
34802 case V4SF_FTYPE_V4SF_INT:
34803 case V4SF_FTYPE_V8SF_INT:
34804 case V2DI_FTYPE_V2DI_INT:
34805 case V2DF_FTYPE_V2DF_INT:
34806 case V2DF_FTYPE_V4DF_INT:
34807 case V16HI_FTYPE_V16HI_INT:
34808 case V8SI_FTYPE_V8SI_INT:
34809 case V16SI_FTYPE_V16SI_INT:
34810 case V4SI_FTYPE_V16SI_INT:
34811 case V4DI_FTYPE_V4DI_INT:
34812 case V2DI_FTYPE_V4DI_INT:
34813 case V4DI_FTYPE_V8DI_INT:
34814 case QI_FTYPE_V4SF_INT:
34815 case QI_FTYPE_V2DF_INT:
34816 case UQI_FTYPE_UQI_UQI_CONST:
34817 case UHI_FTYPE_UHI_UQI:
34818 case USI_FTYPE_USI_UQI:
34819 case UDI_FTYPE_UDI_UQI:
34820 nargs = 2;
34821 nargs_constant = 1;
34822 break;
34823 case V16QI_FTYPE_V16QI_V16QI_V16QI:
34824 case V8SF_FTYPE_V8SF_V8SF_V8SF:
34825 case V4DF_FTYPE_V4DF_V4DF_V4DF:
34826 case V4SF_FTYPE_V4SF_V4SF_V4SF:
34827 case V2DF_FTYPE_V2DF_V2DF_V2DF:
34828 case V32QI_FTYPE_V32QI_V32QI_V32QI:
34829 case UHI_FTYPE_V16SI_V16SI_UHI:
34830 case UQI_FTYPE_V8DI_V8DI_UQI:
34831 case V16HI_FTYPE_V16SI_V16HI_UHI:
34832 case V16QI_FTYPE_V16SI_V16QI_UHI:
34833 case V16QI_FTYPE_V8DI_V16QI_UQI:
34834 case V16SF_FTYPE_V16SF_V16SF_UHI:
34835 case V16SF_FTYPE_V4SF_V16SF_UHI:
34836 case V16SI_FTYPE_SI_V16SI_UHI:
34837 case V16SI_FTYPE_V16HI_V16SI_UHI:
34838 case V16SI_FTYPE_V16QI_V16SI_UHI:
34839 case V8SF_FTYPE_V4SF_V8SF_UQI:
34840 case V4DF_FTYPE_V2DF_V4DF_UQI:
34841 case V8SI_FTYPE_V4SI_V8SI_UQI:
34842 case V8SI_FTYPE_SI_V8SI_UQI:
34843 case V4SI_FTYPE_V4SI_V4SI_UQI:
34844 case V4SI_FTYPE_SI_V4SI_UQI:
34845 case V4DI_FTYPE_V2DI_V4DI_UQI:
34846 case V4DI_FTYPE_DI_V4DI_UQI:
34847 case V2DI_FTYPE_V2DI_V2DI_UQI:
34848 case V2DI_FTYPE_DI_V2DI_UQI:
34849 case V64QI_FTYPE_V64QI_V64QI_UDI:
34850 case V64QI_FTYPE_V16QI_V64QI_UDI:
34851 case V64QI_FTYPE_QI_V64QI_UDI:
34852 case V32QI_FTYPE_V32QI_V32QI_USI:
34853 case V32QI_FTYPE_V16QI_V32QI_USI:
34854 case V32QI_FTYPE_QI_V32QI_USI:
34855 case V16QI_FTYPE_V16QI_V16QI_UHI:
34856 case V16QI_FTYPE_QI_V16QI_UHI:
34857 case V32HI_FTYPE_V8HI_V32HI_USI:
34858 case V32HI_FTYPE_HI_V32HI_USI:
34859 case V16HI_FTYPE_V8HI_V16HI_UHI:
34860 case V16HI_FTYPE_HI_V16HI_UHI:
34861 case V8HI_FTYPE_V8HI_V8HI_UQI:
34862 case V8HI_FTYPE_HI_V8HI_UQI:
34863 case V8SF_FTYPE_V8HI_V8SF_UQI:
34864 case V4SF_FTYPE_V8HI_V4SF_UQI:
34865 case V8SI_FTYPE_V8SF_V8SI_UQI:
34866 case V4SI_FTYPE_V4SF_V4SI_UQI:
34867 case V4DI_FTYPE_V4SF_V4DI_UQI:
34868 case V2DI_FTYPE_V4SF_V2DI_UQI:
34869 case V4SF_FTYPE_V4DI_V4SF_UQI:
34870 case V4SF_FTYPE_V2DI_V4SF_UQI:
34871 case V4DF_FTYPE_V4DI_V4DF_UQI:
34872 case V2DF_FTYPE_V2DI_V2DF_UQI:
34873 case V16QI_FTYPE_V8HI_V16QI_UQI:
34874 case V16QI_FTYPE_V16HI_V16QI_UHI:
34875 case V16QI_FTYPE_V4SI_V16QI_UQI:
34876 case V16QI_FTYPE_V8SI_V16QI_UQI:
34877 case V8HI_FTYPE_V4SI_V8HI_UQI:
34878 case V8HI_FTYPE_V8SI_V8HI_UQI:
34879 case V16QI_FTYPE_V2DI_V16QI_UQI:
34880 case V16QI_FTYPE_V4DI_V16QI_UQI:
34881 case V8HI_FTYPE_V2DI_V8HI_UQI:
34882 case V8HI_FTYPE_V4DI_V8HI_UQI:
34883 case V4SI_FTYPE_V2DI_V4SI_UQI:
34884 case V4SI_FTYPE_V4DI_V4SI_UQI:
34885 case V32QI_FTYPE_V32HI_V32QI_USI:
34886 case UHI_FTYPE_V16QI_V16QI_UHI:
34887 case USI_FTYPE_V32QI_V32QI_USI:
34888 case UDI_FTYPE_V64QI_V64QI_UDI:
34889 case UQI_FTYPE_V8HI_V8HI_UQI:
34890 case UHI_FTYPE_V16HI_V16HI_UHI:
34891 case USI_FTYPE_V32HI_V32HI_USI:
34892 case UQI_FTYPE_V4SI_V4SI_UQI:
34893 case UQI_FTYPE_V8SI_V8SI_UQI:
34894 case UQI_FTYPE_V2DI_V2DI_UQI:
34895 case UQI_FTYPE_V4DI_V4DI_UQI:
34896 case V4SF_FTYPE_V2DF_V4SF_UQI:
34897 case V4SF_FTYPE_V4DF_V4SF_UQI:
34898 case V16SI_FTYPE_V16SI_V16SI_UHI:
34899 case V16SI_FTYPE_V4SI_V16SI_UHI:
34900 case V2DI_FTYPE_V4SI_V2DI_UQI:
34901 case V2DI_FTYPE_V8HI_V2DI_UQI:
34902 case V2DI_FTYPE_V16QI_V2DI_UQI:
34903 case V4DI_FTYPE_V4DI_V4DI_UQI:
34904 case V4DI_FTYPE_V4SI_V4DI_UQI:
34905 case V4DI_FTYPE_V8HI_V4DI_UQI:
34906 case V4DI_FTYPE_V16QI_V4DI_UQI:
34907 case V4DI_FTYPE_V4DF_V4DI_UQI:
34908 case V2DI_FTYPE_V2DF_V2DI_UQI:
34909 case V4SI_FTYPE_V4DF_V4SI_UQI:
34910 case V4SI_FTYPE_V2DF_V4SI_UQI:
34911 case V4SI_FTYPE_V8HI_V4SI_UQI:
34912 case V4SI_FTYPE_V16QI_V4SI_UQI:
34913 case V4DI_FTYPE_V4DI_V4DI_V4DI:
34914 case V8DF_FTYPE_V2DF_V8DF_UQI:
34915 case V8DF_FTYPE_V4DF_V8DF_UQI:
34916 case V8DF_FTYPE_V8DF_V8DF_UQI:
34917 case V8SF_FTYPE_V8SF_V8SF_UQI:
34918 case V8SF_FTYPE_V8SI_V8SF_UQI:
34919 case V4DF_FTYPE_V4DF_V4DF_UQI:
34920 case V4SF_FTYPE_V4SF_V4SF_UQI:
34921 case V2DF_FTYPE_V2DF_V2DF_UQI:
34922 case V2DF_FTYPE_V4SF_V2DF_UQI:
34923 case V2DF_FTYPE_V4SI_V2DF_UQI:
34924 case V4SF_FTYPE_V4SI_V4SF_UQI:
34925 case V4DF_FTYPE_V4SF_V4DF_UQI:
34926 case V4DF_FTYPE_V4SI_V4DF_UQI:
34927 case V8SI_FTYPE_V8SI_V8SI_UQI:
34928 case V8SI_FTYPE_V8HI_V8SI_UQI:
34929 case V8SI_FTYPE_V16QI_V8SI_UQI:
34930 case V8DF_FTYPE_V8SI_V8DF_UQI:
34931 case V8DI_FTYPE_DI_V8DI_UQI:
34932 case V16SF_FTYPE_V8SF_V16SF_UHI:
34933 case V16SI_FTYPE_V8SI_V16SI_UHI:
34934 case V16HI_FTYPE_V16HI_V16HI_UHI:
34935 case V8HI_FTYPE_V16QI_V8HI_UQI:
34936 case V16HI_FTYPE_V16QI_V16HI_UHI:
34937 case V32HI_FTYPE_V32HI_V32HI_USI:
34938 case V32HI_FTYPE_V32QI_V32HI_USI:
34939 case V8DI_FTYPE_V16QI_V8DI_UQI:
34940 case V8DI_FTYPE_V2DI_V8DI_UQI:
34941 case V8DI_FTYPE_V4DI_V8DI_UQI:
34942 case V8DI_FTYPE_V8DI_V8DI_UQI:
34943 case V8DI_FTYPE_V8HI_V8DI_UQI:
34944 case V8DI_FTYPE_V8SI_V8DI_UQI:
34945 case V8HI_FTYPE_V8DI_V8HI_UQI:
34946 case V8SI_FTYPE_V8DI_V8SI_UQI:
34947 case V4SI_FTYPE_V4SI_V4SI_V4SI:
34948 case V16SI_FTYPE_V16SI_V16SI_V16SI:
34949 case V8DI_FTYPE_V8DI_V8DI_V8DI:
34950 case V32HI_FTYPE_V32HI_V32HI_V32HI:
34951 case V2DI_FTYPE_V2DI_V2DI_V2DI:
34952 case V16HI_FTYPE_V16HI_V16HI_V16HI:
34953 case V8SI_FTYPE_V8SI_V8SI_V8SI:
34954 case V8HI_FTYPE_V8HI_V8HI_V8HI:
34955 nargs = 3;
34956 break;
34957 case V32QI_FTYPE_V32QI_V32QI_INT:
34958 case V16HI_FTYPE_V16HI_V16HI_INT:
34959 case V16QI_FTYPE_V16QI_V16QI_INT:
34960 case V4DI_FTYPE_V4DI_V4DI_INT:
34961 case V8HI_FTYPE_V8HI_V8HI_INT:
34962 case V8SI_FTYPE_V8SI_V8SI_INT:
34963 case V8SI_FTYPE_V8SI_V4SI_INT:
34964 case V8SF_FTYPE_V8SF_V8SF_INT:
34965 case V8SF_FTYPE_V8SF_V4SF_INT:
34966 case V4SI_FTYPE_V4SI_V4SI_INT:
34967 case V4DF_FTYPE_V4DF_V4DF_INT:
34968 case V16SF_FTYPE_V16SF_V16SF_INT:
34969 case V16SF_FTYPE_V16SF_V4SF_INT:
34970 case V16SI_FTYPE_V16SI_V4SI_INT:
34971 case V4DF_FTYPE_V4DF_V2DF_INT:
34972 case V4SF_FTYPE_V4SF_V4SF_INT:
34973 case V2DI_FTYPE_V2DI_V2DI_INT:
34974 case V4DI_FTYPE_V4DI_V2DI_INT:
34975 case V2DF_FTYPE_V2DF_V2DF_INT:
34976 case UQI_FTYPE_V8DI_V8UDI_INT:
34977 case UQI_FTYPE_V8DF_V8DF_INT:
34978 case UQI_FTYPE_V2DF_V2DF_INT:
34979 case UQI_FTYPE_V4SF_V4SF_INT:
34980 case UHI_FTYPE_V16SI_V16SI_INT:
34981 case UHI_FTYPE_V16SF_V16SF_INT:
34982 case V64QI_FTYPE_V64QI_V64QI_INT:
34983 case V32HI_FTYPE_V32HI_V32HI_INT:
34984 case V16SI_FTYPE_V16SI_V16SI_INT:
34985 case V8DI_FTYPE_V8DI_V8DI_INT:
34986 nargs = 3;
34987 nargs_constant = 1;
34988 break;
34989 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
34990 nargs = 3;
34991 rmode = V4DImode;
34992 nargs_constant = 1;
34993 break;
34994 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
34995 nargs = 3;
34996 rmode = V2DImode;
34997 nargs_constant = 1;
34998 break;
34999 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
35000 nargs = 3;
35001 rmode = DImode;
35002 nargs_constant = 1;
35003 break;
35004 case V2DI_FTYPE_V2DI_UINT_UINT:
35005 nargs = 3;
35006 nargs_constant = 2;
35007 break;
35008 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
35009 nargs = 3;
35010 rmode = V8DImode;
35011 nargs_constant = 1;
35012 break;
35013 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT:
35014 nargs = 5;
35015 rmode = V8DImode;
35016 mask_pos = 2;
35017 nargs_constant = 1;
35018 break;
35019 case QI_FTYPE_V8DF_INT_UQI:
35020 case QI_FTYPE_V4DF_INT_UQI:
35021 case QI_FTYPE_V2DF_INT_UQI:
35022 case HI_FTYPE_V16SF_INT_UHI:
35023 case QI_FTYPE_V8SF_INT_UQI:
35024 case QI_FTYPE_V4SF_INT_UQI:
35025 case V4SI_FTYPE_V4SI_V4SI_UHI:
35026 case V8SI_FTYPE_V8SI_V8SI_UHI:
35027 nargs = 3;
35028 mask_pos = 1;
35029 nargs_constant = 1;
35030 break;
35031 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_USI_CONVERT:
35032 nargs = 5;
35033 rmode = V4DImode;
35034 mask_pos = 2;
35035 nargs_constant = 1;
35036 break;
35037 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UHI_CONVERT:
35038 nargs = 5;
35039 rmode = V2DImode;
35040 mask_pos = 2;
35041 nargs_constant = 1;
35042 break;
35043 case V32QI_FTYPE_V32QI_V32QI_V32QI_USI:
35044 case V32HI_FTYPE_V32HI_V32HI_V32HI_USI:
35045 case V32HI_FTYPE_V64QI_V64QI_V32HI_USI:
35046 case V16SI_FTYPE_V32HI_V32HI_V16SI_UHI:
35047 case V64QI_FTYPE_V64QI_V64QI_V64QI_UDI:
35048 case V32HI_FTYPE_V32HI_V8HI_V32HI_USI:
35049 case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI:
35050 case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI:
35051 case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI:
35052 case V64QI_FTYPE_V32HI_V32HI_V64QI_UDI:
35053 case V32QI_FTYPE_V16HI_V16HI_V32QI_USI:
35054 case V16QI_FTYPE_V8HI_V8HI_V16QI_UHI:
35055 case V32HI_FTYPE_V16SI_V16SI_V32HI_USI:
35056 case V16HI_FTYPE_V8SI_V8SI_V16HI_UHI:
35057 case V8HI_FTYPE_V4SI_V4SI_V8HI_UQI:
35058 case V4DF_FTYPE_V4DF_V4DI_V4DF_UQI:
35059 case V8SF_FTYPE_V8SF_V8SI_V8SF_UQI:
35060 case V4SF_FTYPE_V4SF_V4SI_V4SF_UQI:
35061 case V2DF_FTYPE_V2DF_V2DI_V2DF_UQI:
35062 case V2DI_FTYPE_V4SI_V4SI_V2DI_UQI:
35063 case V4DI_FTYPE_V8SI_V8SI_V4DI_UQI:
35064 case V4DF_FTYPE_V4DI_V4DF_V4DF_UQI:
35065 case V8SF_FTYPE_V8SI_V8SF_V8SF_UQI:
35066 case V2DF_FTYPE_V2DI_V2DF_V2DF_UQI:
35067 case V4SF_FTYPE_V4SI_V4SF_V4SF_UQI:
35068 case V16SF_FTYPE_V16SF_V16SF_V16SF_UHI:
35069 case V16SF_FTYPE_V16SF_V16SI_V16SF_UHI:
35070 case V16SF_FTYPE_V16SI_V16SF_V16SF_UHI:
35071 case V16SI_FTYPE_V16SI_V16SI_V16SI_UHI:
35072 case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI:
35073 case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI:
35074 case V8SI_FTYPE_V8SI_V8SI_V8SI_UQI:
35075 case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI:
35076 case V8SF_FTYPE_V8SF_V8SF_V8SF_UQI:
35077 case V16QI_FTYPE_V16QI_V16QI_V16QI_UHI:
35078 case V16HI_FTYPE_V16HI_V16HI_V16HI_UHI:
35079 case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI:
35080 case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI:
35081 case V4DI_FTYPE_V4DI_V4DI_V4DI_UQI:
35082 case V4DF_FTYPE_V4DF_V4DF_V4DF_UQI:
35083 case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI:
35084 case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI:
35085 case V8DF_FTYPE_V8DF_V8DI_V8DF_UQI:
35086 case V8DF_FTYPE_V8DI_V8DF_V8DF_UQI:
35087 case V8DI_FTYPE_V16SI_V16SI_V8DI_UQI:
35088 case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI:
35089 case V8DI_FTYPE_V8DI_V8DI_V8DI_UQI:
35090 case V8HI_FTYPE_V16QI_V16QI_V8HI_UQI:
35091 case V16HI_FTYPE_V32QI_V32QI_V16HI_UHI:
35092 case V8SI_FTYPE_V16HI_V16HI_V8SI_UQI:
35093 case V4SI_FTYPE_V8HI_V8HI_V4SI_UQI:
35094 nargs = 4;
35095 break;
35096 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
35097 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
35098 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
35099 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
35100 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
35101 nargs = 4;
35102 nargs_constant = 1;
35103 break;
35104 case UQI_FTYPE_V4DI_V4DI_INT_UQI:
35105 case UQI_FTYPE_V8SI_V8SI_INT_UQI:
35106 case QI_FTYPE_V4DF_V4DF_INT_UQI:
35107 case QI_FTYPE_V8SF_V8SF_INT_UQI:
35108 case UQI_FTYPE_V2DI_V2DI_INT_UQI:
35109 case UQI_FTYPE_V4SI_V4SI_INT_UQI:
35110 case UQI_FTYPE_V2DF_V2DF_INT_UQI:
35111 case UQI_FTYPE_V4SF_V4SF_INT_UQI:
35112 case UDI_FTYPE_V64QI_V64QI_INT_UDI:
35113 case USI_FTYPE_V32QI_V32QI_INT_USI:
35114 case UHI_FTYPE_V16QI_V16QI_INT_UHI:
35115 case USI_FTYPE_V32HI_V32HI_INT_USI:
35116 case UHI_FTYPE_V16HI_V16HI_INT_UHI:
35117 case UQI_FTYPE_V8HI_V8HI_INT_UQI:
35118 case V32HI_FTYPE_V32HI_V32HI_V32HI_INT:
35119 case V16HI_FTYPE_V16HI_V16HI_V16HI_INT:
35120 case V8HI_FTYPE_V8HI_V8HI_V8HI_INT:
35121 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT:
35122 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT:
35123 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT:
35124 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT:
35125 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT:
35126 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT:
35127 nargs = 4;
35128 mask_pos = 1;
35129 nargs_constant = 1;
35130 break;
35131 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
35132 nargs = 4;
35133 nargs_constant = 2;
35134 break;
35135 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
35136 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
35137 nargs = 4;
35138 break;
35139 case UQI_FTYPE_V8DI_V8DI_INT_UQI:
35140 case UHI_FTYPE_V16SI_V16SI_INT_UHI:
35141 mask_pos = 1;
35142 nargs = 4;
35143 nargs_constant = 1;
35144 break;
35145 case V8SF_FTYPE_V8SF_INT_V8SF_UQI:
35146 case V4SF_FTYPE_V4SF_INT_V4SF_UQI:
35147 case V2DF_FTYPE_V4DF_INT_V2DF_UQI:
35148 case V2DI_FTYPE_V4DI_INT_V2DI_UQI:
35149 case V8SF_FTYPE_V16SF_INT_V8SF_UQI:
35150 case V8SI_FTYPE_V16SI_INT_V8SI_UQI:
35151 case V2DF_FTYPE_V8DF_INT_V2DF_UQI:
35152 case V2DI_FTYPE_V8DI_INT_V2DI_UQI:
35153 case V4SF_FTYPE_V8SF_INT_V4SF_UQI:
35154 case V4SI_FTYPE_V8SI_INT_V4SI_UQI:
35155 case V8HI_FTYPE_V8SF_INT_V8HI_UQI:
35156 case V8HI_FTYPE_V4SF_INT_V8HI_UQI:
35157 case V32HI_FTYPE_V32HI_INT_V32HI_USI:
35158 case V16HI_FTYPE_V16HI_INT_V16HI_UHI:
35159 case V8HI_FTYPE_V8HI_INT_V8HI_UQI:
35160 case V4DI_FTYPE_V4DI_INT_V4DI_UQI:
35161 case V2DI_FTYPE_V2DI_INT_V2DI_UQI:
35162 case V8SI_FTYPE_V8SI_INT_V8SI_UQI:
35163 case V4SI_FTYPE_V4SI_INT_V4SI_UQI:
35164 case V4DF_FTYPE_V4DF_INT_V4DF_UQI:
35165 case V2DF_FTYPE_V2DF_INT_V2DF_UQI:
35166 case V8DF_FTYPE_V8DF_INT_V8DF_UQI:
35167 case V16SF_FTYPE_V16SF_INT_V16SF_UHI:
35168 case V16HI_FTYPE_V16SF_INT_V16HI_UHI:
35169 case V16SI_FTYPE_V16SI_INT_V16SI_UHI:
35170 case V4SI_FTYPE_V16SI_INT_V4SI_UQI:
35171 case V4DI_FTYPE_V8DI_INT_V4DI_UQI:
35172 case V4DF_FTYPE_V8DF_INT_V4DF_UQI:
35173 case V4SF_FTYPE_V16SF_INT_V4SF_UQI:
35174 case V8DI_FTYPE_V8DI_INT_V8DI_UQI:
35175 nargs = 4;
35176 mask_pos = 2;
35177 nargs_constant = 1;
35178 break;
35179 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI:
35180 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI:
35181 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI:
35182 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI:
35183 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI:
35184 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI:
35185 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI:
35186 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI:
35187 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI:
35188 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI:
35189 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI:
35190 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI:
35191 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI:
35192 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI:
35193 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI:
35194 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI:
35195 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_UQI:
35196 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UQI:
35197 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI:
35198 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI:
35199 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI:
35200 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI:
35201 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI:
35202 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_UQI:
35203 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_UQI:
35204 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_UQI:
35205 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_UQI:
35206 nargs = 5;
35207 mask_pos = 2;
35208 nargs_constant = 1;
35209 break;
35210 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI:
35211 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI:
35212 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI:
35213 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI:
35214 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI:
35215 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI:
35216 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI:
35217 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI:
35218 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI:
35219 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI:
35220 nargs = 5;
35221 mask_pos = 1;
35222 nargs_constant = 1;
35223 break;
35224 case V64QI_FTYPE_V64QI_V64QI_INT_V64QI_UDI:
35225 case V32QI_FTYPE_V32QI_V32QI_INT_V32QI_USI:
35226 case V16QI_FTYPE_V16QI_V16QI_INT_V16QI_UHI:
35227 case V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT:
35228 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_INT:
35229 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_INT:
35230 case V16HI_FTYPE_V16HI_V16HI_INT_V16HI_INT:
35231 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_INT:
35232 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_INT:
35233 case V8HI_FTYPE_V8HI_V8HI_INT_V8HI_INT:
35234 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_INT:
35235 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_INT:
35236 nargs = 5;
35237 mask_pos = 1;
35238 nargs_constant = 2;
35239 break;
35241 default:
35242 gcc_unreachable ();
35245 gcc_assert (nargs <= ARRAY_SIZE (args));
35247 if (comparison != UNKNOWN)
35249 gcc_assert (nargs == 2);
35250 return ix86_expand_sse_compare (d, exp, target, swap);
35253 if (rmode == VOIDmode || rmode == tmode)
35255 if (optimize
35256 || target == 0
35257 || GET_MODE (target) != tmode
35258 || !insn_p->operand[0].predicate (target, tmode))
35259 target = gen_reg_rtx (tmode);
35260 else if (memory_operand (target, tmode))
35261 num_memory++;
35262 real_target = target;
35264 else
35266 real_target = gen_reg_rtx (tmode);
35267 target = lowpart_subreg (rmode, real_target, tmode);
35270 for (i = 0; i < nargs; i++)
35272 tree arg = CALL_EXPR_ARG (exp, i);
35273 rtx op = expand_normal (arg);
35274 machine_mode mode = insn_p->operand[i + 1].mode;
35275 bool match = insn_p->operand[i + 1].predicate (op, mode);
35277 if (second_arg_count && i == 1)
35279 /* SIMD shift insns take either an 8-bit immediate or
35280 register as count. But builtin functions take int as
35281 count. If count doesn't match, we put it in register.
35282 The instructions are using 64-bit count, if op is just
35283 32-bit, zero-extend it, as negative shift counts
35284 are undefined behavior and zero-extension is more
35285 efficient. */
35286 if (!match)
35288 if (SCALAR_INT_MODE_P (GET_MODE (op)))
35289 op = convert_modes (mode, GET_MODE (op), op, 1);
35290 else
35291 op = lowpart_subreg (mode, op, GET_MODE (op));
35292 if (!insn_p->operand[i + 1].predicate (op, mode))
35293 op = copy_to_reg (op);
35296 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
35297 (!mask_pos && (nargs - i) <= nargs_constant))
35299 if (!match)
35300 switch (icode)
35302 case CODE_FOR_avx_vinsertf128v4di:
35303 case CODE_FOR_avx_vextractf128v4di:
35304 error ("the last argument must be an 1-bit immediate");
35305 return const0_rtx;
35307 case CODE_FOR_avx512f_cmpv8di3_mask:
35308 case CODE_FOR_avx512f_cmpv16si3_mask:
35309 case CODE_FOR_avx512f_ucmpv8di3_mask:
35310 case CODE_FOR_avx512f_ucmpv16si3_mask:
35311 case CODE_FOR_avx512vl_cmpv4di3_mask:
35312 case CODE_FOR_avx512vl_cmpv8si3_mask:
35313 case CODE_FOR_avx512vl_ucmpv4di3_mask:
35314 case CODE_FOR_avx512vl_ucmpv8si3_mask:
35315 case CODE_FOR_avx512vl_cmpv2di3_mask:
35316 case CODE_FOR_avx512vl_cmpv4si3_mask:
35317 case CODE_FOR_avx512vl_ucmpv2di3_mask:
35318 case CODE_FOR_avx512vl_ucmpv4si3_mask:
35319 error ("the last argument must be a 3-bit immediate");
35320 return const0_rtx;
35322 case CODE_FOR_sse4_1_roundsd:
35323 case CODE_FOR_sse4_1_roundss:
35325 case CODE_FOR_sse4_1_roundpd:
35326 case CODE_FOR_sse4_1_roundps:
35327 case CODE_FOR_avx_roundpd256:
35328 case CODE_FOR_avx_roundps256:
35330 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
35331 case CODE_FOR_sse4_1_roundps_sfix:
35332 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
35333 case CODE_FOR_avx_roundps_sfix256:
35335 case CODE_FOR_sse4_1_blendps:
35336 case CODE_FOR_avx_blendpd256:
35337 case CODE_FOR_avx_vpermilv4df:
35338 case CODE_FOR_avx_vpermilv4df_mask:
35339 case CODE_FOR_avx512f_getmantv8df_mask:
35340 case CODE_FOR_avx512f_getmantv16sf_mask:
35341 case CODE_FOR_avx512vl_getmantv8sf_mask:
35342 case CODE_FOR_avx512vl_getmantv4df_mask:
35343 case CODE_FOR_avx512vl_getmantv4sf_mask:
35344 case CODE_FOR_avx512vl_getmantv2df_mask:
35345 case CODE_FOR_avx512dq_rangepv8df_mask_round:
35346 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
35347 case CODE_FOR_avx512dq_rangepv4df_mask:
35348 case CODE_FOR_avx512dq_rangepv8sf_mask:
35349 case CODE_FOR_avx512dq_rangepv2df_mask:
35350 case CODE_FOR_avx512dq_rangepv4sf_mask:
35351 case CODE_FOR_avx_shufpd256_mask:
35352 error ("the last argument must be a 4-bit immediate");
35353 return const0_rtx;
35355 case CODE_FOR_sha1rnds4:
35356 case CODE_FOR_sse4_1_blendpd:
35357 case CODE_FOR_avx_vpermilv2df:
35358 case CODE_FOR_avx_vpermilv2df_mask:
35359 case CODE_FOR_xop_vpermil2v2df3:
35360 case CODE_FOR_xop_vpermil2v4sf3:
35361 case CODE_FOR_xop_vpermil2v4df3:
35362 case CODE_FOR_xop_vpermil2v8sf3:
35363 case CODE_FOR_avx512f_vinsertf32x4_mask:
35364 case CODE_FOR_avx512f_vinserti32x4_mask:
35365 case CODE_FOR_avx512f_vextractf32x4_mask:
35366 case CODE_FOR_avx512f_vextracti32x4_mask:
35367 case CODE_FOR_sse2_shufpd:
35368 case CODE_FOR_sse2_shufpd_mask:
35369 case CODE_FOR_avx512dq_shuf_f64x2_mask:
35370 case CODE_FOR_avx512dq_shuf_i64x2_mask:
35371 case CODE_FOR_avx512vl_shuf_i32x4_mask:
35372 case CODE_FOR_avx512vl_shuf_f32x4_mask:
35373 error ("the last argument must be a 2-bit immediate");
35374 return const0_rtx;
35376 case CODE_FOR_avx_vextractf128v4df:
35377 case CODE_FOR_avx_vextractf128v8sf:
35378 case CODE_FOR_avx_vextractf128v8si:
35379 case CODE_FOR_avx_vinsertf128v4df:
35380 case CODE_FOR_avx_vinsertf128v8sf:
35381 case CODE_FOR_avx_vinsertf128v8si:
35382 case CODE_FOR_avx512f_vinsertf64x4_mask:
35383 case CODE_FOR_avx512f_vinserti64x4_mask:
35384 case CODE_FOR_avx512f_vextractf64x4_mask:
35385 case CODE_FOR_avx512f_vextracti64x4_mask:
35386 case CODE_FOR_avx512dq_vinsertf32x8_mask:
35387 case CODE_FOR_avx512dq_vinserti32x8_mask:
35388 case CODE_FOR_avx512vl_vinsertv4df:
35389 case CODE_FOR_avx512vl_vinsertv4di:
35390 case CODE_FOR_avx512vl_vinsertv8sf:
35391 case CODE_FOR_avx512vl_vinsertv8si:
35392 error ("the last argument must be a 1-bit immediate");
35393 return const0_rtx;
35395 case CODE_FOR_avx_vmcmpv2df3:
35396 case CODE_FOR_avx_vmcmpv4sf3:
35397 case CODE_FOR_avx_cmpv2df3:
35398 case CODE_FOR_avx_cmpv4sf3:
35399 case CODE_FOR_avx_cmpv4df3:
35400 case CODE_FOR_avx_cmpv8sf3:
35401 case CODE_FOR_avx512f_cmpv8df3_mask:
35402 case CODE_FOR_avx512f_cmpv16sf3_mask:
35403 case CODE_FOR_avx512f_vmcmpv2df3_mask:
35404 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
35405 error ("the last argument must be a 5-bit immediate");
35406 return const0_rtx;
35408 default:
35409 switch (nargs_constant)
35411 case 2:
35412 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
35413 (!mask_pos && (nargs - i) == nargs_constant))
35415 error ("the next to last argument must be an 8-bit immediate");
35416 break;
35418 /* FALLTHRU */
35419 case 1:
35420 error ("the last argument must be an 8-bit immediate");
35421 break;
35422 default:
35423 gcc_unreachable ();
35425 return const0_rtx;
35428 else
35430 if (VECTOR_MODE_P (mode))
35431 op = safe_vector_operand (op, mode);
35433 /* If we aren't optimizing, only allow one memory operand to
35434 be generated. */
35435 if (memory_operand (op, mode))
35436 num_memory++;
35438 op = fixup_modeless_constant (op, mode);
35440 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
35442 if (optimize || !match || num_memory > 1)
35443 op = copy_to_mode_reg (mode, op);
35445 else
35447 op = copy_to_reg (op);
35448 op = lowpart_subreg (mode, op, GET_MODE (op));
35452 args[i].op = op;
35453 args[i].mode = mode;
35456 switch (nargs)
35458 case 1:
35459 pat = GEN_FCN (icode) (real_target, args[0].op);
35460 break;
35461 case 2:
35462 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
35463 break;
35464 case 3:
35465 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
35466 args[2].op);
35467 break;
35468 case 4:
35469 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
35470 args[2].op, args[3].op);
35471 break;
35472 case 5:
35473 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
35474 args[2].op, args[3].op, args[4].op);
35475 break;
35476 case 6:
35477 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
35478 args[2].op, args[3].op, args[4].op,
35479 args[5].op);
35480 break;
35481 default:
35482 gcc_unreachable ();
35485 if (! pat)
35486 return 0;
35488 emit_insn (pat);
35489 return target;
35492 /* Transform pattern of following layout:
35493 (set A
35494 (unspec [B C] UNSPEC_EMBEDDED_ROUNDING))
35496 into:
35497 (set (A B)) */
35499 static rtx
35500 ix86_erase_embedded_rounding (rtx pat)
35502 if (GET_CODE (pat) == INSN)
35503 pat = PATTERN (pat);
35505 gcc_assert (GET_CODE (pat) == SET);
35506 rtx src = SET_SRC (pat);
35507 gcc_assert (XVECLEN (src, 0) == 2);
35508 rtx p0 = XVECEXP (src, 0, 0);
35509 gcc_assert (GET_CODE (src) == UNSPEC
35510 && XINT (src, 1) == UNSPEC_EMBEDDED_ROUNDING);
35511 rtx res = gen_rtx_SET (SET_DEST (pat), p0);
35512 return res;
35515 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
35516 with rounding. */
35517 static rtx
35518 ix86_expand_sse_comi_round (const struct builtin_description *d,
35519 tree exp, rtx target)
35521 rtx pat, set_dst;
35522 tree arg0 = CALL_EXPR_ARG (exp, 0);
35523 tree arg1 = CALL_EXPR_ARG (exp, 1);
35524 tree arg2 = CALL_EXPR_ARG (exp, 2);
35525 tree arg3 = CALL_EXPR_ARG (exp, 3);
35526 rtx op0 = expand_normal (arg0);
35527 rtx op1 = expand_normal (arg1);
35528 rtx op2 = expand_normal (arg2);
35529 rtx op3 = expand_normal (arg3);
35530 enum insn_code icode = d->icode;
35531 const struct insn_data_d *insn_p = &insn_data[icode];
35532 machine_mode mode0 = insn_p->operand[0].mode;
35533 machine_mode mode1 = insn_p->operand[1].mode;
35534 enum rtx_code comparison = UNEQ;
35535 bool need_ucomi = false;
35537 /* See avxintrin.h for values. */
35538 enum rtx_code comi_comparisons[32] =
35540 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
35541 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
35542 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
35544 bool need_ucomi_values[32] =
35546 true, false, false, true, true, false, false, true,
35547 true, false, false, true, true, false, false, true,
35548 false, true, true, false, false, true, true, false,
35549 false, true, true, false, false, true, true, false
35552 if (!CONST_INT_P (op2))
35554 error ("the third argument must be comparison constant");
35555 return const0_rtx;
35557 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
35559 error ("incorrect comparison mode");
35560 return const0_rtx;
35563 if (!insn_p->operand[2].predicate (op3, SImode))
35565 error ("incorrect rounding operand");
35566 return const0_rtx;
35569 comparison = comi_comparisons[INTVAL (op2)];
35570 need_ucomi = need_ucomi_values[INTVAL (op2)];
35572 if (VECTOR_MODE_P (mode0))
35573 op0 = safe_vector_operand (op0, mode0);
35574 if (VECTOR_MODE_P (mode1))
35575 op1 = safe_vector_operand (op1, mode1);
35577 target = gen_reg_rtx (SImode);
35578 emit_move_insn (target, const0_rtx);
35579 target = gen_rtx_SUBREG (QImode, target, 0);
35581 if ((optimize && !register_operand (op0, mode0))
35582 || !insn_p->operand[0].predicate (op0, mode0))
35583 op0 = copy_to_mode_reg (mode0, op0);
35584 if ((optimize && !register_operand (op1, mode1))
35585 || !insn_p->operand[1].predicate (op1, mode1))
35586 op1 = copy_to_mode_reg (mode1, op1);
35588 if (need_ucomi)
35589 icode = icode == CODE_FOR_sse_comi_round
35590 ? CODE_FOR_sse_ucomi_round
35591 : CODE_FOR_sse2_ucomi_round;
35593 pat = GEN_FCN (icode) (op0, op1, op3);
35594 if (! pat)
35595 return 0;
35597 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
35598 if (INTVAL (op3) == NO_ROUND)
35600 pat = ix86_erase_embedded_rounding (pat);
35601 if (! pat)
35602 return 0;
35604 set_dst = SET_DEST (pat);
35606 else
35608 gcc_assert (GET_CODE (pat) == SET);
35609 set_dst = SET_DEST (pat);
35612 emit_insn (pat);
35613 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
35614 gen_rtx_fmt_ee (comparison, QImode,
35615 set_dst,
35616 const0_rtx)));
35618 return SUBREG_REG (target);
35621 static rtx
35622 ix86_expand_round_builtin (const struct builtin_description *d,
35623 tree exp, rtx target)
35625 rtx pat;
35626 unsigned int i, nargs;
35627 struct
35629 rtx op;
35630 machine_mode mode;
35631 } args[6];
35632 enum insn_code icode = d->icode;
35633 const struct insn_data_d *insn_p = &insn_data[icode];
35634 machine_mode tmode = insn_p->operand[0].mode;
35635 unsigned int nargs_constant = 0;
35636 unsigned int redundant_embed_rnd = 0;
35638 switch ((enum ix86_builtin_func_type) d->flag)
35640 case UINT64_FTYPE_V2DF_INT:
35641 case UINT64_FTYPE_V4SF_INT:
35642 case UINT_FTYPE_V2DF_INT:
35643 case UINT_FTYPE_V4SF_INT:
35644 case INT64_FTYPE_V2DF_INT:
35645 case INT64_FTYPE_V4SF_INT:
35646 case INT_FTYPE_V2DF_INT:
35647 case INT_FTYPE_V4SF_INT:
35648 nargs = 2;
35649 break;
35650 case V4SF_FTYPE_V4SF_UINT_INT:
35651 case V4SF_FTYPE_V4SF_UINT64_INT:
35652 case V2DF_FTYPE_V2DF_UINT64_INT:
35653 case V4SF_FTYPE_V4SF_INT_INT:
35654 case V4SF_FTYPE_V4SF_INT64_INT:
35655 case V2DF_FTYPE_V2DF_INT64_INT:
35656 case V4SF_FTYPE_V4SF_V4SF_INT:
35657 case V2DF_FTYPE_V2DF_V2DF_INT:
35658 case V4SF_FTYPE_V4SF_V2DF_INT:
35659 case V2DF_FTYPE_V2DF_V4SF_INT:
35660 nargs = 3;
35661 break;
35662 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
35663 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
35664 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
35665 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
35666 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
35667 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
35668 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
35669 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
35670 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
35671 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
35672 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
35673 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
35674 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
35675 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
35676 nargs = 4;
35677 break;
35678 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
35679 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
35680 nargs_constant = 2;
35681 nargs = 4;
35682 break;
35683 case INT_FTYPE_V4SF_V4SF_INT_INT:
35684 case INT_FTYPE_V2DF_V2DF_INT_INT:
35685 return ix86_expand_sse_comi_round (d, exp, target);
35686 case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT:
35687 case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT:
35688 case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT:
35689 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
35690 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
35691 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
35692 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
35693 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
35694 nargs = 5;
35695 break;
35696 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
35697 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
35698 nargs_constant = 4;
35699 nargs = 5;
35700 break;
35701 case UQI_FTYPE_V8DF_V8DF_INT_UQI_INT:
35702 case UQI_FTYPE_V2DF_V2DF_INT_UQI_INT:
35703 case UHI_FTYPE_V16SF_V16SF_INT_UHI_INT:
35704 case UQI_FTYPE_V4SF_V4SF_INT_UQI_INT:
35705 nargs_constant = 3;
35706 nargs = 5;
35707 break;
35708 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
35709 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
35710 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
35711 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
35712 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT:
35713 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT:
35714 nargs = 6;
35715 nargs_constant = 4;
35716 break;
35717 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
35718 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
35719 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
35720 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
35721 nargs = 6;
35722 nargs_constant = 3;
35723 break;
35724 default:
35725 gcc_unreachable ();
35727 gcc_assert (nargs <= ARRAY_SIZE (args));
35729 if (optimize
35730 || target == 0
35731 || GET_MODE (target) != tmode
35732 || !insn_p->operand[0].predicate (target, tmode))
35733 target = gen_reg_rtx (tmode);
35735 for (i = 0; i < nargs; i++)
35737 tree arg = CALL_EXPR_ARG (exp, i);
35738 rtx op = expand_normal (arg);
35739 machine_mode mode = insn_p->operand[i + 1].mode;
35740 bool match = insn_p->operand[i + 1].predicate (op, mode);
35742 if (i == nargs - nargs_constant)
35744 if (!match)
35746 switch (icode)
35748 case CODE_FOR_avx512f_getmantv8df_mask_round:
35749 case CODE_FOR_avx512f_getmantv16sf_mask_round:
35750 case CODE_FOR_avx512f_vgetmantv2df_round:
35751 case CODE_FOR_avx512f_vgetmantv2df_mask_round:
35752 case CODE_FOR_avx512f_vgetmantv4sf_round:
35753 case CODE_FOR_avx512f_vgetmantv4sf_mask_round:
35754 error ("the immediate argument must be a 4-bit immediate");
35755 return const0_rtx;
35756 case CODE_FOR_avx512f_cmpv8df3_mask_round:
35757 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
35758 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
35759 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
35760 error ("the immediate argument must be a 5-bit immediate");
35761 return const0_rtx;
35762 default:
35763 error ("the immediate argument must be an 8-bit immediate");
35764 return const0_rtx;
35768 else if (i == nargs-1)
35770 if (!insn_p->operand[nargs].predicate (op, SImode))
35772 error ("incorrect rounding operand");
35773 return const0_rtx;
35776 /* If there is no rounding use normal version of the pattern. */
35777 if (INTVAL (op) == NO_ROUND)
35778 redundant_embed_rnd = 1;
35780 else
35782 if (VECTOR_MODE_P (mode))
35783 op = safe_vector_operand (op, mode);
35785 op = fixup_modeless_constant (op, mode);
35787 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
35789 if (optimize || !match)
35790 op = copy_to_mode_reg (mode, op);
35792 else
35794 op = copy_to_reg (op);
35795 op = lowpart_subreg (mode, op, GET_MODE (op));
35799 args[i].op = op;
35800 args[i].mode = mode;
35803 switch (nargs)
35805 case 1:
35806 pat = GEN_FCN (icode) (target, args[0].op);
35807 break;
35808 case 2:
35809 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
35810 break;
35811 case 3:
35812 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
35813 args[2].op);
35814 break;
35815 case 4:
35816 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
35817 args[2].op, args[3].op);
35818 break;
35819 case 5:
35820 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
35821 args[2].op, args[3].op, args[4].op);
35822 break;
35823 case 6:
35824 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
35825 args[2].op, args[3].op, args[4].op,
35826 args[5].op);
35827 break;
35828 default:
35829 gcc_unreachable ();
35832 if (!pat)
35833 return 0;
35835 if (redundant_embed_rnd)
35836 pat = ix86_erase_embedded_rounding (pat);
35838 emit_insn (pat);
35839 return target;
35842 /* Subroutine of ix86_expand_builtin to take care of special insns
35843 with variable number of operands. */
35845 static rtx
35846 ix86_expand_special_args_builtin (const struct builtin_description *d,
35847 tree exp, rtx target)
35849 tree arg;
35850 rtx pat, op;
35851 unsigned int i, nargs, arg_adjust, memory;
35852 bool aligned_mem = false;
35853 struct
35855 rtx op;
35856 machine_mode mode;
35857 } args[3];
35858 enum insn_code icode = d->icode;
35859 bool last_arg_constant = false;
35860 const struct insn_data_d *insn_p = &insn_data[icode];
35861 machine_mode tmode = insn_p->operand[0].mode;
35862 enum { load, store } klass;
35864 switch ((enum ix86_builtin_func_type) d->flag)
35866 case VOID_FTYPE_VOID:
35867 emit_insn (GEN_FCN (icode) (target));
35868 return 0;
35869 case VOID_FTYPE_UINT64:
35870 case VOID_FTYPE_UNSIGNED:
35871 nargs = 0;
35872 klass = store;
35873 memory = 0;
35874 break;
35876 case INT_FTYPE_VOID:
35877 case USHORT_FTYPE_VOID:
35878 case UINT64_FTYPE_VOID:
35879 case UINT_FTYPE_VOID:
35880 case UNSIGNED_FTYPE_VOID:
35881 nargs = 0;
35882 klass = load;
35883 memory = 0;
35884 break;
35885 case UINT64_FTYPE_PUNSIGNED:
35886 case V2DI_FTYPE_PV2DI:
35887 case V4DI_FTYPE_PV4DI:
35888 case V32QI_FTYPE_PCCHAR:
35889 case V16QI_FTYPE_PCCHAR:
35890 case V8SF_FTYPE_PCV4SF:
35891 case V8SF_FTYPE_PCFLOAT:
35892 case V4SF_FTYPE_PCFLOAT:
35893 case V4DF_FTYPE_PCV2DF:
35894 case V4DF_FTYPE_PCDOUBLE:
35895 case V2DF_FTYPE_PCDOUBLE:
35896 case VOID_FTYPE_PVOID:
35897 case V8DI_FTYPE_PV8DI:
35898 nargs = 1;
35899 klass = load;
35900 memory = 0;
35901 switch (icode)
35903 case CODE_FOR_sse4_1_movntdqa:
35904 case CODE_FOR_avx2_movntdqa:
35905 case CODE_FOR_avx512f_movntdqa:
35906 aligned_mem = true;
35907 break;
35908 default:
35909 break;
35911 break;
35912 case VOID_FTYPE_PV2SF_V4SF:
35913 case VOID_FTYPE_PV8DI_V8DI:
35914 case VOID_FTYPE_PV4DI_V4DI:
35915 case VOID_FTYPE_PV2DI_V2DI:
35916 case VOID_FTYPE_PCHAR_V32QI:
35917 case VOID_FTYPE_PCHAR_V16QI:
35918 case VOID_FTYPE_PFLOAT_V16SF:
35919 case VOID_FTYPE_PFLOAT_V8SF:
35920 case VOID_FTYPE_PFLOAT_V4SF:
35921 case VOID_FTYPE_PDOUBLE_V8DF:
35922 case VOID_FTYPE_PDOUBLE_V4DF:
35923 case VOID_FTYPE_PDOUBLE_V2DF:
35924 case VOID_FTYPE_PLONGLONG_LONGLONG:
35925 case VOID_FTYPE_PULONGLONG_ULONGLONG:
35926 case VOID_FTYPE_PINT_INT:
35927 nargs = 1;
35928 klass = store;
35929 /* Reserve memory operand for target. */
35930 memory = ARRAY_SIZE (args);
35931 switch (icode)
35933 /* These builtins and instructions require the memory
35934 to be properly aligned. */
35935 case CODE_FOR_avx_movntv4di:
35936 case CODE_FOR_sse2_movntv2di:
35937 case CODE_FOR_avx_movntv8sf:
35938 case CODE_FOR_sse_movntv4sf:
35939 case CODE_FOR_sse4a_vmmovntv4sf:
35940 case CODE_FOR_avx_movntv4df:
35941 case CODE_FOR_sse2_movntv2df:
35942 case CODE_FOR_sse4a_vmmovntv2df:
35943 case CODE_FOR_sse2_movntidi:
35944 case CODE_FOR_sse_movntq:
35945 case CODE_FOR_sse2_movntisi:
35946 case CODE_FOR_avx512f_movntv16sf:
35947 case CODE_FOR_avx512f_movntv8df:
35948 case CODE_FOR_avx512f_movntv8di:
35949 aligned_mem = true;
35950 break;
35951 default:
35952 break;
35954 break;
35955 case V4SF_FTYPE_V4SF_PCV2SF:
35956 case V2DF_FTYPE_V2DF_PCDOUBLE:
35957 nargs = 2;
35958 klass = load;
35959 memory = 1;
35960 break;
35961 case V8SF_FTYPE_PCV8SF_V8SI:
35962 case V4DF_FTYPE_PCV4DF_V4DI:
35963 case V4SF_FTYPE_PCV4SF_V4SI:
35964 case V2DF_FTYPE_PCV2DF_V2DI:
35965 case V8SI_FTYPE_PCV8SI_V8SI:
35966 case V4DI_FTYPE_PCV4DI_V4DI:
35967 case V4SI_FTYPE_PCV4SI_V4SI:
35968 case V2DI_FTYPE_PCV2DI_V2DI:
35969 case VOID_FTYPE_INT_INT64:
35970 nargs = 2;
35971 klass = load;
35972 memory = 0;
35973 break;
35974 case VOID_FTYPE_PV8DF_V8DF_UQI:
35975 case VOID_FTYPE_PV4DF_V4DF_UQI:
35976 case VOID_FTYPE_PV2DF_V2DF_UQI:
35977 case VOID_FTYPE_PV16SF_V16SF_UHI:
35978 case VOID_FTYPE_PV8SF_V8SF_UQI:
35979 case VOID_FTYPE_PV4SF_V4SF_UQI:
35980 case VOID_FTYPE_PV8DI_V8DI_UQI:
35981 case VOID_FTYPE_PV4DI_V4DI_UQI:
35982 case VOID_FTYPE_PV2DI_V2DI_UQI:
35983 case VOID_FTYPE_PV16SI_V16SI_UHI:
35984 case VOID_FTYPE_PV8SI_V8SI_UQI:
35985 case VOID_FTYPE_PV4SI_V4SI_UQI:
35986 case VOID_FTYPE_PV64QI_V64QI_UDI:
35987 case VOID_FTYPE_PV32HI_V32HI_USI:
35988 case VOID_FTYPE_PV32QI_V32QI_USI:
35989 case VOID_FTYPE_PV16QI_V16QI_UHI:
35990 case VOID_FTYPE_PV16HI_V16HI_UHI:
35991 case VOID_FTYPE_PV8HI_V8HI_UQI:
35992 switch (icode)
35994 /* These builtins and instructions require the memory
35995 to be properly aligned. */
35996 case CODE_FOR_avx512f_storev16sf_mask:
35997 case CODE_FOR_avx512f_storev16si_mask:
35998 case CODE_FOR_avx512f_storev8df_mask:
35999 case CODE_FOR_avx512f_storev8di_mask:
36000 case CODE_FOR_avx512vl_storev8sf_mask:
36001 case CODE_FOR_avx512vl_storev8si_mask:
36002 case CODE_FOR_avx512vl_storev4df_mask:
36003 case CODE_FOR_avx512vl_storev4di_mask:
36004 case CODE_FOR_avx512vl_storev4sf_mask:
36005 case CODE_FOR_avx512vl_storev4si_mask:
36006 case CODE_FOR_avx512vl_storev2df_mask:
36007 case CODE_FOR_avx512vl_storev2di_mask:
36008 aligned_mem = true;
36009 break;
36010 default:
36011 break;
36013 /* FALLTHRU */
36014 case VOID_FTYPE_PV8SF_V8SI_V8SF:
36015 case VOID_FTYPE_PV4DF_V4DI_V4DF:
36016 case VOID_FTYPE_PV4SF_V4SI_V4SF:
36017 case VOID_FTYPE_PV2DF_V2DI_V2DF:
36018 case VOID_FTYPE_PV8SI_V8SI_V8SI:
36019 case VOID_FTYPE_PV4DI_V4DI_V4DI:
36020 case VOID_FTYPE_PV4SI_V4SI_V4SI:
36021 case VOID_FTYPE_PV2DI_V2DI_V2DI:
36022 case VOID_FTYPE_PV8SI_V8DI_UQI:
36023 case VOID_FTYPE_PV8HI_V8DI_UQI:
36024 case VOID_FTYPE_PV16HI_V16SI_UHI:
36025 case VOID_FTYPE_PV16QI_V8DI_UQI:
36026 case VOID_FTYPE_PV16QI_V16SI_UHI:
36027 case VOID_FTYPE_PV4SI_V4DI_UQI:
36028 case VOID_FTYPE_PV4SI_V2DI_UQI:
36029 case VOID_FTYPE_PV8HI_V4DI_UQI:
36030 case VOID_FTYPE_PV8HI_V2DI_UQI:
36031 case VOID_FTYPE_PV8HI_V8SI_UQI:
36032 case VOID_FTYPE_PV8HI_V4SI_UQI:
36033 case VOID_FTYPE_PV16QI_V4DI_UQI:
36034 case VOID_FTYPE_PV16QI_V2DI_UQI:
36035 case VOID_FTYPE_PV16QI_V8SI_UQI:
36036 case VOID_FTYPE_PV16QI_V4SI_UQI:
36037 case VOID_FTYPE_PCHAR_V64QI_UDI:
36038 case VOID_FTYPE_PCHAR_V32QI_USI:
36039 case VOID_FTYPE_PCHAR_V16QI_UHI:
36040 case VOID_FTYPE_PSHORT_V32HI_USI:
36041 case VOID_FTYPE_PSHORT_V16HI_UHI:
36042 case VOID_FTYPE_PSHORT_V8HI_UQI:
36043 case VOID_FTYPE_PINT_V16SI_UHI:
36044 case VOID_FTYPE_PINT_V8SI_UQI:
36045 case VOID_FTYPE_PINT_V4SI_UQI:
36046 case VOID_FTYPE_PINT64_V8DI_UQI:
36047 case VOID_FTYPE_PINT64_V4DI_UQI:
36048 case VOID_FTYPE_PINT64_V2DI_UQI:
36049 case VOID_FTYPE_PDOUBLE_V8DF_UQI:
36050 case VOID_FTYPE_PDOUBLE_V4DF_UQI:
36051 case VOID_FTYPE_PDOUBLE_V2DF_UQI:
36052 case VOID_FTYPE_PFLOAT_V16SF_UHI:
36053 case VOID_FTYPE_PFLOAT_V8SF_UQI:
36054 case VOID_FTYPE_PFLOAT_V4SF_UQI:
36055 case VOID_FTYPE_PV32QI_V32HI_USI:
36056 case VOID_FTYPE_PV16QI_V16HI_UHI:
36057 case VOID_FTYPE_PV8QI_V8HI_UQI:
36058 nargs = 2;
36059 klass = store;
36060 /* Reserve memory operand for target. */
36061 memory = ARRAY_SIZE (args);
36062 break;
36063 case V4SF_FTYPE_PCV4SF_V4SF_UQI:
36064 case V8SF_FTYPE_PCV8SF_V8SF_UQI:
36065 case V16SF_FTYPE_PCV16SF_V16SF_UHI:
36066 case V4SI_FTYPE_PCV4SI_V4SI_UQI:
36067 case V8SI_FTYPE_PCV8SI_V8SI_UQI:
36068 case V16SI_FTYPE_PCV16SI_V16SI_UHI:
36069 case V2DF_FTYPE_PCV2DF_V2DF_UQI:
36070 case V4DF_FTYPE_PCV4DF_V4DF_UQI:
36071 case V8DF_FTYPE_PCV8DF_V8DF_UQI:
36072 case V2DI_FTYPE_PCV2DI_V2DI_UQI:
36073 case V4DI_FTYPE_PCV4DI_V4DI_UQI:
36074 case V8DI_FTYPE_PCV8DI_V8DI_UQI:
36075 case V64QI_FTYPE_PCV64QI_V64QI_UDI:
36076 case V32HI_FTYPE_PCV32HI_V32HI_USI:
36077 case V32QI_FTYPE_PCV32QI_V32QI_USI:
36078 case V16QI_FTYPE_PCV16QI_V16QI_UHI:
36079 case V16HI_FTYPE_PCV16HI_V16HI_UHI:
36080 case V8HI_FTYPE_PCV8HI_V8HI_UQI:
36081 switch (icode)
36083 /* These builtins and instructions require the memory
36084 to be properly aligned. */
36085 case CODE_FOR_avx512f_loadv16sf_mask:
36086 case CODE_FOR_avx512f_loadv16si_mask:
36087 case CODE_FOR_avx512f_loadv8df_mask:
36088 case CODE_FOR_avx512f_loadv8di_mask:
36089 case CODE_FOR_avx512vl_loadv8sf_mask:
36090 case CODE_FOR_avx512vl_loadv8si_mask:
36091 case CODE_FOR_avx512vl_loadv4df_mask:
36092 case CODE_FOR_avx512vl_loadv4di_mask:
36093 case CODE_FOR_avx512vl_loadv4sf_mask:
36094 case CODE_FOR_avx512vl_loadv4si_mask:
36095 case CODE_FOR_avx512vl_loadv2df_mask:
36096 case CODE_FOR_avx512vl_loadv2di_mask:
36097 case CODE_FOR_avx512bw_loadv64qi_mask:
36098 case CODE_FOR_avx512vl_loadv32qi_mask:
36099 case CODE_FOR_avx512vl_loadv16qi_mask:
36100 case CODE_FOR_avx512bw_loadv32hi_mask:
36101 case CODE_FOR_avx512vl_loadv16hi_mask:
36102 case CODE_FOR_avx512vl_loadv8hi_mask:
36103 aligned_mem = true;
36104 break;
36105 default:
36106 break;
36108 /* FALLTHRU */
36109 case V64QI_FTYPE_PCCHAR_V64QI_UDI:
36110 case V32QI_FTYPE_PCCHAR_V32QI_USI:
36111 case V16QI_FTYPE_PCCHAR_V16QI_UHI:
36112 case V32HI_FTYPE_PCSHORT_V32HI_USI:
36113 case V16HI_FTYPE_PCSHORT_V16HI_UHI:
36114 case V8HI_FTYPE_PCSHORT_V8HI_UQI:
36115 case V16SI_FTYPE_PCINT_V16SI_UHI:
36116 case V8SI_FTYPE_PCINT_V8SI_UQI:
36117 case V4SI_FTYPE_PCINT_V4SI_UQI:
36118 case V8DI_FTYPE_PCINT64_V8DI_UQI:
36119 case V4DI_FTYPE_PCINT64_V4DI_UQI:
36120 case V2DI_FTYPE_PCINT64_V2DI_UQI:
36121 case V8DF_FTYPE_PCDOUBLE_V8DF_UQI:
36122 case V4DF_FTYPE_PCDOUBLE_V4DF_UQI:
36123 case V2DF_FTYPE_PCDOUBLE_V2DF_UQI:
36124 case V16SF_FTYPE_PCFLOAT_V16SF_UHI:
36125 case V8SF_FTYPE_PCFLOAT_V8SF_UQI:
36126 case V4SF_FTYPE_PCFLOAT_V4SF_UQI:
36127 nargs = 3;
36128 klass = load;
36129 memory = 0;
36130 break;
36131 case VOID_FTYPE_UINT_UINT_UINT:
36132 case VOID_FTYPE_UINT64_UINT_UINT:
36133 case UCHAR_FTYPE_UINT_UINT_UINT:
36134 case UCHAR_FTYPE_UINT64_UINT_UINT:
36135 nargs = 3;
36136 klass = load;
36137 memory = ARRAY_SIZE (args);
36138 last_arg_constant = true;
36139 break;
36140 default:
36141 gcc_unreachable ();
36144 gcc_assert (nargs <= ARRAY_SIZE (args));
36146 if (klass == store)
36148 arg = CALL_EXPR_ARG (exp, 0);
36149 op = expand_normal (arg);
36150 gcc_assert (target == 0);
36151 if (memory)
36153 op = ix86_zero_extend_to_Pmode (op);
36154 target = gen_rtx_MEM (tmode, op);
36155 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
36156 on it. Try to improve it using get_pointer_alignment,
36157 and if the special builtin is one that requires strict
36158 mode alignment, also from it's GET_MODE_ALIGNMENT.
36159 Failure to do so could lead to ix86_legitimate_combined_insn
36160 rejecting all changes to such insns. */
36161 unsigned int align = get_pointer_alignment (arg);
36162 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
36163 align = GET_MODE_ALIGNMENT (tmode);
36164 if (MEM_ALIGN (target) < align)
36165 set_mem_align (target, align);
36167 else
36168 target = force_reg (tmode, op);
36169 arg_adjust = 1;
36171 else
36173 arg_adjust = 0;
36174 if (optimize
36175 || target == 0
36176 || !register_operand (target, tmode)
36177 || GET_MODE (target) != tmode)
36178 target = gen_reg_rtx (tmode);
36181 for (i = 0; i < nargs; i++)
36183 machine_mode mode = insn_p->operand[i + 1].mode;
36184 bool match;
36186 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
36187 op = expand_normal (arg);
36188 match = insn_p->operand[i + 1].predicate (op, mode);
36190 if (last_arg_constant && (i + 1) == nargs)
36192 if (!match)
36194 if (icode == CODE_FOR_lwp_lwpvalsi3
36195 || icode == CODE_FOR_lwp_lwpinssi3
36196 || icode == CODE_FOR_lwp_lwpvaldi3
36197 || icode == CODE_FOR_lwp_lwpinsdi3)
36198 error ("the last argument must be a 32-bit immediate");
36199 else
36200 error ("the last argument must be an 8-bit immediate");
36201 return const0_rtx;
36204 else
36206 if (i == memory)
36208 /* This must be the memory operand. */
36209 op = ix86_zero_extend_to_Pmode (op);
36210 op = gen_rtx_MEM (mode, op);
36211 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
36212 on it. Try to improve it using get_pointer_alignment,
36213 and if the special builtin is one that requires strict
36214 mode alignment, also from it's GET_MODE_ALIGNMENT.
36215 Failure to do so could lead to ix86_legitimate_combined_insn
36216 rejecting all changes to such insns. */
36217 unsigned int align = get_pointer_alignment (arg);
36218 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
36219 align = GET_MODE_ALIGNMENT (mode);
36220 if (MEM_ALIGN (op) < align)
36221 set_mem_align (op, align);
36223 else
36225 /* This must be register. */
36226 if (VECTOR_MODE_P (mode))
36227 op = safe_vector_operand (op, mode);
36229 op = fixup_modeless_constant (op, mode);
36231 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
36232 op = copy_to_mode_reg (mode, op);
36233 else
36235 op = copy_to_reg (op);
36236 op = lowpart_subreg (mode, op, GET_MODE (op));
36241 args[i].op = op;
36242 args[i].mode = mode;
36245 switch (nargs)
36247 case 0:
36248 pat = GEN_FCN (icode) (target);
36249 break;
36250 case 1:
36251 pat = GEN_FCN (icode) (target, args[0].op);
36252 break;
36253 case 2:
36254 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
36255 break;
36256 case 3:
36257 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
36258 break;
36259 default:
36260 gcc_unreachable ();
36263 if (! pat)
36264 return 0;
36265 emit_insn (pat);
36266 return klass == store ? 0 : target;
36269 /* Return the integer constant in ARG. Constrain it to be in the range
36270 of the subparts of VEC_TYPE; issue an error if not. */
36272 static int
36273 get_element_number (tree vec_type, tree arg)
36275 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
36277 if (!tree_fits_uhwi_p (arg)
36278 || (elt = tree_to_uhwi (arg), elt > max))
36280 error ("selector must be an integer constant in the range 0..%wi", max);
36281 return 0;
36284 return elt;
36287 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
36288 ix86_expand_vector_init. We DO have language-level syntax for this, in
36289 the form of (type){ init-list }. Except that since we can't place emms
36290 instructions from inside the compiler, we can't allow the use of MMX
36291 registers unless the user explicitly asks for it. So we do *not* define
36292 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
36293 we have builtins invoked by mmintrin.h that gives us license to emit
36294 these sorts of instructions. */
36296 static rtx
36297 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
36299 machine_mode tmode = TYPE_MODE (type);
36300 machine_mode inner_mode = GET_MODE_INNER (tmode);
36301 int i, n_elt = GET_MODE_NUNITS (tmode);
36302 rtvec v = rtvec_alloc (n_elt);
36304 gcc_assert (VECTOR_MODE_P (tmode));
36305 gcc_assert (call_expr_nargs (exp) == n_elt);
36307 for (i = 0; i < n_elt; ++i)
36309 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
36310 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
36313 if (!target || !register_operand (target, tmode))
36314 target = gen_reg_rtx (tmode);
36316 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
36317 return target;
36320 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
36321 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
36322 had a language-level syntax for referencing vector elements. */
36324 static rtx
36325 ix86_expand_vec_ext_builtin (tree exp, rtx target)
36327 machine_mode tmode, mode0;
36328 tree arg0, arg1;
36329 int elt;
36330 rtx op0;
36332 arg0 = CALL_EXPR_ARG (exp, 0);
36333 arg1 = CALL_EXPR_ARG (exp, 1);
36335 op0 = expand_normal (arg0);
36336 elt = get_element_number (TREE_TYPE (arg0), arg1);
36338 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
36339 mode0 = TYPE_MODE (TREE_TYPE (arg0));
36340 gcc_assert (VECTOR_MODE_P (mode0));
36342 op0 = force_reg (mode0, op0);
36344 if (optimize || !target || !register_operand (target, tmode))
36345 target = gen_reg_rtx (tmode);
36347 ix86_expand_vector_extract (true, target, op0, elt);
36349 return target;
36352 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
36353 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
36354 a language-level syntax for referencing vector elements. */
36356 static rtx
36357 ix86_expand_vec_set_builtin (tree exp)
36359 machine_mode tmode, mode1;
36360 tree arg0, arg1, arg2;
36361 int elt;
36362 rtx op0, op1, target;
36364 arg0 = CALL_EXPR_ARG (exp, 0);
36365 arg1 = CALL_EXPR_ARG (exp, 1);
36366 arg2 = CALL_EXPR_ARG (exp, 2);
36368 tmode = TYPE_MODE (TREE_TYPE (arg0));
36369 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
36370 gcc_assert (VECTOR_MODE_P (tmode));
36372 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
36373 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
36374 elt = get_element_number (TREE_TYPE (arg0), arg2);
36376 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
36377 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
36379 op0 = force_reg (tmode, op0);
36380 op1 = force_reg (mode1, op1);
36382 /* OP0 is the source of these builtin functions and shouldn't be
36383 modified. Create a copy, use it and return it as target. */
36384 target = gen_reg_rtx (tmode);
36385 emit_move_insn (target, op0);
36386 ix86_expand_vector_set (true, target, op1, elt);
36388 return target;
36391 /* Emit conditional move of SRC to DST with condition
36392 OP1 CODE OP2. */
36393 static void
36394 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
36396 rtx t;
36398 if (TARGET_CMOVE)
36400 t = ix86_expand_compare (code, op1, op2);
36401 emit_insn (gen_rtx_SET (dst, gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
36402 src, dst)));
36404 else
36406 rtx_code_label *nomove = gen_label_rtx ();
36407 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
36408 const0_rtx, GET_MODE (op1), 1, nomove);
36409 emit_move_insn (dst, src);
36410 emit_label (nomove);
36414 /* Choose max of DST and SRC and put it to DST. */
36415 static void
36416 ix86_emit_move_max (rtx dst, rtx src)
36418 ix86_emit_cmove (dst, src, LTU, dst, src);
36421 /* Expand an expression EXP that calls a built-in function,
36422 with result going to TARGET if that's convenient
36423 (and in mode MODE if that's convenient).
36424 SUBTARGET may be used as the target for computing one of EXP's operands.
36425 IGNORE is nonzero if the value is to be ignored. */
36427 static rtx
36428 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
36429 machine_mode mode, int ignore)
36431 size_t i;
36432 enum insn_code icode, icode2;
36433 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
36434 tree arg0, arg1, arg2, arg3, arg4;
36435 rtx op0, op1, op2, op3, op4, pat, pat2, insn;
36436 machine_mode mode0, mode1, mode2, mode3, mode4;
36437 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
36439 /* For CPU builtins that can be folded, fold first and expand the fold. */
36440 switch (fcode)
36442 case IX86_BUILTIN_CPU_INIT:
36444 /* Make it call __cpu_indicator_init in libgcc. */
36445 tree call_expr, fndecl, type;
36446 type = build_function_type_list (integer_type_node, NULL_TREE);
36447 fndecl = build_fn_decl ("__cpu_indicator_init", type);
36448 call_expr = build_call_expr (fndecl, 0);
36449 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
36451 case IX86_BUILTIN_CPU_IS:
36452 case IX86_BUILTIN_CPU_SUPPORTS:
36454 tree arg0 = CALL_EXPR_ARG (exp, 0);
36455 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
36456 gcc_assert (fold_expr != NULL_TREE);
36457 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
36461 HOST_WIDE_INT isa = ix86_isa_flags;
36462 HOST_WIDE_INT isa2 = ix86_isa_flags2;
36463 HOST_WIDE_INT bisa = ix86_builtins_isa[fcode].isa;
36464 HOST_WIDE_INT bisa2 = ix86_builtins_isa[fcode].isa2;
36465 /* The general case is we require all the ISAs specified in bisa{,2}
36466 to be enabled.
36467 The exceptions are:
36468 OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A
36469 OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32
36470 OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4
36471 where for each this pair it is sufficient if either of the ISAs is
36472 enabled, plus if it is ored with other options also those others. */
36473 if (((bisa & (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A))
36474 == (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A))
36475 && (isa & (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A)) != 0)
36476 isa |= (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A);
36477 if (((bisa & (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32))
36478 == (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32))
36479 && (isa & (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32)) != 0)
36480 isa |= (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32);
36481 if (((bisa & (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4))
36482 == (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4))
36483 && (isa & (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4)) != 0)
36484 isa |= (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4);
36485 if ((bisa & isa) != bisa || (bisa2 & isa2) != bisa2)
36487 char *opts = ix86_target_string (bisa, bisa2, 0, 0, NULL, NULL,
36488 (enum fpmath_unit) 0, false);
36489 if (!opts)
36490 error ("%qE needs unknown isa option", fndecl);
36491 else
36493 gcc_assert (opts != NULL);
36494 error ("%qE needs isa option %s", fndecl, opts);
36495 free (opts);
36497 return expand_call (exp, target, ignore);
36500 switch (fcode)
36502 case IX86_BUILTIN_BNDMK:
36503 if (!target
36504 || GET_MODE (target) != BNDmode
36505 || !register_operand (target, BNDmode))
36506 target = gen_reg_rtx (BNDmode);
36508 arg0 = CALL_EXPR_ARG (exp, 0);
36509 arg1 = CALL_EXPR_ARG (exp, 1);
36511 op0 = expand_normal (arg0);
36512 op1 = expand_normal (arg1);
36514 if (!register_operand (op0, Pmode))
36515 op0 = ix86_zero_extend_to_Pmode (op0);
36516 if (!register_operand (op1, Pmode))
36517 op1 = ix86_zero_extend_to_Pmode (op1);
36519 /* Builtin arg1 is size of block but instruction op1 should
36520 be (size - 1). */
36521 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
36522 NULL_RTX, 1, OPTAB_DIRECT);
36524 emit_insn (BNDmode == BND64mode
36525 ? gen_bnd64_mk (target, op0, op1)
36526 : gen_bnd32_mk (target, op0, op1));
36527 return target;
36529 case IX86_BUILTIN_BNDSTX:
36530 arg0 = CALL_EXPR_ARG (exp, 0);
36531 arg1 = CALL_EXPR_ARG (exp, 1);
36532 arg2 = CALL_EXPR_ARG (exp, 2);
36534 op0 = expand_normal (arg0);
36535 op1 = expand_normal (arg1);
36536 op2 = expand_normal (arg2);
36538 if (!register_operand (op0, Pmode))
36539 op0 = ix86_zero_extend_to_Pmode (op0);
36540 if (!register_operand (op1, BNDmode))
36541 op1 = copy_to_mode_reg (BNDmode, op1);
36542 if (!register_operand (op2, Pmode))
36543 op2 = ix86_zero_extend_to_Pmode (op2);
36545 emit_insn (BNDmode == BND64mode
36546 ? gen_bnd64_stx (op2, op0, op1)
36547 : gen_bnd32_stx (op2, op0, op1));
36548 return 0;
36550 case IX86_BUILTIN_BNDLDX:
36551 if (!target
36552 || GET_MODE (target) != BNDmode
36553 || !register_operand (target, BNDmode))
36554 target = gen_reg_rtx (BNDmode);
36556 arg0 = CALL_EXPR_ARG (exp, 0);
36557 arg1 = CALL_EXPR_ARG (exp, 1);
36559 op0 = expand_normal (arg0);
36560 op1 = expand_normal (arg1);
36562 if (!register_operand (op0, Pmode))
36563 op0 = ix86_zero_extend_to_Pmode (op0);
36564 if (!register_operand (op1, Pmode))
36565 op1 = ix86_zero_extend_to_Pmode (op1);
36567 emit_insn (BNDmode == BND64mode
36568 ? gen_bnd64_ldx (target, op0, op1)
36569 : gen_bnd32_ldx (target, op0, op1));
36570 return target;
36572 case IX86_BUILTIN_BNDCL:
36573 arg0 = CALL_EXPR_ARG (exp, 0);
36574 arg1 = CALL_EXPR_ARG (exp, 1);
36576 op0 = expand_normal (arg0);
36577 op1 = expand_normal (arg1);
36579 if (!register_operand (op0, Pmode))
36580 op0 = ix86_zero_extend_to_Pmode (op0);
36581 if (!register_operand (op1, BNDmode))
36582 op1 = copy_to_mode_reg (BNDmode, op1);
36584 emit_insn (BNDmode == BND64mode
36585 ? gen_bnd64_cl (op1, op0)
36586 : gen_bnd32_cl (op1, op0));
36587 return 0;
36589 case IX86_BUILTIN_BNDCU:
36590 arg0 = CALL_EXPR_ARG (exp, 0);
36591 arg1 = CALL_EXPR_ARG (exp, 1);
36593 op0 = expand_normal (arg0);
36594 op1 = expand_normal (arg1);
36596 if (!register_operand (op0, Pmode))
36597 op0 = ix86_zero_extend_to_Pmode (op0);
36598 if (!register_operand (op1, BNDmode))
36599 op1 = copy_to_mode_reg (BNDmode, op1);
36601 emit_insn (BNDmode == BND64mode
36602 ? gen_bnd64_cu (op1, op0)
36603 : gen_bnd32_cu (op1, op0));
36604 return 0;
36606 case IX86_BUILTIN_BNDRET:
36607 arg0 = CALL_EXPR_ARG (exp, 0);
36608 target = chkp_get_rtl_bounds (arg0);
36610 /* If no bounds were specified for returned value,
36611 then use INIT bounds. It usually happens when
36612 some built-in function is expanded. */
36613 if (!target)
36615 rtx t1 = gen_reg_rtx (Pmode);
36616 rtx t2 = gen_reg_rtx (Pmode);
36617 target = gen_reg_rtx (BNDmode);
36618 emit_move_insn (t1, const0_rtx);
36619 emit_move_insn (t2, constm1_rtx);
36620 emit_insn (BNDmode == BND64mode
36621 ? gen_bnd64_mk (target, t1, t2)
36622 : gen_bnd32_mk (target, t1, t2));
36625 gcc_assert (target && REG_P (target));
36626 return target;
36628 case IX86_BUILTIN_BNDNARROW:
36630 rtx m1, m1h1, m1h2, lb, ub, t1;
36632 /* Return value and lb. */
36633 arg0 = CALL_EXPR_ARG (exp, 0);
36634 /* Bounds. */
36635 arg1 = CALL_EXPR_ARG (exp, 1);
36636 /* Size. */
36637 arg2 = CALL_EXPR_ARG (exp, 2);
36639 lb = expand_normal (arg0);
36640 op1 = expand_normal (arg1);
36641 op2 = expand_normal (arg2);
36643 /* Size was passed but we need to use (size - 1) as for bndmk. */
36644 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
36645 NULL_RTX, 1, OPTAB_DIRECT);
36647 /* Add LB to size and inverse to get UB. */
36648 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
36649 op2, 1, OPTAB_DIRECT);
36650 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
36652 if (!register_operand (lb, Pmode))
36653 lb = ix86_zero_extend_to_Pmode (lb);
36654 if (!register_operand (ub, Pmode))
36655 ub = ix86_zero_extend_to_Pmode (ub);
36657 /* We need to move bounds to memory before any computations. */
36658 if (MEM_P (op1))
36659 m1 = op1;
36660 else
36662 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
36663 emit_move_insn (m1, op1);
36666 /* Generate mem expression to be used for access to LB and UB. */
36667 m1h1 = adjust_address (m1, Pmode, 0);
36668 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
36670 t1 = gen_reg_rtx (Pmode);
36672 /* Compute LB. */
36673 emit_move_insn (t1, m1h1);
36674 ix86_emit_move_max (t1, lb);
36675 emit_move_insn (m1h1, t1);
36677 /* Compute UB. UB is stored in 1's complement form. Therefore
36678 we also use max here. */
36679 emit_move_insn (t1, m1h2);
36680 ix86_emit_move_max (t1, ub);
36681 emit_move_insn (m1h2, t1);
36683 op2 = gen_reg_rtx (BNDmode);
36684 emit_move_insn (op2, m1);
36686 return chkp_join_splitted_slot (lb, op2);
36689 case IX86_BUILTIN_BNDINT:
36691 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
36693 if (!target
36694 || GET_MODE (target) != BNDmode
36695 || !register_operand (target, BNDmode))
36696 target = gen_reg_rtx (BNDmode);
36698 arg0 = CALL_EXPR_ARG (exp, 0);
36699 arg1 = CALL_EXPR_ARG (exp, 1);
36701 op0 = expand_normal (arg0);
36702 op1 = expand_normal (arg1);
36704 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
36705 rh1 = adjust_address (res, Pmode, 0);
36706 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
36708 /* Put first bounds to temporaries. */
36709 lb1 = gen_reg_rtx (Pmode);
36710 ub1 = gen_reg_rtx (Pmode);
36711 if (MEM_P (op0))
36713 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
36714 emit_move_insn (ub1, adjust_address (op0, Pmode,
36715 GET_MODE_SIZE (Pmode)));
36717 else
36719 emit_move_insn (res, op0);
36720 emit_move_insn (lb1, rh1);
36721 emit_move_insn (ub1, rh2);
36724 /* Put second bounds to temporaries. */
36725 lb2 = gen_reg_rtx (Pmode);
36726 ub2 = gen_reg_rtx (Pmode);
36727 if (MEM_P (op1))
36729 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
36730 emit_move_insn (ub2, adjust_address (op1, Pmode,
36731 GET_MODE_SIZE (Pmode)));
36733 else
36735 emit_move_insn (res, op1);
36736 emit_move_insn (lb2, rh1);
36737 emit_move_insn (ub2, rh2);
36740 /* Compute LB. */
36741 ix86_emit_move_max (lb1, lb2);
36742 emit_move_insn (rh1, lb1);
36744 /* Compute UB. UB is stored in 1's complement form. Therefore
36745 we also use max here. */
36746 ix86_emit_move_max (ub1, ub2);
36747 emit_move_insn (rh2, ub1);
36749 emit_move_insn (target, res);
36751 return target;
36754 case IX86_BUILTIN_SIZEOF:
36756 tree name;
36757 rtx symbol;
36759 if (!target
36760 || GET_MODE (target) != Pmode
36761 || !register_operand (target, Pmode))
36762 target = gen_reg_rtx (Pmode);
36764 arg0 = CALL_EXPR_ARG (exp, 0);
36765 gcc_assert (VAR_P (arg0));
36767 name = DECL_ASSEMBLER_NAME (arg0);
36768 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
36770 emit_insn (Pmode == SImode
36771 ? gen_move_size_reloc_si (target, symbol)
36772 : gen_move_size_reloc_di (target, symbol));
36774 return target;
36777 case IX86_BUILTIN_BNDLOWER:
36779 rtx mem, hmem;
36781 if (!target
36782 || GET_MODE (target) != Pmode
36783 || !register_operand (target, Pmode))
36784 target = gen_reg_rtx (Pmode);
36786 arg0 = CALL_EXPR_ARG (exp, 0);
36787 op0 = expand_normal (arg0);
36789 /* We need to move bounds to memory first. */
36790 if (MEM_P (op0))
36791 mem = op0;
36792 else
36794 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
36795 emit_move_insn (mem, op0);
36798 /* Generate mem expression to access LB and load it. */
36799 hmem = adjust_address (mem, Pmode, 0);
36800 emit_move_insn (target, hmem);
36802 return target;
36805 case IX86_BUILTIN_BNDUPPER:
36807 rtx mem, hmem, res;
36809 if (!target
36810 || GET_MODE (target) != Pmode
36811 || !register_operand (target, Pmode))
36812 target = gen_reg_rtx (Pmode);
36814 arg0 = CALL_EXPR_ARG (exp, 0);
36815 op0 = expand_normal (arg0);
36817 /* We need to move bounds to memory first. */
36818 if (MEM_P (op0))
36819 mem = op0;
36820 else
36822 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
36823 emit_move_insn (mem, op0);
36826 /* Generate mem expression to access UB. */
36827 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
36829 /* We need to inverse all bits of UB. */
36830 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
36832 if (res != target)
36833 emit_move_insn (target, res);
36835 return target;
36838 case IX86_BUILTIN_MASKMOVQ:
36839 case IX86_BUILTIN_MASKMOVDQU:
36840 icode = (fcode == IX86_BUILTIN_MASKMOVQ
36841 ? CODE_FOR_mmx_maskmovq
36842 : CODE_FOR_sse2_maskmovdqu);
36843 /* Note the arg order is different from the operand order. */
36844 arg1 = CALL_EXPR_ARG (exp, 0);
36845 arg2 = CALL_EXPR_ARG (exp, 1);
36846 arg0 = CALL_EXPR_ARG (exp, 2);
36847 op0 = expand_normal (arg0);
36848 op1 = expand_normal (arg1);
36849 op2 = expand_normal (arg2);
36850 mode0 = insn_data[icode].operand[0].mode;
36851 mode1 = insn_data[icode].operand[1].mode;
36852 mode2 = insn_data[icode].operand[2].mode;
36854 op0 = ix86_zero_extend_to_Pmode (op0);
36855 op0 = gen_rtx_MEM (mode1, op0);
36857 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36858 op0 = copy_to_mode_reg (mode0, op0);
36859 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36860 op1 = copy_to_mode_reg (mode1, op1);
36861 if (!insn_data[icode].operand[2].predicate (op2, mode2))
36862 op2 = copy_to_mode_reg (mode2, op2);
36863 pat = GEN_FCN (icode) (op0, op1, op2);
36864 if (! pat)
36865 return 0;
36866 emit_insn (pat);
36867 return 0;
36869 case IX86_BUILTIN_LDMXCSR:
36870 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
36871 target = assign_386_stack_local (SImode, SLOT_TEMP);
36872 emit_move_insn (target, op0);
36873 emit_insn (gen_sse_ldmxcsr (target));
36874 return 0;
36876 case IX86_BUILTIN_STMXCSR:
36877 target = assign_386_stack_local (SImode, SLOT_TEMP);
36878 emit_insn (gen_sse_stmxcsr (target));
36879 return copy_to_mode_reg (SImode, target);
36881 case IX86_BUILTIN_CLFLUSH:
36882 arg0 = CALL_EXPR_ARG (exp, 0);
36883 op0 = expand_normal (arg0);
36884 icode = CODE_FOR_sse2_clflush;
36885 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
36886 op0 = ix86_zero_extend_to_Pmode (op0);
36888 emit_insn (gen_sse2_clflush (op0));
36889 return 0;
36891 case IX86_BUILTIN_CLWB:
36892 arg0 = CALL_EXPR_ARG (exp, 0);
36893 op0 = expand_normal (arg0);
36894 icode = CODE_FOR_clwb;
36895 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
36896 op0 = ix86_zero_extend_to_Pmode (op0);
36898 emit_insn (gen_clwb (op0));
36899 return 0;
36901 case IX86_BUILTIN_CLFLUSHOPT:
36902 arg0 = CALL_EXPR_ARG (exp, 0);
36903 op0 = expand_normal (arg0);
36904 icode = CODE_FOR_clflushopt;
36905 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
36906 op0 = ix86_zero_extend_to_Pmode (op0);
36908 emit_insn (gen_clflushopt (op0));
36909 return 0;
36911 case IX86_BUILTIN_MONITOR:
36912 case IX86_BUILTIN_MONITORX:
36913 arg0 = CALL_EXPR_ARG (exp, 0);
36914 arg1 = CALL_EXPR_ARG (exp, 1);
36915 arg2 = CALL_EXPR_ARG (exp, 2);
36916 op0 = expand_normal (arg0);
36917 op1 = expand_normal (arg1);
36918 op2 = expand_normal (arg2);
36919 if (!REG_P (op0))
36920 op0 = ix86_zero_extend_to_Pmode (op0);
36921 if (!REG_P (op1))
36922 op1 = copy_to_mode_reg (SImode, op1);
36923 if (!REG_P (op2))
36924 op2 = copy_to_mode_reg (SImode, op2);
36926 emit_insn (fcode == IX86_BUILTIN_MONITOR
36927 ? ix86_gen_monitor (op0, op1, op2)
36928 : ix86_gen_monitorx (op0, op1, op2));
36929 return 0;
36931 case IX86_BUILTIN_MWAIT:
36932 arg0 = CALL_EXPR_ARG (exp, 0);
36933 arg1 = CALL_EXPR_ARG (exp, 1);
36934 op0 = expand_normal (arg0);
36935 op1 = expand_normal (arg1);
36936 if (!REG_P (op0))
36937 op0 = copy_to_mode_reg (SImode, op0);
36938 if (!REG_P (op1))
36939 op1 = copy_to_mode_reg (SImode, op1);
36940 emit_insn (gen_sse3_mwait (op0, op1));
36941 return 0;
36943 case IX86_BUILTIN_MWAITX:
36944 arg0 = CALL_EXPR_ARG (exp, 0);
36945 arg1 = CALL_EXPR_ARG (exp, 1);
36946 arg2 = CALL_EXPR_ARG (exp, 2);
36947 op0 = expand_normal (arg0);
36948 op1 = expand_normal (arg1);
36949 op2 = expand_normal (arg2);
36950 if (!REG_P (op0))
36951 op0 = copy_to_mode_reg (SImode, op0);
36952 if (!REG_P (op1))
36953 op1 = copy_to_mode_reg (SImode, op1);
36954 if (!REG_P (op2))
36955 op2 = copy_to_mode_reg (SImode, op2);
36956 emit_insn (gen_mwaitx (op0, op1, op2));
36957 return 0;
36959 case IX86_BUILTIN_CLZERO:
36960 arg0 = CALL_EXPR_ARG (exp, 0);
36961 op0 = expand_normal (arg0);
36962 if (!REG_P (op0))
36963 op0 = ix86_zero_extend_to_Pmode (op0);
36964 emit_insn (ix86_gen_clzero (op0));
36965 return 0;
36967 case IX86_BUILTIN_VEC_INIT_V2SI:
36968 case IX86_BUILTIN_VEC_INIT_V4HI:
36969 case IX86_BUILTIN_VEC_INIT_V8QI:
36970 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
36972 case IX86_BUILTIN_VEC_EXT_V2DF:
36973 case IX86_BUILTIN_VEC_EXT_V2DI:
36974 case IX86_BUILTIN_VEC_EXT_V4SF:
36975 case IX86_BUILTIN_VEC_EXT_V4SI:
36976 case IX86_BUILTIN_VEC_EXT_V8HI:
36977 case IX86_BUILTIN_VEC_EXT_V2SI:
36978 case IX86_BUILTIN_VEC_EXT_V4HI:
36979 case IX86_BUILTIN_VEC_EXT_V16QI:
36980 return ix86_expand_vec_ext_builtin (exp, target);
36982 case IX86_BUILTIN_VEC_SET_V2DI:
36983 case IX86_BUILTIN_VEC_SET_V4SF:
36984 case IX86_BUILTIN_VEC_SET_V4SI:
36985 case IX86_BUILTIN_VEC_SET_V8HI:
36986 case IX86_BUILTIN_VEC_SET_V4HI:
36987 case IX86_BUILTIN_VEC_SET_V16QI:
36988 return ix86_expand_vec_set_builtin (exp);
36990 case IX86_BUILTIN_NANQ:
36991 case IX86_BUILTIN_NANSQ:
36992 return expand_call (exp, target, ignore);
36994 case IX86_BUILTIN_RDPID:
36996 op0 = gen_reg_rtx (TARGET_64BIT ? DImode : SImode);
36998 if (TARGET_64BIT)
37000 insn = gen_rdpid_rex64 (op0);
37001 op0 = convert_to_mode (SImode, op0, 1);
37003 else
37004 insn = gen_rdpid (op0);
37005 emit_insn (insn);
37007 if (target == 0)
37009 /* mode is VOIDmode if __builtin_rdpid has been called
37010 without lhs. */
37011 if (mode == VOIDmode)
37012 return target;
37013 target = gen_reg_rtx (mode);
37015 emit_move_insn (target, op0);
37016 return target;
37017 case IX86_BUILTIN_RDPMC:
37018 case IX86_BUILTIN_RDTSC:
37019 case IX86_BUILTIN_RDTSCP:
37020 case IX86_BUILTIN_XGETBV:
37022 op0 = gen_reg_rtx (DImode);
37023 op1 = gen_reg_rtx (DImode);
37025 if (fcode == IX86_BUILTIN_RDPMC)
37027 arg0 = CALL_EXPR_ARG (exp, 0);
37028 op2 = expand_normal (arg0);
37029 if (!register_operand (op2, SImode))
37030 op2 = copy_to_mode_reg (SImode, op2);
37032 insn = (TARGET_64BIT
37033 ? gen_rdpmc_rex64 (op0, op1, op2)
37034 : gen_rdpmc (op0, op2));
37035 emit_insn (insn);
37037 else if (fcode == IX86_BUILTIN_XGETBV)
37039 arg0 = CALL_EXPR_ARG (exp, 0);
37040 op2 = expand_normal (arg0);
37041 if (!register_operand (op2, SImode))
37042 op2 = copy_to_mode_reg (SImode, op2);
37044 insn = (TARGET_64BIT
37045 ? gen_xgetbv_rex64 (op0, op1, op2)
37046 : gen_xgetbv (op0, op2));
37047 emit_insn (insn);
37049 else if (fcode == IX86_BUILTIN_RDTSC)
37051 insn = (TARGET_64BIT
37052 ? gen_rdtsc_rex64 (op0, op1)
37053 : gen_rdtsc (op0));
37054 emit_insn (insn);
37056 else
37058 op2 = gen_reg_rtx (SImode);
37060 insn = (TARGET_64BIT
37061 ? gen_rdtscp_rex64 (op0, op1, op2)
37062 : gen_rdtscp (op0, op2));
37063 emit_insn (insn);
37065 arg0 = CALL_EXPR_ARG (exp, 0);
37066 op4 = expand_normal (arg0);
37067 if (!address_operand (op4, VOIDmode))
37069 op4 = convert_memory_address (Pmode, op4);
37070 op4 = copy_addr_to_reg (op4);
37072 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
37075 if (target == 0)
37077 /* mode is VOIDmode if __builtin_rd* has been called
37078 without lhs. */
37079 if (mode == VOIDmode)
37080 return target;
37081 target = gen_reg_rtx (mode);
37084 if (TARGET_64BIT)
37086 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
37087 op1, 1, OPTAB_DIRECT);
37088 op0 = expand_simple_binop (DImode, IOR, op0, op1,
37089 op0, 1, OPTAB_DIRECT);
37092 emit_move_insn (target, op0);
37093 return target;
37095 case IX86_BUILTIN_FXSAVE:
37096 case IX86_BUILTIN_FXRSTOR:
37097 case IX86_BUILTIN_FXSAVE64:
37098 case IX86_BUILTIN_FXRSTOR64:
37099 case IX86_BUILTIN_FNSTENV:
37100 case IX86_BUILTIN_FLDENV:
37101 mode0 = BLKmode;
37102 switch (fcode)
37104 case IX86_BUILTIN_FXSAVE:
37105 icode = CODE_FOR_fxsave;
37106 break;
37107 case IX86_BUILTIN_FXRSTOR:
37108 icode = CODE_FOR_fxrstor;
37109 break;
37110 case IX86_BUILTIN_FXSAVE64:
37111 icode = CODE_FOR_fxsave64;
37112 break;
37113 case IX86_BUILTIN_FXRSTOR64:
37114 icode = CODE_FOR_fxrstor64;
37115 break;
37116 case IX86_BUILTIN_FNSTENV:
37117 icode = CODE_FOR_fnstenv;
37118 break;
37119 case IX86_BUILTIN_FLDENV:
37120 icode = CODE_FOR_fldenv;
37121 break;
37122 default:
37123 gcc_unreachable ();
37126 arg0 = CALL_EXPR_ARG (exp, 0);
37127 op0 = expand_normal (arg0);
37129 if (!address_operand (op0, VOIDmode))
37131 op0 = convert_memory_address (Pmode, op0);
37132 op0 = copy_addr_to_reg (op0);
37134 op0 = gen_rtx_MEM (mode0, op0);
37136 pat = GEN_FCN (icode) (op0);
37137 if (pat)
37138 emit_insn (pat);
37139 return 0;
37141 case IX86_BUILTIN_XSETBV:
37142 arg0 = CALL_EXPR_ARG (exp, 0);
37143 arg1 = CALL_EXPR_ARG (exp, 1);
37144 op0 = expand_normal (arg0);
37145 op1 = expand_normal (arg1);
37147 if (!REG_P (op0))
37148 op0 = copy_to_mode_reg (SImode, op0);
37150 if (TARGET_64BIT)
37152 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
37153 NULL, 1, OPTAB_DIRECT);
37155 op2 = gen_lowpart (SImode, op2);
37156 op1 = gen_lowpart (SImode, op1);
37157 if (!REG_P (op1))
37158 op1 = copy_to_mode_reg (SImode, op1);
37159 if (!REG_P (op2))
37160 op2 = copy_to_mode_reg (SImode, op2);
37161 icode = CODE_FOR_xsetbv_rex64;
37162 pat = GEN_FCN (icode) (op0, op1, op2);
37164 else
37166 if (!REG_P (op1))
37167 op1 = copy_to_mode_reg (DImode, op1);
37168 icode = CODE_FOR_xsetbv;
37169 pat = GEN_FCN (icode) (op0, op1);
37171 if (pat)
37172 emit_insn (pat);
37173 return 0;
37175 case IX86_BUILTIN_XSAVE:
37176 case IX86_BUILTIN_XRSTOR:
37177 case IX86_BUILTIN_XSAVE64:
37178 case IX86_BUILTIN_XRSTOR64:
37179 case IX86_BUILTIN_XSAVEOPT:
37180 case IX86_BUILTIN_XSAVEOPT64:
37181 case IX86_BUILTIN_XSAVES:
37182 case IX86_BUILTIN_XRSTORS:
37183 case IX86_BUILTIN_XSAVES64:
37184 case IX86_BUILTIN_XRSTORS64:
37185 case IX86_BUILTIN_XSAVEC:
37186 case IX86_BUILTIN_XSAVEC64:
37187 arg0 = CALL_EXPR_ARG (exp, 0);
37188 arg1 = CALL_EXPR_ARG (exp, 1);
37189 op0 = expand_normal (arg0);
37190 op1 = expand_normal (arg1);
37192 if (!address_operand (op0, VOIDmode))
37194 op0 = convert_memory_address (Pmode, op0);
37195 op0 = copy_addr_to_reg (op0);
37197 op0 = gen_rtx_MEM (BLKmode, op0);
37199 op1 = force_reg (DImode, op1);
37201 if (TARGET_64BIT)
37203 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
37204 NULL, 1, OPTAB_DIRECT);
37205 switch (fcode)
37207 case IX86_BUILTIN_XSAVE:
37208 icode = CODE_FOR_xsave_rex64;
37209 break;
37210 case IX86_BUILTIN_XRSTOR:
37211 icode = CODE_FOR_xrstor_rex64;
37212 break;
37213 case IX86_BUILTIN_XSAVE64:
37214 icode = CODE_FOR_xsave64;
37215 break;
37216 case IX86_BUILTIN_XRSTOR64:
37217 icode = CODE_FOR_xrstor64;
37218 break;
37219 case IX86_BUILTIN_XSAVEOPT:
37220 icode = CODE_FOR_xsaveopt_rex64;
37221 break;
37222 case IX86_BUILTIN_XSAVEOPT64:
37223 icode = CODE_FOR_xsaveopt64;
37224 break;
37225 case IX86_BUILTIN_XSAVES:
37226 icode = CODE_FOR_xsaves_rex64;
37227 break;
37228 case IX86_BUILTIN_XRSTORS:
37229 icode = CODE_FOR_xrstors_rex64;
37230 break;
37231 case IX86_BUILTIN_XSAVES64:
37232 icode = CODE_FOR_xsaves64;
37233 break;
37234 case IX86_BUILTIN_XRSTORS64:
37235 icode = CODE_FOR_xrstors64;
37236 break;
37237 case IX86_BUILTIN_XSAVEC:
37238 icode = CODE_FOR_xsavec_rex64;
37239 break;
37240 case IX86_BUILTIN_XSAVEC64:
37241 icode = CODE_FOR_xsavec64;
37242 break;
37243 default:
37244 gcc_unreachable ();
37247 op2 = gen_lowpart (SImode, op2);
37248 op1 = gen_lowpart (SImode, op1);
37249 pat = GEN_FCN (icode) (op0, op1, op2);
37251 else
37253 switch (fcode)
37255 case IX86_BUILTIN_XSAVE:
37256 icode = CODE_FOR_xsave;
37257 break;
37258 case IX86_BUILTIN_XRSTOR:
37259 icode = CODE_FOR_xrstor;
37260 break;
37261 case IX86_BUILTIN_XSAVEOPT:
37262 icode = CODE_FOR_xsaveopt;
37263 break;
37264 case IX86_BUILTIN_XSAVES:
37265 icode = CODE_FOR_xsaves;
37266 break;
37267 case IX86_BUILTIN_XRSTORS:
37268 icode = CODE_FOR_xrstors;
37269 break;
37270 case IX86_BUILTIN_XSAVEC:
37271 icode = CODE_FOR_xsavec;
37272 break;
37273 default:
37274 gcc_unreachable ();
37276 pat = GEN_FCN (icode) (op0, op1);
37279 if (pat)
37280 emit_insn (pat);
37281 return 0;
37283 case IX86_BUILTIN_LLWPCB:
37284 arg0 = CALL_EXPR_ARG (exp, 0);
37285 op0 = expand_normal (arg0);
37286 icode = CODE_FOR_lwp_llwpcb;
37287 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
37288 op0 = ix86_zero_extend_to_Pmode (op0);
37289 emit_insn (gen_lwp_llwpcb (op0));
37290 return 0;
37292 case IX86_BUILTIN_SLWPCB:
37293 icode = CODE_FOR_lwp_slwpcb;
37294 if (!target
37295 || !insn_data[icode].operand[0].predicate (target, Pmode))
37296 target = gen_reg_rtx (Pmode);
37297 emit_insn (gen_lwp_slwpcb (target));
37298 return target;
37300 case IX86_BUILTIN_BEXTRI32:
37301 case IX86_BUILTIN_BEXTRI64:
37302 arg0 = CALL_EXPR_ARG (exp, 0);
37303 arg1 = CALL_EXPR_ARG (exp, 1);
37304 op0 = expand_normal (arg0);
37305 op1 = expand_normal (arg1);
37306 icode = (fcode == IX86_BUILTIN_BEXTRI32
37307 ? CODE_FOR_tbm_bextri_si
37308 : CODE_FOR_tbm_bextri_di);
37309 if (!CONST_INT_P (op1))
37311 error ("last argument must be an immediate");
37312 return const0_rtx;
37314 else
37316 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
37317 unsigned char lsb_index = INTVAL (op1) & 0xFF;
37318 op1 = GEN_INT (length);
37319 op2 = GEN_INT (lsb_index);
37320 pat = GEN_FCN (icode) (target, op0, op1, op2);
37321 if (pat)
37322 emit_insn (pat);
37323 return target;
37326 case IX86_BUILTIN_RDRAND16_STEP:
37327 icode = CODE_FOR_rdrandhi_1;
37328 mode0 = HImode;
37329 goto rdrand_step;
37331 case IX86_BUILTIN_RDRAND32_STEP:
37332 icode = CODE_FOR_rdrandsi_1;
37333 mode0 = SImode;
37334 goto rdrand_step;
37336 case IX86_BUILTIN_RDRAND64_STEP:
37337 icode = CODE_FOR_rdranddi_1;
37338 mode0 = DImode;
37340 rdrand_step:
37341 arg0 = CALL_EXPR_ARG (exp, 0);
37342 op1 = expand_normal (arg0);
37343 if (!address_operand (op1, VOIDmode))
37345 op1 = convert_memory_address (Pmode, op1);
37346 op1 = copy_addr_to_reg (op1);
37349 op0 = gen_reg_rtx (mode0);
37350 emit_insn (GEN_FCN (icode) (op0));
37352 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
37354 op1 = gen_reg_rtx (SImode);
37355 emit_move_insn (op1, CONST1_RTX (SImode));
37357 /* Emit SImode conditional move. */
37358 if (mode0 == HImode)
37360 if (TARGET_ZERO_EXTEND_WITH_AND
37361 && optimize_function_for_speed_p (cfun))
37363 op2 = force_reg (SImode, const0_rtx);
37365 emit_insn (gen_movstricthi
37366 (gen_lowpart (HImode, op2), op0));
37368 else
37370 op2 = gen_reg_rtx (SImode);
37372 emit_insn (gen_zero_extendhisi2 (op2, op0));
37375 else if (mode0 == SImode)
37376 op2 = op0;
37377 else
37378 op2 = gen_rtx_SUBREG (SImode, op0, 0);
37380 if (target == 0
37381 || !register_operand (target, SImode))
37382 target = gen_reg_rtx (SImode);
37384 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
37385 const0_rtx);
37386 emit_insn (gen_rtx_SET (target,
37387 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
37388 return target;
37390 case IX86_BUILTIN_RDSEED16_STEP:
37391 icode = CODE_FOR_rdseedhi_1;
37392 mode0 = HImode;
37393 goto rdseed_step;
37395 case IX86_BUILTIN_RDSEED32_STEP:
37396 icode = CODE_FOR_rdseedsi_1;
37397 mode0 = SImode;
37398 goto rdseed_step;
37400 case IX86_BUILTIN_RDSEED64_STEP:
37401 icode = CODE_FOR_rdseeddi_1;
37402 mode0 = DImode;
37404 rdseed_step:
37405 arg0 = CALL_EXPR_ARG (exp, 0);
37406 op1 = expand_normal (arg0);
37407 if (!address_operand (op1, VOIDmode))
37409 op1 = convert_memory_address (Pmode, op1);
37410 op1 = copy_addr_to_reg (op1);
37413 op0 = gen_reg_rtx (mode0);
37414 emit_insn (GEN_FCN (icode) (op0));
37416 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
37418 op2 = gen_reg_rtx (QImode);
37420 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
37421 const0_rtx);
37422 emit_insn (gen_rtx_SET (op2, pat));
37424 if (target == 0
37425 || !register_operand (target, SImode))
37426 target = gen_reg_rtx (SImode);
37428 emit_insn (gen_zero_extendqisi2 (target, op2));
37429 return target;
37431 case IX86_BUILTIN_SBB32:
37432 icode = CODE_FOR_subborrowsi;
37433 icode2 = CODE_FOR_subborrowsi_0;
37434 mode0 = SImode;
37435 mode1 = DImode;
37436 mode2 = CCmode;
37437 goto handlecarry;
37439 case IX86_BUILTIN_SBB64:
37440 icode = CODE_FOR_subborrowdi;
37441 icode2 = CODE_FOR_subborrowdi_0;
37442 mode0 = DImode;
37443 mode1 = TImode;
37444 mode2 = CCmode;
37445 goto handlecarry;
37447 case IX86_BUILTIN_ADDCARRYX32:
37448 icode = CODE_FOR_addcarrysi;
37449 icode2 = CODE_FOR_addcarrysi_0;
37450 mode0 = SImode;
37451 mode1 = DImode;
37452 mode2 = CCCmode;
37453 goto handlecarry;
37455 case IX86_BUILTIN_ADDCARRYX64:
37456 icode = CODE_FOR_addcarrydi;
37457 icode2 = CODE_FOR_addcarrydi_0;
37458 mode0 = DImode;
37459 mode1 = TImode;
37460 mode2 = CCCmode;
37462 handlecarry:
37463 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
37464 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
37465 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
37466 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
37468 op1 = expand_normal (arg0);
37469 if (!integer_zerop (arg0))
37470 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
37472 op2 = expand_normal (arg1);
37473 if (!register_operand (op2, mode0))
37474 op2 = copy_to_mode_reg (mode0, op2);
37476 op3 = expand_normal (arg2);
37477 if (!register_operand (op3, mode0))
37478 op3 = copy_to_mode_reg (mode0, op3);
37480 op4 = expand_normal (arg3);
37481 if (!address_operand (op4, VOIDmode))
37483 op4 = convert_memory_address (Pmode, op4);
37484 op4 = copy_addr_to_reg (op4);
37487 op0 = gen_reg_rtx (mode0);
37488 if (integer_zerop (arg0))
37490 /* If arg0 is 0, optimize right away into add or sub
37491 instruction that sets CCCmode flags. */
37492 op1 = gen_rtx_REG (mode2, FLAGS_REG);
37493 emit_insn (GEN_FCN (icode2) (op0, op2, op3));
37495 else
37497 /* Generate CF from input operand. */
37498 emit_insn (gen_addqi3_cconly_overflow (op1, constm1_rtx));
37500 /* Generate instruction that consumes CF. */
37501 op1 = gen_rtx_REG (CCCmode, FLAGS_REG);
37502 pat = gen_rtx_LTU (mode1, op1, const0_rtx);
37503 pat2 = gen_rtx_LTU (mode0, op1, const0_rtx);
37504 emit_insn (GEN_FCN (icode) (op0, op2, op3, op1, pat, pat2));
37507 /* Return current CF value. */
37508 if (target == 0)
37509 target = gen_reg_rtx (QImode);
37511 pat = gen_rtx_LTU (QImode, op1, const0_rtx);
37512 emit_insn (gen_rtx_SET (target, pat));
37514 /* Store the result. */
37515 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
37517 return target;
37519 case IX86_BUILTIN_READ_FLAGS:
37520 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
37522 if (optimize
37523 || target == NULL_RTX
37524 || !nonimmediate_operand (target, word_mode)
37525 || GET_MODE (target) != word_mode)
37526 target = gen_reg_rtx (word_mode);
37528 emit_insn (gen_pop (target));
37529 return target;
37531 case IX86_BUILTIN_WRITE_FLAGS:
37533 arg0 = CALL_EXPR_ARG (exp, 0);
37534 op0 = expand_normal (arg0);
37535 if (!general_no_elim_operand (op0, word_mode))
37536 op0 = copy_to_mode_reg (word_mode, op0);
37538 emit_insn (gen_push (op0));
37539 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
37540 return 0;
37542 case IX86_BUILTIN_KTESTC8:
37543 icode = CODE_FOR_ktestqi;
37544 mode3 = CCCmode;
37545 goto kortest;
37547 case IX86_BUILTIN_KTESTZ8:
37548 icode = CODE_FOR_ktestqi;
37549 mode3 = CCZmode;
37550 goto kortest;
37552 case IX86_BUILTIN_KTESTC16:
37553 icode = CODE_FOR_ktesthi;
37554 mode3 = CCCmode;
37555 goto kortest;
37557 case IX86_BUILTIN_KTESTZ16:
37558 icode = CODE_FOR_ktesthi;
37559 mode3 = CCZmode;
37560 goto kortest;
37562 case IX86_BUILTIN_KTESTC32:
37563 icode = CODE_FOR_ktestsi;
37564 mode3 = CCCmode;
37565 goto kortest;
37567 case IX86_BUILTIN_KTESTZ32:
37568 icode = CODE_FOR_ktestsi;
37569 mode3 = CCZmode;
37570 goto kortest;
37572 case IX86_BUILTIN_KTESTC64:
37573 icode = CODE_FOR_ktestdi;
37574 mode3 = CCCmode;
37575 goto kortest;
37577 case IX86_BUILTIN_KTESTZ64:
37578 icode = CODE_FOR_ktestdi;
37579 mode3 = CCZmode;
37580 goto kortest;
37582 case IX86_BUILTIN_KORTESTC8:
37583 icode = CODE_FOR_kortestqi;
37584 mode3 = CCCmode;
37585 goto kortest;
37587 case IX86_BUILTIN_KORTESTZ8:
37588 icode = CODE_FOR_kortestqi;
37589 mode3 = CCZmode;
37590 goto kortest;
37592 case IX86_BUILTIN_KORTESTC16:
37593 icode = CODE_FOR_kortesthi;
37594 mode3 = CCCmode;
37595 goto kortest;
37597 case IX86_BUILTIN_KORTESTZ16:
37598 icode = CODE_FOR_kortesthi;
37599 mode3 = CCZmode;
37600 goto kortest;
37602 case IX86_BUILTIN_KORTESTC32:
37603 icode = CODE_FOR_kortestsi;
37604 mode3 = CCCmode;
37605 goto kortest;
37607 case IX86_BUILTIN_KORTESTZ32:
37608 icode = CODE_FOR_kortestsi;
37609 mode3 = CCZmode;
37610 goto kortest;
37612 case IX86_BUILTIN_KORTESTC64:
37613 icode = CODE_FOR_kortestdi;
37614 mode3 = CCCmode;
37615 goto kortest;
37617 case IX86_BUILTIN_KORTESTZ64:
37618 icode = CODE_FOR_kortestdi;
37619 mode3 = CCZmode;
37621 kortest:
37622 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
37623 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
37624 op0 = expand_normal (arg0);
37625 op1 = expand_normal (arg1);
37627 mode0 = insn_data[icode].operand[0].mode;
37628 mode1 = insn_data[icode].operand[1].mode;
37630 if (GET_MODE (op0) != VOIDmode)
37631 op0 = force_reg (GET_MODE (op0), op0);
37633 op0 = gen_lowpart (mode0, op0);
37635 if (!insn_data[icode].operand[0].predicate (op0, mode0))
37636 op0 = copy_to_mode_reg (mode0, op0);
37638 if (GET_MODE (op1) != VOIDmode)
37639 op1 = force_reg (GET_MODE (op1), op1);
37641 op1 = gen_lowpart (mode1, op1);
37643 if (!insn_data[icode].operand[1].predicate (op1, mode1))
37644 op1 = copy_to_mode_reg (mode1, op1);
37646 target = gen_reg_rtx (QImode);
37648 /* Emit kortest. */
37649 emit_insn (GEN_FCN (icode) (op0, op1));
37650 /* And use setcc to return result from flags. */
37651 ix86_expand_setcc (target, EQ,
37652 gen_rtx_REG (mode3, FLAGS_REG), const0_rtx);
37653 return target;
37655 case IX86_BUILTIN_GATHERSIV2DF:
37656 icode = CODE_FOR_avx2_gathersiv2df;
37657 goto gather_gen;
37658 case IX86_BUILTIN_GATHERSIV4DF:
37659 icode = CODE_FOR_avx2_gathersiv4df;
37660 goto gather_gen;
37661 case IX86_BUILTIN_GATHERDIV2DF:
37662 icode = CODE_FOR_avx2_gatherdiv2df;
37663 goto gather_gen;
37664 case IX86_BUILTIN_GATHERDIV4DF:
37665 icode = CODE_FOR_avx2_gatherdiv4df;
37666 goto gather_gen;
37667 case IX86_BUILTIN_GATHERSIV4SF:
37668 icode = CODE_FOR_avx2_gathersiv4sf;
37669 goto gather_gen;
37670 case IX86_BUILTIN_GATHERSIV8SF:
37671 icode = CODE_FOR_avx2_gathersiv8sf;
37672 goto gather_gen;
37673 case IX86_BUILTIN_GATHERDIV4SF:
37674 icode = CODE_FOR_avx2_gatherdiv4sf;
37675 goto gather_gen;
37676 case IX86_BUILTIN_GATHERDIV8SF:
37677 icode = CODE_FOR_avx2_gatherdiv8sf;
37678 goto gather_gen;
37679 case IX86_BUILTIN_GATHERSIV2DI:
37680 icode = CODE_FOR_avx2_gathersiv2di;
37681 goto gather_gen;
37682 case IX86_BUILTIN_GATHERSIV4DI:
37683 icode = CODE_FOR_avx2_gathersiv4di;
37684 goto gather_gen;
37685 case IX86_BUILTIN_GATHERDIV2DI:
37686 icode = CODE_FOR_avx2_gatherdiv2di;
37687 goto gather_gen;
37688 case IX86_BUILTIN_GATHERDIV4DI:
37689 icode = CODE_FOR_avx2_gatherdiv4di;
37690 goto gather_gen;
37691 case IX86_BUILTIN_GATHERSIV4SI:
37692 icode = CODE_FOR_avx2_gathersiv4si;
37693 goto gather_gen;
37694 case IX86_BUILTIN_GATHERSIV8SI:
37695 icode = CODE_FOR_avx2_gathersiv8si;
37696 goto gather_gen;
37697 case IX86_BUILTIN_GATHERDIV4SI:
37698 icode = CODE_FOR_avx2_gatherdiv4si;
37699 goto gather_gen;
37700 case IX86_BUILTIN_GATHERDIV8SI:
37701 icode = CODE_FOR_avx2_gatherdiv8si;
37702 goto gather_gen;
37703 case IX86_BUILTIN_GATHERALTSIV4DF:
37704 icode = CODE_FOR_avx2_gathersiv4df;
37705 goto gather_gen;
37706 case IX86_BUILTIN_GATHERALTDIV8SF:
37707 icode = CODE_FOR_avx2_gatherdiv8sf;
37708 goto gather_gen;
37709 case IX86_BUILTIN_GATHERALTSIV4DI:
37710 icode = CODE_FOR_avx2_gathersiv4di;
37711 goto gather_gen;
37712 case IX86_BUILTIN_GATHERALTDIV8SI:
37713 icode = CODE_FOR_avx2_gatherdiv8si;
37714 goto gather_gen;
37715 case IX86_BUILTIN_GATHER3SIV16SF:
37716 icode = CODE_FOR_avx512f_gathersiv16sf;
37717 goto gather_gen;
37718 case IX86_BUILTIN_GATHER3SIV8DF:
37719 icode = CODE_FOR_avx512f_gathersiv8df;
37720 goto gather_gen;
37721 case IX86_BUILTIN_GATHER3DIV16SF:
37722 icode = CODE_FOR_avx512f_gatherdiv16sf;
37723 goto gather_gen;
37724 case IX86_BUILTIN_GATHER3DIV8DF:
37725 icode = CODE_FOR_avx512f_gatherdiv8df;
37726 goto gather_gen;
37727 case IX86_BUILTIN_GATHER3SIV16SI:
37728 icode = CODE_FOR_avx512f_gathersiv16si;
37729 goto gather_gen;
37730 case IX86_BUILTIN_GATHER3SIV8DI:
37731 icode = CODE_FOR_avx512f_gathersiv8di;
37732 goto gather_gen;
37733 case IX86_BUILTIN_GATHER3DIV16SI:
37734 icode = CODE_FOR_avx512f_gatherdiv16si;
37735 goto gather_gen;
37736 case IX86_BUILTIN_GATHER3DIV8DI:
37737 icode = CODE_FOR_avx512f_gatherdiv8di;
37738 goto gather_gen;
37739 case IX86_BUILTIN_GATHER3ALTSIV8DF:
37740 icode = CODE_FOR_avx512f_gathersiv8df;
37741 goto gather_gen;
37742 case IX86_BUILTIN_GATHER3ALTDIV16SF:
37743 icode = CODE_FOR_avx512f_gatherdiv16sf;
37744 goto gather_gen;
37745 case IX86_BUILTIN_GATHER3ALTSIV8DI:
37746 icode = CODE_FOR_avx512f_gathersiv8di;
37747 goto gather_gen;
37748 case IX86_BUILTIN_GATHER3ALTDIV16SI:
37749 icode = CODE_FOR_avx512f_gatherdiv16si;
37750 goto gather_gen;
37751 case IX86_BUILTIN_GATHER3SIV2DF:
37752 icode = CODE_FOR_avx512vl_gathersiv2df;
37753 goto gather_gen;
37754 case IX86_BUILTIN_GATHER3SIV4DF:
37755 icode = CODE_FOR_avx512vl_gathersiv4df;
37756 goto gather_gen;
37757 case IX86_BUILTIN_GATHER3DIV2DF:
37758 icode = CODE_FOR_avx512vl_gatherdiv2df;
37759 goto gather_gen;
37760 case IX86_BUILTIN_GATHER3DIV4DF:
37761 icode = CODE_FOR_avx512vl_gatherdiv4df;
37762 goto gather_gen;
37763 case IX86_BUILTIN_GATHER3SIV4SF:
37764 icode = CODE_FOR_avx512vl_gathersiv4sf;
37765 goto gather_gen;
37766 case IX86_BUILTIN_GATHER3SIV8SF:
37767 icode = CODE_FOR_avx512vl_gathersiv8sf;
37768 goto gather_gen;
37769 case IX86_BUILTIN_GATHER3DIV4SF:
37770 icode = CODE_FOR_avx512vl_gatherdiv4sf;
37771 goto gather_gen;
37772 case IX86_BUILTIN_GATHER3DIV8SF:
37773 icode = CODE_FOR_avx512vl_gatherdiv8sf;
37774 goto gather_gen;
37775 case IX86_BUILTIN_GATHER3SIV2DI:
37776 icode = CODE_FOR_avx512vl_gathersiv2di;
37777 goto gather_gen;
37778 case IX86_BUILTIN_GATHER3SIV4DI:
37779 icode = CODE_FOR_avx512vl_gathersiv4di;
37780 goto gather_gen;
37781 case IX86_BUILTIN_GATHER3DIV2DI:
37782 icode = CODE_FOR_avx512vl_gatherdiv2di;
37783 goto gather_gen;
37784 case IX86_BUILTIN_GATHER3DIV4DI:
37785 icode = CODE_FOR_avx512vl_gatherdiv4di;
37786 goto gather_gen;
37787 case IX86_BUILTIN_GATHER3SIV4SI:
37788 icode = CODE_FOR_avx512vl_gathersiv4si;
37789 goto gather_gen;
37790 case IX86_BUILTIN_GATHER3SIV8SI:
37791 icode = CODE_FOR_avx512vl_gathersiv8si;
37792 goto gather_gen;
37793 case IX86_BUILTIN_GATHER3DIV4SI:
37794 icode = CODE_FOR_avx512vl_gatherdiv4si;
37795 goto gather_gen;
37796 case IX86_BUILTIN_GATHER3DIV8SI:
37797 icode = CODE_FOR_avx512vl_gatherdiv8si;
37798 goto gather_gen;
37799 case IX86_BUILTIN_GATHER3ALTSIV4DF:
37800 icode = CODE_FOR_avx512vl_gathersiv4df;
37801 goto gather_gen;
37802 case IX86_BUILTIN_GATHER3ALTDIV8SF:
37803 icode = CODE_FOR_avx512vl_gatherdiv8sf;
37804 goto gather_gen;
37805 case IX86_BUILTIN_GATHER3ALTSIV4DI:
37806 icode = CODE_FOR_avx512vl_gathersiv4di;
37807 goto gather_gen;
37808 case IX86_BUILTIN_GATHER3ALTDIV8SI:
37809 icode = CODE_FOR_avx512vl_gatherdiv8si;
37810 goto gather_gen;
37811 case IX86_BUILTIN_SCATTERSIV16SF:
37812 icode = CODE_FOR_avx512f_scattersiv16sf;
37813 goto scatter_gen;
37814 case IX86_BUILTIN_SCATTERSIV8DF:
37815 icode = CODE_FOR_avx512f_scattersiv8df;
37816 goto scatter_gen;
37817 case IX86_BUILTIN_SCATTERDIV16SF:
37818 icode = CODE_FOR_avx512f_scatterdiv16sf;
37819 goto scatter_gen;
37820 case IX86_BUILTIN_SCATTERDIV8DF:
37821 icode = CODE_FOR_avx512f_scatterdiv8df;
37822 goto scatter_gen;
37823 case IX86_BUILTIN_SCATTERSIV16SI:
37824 icode = CODE_FOR_avx512f_scattersiv16si;
37825 goto scatter_gen;
37826 case IX86_BUILTIN_SCATTERSIV8DI:
37827 icode = CODE_FOR_avx512f_scattersiv8di;
37828 goto scatter_gen;
37829 case IX86_BUILTIN_SCATTERDIV16SI:
37830 icode = CODE_FOR_avx512f_scatterdiv16si;
37831 goto scatter_gen;
37832 case IX86_BUILTIN_SCATTERDIV8DI:
37833 icode = CODE_FOR_avx512f_scatterdiv8di;
37834 goto scatter_gen;
37835 case IX86_BUILTIN_SCATTERSIV8SF:
37836 icode = CODE_FOR_avx512vl_scattersiv8sf;
37837 goto scatter_gen;
37838 case IX86_BUILTIN_SCATTERSIV4SF:
37839 icode = CODE_FOR_avx512vl_scattersiv4sf;
37840 goto scatter_gen;
37841 case IX86_BUILTIN_SCATTERSIV4DF:
37842 icode = CODE_FOR_avx512vl_scattersiv4df;
37843 goto scatter_gen;
37844 case IX86_BUILTIN_SCATTERSIV2DF:
37845 icode = CODE_FOR_avx512vl_scattersiv2df;
37846 goto scatter_gen;
37847 case IX86_BUILTIN_SCATTERDIV8SF:
37848 icode = CODE_FOR_avx512vl_scatterdiv8sf;
37849 goto scatter_gen;
37850 case IX86_BUILTIN_SCATTERDIV4SF:
37851 icode = CODE_FOR_avx512vl_scatterdiv4sf;
37852 goto scatter_gen;
37853 case IX86_BUILTIN_SCATTERDIV4DF:
37854 icode = CODE_FOR_avx512vl_scatterdiv4df;
37855 goto scatter_gen;
37856 case IX86_BUILTIN_SCATTERDIV2DF:
37857 icode = CODE_FOR_avx512vl_scatterdiv2df;
37858 goto scatter_gen;
37859 case IX86_BUILTIN_SCATTERSIV8SI:
37860 icode = CODE_FOR_avx512vl_scattersiv8si;
37861 goto scatter_gen;
37862 case IX86_BUILTIN_SCATTERSIV4SI:
37863 icode = CODE_FOR_avx512vl_scattersiv4si;
37864 goto scatter_gen;
37865 case IX86_BUILTIN_SCATTERSIV4DI:
37866 icode = CODE_FOR_avx512vl_scattersiv4di;
37867 goto scatter_gen;
37868 case IX86_BUILTIN_SCATTERSIV2DI:
37869 icode = CODE_FOR_avx512vl_scattersiv2di;
37870 goto scatter_gen;
37871 case IX86_BUILTIN_SCATTERDIV8SI:
37872 icode = CODE_FOR_avx512vl_scatterdiv8si;
37873 goto scatter_gen;
37874 case IX86_BUILTIN_SCATTERDIV4SI:
37875 icode = CODE_FOR_avx512vl_scatterdiv4si;
37876 goto scatter_gen;
37877 case IX86_BUILTIN_SCATTERDIV4DI:
37878 icode = CODE_FOR_avx512vl_scatterdiv4di;
37879 goto scatter_gen;
37880 case IX86_BUILTIN_SCATTERDIV2DI:
37881 icode = CODE_FOR_avx512vl_scatterdiv2di;
37882 goto scatter_gen;
37883 case IX86_BUILTIN_GATHERPFDPD:
37884 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
37885 goto vec_prefetch_gen;
37886 case IX86_BUILTIN_SCATTERALTSIV8DF:
37887 icode = CODE_FOR_avx512f_scattersiv8df;
37888 goto scatter_gen;
37889 case IX86_BUILTIN_SCATTERALTDIV16SF:
37890 icode = CODE_FOR_avx512f_scatterdiv16sf;
37891 goto scatter_gen;
37892 case IX86_BUILTIN_SCATTERALTSIV8DI:
37893 icode = CODE_FOR_avx512f_scattersiv8di;
37894 goto scatter_gen;
37895 case IX86_BUILTIN_SCATTERALTDIV16SI:
37896 icode = CODE_FOR_avx512f_scatterdiv16si;
37897 goto scatter_gen;
37898 case IX86_BUILTIN_GATHERPFDPS:
37899 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
37900 goto vec_prefetch_gen;
37901 case IX86_BUILTIN_GATHERPFQPD:
37902 icode = CODE_FOR_avx512pf_gatherpfv8didf;
37903 goto vec_prefetch_gen;
37904 case IX86_BUILTIN_GATHERPFQPS:
37905 icode = CODE_FOR_avx512pf_gatherpfv8disf;
37906 goto vec_prefetch_gen;
37907 case IX86_BUILTIN_SCATTERPFDPD:
37908 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
37909 goto vec_prefetch_gen;
37910 case IX86_BUILTIN_SCATTERPFDPS:
37911 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
37912 goto vec_prefetch_gen;
37913 case IX86_BUILTIN_SCATTERPFQPD:
37914 icode = CODE_FOR_avx512pf_scatterpfv8didf;
37915 goto vec_prefetch_gen;
37916 case IX86_BUILTIN_SCATTERPFQPS:
37917 icode = CODE_FOR_avx512pf_scatterpfv8disf;
37918 goto vec_prefetch_gen;
37920 gather_gen:
37921 rtx half;
37922 rtx (*gen) (rtx, rtx);
37924 arg0 = CALL_EXPR_ARG (exp, 0);
37925 arg1 = CALL_EXPR_ARG (exp, 1);
37926 arg2 = CALL_EXPR_ARG (exp, 2);
37927 arg3 = CALL_EXPR_ARG (exp, 3);
37928 arg4 = CALL_EXPR_ARG (exp, 4);
37929 op0 = expand_normal (arg0);
37930 op1 = expand_normal (arg1);
37931 op2 = expand_normal (arg2);
37932 op3 = expand_normal (arg3);
37933 op4 = expand_normal (arg4);
37934 /* Note the arg order is different from the operand order. */
37935 mode0 = insn_data[icode].operand[1].mode;
37936 mode2 = insn_data[icode].operand[3].mode;
37937 mode3 = insn_data[icode].operand[4].mode;
37938 mode4 = insn_data[icode].operand[5].mode;
37940 if (target == NULL_RTX
37941 || GET_MODE (target) != insn_data[icode].operand[0].mode
37942 || !insn_data[icode].operand[0].predicate (target,
37943 GET_MODE (target)))
37944 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
37945 else
37946 subtarget = target;
37948 switch (fcode)
37950 case IX86_BUILTIN_GATHER3ALTSIV8DF:
37951 case IX86_BUILTIN_GATHER3ALTSIV8DI:
37952 half = gen_reg_rtx (V8SImode);
37953 if (!nonimmediate_operand (op2, V16SImode))
37954 op2 = copy_to_mode_reg (V16SImode, op2);
37955 emit_insn (gen_vec_extract_lo_v16si (half, op2));
37956 op2 = half;
37957 break;
37958 case IX86_BUILTIN_GATHER3ALTSIV4DF:
37959 case IX86_BUILTIN_GATHER3ALTSIV4DI:
37960 case IX86_BUILTIN_GATHERALTSIV4DF:
37961 case IX86_BUILTIN_GATHERALTSIV4DI:
37962 half = gen_reg_rtx (V4SImode);
37963 if (!nonimmediate_operand (op2, V8SImode))
37964 op2 = copy_to_mode_reg (V8SImode, op2);
37965 emit_insn (gen_vec_extract_lo_v8si (half, op2));
37966 op2 = half;
37967 break;
37968 case IX86_BUILTIN_GATHER3ALTDIV16SF:
37969 case IX86_BUILTIN_GATHER3ALTDIV16SI:
37970 half = gen_reg_rtx (mode0);
37971 if (mode0 == V8SFmode)
37972 gen = gen_vec_extract_lo_v16sf;
37973 else
37974 gen = gen_vec_extract_lo_v16si;
37975 if (!nonimmediate_operand (op0, GET_MODE (op0)))
37976 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
37977 emit_insn (gen (half, op0));
37978 op0 = half;
37979 if (GET_MODE (op3) != VOIDmode)
37981 if (!nonimmediate_operand (op3, GET_MODE (op3)))
37982 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
37983 emit_insn (gen (half, op3));
37984 op3 = half;
37986 break;
37987 case IX86_BUILTIN_GATHER3ALTDIV8SF:
37988 case IX86_BUILTIN_GATHER3ALTDIV8SI:
37989 case IX86_BUILTIN_GATHERALTDIV8SF:
37990 case IX86_BUILTIN_GATHERALTDIV8SI:
37991 half = gen_reg_rtx (mode0);
37992 if (mode0 == V4SFmode)
37993 gen = gen_vec_extract_lo_v8sf;
37994 else
37995 gen = gen_vec_extract_lo_v8si;
37996 if (!nonimmediate_operand (op0, GET_MODE (op0)))
37997 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
37998 emit_insn (gen (half, op0));
37999 op0 = half;
38000 if (GET_MODE (op3) != VOIDmode)
38002 if (!nonimmediate_operand (op3, GET_MODE (op3)))
38003 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
38004 emit_insn (gen (half, op3));
38005 op3 = half;
38007 break;
38008 default:
38009 break;
38012 /* Force memory operand only with base register here. But we
38013 don't want to do it on memory operand for other builtin
38014 functions. */
38015 op1 = ix86_zero_extend_to_Pmode (op1);
38017 if (!insn_data[icode].operand[1].predicate (op0, mode0))
38018 op0 = copy_to_mode_reg (mode0, op0);
38019 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
38020 op1 = copy_to_mode_reg (Pmode, op1);
38021 if (!insn_data[icode].operand[3].predicate (op2, mode2))
38022 op2 = copy_to_mode_reg (mode2, op2);
38024 op3 = fixup_modeless_constant (op3, mode3);
38026 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
38028 if (!insn_data[icode].operand[4].predicate (op3, mode3))
38029 op3 = copy_to_mode_reg (mode3, op3);
38031 else
38033 op3 = copy_to_reg (op3);
38034 op3 = lowpart_subreg (mode3, op3, GET_MODE (op3));
38036 if (!insn_data[icode].operand[5].predicate (op4, mode4))
38038 error ("the last argument must be scale 1, 2, 4, 8");
38039 return const0_rtx;
38042 /* Optimize. If mask is known to have all high bits set,
38043 replace op0 with pc_rtx to signal that the instruction
38044 overwrites the whole destination and doesn't use its
38045 previous contents. */
38046 if (optimize)
38048 if (TREE_CODE (arg3) == INTEGER_CST)
38050 if (integer_all_onesp (arg3))
38051 op0 = pc_rtx;
38053 else if (TREE_CODE (arg3) == VECTOR_CST)
38055 unsigned int negative = 0;
38056 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
38058 tree cst = VECTOR_CST_ELT (arg3, i);
38059 if (TREE_CODE (cst) == INTEGER_CST
38060 && tree_int_cst_sign_bit (cst))
38061 negative++;
38062 else if (TREE_CODE (cst) == REAL_CST
38063 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
38064 negative++;
38066 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
38067 op0 = pc_rtx;
38069 else if (TREE_CODE (arg3) == SSA_NAME
38070 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
38072 /* Recognize also when mask is like:
38073 __v2df src = _mm_setzero_pd ();
38074 __v2df mask = _mm_cmpeq_pd (src, src);
38076 __v8sf src = _mm256_setzero_ps ();
38077 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
38078 as that is a cheaper way to load all ones into
38079 a register than having to load a constant from
38080 memory. */
38081 gimple *def_stmt = SSA_NAME_DEF_STMT (arg3);
38082 if (is_gimple_call (def_stmt))
38084 tree fndecl = gimple_call_fndecl (def_stmt);
38085 if (fndecl
38086 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
38087 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
38089 case IX86_BUILTIN_CMPPD:
38090 case IX86_BUILTIN_CMPPS:
38091 case IX86_BUILTIN_CMPPD256:
38092 case IX86_BUILTIN_CMPPS256:
38093 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
38094 break;
38095 /* FALLTHRU */
38096 case IX86_BUILTIN_CMPEQPD:
38097 case IX86_BUILTIN_CMPEQPS:
38098 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
38099 && initializer_zerop (gimple_call_arg (def_stmt,
38100 1)))
38101 op0 = pc_rtx;
38102 break;
38103 default:
38104 break;
38110 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
38111 if (! pat)
38112 return const0_rtx;
38113 emit_insn (pat);
38115 switch (fcode)
38117 case IX86_BUILTIN_GATHER3DIV16SF:
38118 if (target == NULL_RTX)
38119 target = gen_reg_rtx (V8SFmode);
38120 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
38121 break;
38122 case IX86_BUILTIN_GATHER3DIV16SI:
38123 if (target == NULL_RTX)
38124 target = gen_reg_rtx (V8SImode);
38125 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
38126 break;
38127 case IX86_BUILTIN_GATHER3DIV8SF:
38128 case IX86_BUILTIN_GATHERDIV8SF:
38129 if (target == NULL_RTX)
38130 target = gen_reg_rtx (V4SFmode);
38131 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
38132 break;
38133 case IX86_BUILTIN_GATHER3DIV8SI:
38134 case IX86_BUILTIN_GATHERDIV8SI:
38135 if (target == NULL_RTX)
38136 target = gen_reg_rtx (V4SImode);
38137 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
38138 break;
38139 default:
38140 target = subtarget;
38141 break;
38143 return target;
38145 scatter_gen:
38146 arg0 = CALL_EXPR_ARG (exp, 0);
38147 arg1 = CALL_EXPR_ARG (exp, 1);
38148 arg2 = CALL_EXPR_ARG (exp, 2);
38149 arg3 = CALL_EXPR_ARG (exp, 3);
38150 arg4 = CALL_EXPR_ARG (exp, 4);
38151 op0 = expand_normal (arg0);
38152 op1 = expand_normal (arg1);
38153 op2 = expand_normal (arg2);
38154 op3 = expand_normal (arg3);
38155 op4 = expand_normal (arg4);
38156 mode1 = insn_data[icode].operand[1].mode;
38157 mode2 = insn_data[icode].operand[2].mode;
38158 mode3 = insn_data[icode].operand[3].mode;
38159 mode4 = insn_data[icode].operand[4].mode;
38161 /* Scatter instruction stores operand op3 to memory with
38162 indices from op2 and scale from op4 under writemask op1.
38163 If index operand op2 has more elements then source operand
38164 op3 one need to use only its low half. And vice versa. */
38165 switch (fcode)
38167 case IX86_BUILTIN_SCATTERALTSIV8DF:
38168 case IX86_BUILTIN_SCATTERALTSIV8DI:
38169 half = gen_reg_rtx (V8SImode);
38170 if (!nonimmediate_operand (op2, V16SImode))
38171 op2 = copy_to_mode_reg (V16SImode, op2);
38172 emit_insn (gen_vec_extract_lo_v16si (half, op2));
38173 op2 = half;
38174 break;
38175 case IX86_BUILTIN_SCATTERALTDIV16SF:
38176 case IX86_BUILTIN_SCATTERALTDIV16SI:
38177 half = gen_reg_rtx (mode3);
38178 if (mode3 == V8SFmode)
38179 gen = gen_vec_extract_lo_v16sf;
38180 else
38181 gen = gen_vec_extract_lo_v16si;
38182 if (!nonimmediate_operand (op3, GET_MODE (op3)))
38183 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
38184 emit_insn (gen (half, op3));
38185 op3 = half;
38186 break;
38187 default:
38188 break;
38191 /* Force memory operand only with base register here. But we
38192 don't want to do it on memory operand for other builtin
38193 functions. */
38194 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
38196 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38197 op0 = copy_to_mode_reg (Pmode, op0);
38199 op1 = fixup_modeless_constant (op1, mode1);
38201 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
38203 if (!insn_data[icode].operand[1].predicate (op1, mode1))
38204 op1 = copy_to_mode_reg (mode1, op1);
38206 else
38208 op1 = copy_to_reg (op1);
38209 op1 = lowpart_subreg (mode1, op1, GET_MODE (op1));
38212 if (!insn_data[icode].operand[2].predicate (op2, mode2))
38213 op2 = copy_to_mode_reg (mode2, op2);
38215 if (!insn_data[icode].operand[3].predicate (op3, mode3))
38216 op3 = copy_to_mode_reg (mode3, op3);
38218 if (!insn_data[icode].operand[4].predicate (op4, mode4))
38220 error ("the last argument must be scale 1, 2, 4, 8");
38221 return const0_rtx;
38224 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
38225 if (! pat)
38226 return const0_rtx;
38228 emit_insn (pat);
38229 return 0;
38231 vec_prefetch_gen:
38232 arg0 = CALL_EXPR_ARG (exp, 0);
38233 arg1 = CALL_EXPR_ARG (exp, 1);
38234 arg2 = CALL_EXPR_ARG (exp, 2);
38235 arg3 = CALL_EXPR_ARG (exp, 3);
38236 arg4 = CALL_EXPR_ARG (exp, 4);
38237 op0 = expand_normal (arg0);
38238 op1 = expand_normal (arg1);
38239 op2 = expand_normal (arg2);
38240 op3 = expand_normal (arg3);
38241 op4 = expand_normal (arg4);
38242 mode0 = insn_data[icode].operand[0].mode;
38243 mode1 = insn_data[icode].operand[1].mode;
38244 mode3 = insn_data[icode].operand[3].mode;
38245 mode4 = insn_data[icode].operand[4].mode;
38247 op0 = fixup_modeless_constant (op0, mode0);
38249 if (GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
38251 if (!insn_data[icode].operand[0].predicate (op0, mode0))
38252 op0 = copy_to_mode_reg (mode0, op0);
38254 else
38256 op0 = copy_to_reg (op0);
38257 op0 = lowpart_subreg (mode0, op0, GET_MODE (op0));
38260 if (!insn_data[icode].operand[1].predicate (op1, mode1))
38261 op1 = copy_to_mode_reg (mode1, op1);
38263 /* Force memory operand only with base register here. But we
38264 don't want to do it on memory operand for other builtin
38265 functions. */
38266 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
38268 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
38269 op2 = copy_to_mode_reg (Pmode, op2);
38271 if (!insn_data[icode].operand[3].predicate (op3, mode3))
38273 error ("the forth argument must be scale 1, 2, 4, 8");
38274 return const0_rtx;
38277 if (!insn_data[icode].operand[4].predicate (op4, mode4))
38279 error ("incorrect hint operand");
38280 return const0_rtx;
38283 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
38284 if (! pat)
38285 return const0_rtx;
38287 emit_insn (pat);
38289 return 0;
38291 case IX86_BUILTIN_XABORT:
38292 icode = CODE_FOR_xabort;
38293 arg0 = CALL_EXPR_ARG (exp, 0);
38294 op0 = expand_normal (arg0);
38295 mode0 = insn_data[icode].operand[0].mode;
38296 if (!insn_data[icode].operand[0].predicate (op0, mode0))
38298 error ("the xabort's argument must be an 8-bit immediate");
38299 return const0_rtx;
38301 emit_insn (gen_xabort (op0));
38302 return 0;
38304 case IX86_BUILTIN_RSTORSSP:
38305 case IX86_BUILTIN_CLRSSBSY:
38306 arg0 = CALL_EXPR_ARG (exp, 0);
38307 op0 = expand_normal (arg0);
38308 icode = (fcode == IX86_BUILTIN_RSTORSSP
38309 ? CODE_FOR_rstorssp
38310 : CODE_FOR_clrssbsy);
38311 if (!address_operand (op0, VOIDmode))
38313 op1 = convert_memory_address (Pmode, op0);
38314 op0 = copy_addr_to_reg (op1);
38316 emit_insn (GEN_FCN (icode) (gen_rtx_MEM (Pmode, op0)));
38317 return 0;
38319 case IX86_BUILTIN_WRSSD:
38320 case IX86_BUILTIN_WRSSQ:
38321 case IX86_BUILTIN_WRUSSD:
38322 case IX86_BUILTIN_WRUSSQ:
38323 arg0 = CALL_EXPR_ARG (exp, 0);
38324 op0 = expand_normal (arg0);
38325 arg1 = CALL_EXPR_ARG (exp, 1);
38326 op1 = expand_normal (arg1);
38327 switch (fcode)
38329 case IX86_BUILTIN_WRSSD:
38330 icode = CODE_FOR_wrsssi;
38331 mode = SImode;
38332 break;
38333 case IX86_BUILTIN_WRSSQ:
38334 icode = CODE_FOR_wrssdi;
38335 mode = DImode;
38336 break;
38337 case IX86_BUILTIN_WRUSSD:
38338 icode = CODE_FOR_wrusssi;
38339 mode = SImode;
38340 break;
38341 case IX86_BUILTIN_WRUSSQ:
38342 icode = CODE_FOR_wrussdi;
38343 mode = DImode;
38344 break;
38346 op0 = force_reg (mode, op0);
38347 if (!address_operand (op1, VOIDmode))
38349 op2 = convert_memory_address (Pmode, op1);
38350 op1 = copy_addr_to_reg (op2);
38352 emit_insn (GEN_FCN (icode) (op0, gen_rtx_MEM (mode, op1)));
38353 return 0;
38355 default:
38356 break;
38359 if (fcode >= IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST
38360 && fcode <= IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST)
38362 i = fcode - IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST;
38363 return ix86_expand_special_args_builtin (bdesc_special_args + i, exp,
38364 target);
38367 if (fcode >= IX86_BUILTIN__BDESC_ARGS_FIRST
38368 && fcode <= IX86_BUILTIN__BDESC_ARGS_LAST)
38370 i = fcode - IX86_BUILTIN__BDESC_ARGS_FIRST;
38371 switch (fcode)
38373 case IX86_BUILTIN_FABSQ:
38374 case IX86_BUILTIN_COPYSIGNQ:
38375 if (!TARGET_SSE)
38376 /* Emit a normal call if SSE isn't available. */
38377 return expand_call (exp, target, ignore);
38378 /* FALLTHRU */
38379 default:
38380 return ix86_expand_args_builtin (bdesc_args + i, exp, target);
38384 if (fcode >= IX86_BUILTIN__BDESC_ARGS2_FIRST
38385 && fcode <= IX86_BUILTIN__BDESC_ARGS2_LAST)
38387 i = fcode - IX86_BUILTIN__BDESC_ARGS2_FIRST;
38388 rtx (*fcn) (rtx, rtx, rtx, rtx) = NULL;
38389 rtx (*fcn_mask) (rtx, rtx, rtx, rtx, rtx);
38390 rtx (*fcn_maskz) (rtx, rtx, rtx, rtx, rtx, rtx);
38391 int masked = 1;
38392 machine_mode mode, wide_mode, nar_mode;
38394 nar_mode = V4SFmode;
38395 mode = V16SFmode;
38396 wide_mode = V64SFmode;
38397 fcn_mask = gen_avx5124fmaddps_4fmaddps_mask;
38398 fcn_maskz = gen_avx5124fmaddps_4fmaddps_maskz;
38400 switch (fcode)
38402 case IX86_BUILTIN_4FMAPS:
38403 fcn = gen_avx5124fmaddps_4fmaddps;
38404 masked = 0;
38405 goto v4fma_expand;
38407 case IX86_BUILTIN_4DPWSSD:
38408 nar_mode = V4SImode;
38409 mode = V16SImode;
38410 wide_mode = V64SImode;
38411 fcn = gen_avx5124vnniw_vp4dpwssd;
38412 masked = 0;
38413 goto v4fma_expand;
38415 case IX86_BUILTIN_4DPWSSDS:
38416 nar_mode = V4SImode;
38417 mode = V16SImode;
38418 wide_mode = V64SImode;
38419 fcn = gen_avx5124vnniw_vp4dpwssds;
38420 masked = 0;
38421 goto v4fma_expand;
38423 case IX86_BUILTIN_4FNMAPS:
38424 fcn = gen_avx5124fmaddps_4fnmaddps;
38425 masked = 0;
38426 goto v4fma_expand;
38428 case IX86_BUILTIN_4FNMAPS_MASK:
38429 fcn_mask = gen_avx5124fmaddps_4fnmaddps_mask;
38430 fcn_maskz = gen_avx5124fmaddps_4fnmaddps_maskz;
38431 goto v4fma_expand;
38433 case IX86_BUILTIN_4DPWSSD_MASK:
38434 nar_mode = V4SImode;
38435 mode = V16SImode;
38436 wide_mode = V64SImode;
38437 fcn_mask = gen_avx5124vnniw_vp4dpwssd_mask;
38438 fcn_maskz = gen_avx5124vnniw_vp4dpwssd_maskz;
38439 goto v4fma_expand;
38441 case IX86_BUILTIN_4DPWSSDS_MASK:
38442 nar_mode = V4SImode;
38443 mode = V16SImode;
38444 wide_mode = V64SImode;
38445 fcn_mask = gen_avx5124vnniw_vp4dpwssds_mask;
38446 fcn_maskz = gen_avx5124vnniw_vp4dpwssds_maskz;
38447 goto v4fma_expand;
38449 case IX86_BUILTIN_4FMAPS_MASK:
38451 tree args[4];
38452 rtx ops[4];
38453 rtx wide_reg;
38454 rtx accum;
38455 rtx addr;
38456 rtx mem;
38458 v4fma_expand:
38459 wide_reg = gen_reg_rtx (wide_mode);
38460 for (i = 0; i < 4; i++)
38462 args[i] = CALL_EXPR_ARG (exp, i);
38463 ops[i] = expand_normal (args[i]);
38465 emit_move_insn (gen_rtx_SUBREG (mode, wide_reg, i * 64),
38466 ops[i]);
38469 accum = expand_normal (CALL_EXPR_ARG (exp, 4));
38470 accum = force_reg (mode, accum);
38472 addr = expand_normal (CALL_EXPR_ARG (exp, 5));
38473 addr = force_reg (Pmode, addr);
38475 mem = gen_rtx_MEM (nar_mode, addr);
38477 target = gen_reg_rtx (mode);
38479 emit_move_insn (target, accum);
38481 if (! masked)
38482 emit_insn (fcn (target, accum, wide_reg, mem));
38483 else
38485 rtx merge, mask;
38486 merge = expand_normal (CALL_EXPR_ARG (exp, 6));
38488 mask = expand_normal (CALL_EXPR_ARG (exp, 7));
38490 if (CONST_INT_P (mask))
38491 mask = fixup_modeless_constant (mask, HImode);
38493 mask = force_reg (HImode, mask);
38495 if (GET_MODE (mask) != HImode)
38496 mask = gen_rtx_SUBREG (HImode, mask, 0);
38498 /* If merge is 0 then we're about to emit z-masked variant. */
38499 if (const0_operand (merge, mode))
38500 emit_insn (fcn_maskz (target, accum, wide_reg, mem, merge, mask));
38501 /* If merge is the same as accum then emit merge-masked variant. */
38502 else if (CALL_EXPR_ARG (exp, 6) == CALL_EXPR_ARG (exp, 4))
38504 merge = force_reg (mode, merge);
38505 emit_insn (fcn_mask (target, wide_reg, mem, merge, mask));
38507 /* Merge with something unknown might happen if we z-mask w/ -O0. */
38508 else
38510 target = gen_reg_rtx (mode);
38511 emit_move_insn (target, merge);
38512 emit_insn (fcn_mask (target, wide_reg, mem, target, mask));
38515 return target;
38518 case IX86_BUILTIN_4FNMASS:
38519 fcn = gen_avx5124fmaddps_4fnmaddss;
38520 masked = 0;
38521 goto s4fma_expand;
38523 case IX86_BUILTIN_4FMASS:
38524 fcn = gen_avx5124fmaddps_4fmaddss;
38525 masked = 0;
38526 goto s4fma_expand;
38528 case IX86_BUILTIN_4FNMASS_MASK:
38529 fcn_mask = gen_avx5124fmaddps_4fnmaddss_mask;
38530 fcn_maskz = gen_avx5124fmaddps_4fnmaddss_maskz;
38531 goto s4fma_expand;
38533 case IX86_BUILTIN_4FMASS_MASK:
38535 tree args[4];
38536 rtx ops[4];
38537 rtx wide_reg;
38538 rtx accum;
38539 rtx addr;
38540 rtx mem;
38542 fcn_mask = gen_avx5124fmaddps_4fmaddss_mask;
38543 fcn_maskz = gen_avx5124fmaddps_4fmaddss_maskz;
38545 s4fma_expand:
38546 mode = V4SFmode;
38547 wide_reg = gen_reg_rtx (V64SFmode);
38548 for (i = 0; i < 4; i++)
38550 rtx tmp;
38551 args[i] = CALL_EXPR_ARG (exp, i);
38552 ops[i] = expand_normal (args[i]);
38554 tmp = gen_reg_rtx (SFmode);
38555 emit_move_insn (tmp, gen_rtx_SUBREG (SFmode, ops[i], 0));
38557 emit_move_insn (gen_rtx_SUBREG (V16SFmode, wide_reg, i * 64),
38558 gen_rtx_SUBREG (V16SFmode, tmp, 0));
38561 accum = expand_normal (CALL_EXPR_ARG (exp, 4));
38562 accum = force_reg (V4SFmode, accum);
38564 addr = expand_normal (CALL_EXPR_ARG (exp, 5));
38565 addr = force_reg (Pmode, addr);
38567 mem = gen_rtx_MEM (V4SFmode, addr);
38569 target = gen_reg_rtx (V4SFmode);
38571 emit_move_insn (target, accum);
38573 if (! masked)
38574 emit_insn (fcn (target, accum, wide_reg, mem));
38575 else
38577 rtx merge, mask;
38578 merge = expand_normal (CALL_EXPR_ARG (exp, 6));
38580 mask = expand_normal (CALL_EXPR_ARG (exp, 7));
38582 if (CONST_INT_P (mask))
38583 mask = fixup_modeless_constant (mask, QImode);
38585 mask = force_reg (QImode, mask);
38587 if (GET_MODE (mask) != QImode)
38588 mask = gen_rtx_SUBREG (QImode, mask, 0);
38590 /* If merge is 0 then we're about to emit z-masked variant. */
38591 if (const0_operand (merge, mode))
38592 emit_insn (fcn_maskz (target, accum, wide_reg, mem, merge, mask));
38593 /* If merge is the same as accum then emit merge-masked
38594 variant. */
38595 else if (CALL_EXPR_ARG (exp, 6) == CALL_EXPR_ARG (exp, 4))
38597 merge = force_reg (mode, merge);
38598 emit_insn (fcn_mask (target, wide_reg, mem, merge, mask));
38600 /* Merge with something unknown might happen if we z-mask
38601 w/ -O0. */
38602 else
38604 target = gen_reg_rtx (mode);
38605 emit_move_insn (target, merge);
38606 emit_insn (fcn_mask (target, wide_reg, mem, target, mask));
38609 return target;
38611 case IX86_BUILTIN_RDPID:
38612 return ix86_expand_special_args_builtin (bdesc_args2 + i, exp,
38613 target);
38614 default:
38615 return ix86_expand_args_builtin (bdesc_args2 + i, exp, target);
38619 if (fcode >= IX86_BUILTIN__BDESC_COMI_FIRST
38620 && fcode <= IX86_BUILTIN__BDESC_COMI_LAST)
38622 i = fcode - IX86_BUILTIN__BDESC_COMI_FIRST;
38623 return ix86_expand_sse_comi (bdesc_comi + i, exp, target);
38626 if (fcode >= IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST
38627 && fcode <= IX86_BUILTIN__BDESC_ROUND_ARGS_LAST)
38629 i = fcode - IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST;
38630 return ix86_expand_round_builtin (bdesc_round_args + i, exp, target);
38633 if (fcode >= IX86_BUILTIN__BDESC_PCMPESTR_FIRST
38634 && fcode <= IX86_BUILTIN__BDESC_PCMPESTR_LAST)
38636 i = fcode - IX86_BUILTIN__BDESC_PCMPESTR_FIRST;
38637 return ix86_expand_sse_pcmpestr (bdesc_pcmpestr + i, exp, target);
38640 if (fcode >= IX86_BUILTIN__BDESC_PCMPISTR_FIRST
38641 && fcode <= IX86_BUILTIN__BDESC_PCMPISTR_LAST)
38643 i = fcode - IX86_BUILTIN__BDESC_PCMPISTR_FIRST;
38644 return ix86_expand_sse_pcmpistr (bdesc_pcmpistr + i, exp, target);
38647 if (fcode >= IX86_BUILTIN__BDESC_MULTI_ARG_FIRST
38648 && fcode <= IX86_BUILTIN__BDESC_MULTI_ARG_LAST)
38650 i = fcode - IX86_BUILTIN__BDESC_MULTI_ARG_FIRST;
38651 const struct builtin_description *d = bdesc_multi_arg + i;
38652 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
38653 (enum ix86_builtin_func_type)
38654 d->flag, d->comparison);
38657 if (fcode >= IX86_BUILTIN__BDESC_CET_FIRST
38658 && fcode <= IX86_BUILTIN__BDESC_CET_LAST)
38660 i = fcode - IX86_BUILTIN__BDESC_CET_FIRST;
38661 return ix86_expand_special_args_builtin (bdesc_cet + i, exp,
38662 target);
38665 if (fcode >= IX86_BUILTIN__BDESC_CET_NORMAL_FIRST
38666 && fcode <= IX86_BUILTIN__BDESC_CET_NORMAL_LAST)
38668 i = fcode - IX86_BUILTIN__BDESC_CET_NORMAL_FIRST;
38669 return ix86_expand_special_args_builtin (bdesc_cet_rdssp + i, exp,
38670 target);
38673 gcc_unreachable ();
38676 /* This returns the target-specific builtin with code CODE if
38677 current_function_decl has visibility on this builtin, which is checked
38678 using isa flags. Returns NULL_TREE otherwise. */
38680 static tree ix86_get_builtin (enum ix86_builtins code)
38682 struct cl_target_option *opts;
38683 tree target_tree = NULL_TREE;
38685 /* Determine the isa flags of current_function_decl. */
38687 if (current_function_decl)
38688 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
38690 if (target_tree == NULL)
38691 target_tree = target_option_default_node;
38693 opts = TREE_TARGET_OPTION (target_tree);
38695 if ((ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
38696 || (ix86_builtins_isa[(int) code].isa2 & opts->x_ix86_isa_flags2))
38697 return ix86_builtin_decl (code, true);
38698 else
38699 return NULL_TREE;
38702 /* Return function decl for target specific builtin
38703 for given MPX builtin passed i FCODE. */
38704 static tree
38705 ix86_builtin_mpx_function (unsigned fcode)
38707 switch (fcode)
38709 case BUILT_IN_CHKP_BNDMK:
38710 return ix86_builtins[IX86_BUILTIN_BNDMK];
38712 case BUILT_IN_CHKP_BNDSTX:
38713 return ix86_builtins[IX86_BUILTIN_BNDSTX];
38715 case BUILT_IN_CHKP_BNDLDX:
38716 return ix86_builtins[IX86_BUILTIN_BNDLDX];
38718 case BUILT_IN_CHKP_BNDCL:
38719 return ix86_builtins[IX86_BUILTIN_BNDCL];
38721 case BUILT_IN_CHKP_BNDCU:
38722 return ix86_builtins[IX86_BUILTIN_BNDCU];
38724 case BUILT_IN_CHKP_BNDRET:
38725 return ix86_builtins[IX86_BUILTIN_BNDRET];
38727 case BUILT_IN_CHKP_INTERSECT:
38728 return ix86_builtins[IX86_BUILTIN_BNDINT];
38730 case BUILT_IN_CHKP_NARROW:
38731 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
38733 case BUILT_IN_CHKP_SIZEOF:
38734 return ix86_builtins[IX86_BUILTIN_SIZEOF];
38736 case BUILT_IN_CHKP_EXTRACT_LOWER:
38737 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
38739 case BUILT_IN_CHKP_EXTRACT_UPPER:
38740 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
38742 default:
38743 return NULL_TREE;
38746 gcc_unreachable ();
38749 /* Helper function for ix86_load_bounds and ix86_store_bounds.
38751 Return an address to be used to load/store bounds for pointer
38752 passed in SLOT.
38754 SLOT_NO is an integer constant holding number of a target
38755 dependent special slot to be used in case SLOT is not a memory.
38757 SPECIAL_BASE is a pointer to be used as a base of fake address
38758 to access special slots in Bounds Table. SPECIAL_BASE[-1],
38759 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
38761 static rtx
38762 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
38764 rtx addr = NULL;
38766 /* NULL slot means we pass bounds for pointer not passed to the
38767 function at all. Register slot means we pass pointer in a
38768 register. In both these cases bounds are passed via Bounds
38769 Table. Since we do not have actual pointer stored in memory,
38770 we have to use fake addresses to access Bounds Table. We
38771 start with (special_base - sizeof (void*)) and decrease this
38772 address by pointer size to get addresses for other slots. */
38773 if (!slot || REG_P (slot))
38775 gcc_assert (CONST_INT_P (slot_no));
38776 addr = plus_constant (Pmode, special_base,
38777 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
38779 /* If pointer is passed in a memory then its address is used to
38780 access Bounds Table. */
38781 else if (MEM_P (slot))
38783 addr = XEXP (slot, 0);
38784 if (!register_operand (addr, Pmode))
38785 addr = copy_addr_to_reg (addr);
38787 else
38788 gcc_unreachable ();
38790 return addr;
38793 /* Expand pass uses this hook to load bounds for function parameter
38794 PTR passed in SLOT in case its bounds are not passed in a register.
38796 If SLOT is a memory, then bounds are loaded as for regular pointer
38797 loaded from memory. PTR may be NULL in case SLOT is a memory.
38798 In such case value of PTR (if required) may be loaded from SLOT.
38800 If SLOT is NULL or a register then SLOT_NO is an integer constant
38801 holding number of the target dependent special slot which should be
38802 used to obtain bounds.
38804 Return loaded bounds. */
38806 static rtx
38807 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
38809 rtx reg = gen_reg_rtx (BNDmode);
38810 rtx addr;
38812 /* Get address to be used to access Bounds Table. Special slots start
38813 at the location of return address of the current function. */
38814 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
38816 /* Load pointer value from a memory if we don't have it. */
38817 if (!ptr)
38819 gcc_assert (MEM_P (slot));
38820 ptr = copy_addr_to_reg (slot);
38823 if (!register_operand (ptr, Pmode))
38824 ptr = ix86_zero_extend_to_Pmode (ptr);
38826 emit_insn (BNDmode == BND64mode
38827 ? gen_bnd64_ldx (reg, addr, ptr)
38828 : gen_bnd32_ldx (reg, addr, ptr));
38830 return reg;
38833 /* Expand pass uses this hook to store BOUNDS for call argument PTR
38834 passed in SLOT in case BOUNDS are not passed in a register.
38836 If SLOT is a memory, then BOUNDS are stored as for regular pointer
38837 stored in memory. PTR may be NULL in case SLOT is a memory.
38838 In such case value of PTR (if required) may be loaded from SLOT.
38840 If SLOT is NULL or a register then SLOT_NO is an integer constant
38841 holding number of the target dependent special slot which should be
38842 used to store BOUNDS. */
38844 static void
38845 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
38847 rtx addr;
38849 /* Get address to be used to access Bounds Table. Special slots start
38850 at the location of return address of a called function. */
38851 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
38853 /* Load pointer value from a memory if we don't have it. */
38854 if (!ptr)
38856 gcc_assert (MEM_P (slot));
38857 ptr = copy_addr_to_reg (slot);
38860 if (!register_operand (ptr, Pmode))
38861 ptr = ix86_zero_extend_to_Pmode (ptr);
38863 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
38864 if (!register_operand (bounds, BNDmode))
38865 bounds = copy_to_mode_reg (BNDmode, bounds);
38867 emit_insn (BNDmode == BND64mode
38868 ? gen_bnd64_stx (addr, ptr, bounds)
38869 : gen_bnd32_stx (addr, ptr, bounds));
38872 /* Load and return bounds returned by function in SLOT. */
38874 static rtx
38875 ix86_load_returned_bounds (rtx slot)
38877 rtx res;
38879 gcc_assert (REG_P (slot));
38880 res = gen_reg_rtx (BNDmode);
38881 emit_move_insn (res, slot);
38883 return res;
38886 /* Store BOUNDS returned by function into SLOT. */
38888 static void
38889 ix86_store_returned_bounds (rtx slot, rtx bounds)
38891 gcc_assert (REG_P (slot));
38892 emit_move_insn (slot, bounds);
38895 /* Returns a function decl for a vectorized version of the combined function
38896 with combined_fn code FN and the result vector type TYPE, or NULL_TREE
38897 if it is not available. */
38899 static tree
38900 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
38901 tree type_in)
38903 machine_mode in_mode, out_mode;
38904 int in_n, out_n;
38906 if (TREE_CODE (type_out) != VECTOR_TYPE
38907 || TREE_CODE (type_in) != VECTOR_TYPE)
38908 return NULL_TREE;
38910 out_mode = TYPE_MODE (TREE_TYPE (type_out));
38911 out_n = TYPE_VECTOR_SUBPARTS (type_out);
38912 in_mode = TYPE_MODE (TREE_TYPE (type_in));
38913 in_n = TYPE_VECTOR_SUBPARTS (type_in);
38915 switch (fn)
38917 CASE_CFN_EXP2:
38918 if (out_mode == SFmode && in_mode == SFmode)
38920 if (out_n == 16 && in_n == 16)
38921 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
38923 break;
38925 CASE_CFN_IFLOOR:
38926 CASE_CFN_LFLOOR:
38927 CASE_CFN_LLFLOOR:
38928 /* The round insn does not trap on denormals. */
38929 if (flag_trapping_math || !TARGET_SSE4_1)
38930 break;
38932 if (out_mode == SImode && in_mode == DFmode)
38934 if (out_n == 4 && in_n == 2)
38935 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
38936 else if (out_n == 8 && in_n == 4)
38937 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
38938 else if (out_n == 16 && in_n == 8)
38939 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
38941 if (out_mode == SImode && in_mode == SFmode)
38943 if (out_n == 4 && in_n == 4)
38944 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
38945 else if (out_n == 8 && in_n == 8)
38946 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
38947 else if (out_n == 16 && in_n == 16)
38948 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX512);
38950 break;
38952 CASE_CFN_ICEIL:
38953 CASE_CFN_LCEIL:
38954 CASE_CFN_LLCEIL:
38955 /* The round insn does not trap on denormals. */
38956 if (flag_trapping_math || !TARGET_SSE4_1)
38957 break;
38959 if (out_mode == SImode && in_mode == DFmode)
38961 if (out_n == 4 && in_n == 2)
38962 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
38963 else if (out_n == 8 && in_n == 4)
38964 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
38965 else if (out_n == 16 && in_n == 8)
38966 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
38968 if (out_mode == SImode && in_mode == SFmode)
38970 if (out_n == 4 && in_n == 4)
38971 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
38972 else if (out_n == 8 && in_n == 8)
38973 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
38974 else if (out_n == 16 && in_n == 16)
38975 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX512);
38977 break;
38979 CASE_CFN_IRINT:
38980 CASE_CFN_LRINT:
38981 CASE_CFN_LLRINT:
38982 if (out_mode == SImode && in_mode == DFmode)
38984 if (out_n == 4 && in_n == 2)
38985 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
38986 else if (out_n == 8 && in_n == 4)
38987 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
38988 else if (out_n == 16 && in_n == 8)
38989 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX512);
38991 if (out_mode == SImode && in_mode == SFmode)
38993 if (out_n == 4 && in_n == 4)
38994 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
38995 else if (out_n == 8 && in_n == 8)
38996 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
38997 else if (out_n == 16 && in_n == 16)
38998 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ512);
39000 break;
39002 CASE_CFN_IROUND:
39003 CASE_CFN_LROUND:
39004 CASE_CFN_LLROUND:
39005 /* The round insn does not trap on denormals. */
39006 if (flag_trapping_math || !TARGET_SSE4_1)
39007 break;
39009 if (out_mode == SImode && in_mode == DFmode)
39011 if (out_n == 4 && in_n == 2)
39012 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
39013 else if (out_n == 8 && in_n == 4)
39014 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
39015 else if (out_n == 16 && in_n == 8)
39016 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
39018 if (out_mode == SImode && in_mode == SFmode)
39020 if (out_n == 4 && in_n == 4)
39021 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
39022 else if (out_n == 8 && in_n == 8)
39023 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
39024 else if (out_n == 16 && in_n == 16)
39025 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX512);
39027 break;
39029 CASE_CFN_FLOOR:
39030 /* The round insn does not trap on denormals. */
39031 if (flag_trapping_math || !TARGET_SSE4_1)
39032 break;
39034 if (out_mode == DFmode && in_mode == DFmode)
39036 if (out_n == 2 && in_n == 2)
39037 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
39038 else if (out_n == 4 && in_n == 4)
39039 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
39040 else if (out_n == 8 && in_n == 8)
39041 return ix86_get_builtin (IX86_BUILTIN_FLOORPD512);
39043 if (out_mode == SFmode && in_mode == SFmode)
39045 if (out_n == 4 && in_n == 4)
39046 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
39047 else if (out_n == 8 && in_n == 8)
39048 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
39049 else if (out_n == 16 && in_n == 16)
39050 return ix86_get_builtin (IX86_BUILTIN_FLOORPS512);
39052 break;
39054 CASE_CFN_CEIL:
39055 /* The round insn does not trap on denormals. */
39056 if (flag_trapping_math || !TARGET_SSE4_1)
39057 break;
39059 if (out_mode == DFmode && in_mode == DFmode)
39061 if (out_n == 2 && in_n == 2)
39062 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
39063 else if (out_n == 4 && in_n == 4)
39064 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
39065 else if (out_n == 8 && in_n == 8)
39066 return ix86_get_builtin (IX86_BUILTIN_CEILPD512);
39068 if (out_mode == SFmode && in_mode == SFmode)
39070 if (out_n == 4 && in_n == 4)
39071 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
39072 else if (out_n == 8 && in_n == 8)
39073 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
39074 else if (out_n == 16 && in_n == 16)
39075 return ix86_get_builtin (IX86_BUILTIN_CEILPS512);
39077 break;
39079 CASE_CFN_TRUNC:
39080 /* The round insn does not trap on denormals. */
39081 if (flag_trapping_math || !TARGET_SSE4_1)
39082 break;
39084 if (out_mode == DFmode && in_mode == DFmode)
39086 if (out_n == 2 && in_n == 2)
39087 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
39088 else if (out_n == 4 && in_n == 4)
39089 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
39090 else if (out_n == 8 && in_n == 8)
39091 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD512);
39093 if (out_mode == SFmode && in_mode == SFmode)
39095 if (out_n == 4 && in_n == 4)
39096 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
39097 else if (out_n == 8 && in_n == 8)
39098 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
39099 else if (out_n == 16 && in_n == 16)
39100 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS512);
39102 break;
39104 CASE_CFN_RINT:
39105 /* The round insn does not trap on denormals. */
39106 if (flag_trapping_math || !TARGET_SSE4_1)
39107 break;
39109 if (out_mode == DFmode && in_mode == DFmode)
39111 if (out_n == 2 && in_n == 2)
39112 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
39113 else if (out_n == 4 && in_n == 4)
39114 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
39116 if (out_mode == SFmode && in_mode == SFmode)
39118 if (out_n == 4 && in_n == 4)
39119 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
39120 else if (out_n == 8 && in_n == 8)
39121 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
39123 break;
39125 CASE_CFN_FMA:
39126 if (out_mode == DFmode && in_mode == DFmode)
39128 if (out_n == 2 && in_n == 2)
39129 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
39130 if (out_n == 4 && in_n == 4)
39131 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
39133 if (out_mode == SFmode && in_mode == SFmode)
39135 if (out_n == 4 && in_n == 4)
39136 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
39137 if (out_n == 8 && in_n == 8)
39138 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
39140 break;
39142 default:
39143 break;
39146 /* Dispatch to a handler for a vectorization library. */
39147 if (ix86_veclib_handler)
39148 return ix86_veclib_handler (combined_fn (fn), type_out, type_in);
39150 return NULL_TREE;
39153 /* Handler for an SVML-style interface to
39154 a library with vectorized intrinsics. */
39156 static tree
39157 ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
39159 char name[20];
39160 tree fntype, new_fndecl, args;
39161 unsigned arity;
39162 const char *bname;
39163 machine_mode el_mode, in_mode;
39164 int n, in_n;
39166 /* The SVML is suitable for unsafe math only. */
39167 if (!flag_unsafe_math_optimizations)
39168 return NULL_TREE;
39170 el_mode = TYPE_MODE (TREE_TYPE (type_out));
39171 n = TYPE_VECTOR_SUBPARTS (type_out);
39172 in_mode = TYPE_MODE (TREE_TYPE (type_in));
39173 in_n = TYPE_VECTOR_SUBPARTS (type_in);
39174 if (el_mode != in_mode
39175 || n != in_n)
39176 return NULL_TREE;
39178 switch (fn)
39180 CASE_CFN_EXP:
39181 CASE_CFN_LOG:
39182 CASE_CFN_LOG10:
39183 CASE_CFN_POW:
39184 CASE_CFN_TANH:
39185 CASE_CFN_TAN:
39186 CASE_CFN_ATAN:
39187 CASE_CFN_ATAN2:
39188 CASE_CFN_ATANH:
39189 CASE_CFN_CBRT:
39190 CASE_CFN_SINH:
39191 CASE_CFN_SIN:
39192 CASE_CFN_ASINH:
39193 CASE_CFN_ASIN:
39194 CASE_CFN_COSH:
39195 CASE_CFN_COS:
39196 CASE_CFN_ACOSH:
39197 CASE_CFN_ACOS:
39198 if ((el_mode != DFmode || n != 2)
39199 && (el_mode != SFmode || n != 4))
39200 return NULL_TREE;
39201 break;
39203 default:
39204 return NULL_TREE;
39207 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
39208 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
39210 if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF)
39211 strcpy (name, "vmlsLn4");
39212 else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG)
39213 strcpy (name, "vmldLn2");
39214 else if (n == 4)
39216 sprintf (name, "vmls%s", bname+10);
39217 name[strlen (name)-1] = '4';
39219 else
39220 sprintf (name, "vmld%s2", bname+10);
39222 /* Convert to uppercase. */
39223 name[4] &= ~0x20;
39225 arity = 0;
39226 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
39227 arity++;
39229 if (arity == 1)
39230 fntype = build_function_type_list (type_out, type_in, NULL);
39231 else
39232 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
39234 /* Build a function declaration for the vectorized function. */
39235 new_fndecl = build_decl (BUILTINS_LOCATION,
39236 FUNCTION_DECL, get_identifier (name), fntype);
39237 TREE_PUBLIC (new_fndecl) = 1;
39238 DECL_EXTERNAL (new_fndecl) = 1;
39239 DECL_IS_NOVOPS (new_fndecl) = 1;
39240 TREE_READONLY (new_fndecl) = 1;
39242 return new_fndecl;
39245 /* Handler for an ACML-style interface to
39246 a library with vectorized intrinsics. */
39248 static tree
39249 ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
39251 char name[20] = "__vr.._";
39252 tree fntype, new_fndecl, args;
39253 unsigned arity;
39254 const char *bname;
39255 machine_mode el_mode, in_mode;
39256 int n, in_n;
39258 /* The ACML is 64bits only and suitable for unsafe math only as
39259 it does not correctly support parts of IEEE with the required
39260 precision such as denormals. */
39261 if (!TARGET_64BIT
39262 || !flag_unsafe_math_optimizations)
39263 return NULL_TREE;
39265 el_mode = TYPE_MODE (TREE_TYPE (type_out));
39266 n = TYPE_VECTOR_SUBPARTS (type_out);
39267 in_mode = TYPE_MODE (TREE_TYPE (type_in));
39268 in_n = TYPE_VECTOR_SUBPARTS (type_in);
39269 if (el_mode != in_mode
39270 || n != in_n)
39271 return NULL_TREE;
39273 switch (fn)
39275 CASE_CFN_SIN:
39276 CASE_CFN_COS:
39277 CASE_CFN_EXP:
39278 CASE_CFN_LOG:
39279 CASE_CFN_LOG2:
39280 CASE_CFN_LOG10:
39281 if (el_mode == DFmode && n == 2)
39283 name[4] = 'd';
39284 name[5] = '2';
39286 else if (el_mode == SFmode && n == 4)
39288 name[4] = 's';
39289 name[5] = '4';
39291 else
39292 return NULL_TREE;
39293 break;
39295 default:
39296 return NULL_TREE;
39299 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
39300 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
39301 sprintf (name + 7, "%s", bname+10);
39303 arity = 0;
39304 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
39305 arity++;
39307 if (arity == 1)
39308 fntype = build_function_type_list (type_out, type_in, NULL);
39309 else
39310 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
39312 /* Build a function declaration for the vectorized function. */
39313 new_fndecl = build_decl (BUILTINS_LOCATION,
39314 FUNCTION_DECL, get_identifier (name), fntype);
39315 TREE_PUBLIC (new_fndecl) = 1;
39316 DECL_EXTERNAL (new_fndecl) = 1;
39317 DECL_IS_NOVOPS (new_fndecl) = 1;
39318 TREE_READONLY (new_fndecl) = 1;
39320 return new_fndecl;
39323 /* Returns a decl of a function that implements gather load with
39324 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
39325 Return NULL_TREE if it is not available. */
39327 static tree
39328 ix86_vectorize_builtin_gather (const_tree mem_vectype,
39329 const_tree index_type, int scale)
39331 bool si;
39332 enum ix86_builtins code;
39334 if (! TARGET_AVX2 || !TARGET_USE_GATHER)
39335 return NULL_TREE;
39337 if ((TREE_CODE (index_type) != INTEGER_TYPE
39338 && !POINTER_TYPE_P (index_type))
39339 || (TYPE_MODE (index_type) != SImode
39340 && TYPE_MODE (index_type) != DImode))
39341 return NULL_TREE;
39343 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
39344 return NULL_TREE;
39346 /* v*gather* insn sign extends index to pointer mode. */
39347 if (TYPE_PRECISION (index_type) < POINTER_SIZE
39348 && TYPE_UNSIGNED (index_type))
39349 return NULL_TREE;
39351 if (scale <= 0
39352 || scale > 8
39353 || (scale & (scale - 1)) != 0)
39354 return NULL_TREE;
39356 si = TYPE_MODE (index_type) == SImode;
39357 switch (TYPE_MODE (mem_vectype))
39359 case E_V2DFmode:
39360 if (TARGET_AVX512VL)
39361 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
39362 else
39363 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
39364 break;
39365 case E_V4DFmode:
39366 if (TARGET_AVX512VL)
39367 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
39368 else
39369 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
39370 break;
39371 case E_V2DImode:
39372 if (TARGET_AVX512VL)
39373 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
39374 else
39375 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
39376 break;
39377 case E_V4DImode:
39378 if (TARGET_AVX512VL)
39379 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
39380 else
39381 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
39382 break;
39383 case E_V4SFmode:
39384 if (TARGET_AVX512VL)
39385 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
39386 else
39387 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
39388 break;
39389 case E_V8SFmode:
39390 if (TARGET_AVX512VL)
39391 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
39392 else
39393 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
39394 break;
39395 case E_V4SImode:
39396 if (TARGET_AVX512VL)
39397 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
39398 else
39399 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
39400 break;
39401 case E_V8SImode:
39402 if (TARGET_AVX512VL)
39403 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
39404 else
39405 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
39406 break;
39407 case E_V8DFmode:
39408 if (TARGET_AVX512F)
39409 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
39410 else
39411 return NULL_TREE;
39412 break;
39413 case E_V8DImode:
39414 if (TARGET_AVX512F)
39415 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
39416 else
39417 return NULL_TREE;
39418 break;
39419 case E_V16SFmode:
39420 if (TARGET_AVX512F)
39421 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
39422 else
39423 return NULL_TREE;
39424 break;
39425 case E_V16SImode:
39426 if (TARGET_AVX512F)
39427 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
39428 else
39429 return NULL_TREE;
39430 break;
39431 default:
39432 return NULL_TREE;
39435 return ix86_get_builtin (code);
39438 /* Returns a decl of a function that implements scatter store with
39439 register type VECTYPE and index type INDEX_TYPE and SCALE.
39440 Return NULL_TREE if it is not available. */
39442 static tree
39443 ix86_vectorize_builtin_scatter (const_tree vectype,
39444 const_tree index_type, int scale)
39446 bool si;
39447 enum ix86_builtins code;
39449 if (!TARGET_AVX512F)
39450 return NULL_TREE;
39452 if ((TREE_CODE (index_type) != INTEGER_TYPE
39453 && !POINTER_TYPE_P (index_type))
39454 || (TYPE_MODE (index_type) != SImode
39455 && TYPE_MODE (index_type) != DImode))
39456 return NULL_TREE;
39458 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
39459 return NULL_TREE;
39461 /* v*scatter* insn sign extends index to pointer mode. */
39462 if (TYPE_PRECISION (index_type) < POINTER_SIZE
39463 && TYPE_UNSIGNED (index_type))
39464 return NULL_TREE;
39466 /* Scale can be 1, 2, 4 or 8. */
39467 if (scale <= 0
39468 || scale > 8
39469 || (scale & (scale - 1)) != 0)
39470 return NULL_TREE;
39472 si = TYPE_MODE (index_type) == SImode;
39473 switch (TYPE_MODE (vectype))
39475 case E_V8DFmode:
39476 code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
39477 break;
39478 case E_V8DImode:
39479 code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
39480 break;
39481 case E_V16SFmode:
39482 code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
39483 break;
39484 case E_V16SImode:
39485 code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
39486 break;
39487 default:
39488 return NULL_TREE;
39491 return ix86_builtins[code];
39494 /* Return true if it is safe to use the rsqrt optabs to optimize
39495 1.0/sqrt. */
39497 static bool
39498 use_rsqrt_p ()
39500 return (TARGET_SSE_MATH
39501 && flag_finite_math_only
39502 && !flag_trapping_math
39503 && flag_unsafe_math_optimizations);
39506 /* Returns a code for a target-specific builtin that implements
39507 reciprocal of the function, or NULL_TREE if not available. */
39509 static tree
39510 ix86_builtin_reciprocal (tree fndecl)
39512 switch (DECL_FUNCTION_CODE (fndecl))
39514 /* Vectorized version of sqrt to rsqrt conversion. */
39515 case IX86_BUILTIN_SQRTPS_NR:
39516 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
39518 case IX86_BUILTIN_SQRTPS_NR256:
39519 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
39521 default:
39522 return NULL_TREE;
39526 /* Helper for avx_vpermilps256_operand et al. This is also used by
39527 the expansion functions to turn the parallel back into a mask.
39528 The return value is 0 for no match and the imm8+1 for a match. */
39531 avx_vpermilp_parallel (rtx par, machine_mode mode)
39533 unsigned i, nelt = GET_MODE_NUNITS (mode);
39534 unsigned mask = 0;
39535 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
39537 if (XVECLEN (par, 0) != (int) nelt)
39538 return 0;
39540 /* Validate that all of the elements are constants, and not totally
39541 out of range. Copy the data into an integral array to make the
39542 subsequent checks easier. */
39543 for (i = 0; i < nelt; ++i)
39545 rtx er = XVECEXP (par, 0, i);
39546 unsigned HOST_WIDE_INT ei;
39548 if (!CONST_INT_P (er))
39549 return 0;
39550 ei = INTVAL (er);
39551 if (ei >= nelt)
39552 return 0;
39553 ipar[i] = ei;
39556 switch (mode)
39558 case E_V8DFmode:
39559 /* In the 512-bit DFmode case, we can only move elements within
39560 a 128-bit lane. First fill the second part of the mask,
39561 then fallthru. */
39562 for (i = 4; i < 6; ++i)
39564 if (ipar[i] < 4 || ipar[i] >= 6)
39565 return 0;
39566 mask |= (ipar[i] - 4) << i;
39568 for (i = 6; i < 8; ++i)
39570 if (ipar[i] < 6)
39571 return 0;
39572 mask |= (ipar[i] - 6) << i;
39574 /* FALLTHRU */
39576 case E_V4DFmode:
39577 /* In the 256-bit DFmode case, we can only move elements within
39578 a 128-bit lane. */
39579 for (i = 0; i < 2; ++i)
39581 if (ipar[i] >= 2)
39582 return 0;
39583 mask |= ipar[i] << i;
39585 for (i = 2; i < 4; ++i)
39587 if (ipar[i] < 2)
39588 return 0;
39589 mask |= (ipar[i] - 2) << i;
39591 break;
39593 case E_V16SFmode:
39594 /* In 512 bit SFmode case, permutation in the upper 256 bits
39595 must mirror the permutation in the lower 256-bits. */
39596 for (i = 0; i < 8; ++i)
39597 if (ipar[i] + 8 != ipar[i + 8])
39598 return 0;
39599 /* FALLTHRU */
39601 case E_V8SFmode:
39602 /* In 256 bit SFmode case, we have full freedom of
39603 movement within the low 128-bit lane, but the high 128-bit
39604 lane must mirror the exact same pattern. */
39605 for (i = 0; i < 4; ++i)
39606 if (ipar[i] + 4 != ipar[i + 4])
39607 return 0;
39608 nelt = 4;
39609 /* FALLTHRU */
39611 case E_V2DFmode:
39612 case E_V4SFmode:
39613 /* In the 128-bit case, we've full freedom in the placement of
39614 the elements from the source operand. */
39615 for (i = 0; i < nelt; ++i)
39616 mask |= ipar[i] << (i * (nelt / 2));
39617 break;
39619 default:
39620 gcc_unreachable ();
39623 /* Make sure success has a non-zero value by adding one. */
39624 return mask + 1;
39627 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
39628 the expansion functions to turn the parallel back into a mask.
39629 The return value is 0 for no match and the imm8+1 for a match. */
39632 avx_vperm2f128_parallel (rtx par, machine_mode mode)
39634 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
39635 unsigned mask = 0;
39636 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
39638 if (XVECLEN (par, 0) != (int) nelt)
39639 return 0;
39641 /* Validate that all of the elements are constants, and not totally
39642 out of range. Copy the data into an integral array to make the
39643 subsequent checks easier. */
39644 for (i = 0; i < nelt; ++i)
39646 rtx er = XVECEXP (par, 0, i);
39647 unsigned HOST_WIDE_INT ei;
39649 if (!CONST_INT_P (er))
39650 return 0;
39651 ei = INTVAL (er);
39652 if (ei >= 2 * nelt)
39653 return 0;
39654 ipar[i] = ei;
39657 /* Validate that the halves of the permute are halves. */
39658 for (i = 0; i < nelt2 - 1; ++i)
39659 if (ipar[i] + 1 != ipar[i + 1])
39660 return 0;
39661 for (i = nelt2; i < nelt - 1; ++i)
39662 if (ipar[i] + 1 != ipar[i + 1])
39663 return 0;
39665 /* Reconstruct the mask. */
39666 for (i = 0; i < 2; ++i)
39668 unsigned e = ipar[i * nelt2];
39669 if (e % nelt2)
39670 return 0;
39671 e /= nelt2;
39672 mask |= e << (i * 4);
39675 /* Make sure success has a non-zero value by adding one. */
39676 return mask + 1;
39679 /* Return a register priority for hard reg REGNO. */
39680 static int
39681 ix86_register_priority (int hard_regno)
39683 /* ebp and r13 as the base always wants a displacement, r12 as the
39684 base always wants an index. So discourage their usage in an
39685 address. */
39686 if (hard_regno == R12_REG || hard_regno == R13_REG)
39687 return 0;
39688 if (hard_regno == BP_REG)
39689 return 1;
39690 /* New x86-64 int registers result in bigger code size. Discourage
39691 them. */
39692 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
39693 return 2;
39694 /* New x86-64 SSE registers result in bigger code size. Discourage
39695 them. */
39696 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
39697 return 2;
39698 /* Usage of AX register results in smaller code. Prefer it. */
39699 if (hard_regno == AX_REG)
39700 return 4;
39701 return 3;
39704 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
39706 Put float CONST_DOUBLE in the constant pool instead of fp regs.
39707 QImode must go into class Q_REGS.
39708 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
39709 movdf to do mem-to-mem moves through integer regs. */
39711 static reg_class_t
39712 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
39714 machine_mode mode = GET_MODE (x);
39716 /* We're only allowed to return a subclass of CLASS. Many of the
39717 following checks fail for NO_REGS, so eliminate that early. */
39718 if (regclass == NO_REGS)
39719 return NO_REGS;
39721 /* All classes can load zeros. */
39722 if (x == CONST0_RTX (mode))
39723 return regclass;
39725 /* Force constants into memory if we are loading a (nonzero) constant into
39726 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
39727 instructions to load from a constant. */
39728 if (CONSTANT_P (x)
39729 && (MAYBE_MMX_CLASS_P (regclass)
39730 || MAYBE_SSE_CLASS_P (regclass)
39731 || MAYBE_MASK_CLASS_P (regclass)))
39732 return NO_REGS;
39734 /* Floating-point constants need more complex checks. */
39735 if (CONST_DOUBLE_P (x))
39737 /* General regs can load everything. */
39738 if (INTEGER_CLASS_P (regclass))
39739 return regclass;
39741 /* Floats can load 0 and 1 plus some others. Note that we eliminated
39742 zero above. We only want to wind up preferring 80387 registers if
39743 we plan on doing computation with them. */
39744 if (IS_STACK_MODE (mode)
39745 && standard_80387_constant_p (x) > 0)
39747 /* Limit class to FP regs. */
39748 if (FLOAT_CLASS_P (regclass))
39749 return FLOAT_REGS;
39750 else if (regclass == FP_TOP_SSE_REGS)
39751 return FP_TOP_REG;
39752 else if (regclass == FP_SECOND_SSE_REGS)
39753 return FP_SECOND_REG;
39756 return NO_REGS;
39759 /* Prefer SSE regs only, if we can use them for math. */
39760 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
39761 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
39763 /* Generally when we see PLUS here, it's the function invariant
39764 (plus soft-fp const_int). Which can only be computed into general
39765 regs. */
39766 if (GET_CODE (x) == PLUS)
39767 return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS;
39769 /* QImode constants are easy to load, but non-constant QImode data
39770 must go into Q_REGS. */
39771 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
39773 if (Q_CLASS_P (regclass))
39774 return regclass;
39775 else if (reg_class_subset_p (Q_REGS, regclass))
39776 return Q_REGS;
39777 else
39778 return NO_REGS;
39781 return regclass;
39784 /* Discourage putting floating-point values in SSE registers unless
39785 SSE math is being used, and likewise for the 387 registers. */
39786 static reg_class_t
39787 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
39789 machine_mode mode = GET_MODE (x);
39791 /* Restrict the output reload class to the register bank that we are doing
39792 math on. If we would like not to return a subset of CLASS, reject this
39793 alternative: if reload cannot do this, it will still use its choice. */
39794 mode = GET_MODE (x);
39795 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
39796 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
39798 if (IS_STACK_MODE (mode))
39800 if (regclass == FP_TOP_SSE_REGS)
39801 return FP_TOP_REG;
39802 else if (regclass == FP_SECOND_SSE_REGS)
39803 return FP_SECOND_REG;
39804 else
39805 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
39808 return regclass;
39811 static reg_class_t
39812 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
39813 machine_mode mode, secondary_reload_info *sri)
39815 /* Double-word spills from general registers to non-offsettable memory
39816 references (zero-extended addresses) require special handling. */
39817 if (TARGET_64BIT
39818 && MEM_P (x)
39819 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
39820 && INTEGER_CLASS_P (rclass)
39821 && !offsettable_memref_p (x))
39823 sri->icode = (in_p
39824 ? CODE_FOR_reload_noff_load
39825 : CODE_FOR_reload_noff_store);
39826 /* Add the cost of moving address to a temporary. */
39827 sri->extra_cost = 1;
39829 return NO_REGS;
39832 /* QImode spills from non-QI registers require
39833 intermediate register on 32bit targets. */
39834 if (mode == QImode
39835 && ((!TARGET_64BIT && !in_p
39836 && INTEGER_CLASS_P (rclass)
39837 && MAYBE_NON_Q_CLASS_P (rclass))
39838 || (!TARGET_AVX512DQ
39839 && MAYBE_MASK_CLASS_P (rclass))))
39841 int regno = true_regnum (x);
39843 /* Return Q_REGS if the operand is in memory. */
39844 if (regno == -1)
39845 return Q_REGS;
39847 return NO_REGS;
39850 /* This condition handles corner case where an expression involving
39851 pointers gets vectorized. We're trying to use the address of a
39852 stack slot as a vector initializer.
39854 (set (reg:V2DI 74 [ vect_cst_.2 ])
39855 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
39857 Eventually frame gets turned into sp+offset like this:
39859 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
39860 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
39861 (const_int 392 [0x188]))))
39863 That later gets turned into:
39865 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
39866 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
39867 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
39869 We'll have the following reload recorded:
39871 Reload 0: reload_in (DI) =
39872 (plus:DI (reg/f:DI 7 sp)
39873 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
39874 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
39875 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
39876 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
39877 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
39878 reload_reg_rtx: (reg:V2DI 22 xmm1)
39880 Which isn't going to work since SSE instructions can't handle scalar
39881 additions. Returning GENERAL_REGS forces the addition into integer
39882 register and reload can handle subsequent reloads without problems. */
39884 if (in_p && GET_CODE (x) == PLUS
39885 && SSE_CLASS_P (rclass)
39886 && SCALAR_INT_MODE_P (mode))
39887 return GENERAL_REGS;
39889 return NO_REGS;
39892 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
39894 static bool
39895 ix86_class_likely_spilled_p (reg_class_t rclass)
39897 switch (rclass)
39899 case AREG:
39900 case DREG:
39901 case CREG:
39902 case BREG:
39903 case AD_REGS:
39904 case SIREG:
39905 case DIREG:
39906 case SSE_FIRST_REG:
39907 case FP_TOP_REG:
39908 case FP_SECOND_REG:
39909 case BND_REGS:
39910 return true;
39912 default:
39913 break;
39916 return false;
39919 /* If we are copying between registers from different register sets
39920 (e.g. FP and integer), we may need a memory location.
39922 The function can't work reliably when one of the CLASSES is a class
39923 containing registers from multiple sets. We avoid this by never combining
39924 different sets in a single alternative in the machine description.
39925 Ensure that this constraint holds to avoid unexpected surprises.
39927 When STRICT is false, we are being called from REGISTER_MOVE_COST,
39928 so do not enforce these sanity checks.
39930 To optimize register_move_cost performance, define inline variant. */
39932 static inline bool
39933 inline_secondary_memory_needed (machine_mode mode, reg_class_t class1,
39934 reg_class_t class2, int strict)
39936 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
39937 return false;
39939 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
39940 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
39941 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
39942 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
39943 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
39944 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)
39945 || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1)
39946 || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2))
39948 gcc_assert (!strict || lra_in_progress);
39949 return true;
39952 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
39953 return true;
39955 /* Between mask and general, we have moves no larger than word size. */
39956 if ((MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
39957 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
39958 return true;
39960 /* ??? This is a lie. We do have moves between mmx/general, and for
39961 mmx/sse2. But by saying we need secondary memory we discourage the
39962 register allocator from using the mmx registers unless needed. */
39963 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
39964 return true;
39966 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
39968 /* SSE1 doesn't have any direct moves from other classes. */
39969 if (!TARGET_SSE2)
39970 return true;
39972 /* If the target says that inter-unit moves are more expensive
39973 than moving through memory, then don't generate them. */
39974 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
39975 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
39976 return true;
39978 /* Between SSE and general, we have moves no larger than word size. */
39979 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
39980 return true;
39983 return false;
39986 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
39988 static bool
39989 ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1,
39990 reg_class_t class2)
39992 return inline_secondary_memory_needed (mode, class1, class2, true);
39995 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
39997 get_secondary_mem widens integral modes to BITS_PER_WORD.
39998 There is no need to emit full 64 bit move on 64 bit targets
39999 for integral modes that can be moved using 32 bit move. */
40001 static machine_mode
40002 ix86_secondary_memory_needed_mode (machine_mode mode)
40004 if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode))
40005 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
40006 return mode;
40009 /* Implement the TARGET_CLASS_MAX_NREGS hook.
40011 On the 80386, this is the size of MODE in words,
40012 except in the FP regs, where a single reg is always enough. */
40014 static unsigned char
40015 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
40017 if (MAYBE_INTEGER_CLASS_P (rclass))
40019 if (mode == XFmode)
40020 return (TARGET_64BIT ? 2 : 3);
40021 else if (mode == XCmode)
40022 return (TARGET_64BIT ? 4 : 6);
40023 else
40024 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
40026 else
40028 if (COMPLEX_MODE_P (mode))
40029 return 2;
40030 else
40031 return 1;
40035 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
40037 static bool
40038 ix86_can_change_mode_class (machine_mode from, machine_mode to,
40039 reg_class_t regclass)
40041 if (from == to)
40042 return true;
40044 /* x87 registers can't do subreg at all, as all values are reformatted
40045 to extended precision. */
40046 if (MAYBE_FLOAT_CLASS_P (regclass))
40047 return false;
40049 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
40051 /* Vector registers do not support QI or HImode loads. If we don't
40052 disallow a change to these modes, reload will assume it's ok to
40053 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
40054 the vec_dupv4hi pattern. */
40055 if (GET_MODE_SIZE (from) < 4)
40056 return false;
40059 return true;
40062 /* Return index of MODE in the sse load/store tables. */
40064 static inline int
40065 sse_store_index (machine_mode mode)
40067 switch (GET_MODE_SIZE (mode))
40069 case 4:
40070 return 0;
40071 case 8:
40072 return 1;
40073 case 16:
40074 return 2;
40075 case 32:
40076 return 3;
40077 case 64:
40078 return 4;
40079 default:
40080 return -1;
40084 /* Return the cost of moving data of mode M between a
40085 register and memory. A value of 2 is the default; this cost is
40086 relative to those in `REGISTER_MOVE_COST'.
40088 This function is used extensively by register_move_cost that is used to
40089 build tables at startup. Make it inline in this case.
40090 When IN is 2, return maximum of in and out move cost.
40092 If moving between registers and memory is more expensive than
40093 between two registers, you should define this macro to express the
40094 relative cost.
40096 Model also increased moving costs of QImode registers in non
40097 Q_REGS classes.
40099 static inline int
40100 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
40101 int in)
40103 int cost;
40104 if (FLOAT_CLASS_P (regclass))
40106 int index;
40107 switch (mode)
40109 case E_SFmode:
40110 index = 0;
40111 break;
40112 case E_DFmode:
40113 index = 1;
40114 break;
40115 case E_XFmode:
40116 index = 2;
40117 break;
40118 default:
40119 return 100;
40121 if (in == 2)
40122 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
40123 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
40125 if (SSE_CLASS_P (regclass))
40127 int index = sse_store_index (mode);
40128 if (index == -1)
40129 return 100;
40130 if (in == 2)
40131 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
40132 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
40134 if (MMX_CLASS_P (regclass))
40136 int index;
40137 switch (GET_MODE_SIZE (mode))
40139 case 4:
40140 index = 0;
40141 break;
40142 case 8:
40143 index = 1;
40144 break;
40145 default:
40146 return 100;
40148 if (in)
40149 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
40150 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
40152 switch (GET_MODE_SIZE (mode))
40154 case 1:
40155 if (Q_CLASS_P (regclass) || TARGET_64BIT)
40157 if (!in)
40158 return ix86_cost->int_store[0];
40159 if (TARGET_PARTIAL_REG_DEPENDENCY
40160 && optimize_function_for_speed_p (cfun))
40161 cost = ix86_cost->movzbl_load;
40162 else
40163 cost = ix86_cost->int_load[0];
40164 if (in == 2)
40165 return MAX (cost, ix86_cost->int_store[0]);
40166 return cost;
40168 else
40170 if (in == 2)
40171 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
40172 if (in)
40173 return ix86_cost->movzbl_load;
40174 else
40175 return ix86_cost->int_store[0] + 4;
40177 break;
40178 case 2:
40179 if (in == 2)
40180 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
40181 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
40182 default:
40183 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
40184 if (mode == TFmode)
40185 mode = XFmode;
40186 if (in == 2)
40187 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
40188 else if (in)
40189 cost = ix86_cost->int_load[2];
40190 else
40191 cost = ix86_cost->int_store[2];
40192 return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
40196 static int
40197 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
40198 bool in)
40200 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
40204 /* Return the cost of moving data from a register in class CLASS1 to
40205 one in class CLASS2.
40207 It is not required that the cost always equal 2 when FROM is the same as TO;
40208 on some machines it is expensive to move between registers if they are not
40209 general registers. */
40211 static int
40212 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
40213 reg_class_t class2_i)
40215 enum reg_class class1 = (enum reg_class) class1_i;
40216 enum reg_class class2 = (enum reg_class) class2_i;
40218 /* In case we require secondary memory, compute cost of the store followed
40219 by load. In order to avoid bad register allocation choices, we need
40220 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
40222 if (inline_secondary_memory_needed (mode, class1, class2, false))
40224 int cost = 1;
40226 cost += inline_memory_move_cost (mode, class1, 2);
40227 cost += inline_memory_move_cost (mode, class2, 2);
40229 /* In case of copying from general_purpose_register we may emit multiple
40230 stores followed by single load causing memory size mismatch stall.
40231 Count this as arbitrarily high cost of 20. */
40232 if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD
40233 && TARGET_MEMORY_MISMATCH_STALL
40234 && targetm.class_max_nregs (class1, mode)
40235 > targetm.class_max_nregs (class2, mode))
40236 cost += 20;
40238 /* In the case of FP/MMX moves, the registers actually overlap, and we
40239 have to switch modes in order to treat them differently. */
40240 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
40241 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
40242 cost += 20;
40244 return cost;
40247 /* Moves between SSE/MMX and integer unit are expensive. */
40248 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
40249 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
40251 /* ??? By keeping returned value relatively high, we limit the number
40252 of moves between integer and MMX/SSE registers for all targets.
40253 Additionally, high value prevents problem with x86_modes_tieable_p(),
40254 where integer modes in MMX/SSE registers are not tieable
40255 because of missing QImode and HImode moves to, from or between
40256 MMX/SSE registers. */
40257 return MAX (8, MMX_CLASS_P (class1) || MMX_CLASS_P (class2)
40258 ? ix86_cost->mmxsse_to_integer : ix86_cost->ssemmx_to_integer);
40260 if (MAYBE_FLOAT_CLASS_P (class1))
40261 return ix86_cost->fp_move;
40262 if (MAYBE_SSE_CLASS_P (class1))
40264 if (GET_MODE_BITSIZE (mode) <= 128)
40265 return ix86_cost->xmm_move;
40266 if (GET_MODE_BITSIZE (mode) <= 256)
40267 return ix86_cost->ymm_move;
40268 return ix86_cost->zmm_move;
40270 if (MAYBE_MMX_CLASS_P (class1))
40271 return ix86_cost->mmx_move;
40272 return 2;
40275 /* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in
40276 words of a value of mode MODE but can be less for certain modes in
40277 special long registers.
40279 Actually there are no two word move instructions for consecutive
40280 registers. And only registers 0-3 may have mov byte instructions
40281 applied to them. */
40283 static unsigned int
40284 ix86_hard_regno_nregs (unsigned int regno, machine_mode mode)
40286 if (GENERAL_REGNO_P (regno))
40288 if (mode == XFmode)
40289 return TARGET_64BIT ? 2 : 3;
40290 if (mode == XCmode)
40291 return TARGET_64BIT ? 4 : 6;
40292 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
40294 if (COMPLEX_MODE_P (mode))
40295 return 2;
40296 if (mode == V64SFmode || mode == V64SImode)
40297 return 4;
40298 return 1;
40301 /* Implement TARGET_HARD_REGNO_MODE_OK. */
40303 static bool
40304 ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
40306 /* Flags and only flags can only hold CCmode values. */
40307 if (CC_REGNO_P (regno))
40308 return GET_MODE_CLASS (mode) == MODE_CC;
40309 if (GET_MODE_CLASS (mode) == MODE_CC
40310 || GET_MODE_CLASS (mode) == MODE_RANDOM
40311 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
40312 return false;
40313 if (STACK_REGNO_P (regno))
40314 return VALID_FP_MODE_P (mode);
40315 if (MASK_REGNO_P (regno))
40316 return (VALID_MASK_REG_MODE (mode)
40317 || (TARGET_AVX512BW
40318 && VALID_MASK_AVX512BW_MODE (mode)));
40319 if (BND_REGNO_P (regno))
40320 return VALID_BND_REG_MODE (mode);
40321 if (SSE_REGNO_P (regno))
40323 /* We implement the move patterns for all vector modes into and
40324 out of SSE registers, even when no operation instructions
40325 are available. */
40327 /* For AVX-512 we allow, regardless of regno:
40328 - XI mode
40329 - any of 512-bit wide vector mode
40330 - any scalar mode. */
40331 if (TARGET_AVX512F
40332 && (mode == XImode
40333 || VALID_AVX512F_REG_MODE (mode)
40334 || VALID_AVX512F_SCALAR_MODE (mode)))
40335 return true;
40337 /* For AVX-5124FMAPS allow V64SFmode for special regnos. */
40338 if ((TARGET_AVX5124FMAPS || TARGET_AVX5124VNNIW)
40339 && MOD4_SSE_REGNO_P (regno)
40340 && mode == V64SFmode)
40341 return true;
40343 /* For AVX-5124VNNIW allow V64SImode for special regnos. */
40344 if ((TARGET_AVX5124FMAPS || TARGET_AVX5124VNNIW)
40345 && MOD4_SSE_REGNO_P (regno)
40346 && mode == V64SImode)
40347 return true;
40349 /* TODO check for QI/HI scalars. */
40350 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
40351 if (TARGET_AVX512VL
40352 && (mode == OImode
40353 || mode == TImode
40354 || VALID_AVX256_REG_MODE (mode)
40355 || VALID_AVX512VL_128_REG_MODE (mode)))
40356 return true;
40358 /* xmm16-xmm31 are only available for AVX-512. */
40359 if (EXT_REX_SSE_REGNO_P (regno))
40360 return false;
40362 /* OImode and AVX modes are available only when AVX is enabled. */
40363 return ((TARGET_AVX
40364 && VALID_AVX256_REG_OR_OI_MODE (mode))
40365 || VALID_SSE_REG_MODE (mode)
40366 || VALID_SSE2_REG_MODE (mode)
40367 || VALID_MMX_REG_MODE (mode)
40368 || VALID_MMX_REG_MODE_3DNOW (mode));
40370 if (MMX_REGNO_P (regno))
40372 /* We implement the move patterns for 3DNOW modes even in MMX mode,
40373 so if the register is available at all, then we can move data of
40374 the given mode into or out of it. */
40375 return (VALID_MMX_REG_MODE (mode)
40376 || VALID_MMX_REG_MODE_3DNOW (mode));
40379 if (mode == QImode)
40381 /* Take care for QImode values - they can be in non-QI regs,
40382 but then they do cause partial register stalls. */
40383 if (ANY_QI_REGNO_P (regno))
40384 return true;
40385 if (!TARGET_PARTIAL_REG_STALL)
40386 return true;
40387 /* LRA checks if the hard register is OK for the given mode.
40388 QImode values can live in non-QI regs, so we allow all
40389 registers here. */
40390 if (lra_in_progress)
40391 return true;
40392 return !can_create_pseudo_p ();
40394 /* We handle both integer and floats in the general purpose registers. */
40395 else if (VALID_INT_MODE_P (mode))
40396 return true;
40397 else if (VALID_FP_MODE_P (mode))
40398 return true;
40399 else if (VALID_DFP_MODE_P (mode))
40400 return true;
40401 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
40402 on to use that value in smaller contexts, this can easily force a
40403 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
40404 supporting DImode, allow it. */
40405 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
40406 return true;
40408 return false;
40411 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that
40412 saves SSE registers across calls is Win64 (thus no need to check the
40413 current ABI here), and with AVX enabled Win64 only guarantees that
40414 the low 16 bytes are saved. */
40416 static bool
40417 ix86_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode)
40419 return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16;
40422 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
40423 tieable integer mode. */
40425 static bool
40426 ix86_tieable_integer_mode_p (machine_mode mode)
40428 switch (mode)
40430 case E_HImode:
40431 case E_SImode:
40432 return true;
40434 case E_QImode:
40435 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
40437 case E_DImode:
40438 return TARGET_64BIT;
40440 default:
40441 return false;
40445 /* Implement TARGET_MODES_TIEABLE_P.
40447 Return true if MODE1 is accessible in a register that can hold MODE2
40448 without copying. That is, all register classes that can hold MODE2
40449 can also hold MODE1. */
40451 static bool
40452 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
40454 if (mode1 == mode2)
40455 return true;
40457 if (ix86_tieable_integer_mode_p (mode1)
40458 && ix86_tieable_integer_mode_p (mode2))
40459 return true;
40461 /* MODE2 being XFmode implies fp stack or general regs, which means we
40462 can tie any smaller floating point modes to it. Note that we do not
40463 tie this with TFmode. */
40464 if (mode2 == XFmode)
40465 return mode1 == SFmode || mode1 == DFmode;
40467 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
40468 that we can tie it with SFmode. */
40469 if (mode2 == DFmode)
40470 return mode1 == SFmode;
40472 /* If MODE2 is only appropriate for an SSE register, then tie with
40473 any other mode acceptable to SSE registers. */
40474 if (GET_MODE_SIZE (mode2) == 32
40475 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
40476 return (GET_MODE_SIZE (mode1) == 32
40477 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
40478 if (GET_MODE_SIZE (mode2) == 16
40479 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
40480 return (GET_MODE_SIZE (mode1) == 16
40481 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
40483 /* If MODE2 is appropriate for an MMX register, then tie
40484 with any other mode acceptable to MMX registers. */
40485 if (GET_MODE_SIZE (mode2) == 8
40486 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
40487 return (GET_MODE_SIZE (mode1) == 8
40488 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
40490 return false;
40493 /* Return the cost of moving between two registers of mode MODE. */
40495 static int
40496 ix86_set_reg_reg_cost (machine_mode mode)
40498 unsigned int units = UNITS_PER_WORD;
40500 switch (GET_MODE_CLASS (mode))
40502 default:
40503 break;
40505 case MODE_CC:
40506 units = GET_MODE_SIZE (CCmode);
40507 break;
40509 case MODE_FLOAT:
40510 if ((TARGET_SSE && mode == TFmode)
40511 || (TARGET_80387 && mode == XFmode)
40512 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
40513 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
40514 units = GET_MODE_SIZE (mode);
40515 break;
40517 case MODE_COMPLEX_FLOAT:
40518 if ((TARGET_SSE && mode == TCmode)
40519 || (TARGET_80387 && mode == XCmode)
40520 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
40521 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
40522 units = GET_MODE_SIZE (mode);
40523 break;
40525 case MODE_VECTOR_INT:
40526 case MODE_VECTOR_FLOAT:
40527 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
40528 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
40529 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
40530 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
40531 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
40532 units = GET_MODE_SIZE (mode);
40535 /* Return the cost of moving between two registers of mode MODE,
40536 assuming that the move will be in pieces of at most UNITS bytes. */
40537 return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
40540 /* Return cost of vector operation in MODE given that scalar version has
40541 COST. If PARALLEL is true assume that CPU has more than one unit
40542 performing the operation. */
40544 static int
40545 ix86_vec_cost (machine_mode mode, int cost, bool parallel)
40547 if (!VECTOR_MODE_P (mode))
40548 return cost;
40550 if (!parallel)
40551 return cost * GET_MODE_NUNITS (mode);
40552 if (GET_MODE_BITSIZE (mode) == 128
40553 && TARGET_SSE_SPLIT_REGS)
40554 return cost * 2;
40555 if (GET_MODE_BITSIZE (mode) > 128
40556 && TARGET_AVX128_OPTIMAL)
40557 return cost * GET_MODE_BITSIZE (mode) / 128;
40558 return cost;
40561 /* Return cost of multiplication in MODE. */
40563 static int
40564 ix86_multiplication_cost (const struct processor_costs *cost,
40565 enum machine_mode mode)
40567 machine_mode inner_mode = mode;
40568 if (VECTOR_MODE_P (mode))
40569 inner_mode = GET_MODE_INNER (mode);
40571 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
40572 return inner_mode == DFmode ? cost->mulsd : cost->mulss;
40573 else if (X87_FLOAT_MODE_P (mode))
40574 return cost->fmul;
40575 else if (FLOAT_MODE_P (mode))
40576 return ix86_vec_cost (mode,
40577 inner_mode == DFmode
40578 ? cost->mulsd : cost->mulss, true);
40579 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
40581 /* vpmullq is used in this case. No emulation is needed. */
40582 if (TARGET_AVX512DQ)
40583 return ix86_vec_cost (mode, cost->mulss, true);
40585 /* V*QImode is emulated with 7-13 insns. */
40586 if (mode == V16QImode || mode == V32QImode)
40588 int extra = 11;
40589 if (TARGET_XOP && mode == V16QImode)
40590 extra = 5;
40591 else if (TARGET_SSSE3)
40592 extra = 6;
40593 return ix86_vec_cost (mode,
40594 cost->mulss * 2 + cost->sse_op * extra,
40595 true);
40597 /* V*DImode is emulated with 5-8 insns. */
40598 else if (mode == V2DImode || mode == V4DImode)
40600 if (TARGET_XOP && mode == V2DImode)
40601 return ix86_vec_cost (mode,
40602 cost->mulss * 2 + cost->sse_op * 3,
40603 true);
40604 else
40605 return ix86_vec_cost (mode,
40606 cost->mulss * 3 + cost->sse_op * 5,
40607 true);
40609 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
40610 insns, including two PMULUDQ. */
40611 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
40612 return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5,
40613 true);
40614 else
40615 return ix86_vec_cost (mode, cost->mulss, true);
40617 else
40618 return (cost->mult_init[MODE_INDEX (mode)] + cost->mult_bit * 7);
40621 /* Return cost of multiplication in MODE. */
40623 static int
40624 ix86_division_cost (const struct processor_costs *cost,
40625 enum machine_mode mode)
40627 machine_mode inner_mode = mode;
40628 if (VECTOR_MODE_P (mode))
40629 inner_mode = GET_MODE_INNER (mode);
40631 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
40632 return inner_mode == DFmode ? cost->divsd : cost->divss;
40633 else if (X87_FLOAT_MODE_P (mode))
40634 return cost->fdiv;
40635 else if (FLOAT_MODE_P (mode))
40636 return ix86_vec_cost (mode,
40637 inner_mode == DFmode ? cost->divsd : cost->divss,
40638 true);
40639 else
40640 return cost->divide[MODE_INDEX (mode)];
40643 /* Return cost of shift in MODE.
40644 If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL.
40645 AND_IN_OP1 specify in op1 is result of and and SHIFT_AND_TRUNCATE
40646 if op1 is a result of subreg.
40648 SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored. */
40650 static int
40651 ix86_shift_rotate_cost (const struct processor_costs *cost,
40652 enum machine_mode mode, bool constant_op1,
40653 HOST_WIDE_INT op1_val,
40654 bool speed,
40655 bool and_in_op1,
40656 bool shift_and_truncate,
40657 bool *skip_op0, bool *skip_op1)
40659 if (skip_op0)
40660 *skip_op0 = *skip_op1 = false;
40661 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
40663 /* V*QImode is emulated with 1-11 insns. */
40664 if (mode == V16QImode || mode == V32QImode)
40666 int count = 11;
40667 if (TARGET_XOP && mode == V16QImode)
40669 /* For XOP we use vpshab, which requires a broadcast of the
40670 value to the variable shift insn. For constants this
40671 means a V16Q const in mem; even when we can perform the
40672 shift with one insn set the cost to prefer paddb. */
40673 if (constant_op1)
40675 if (skip_op1)
40676 *skip_op1 = true;
40677 return ix86_vec_cost (mode,
40678 cost->sse_op
40679 + (speed
40681 : COSTS_N_BYTES
40682 (GET_MODE_UNIT_SIZE (mode))), true);
40684 count = 3;
40686 else if (TARGET_SSSE3)
40687 count = 7;
40688 return ix86_vec_cost (mode, cost->sse_op * count, true);
40690 else
40691 return ix86_vec_cost (mode, cost->sse_op, true);
40693 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
40695 if (constant_op1)
40697 if (op1_val > 32)
40698 return cost->shift_const + COSTS_N_INSNS (2);
40699 else
40700 return cost->shift_const * 2;
40702 else
40704 if (and_in_op1)
40705 return cost->shift_var * 2;
40706 else
40707 return cost->shift_var * 6 + COSTS_N_INSNS (2);
40710 else
40712 if (constant_op1)
40713 return cost->shift_const;
40714 else if (shift_and_truncate)
40716 if (skip_op0)
40717 *skip_op0 = *skip_op1 = true;
40718 /* Return the cost after shift-and truncation. */
40719 return cost->shift_var;
40721 else
40722 return cost->shift_var;
40724 return cost->shift_const;
40727 /* Compute a (partial) cost for rtx X. Return true if the complete
40728 cost has been computed, and false if subexpressions should be
40729 scanned. In either case, *TOTAL contains the cost result. */
40731 static bool
40732 ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
40733 int *total, bool speed)
40735 rtx mask;
40736 enum rtx_code code = GET_CODE (x);
40737 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
40738 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
40739 int src_cost;
40741 switch (code)
40743 case SET:
40744 if (register_operand (SET_DEST (x), VOIDmode)
40745 && reg_or_0_operand (SET_SRC (x), VOIDmode))
40747 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
40748 return true;
40751 if (register_operand (SET_SRC (x), VOIDmode))
40752 /* Avoid potentially incorrect high cost from rtx_costs
40753 for non-tieable SUBREGs. */
40754 src_cost = 0;
40755 else
40757 src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed);
40759 if (CONSTANT_P (SET_SRC (x)))
40760 /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
40761 a small value, possibly zero for cheap constants. */
40762 src_cost += COSTS_N_INSNS (1);
40765 *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed);
40766 return true;
40768 case CONST_INT:
40769 case CONST:
40770 case LABEL_REF:
40771 case SYMBOL_REF:
40772 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
40773 *total = 3;
40774 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
40775 *total = 2;
40776 else if (flag_pic && SYMBOLIC_CONST (x)
40777 && !(TARGET_64BIT
40778 && (GET_CODE (x) == LABEL_REF
40779 || (GET_CODE (x) == SYMBOL_REF
40780 && SYMBOL_REF_LOCAL_P (x))))
40781 /* Use 0 cost for CONST to improve its propagation. */
40782 && (TARGET_64BIT || GET_CODE (x) != CONST))
40783 *total = 1;
40784 else
40785 *total = 0;
40786 return true;
40788 case CONST_DOUBLE:
40789 if (IS_STACK_MODE (mode))
40790 switch (standard_80387_constant_p (x))
40792 case -1:
40793 case 0:
40794 break;
40795 case 1: /* 0.0 */
40796 *total = 1;
40797 return true;
40798 default: /* Other constants */
40799 *total = 2;
40800 return true;
40802 /* FALLTHRU */
40804 case CONST_VECTOR:
40805 switch (standard_sse_constant_p (x, mode))
40807 case 0:
40808 break;
40809 case 1: /* 0: xor eliminates false dependency */
40810 *total = 0;
40811 return true;
40812 default: /* -1: cmp contains false dependency */
40813 *total = 1;
40814 return true;
40816 /* FALLTHRU */
40818 case CONST_WIDE_INT:
40819 /* Fall back to (MEM (SYMBOL_REF)), since that's where
40820 it'll probably end up. Add a penalty for size. */
40821 *total = (COSTS_N_INSNS (1)
40822 + (!TARGET_64BIT && flag_pic)
40823 + (GET_MODE_SIZE (mode) <= 4
40824 ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2));
40825 return true;
40827 case ZERO_EXTEND:
40828 /* The zero extensions is often completely free on x86_64, so make
40829 it as cheap as possible. */
40830 if (TARGET_64BIT && mode == DImode
40831 && GET_MODE (XEXP (x, 0)) == SImode)
40832 *total = 1;
40833 else if (TARGET_ZERO_EXTEND_WITH_AND)
40834 *total = cost->add;
40835 else
40836 *total = cost->movzx;
40837 return false;
40839 case SIGN_EXTEND:
40840 *total = cost->movsx;
40841 return false;
40843 case ASHIFT:
40844 if (SCALAR_INT_MODE_P (mode)
40845 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
40846 && CONST_INT_P (XEXP (x, 1)))
40848 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
40849 if (value == 1)
40851 *total = cost->add;
40852 return false;
40854 if ((value == 2 || value == 3)
40855 && cost->lea <= cost->shift_const)
40857 *total = cost->lea;
40858 return false;
40861 /* FALLTHRU */
40863 case ROTATE:
40864 case ASHIFTRT:
40865 case LSHIFTRT:
40866 case ROTATERT:
40867 bool skip_op0, skip_op1;
40868 *total = ix86_shift_rotate_cost (cost, mode, CONSTANT_P (XEXP (x, 1)),
40869 CONST_INT_P (XEXP (x, 1))
40870 ? INTVAL (XEXP (x, 1)) : -1,
40871 speed,
40872 GET_CODE (XEXP (x, 1)) == AND,
40873 SUBREG_P (XEXP (x, 1))
40874 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND,
40875 &skip_op0, &skip_op1);
40876 if (skip_op0 || skip_op1)
40878 if (!skip_op0)
40879 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
40880 if (!skip_op1)
40881 *total += rtx_cost (XEXP (x, 1), mode, code, 0, speed);
40882 return true;
40884 return false;
40886 case FMA:
40888 rtx sub;
40890 gcc_assert (FLOAT_MODE_P (mode));
40891 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
40893 *total = ix86_vec_cost (mode,
40894 mode == SFmode ? cost->fmass : cost->fmasd,
40895 true);
40896 *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
40898 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
40899 sub = XEXP (x, 0);
40900 if (GET_CODE (sub) == NEG)
40901 sub = XEXP (sub, 0);
40902 *total += rtx_cost (sub, mode, FMA, 0, speed);
40904 sub = XEXP (x, 2);
40905 if (GET_CODE (sub) == NEG)
40906 sub = XEXP (sub, 0);
40907 *total += rtx_cost (sub, mode, FMA, 2, speed);
40908 return true;
40911 case MULT:
40912 if (!FLOAT_MODE_P (mode) && !VECTOR_MODE_P (mode))
40914 rtx op0 = XEXP (x, 0);
40915 rtx op1 = XEXP (x, 1);
40916 int nbits;
40917 if (CONST_INT_P (XEXP (x, 1)))
40919 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
40920 for (nbits = 0; value != 0; value &= value - 1)
40921 nbits++;
40923 else
40924 /* This is arbitrary. */
40925 nbits = 7;
40927 /* Compute costs correctly for widening multiplication. */
40928 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
40929 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
40930 == GET_MODE_SIZE (mode))
40932 int is_mulwiden = 0;
40933 machine_mode inner_mode = GET_MODE (op0);
40935 if (GET_CODE (op0) == GET_CODE (op1))
40936 is_mulwiden = 1, op1 = XEXP (op1, 0);
40937 else if (CONST_INT_P (op1))
40939 if (GET_CODE (op0) == SIGN_EXTEND)
40940 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
40941 == INTVAL (op1);
40942 else
40943 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
40946 if (is_mulwiden)
40947 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
40950 *total = (cost->mult_init[MODE_INDEX (mode)]
40951 + nbits * cost->mult_bit
40952 + rtx_cost (op0, mode, outer_code, opno, speed)
40953 + rtx_cost (op1, mode, outer_code, opno, speed));
40955 return true;
40957 *total = ix86_multiplication_cost (cost, mode);
40958 return false;
40960 case DIV:
40961 case UDIV:
40962 case MOD:
40963 case UMOD:
40964 *total = ix86_division_cost (cost, mode);
40965 return false;
40967 case PLUS:
40968 if (GET_MODE_CLASS (mode) == MODE_INT
40969 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
40971 if (GET_CODE (XEXP (x, 0)) == PLUS
40972 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
40973 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
40974 && CONSTANT_P (XEXP (x, 1)))
40976 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
40977 if (val == 2 || val == 4 || val == 8)
40979 *total = cost->lea;
40980 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
40981 outer_code, opno, speed);
40982 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
40983 outer_code, opno, speed);
40984 *total += rtx_cost (XEXP (x, 1), mode,
40985 outer_code, opno, speed);
40986 return true;
40989 else if (GET_CODE (XEXP (x, 0)) == MULT
40990 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
40992 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
40993 if (val == 2 || val == 4 || val == 8)
40995 *total = cost->lea;
40996 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
40997 outer_code, opno, speed);
40998 *total += rtx_cost (XEXP (x, 1), mode,
40999 outer_code, opno, speed);
41000 return true;
41003 else if (GET_CODE (XEXP (x, 0)) == PLUS)
41005 /* Add with carry, ignore the cost of adding a carry flag. */
41006 if (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 0), mode))
41007 *total = cost->add;
41008 else
41010 *total = cost->lea;
41011 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
41012 outer_code, opno, speed);
41015 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
41016 outer_code, opno, speed);
41017 *total += rtx_cost (XEXP (x, 1), mode,
41018 outer_code, opno, speed);
41019 return true;
41022 /* FALLTHRU */
41024 case MINUS:
41025 /* Subtract with borrow, ignore the cost of subtracting a carry flag. */
41026 if (GET_MODE_CLASS (mode) == MODE_INT
41027 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
41028 && GET_CODE (XEXP (x, 0)) == MINUS
41029 && ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode))
41031 *total = cost->add;
41032 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
41033 outer_code, opno, speed);
41034 *total += rtx_cost (XEXP (x, 1), mode,
41035 outer_code, opno, speed);
41036 return true;
41039 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
41041 *total = cost->addss;
41042 return false;
41044 else if (X87_FLOAT_MODE_P (mode))
41046 *total = cost->fadd;
41047 return false;
41049 else if (FLOAT_MODE_P (mode))
41051 *total = ix86_vec_cost (mode, cost->addss, true);
41052 return false;
41054 /* FALLTHRU */
41056 case AND:
41057 case IOR:
41058 case XOR:
41059 if (GET_MODE_CLASS (mode) == MODE_INT
41060 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41062 *total = (cost->add * 2
41063 + (rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
41064 << (GET_MODE (XEXP (x, 0)) != DImode))
41065 + (rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
41066 << (GET_MODE (XEXP (x, 1)) != DImode)));
41067 return true;
41069 /* FALLTHRU */
41071 case NEG:
41072 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
41074 *total = cost->sse_op;
41075 return false;
41077 else if (X87_FLOAT_MODE_P (mode))
41079 *total = cost->fchs;
41080 return false;
41082 else if (FLOAT_MODE_P (mode))
41084 *total = ix86_vec_cost (mode, cost->sse_op, true);
41085 return false;
41087 /* FALLTHRU */
41089 case NOT:
41090 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
41091 *total = ix86_vec_cost (mode, cost->sse_op, true);
41092 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41093 *total = cost->add * 2;
41094 else
41095 *total = cost->add;
41096 return false;
41098 case COMPARE:
41099 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
41100 && XEXP (XEXP (x, 0), 1) == const1_rtx
41101 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
41102 && XEXP (x, 1) == const0_rtx)
41104 /* This kind of construct is implemented using test[bwl].
41105 Treat it as if we had an AND. */
41106 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
41107 *total = (cost->add
41108 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, outer_code,
41109 opno, speed)
41110 + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
41111 return true;
41114 /* The embedded comparison operand is completely free. */
41115 if (!general_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0)))
41116 && XEXP (x, 1) == const0_rtx)
41117 *total = 0;
41119 return false;
41121 case FLOAT_EXTEND:
41122 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
41123 *total = 0;
41124 else
41125 *total = ix86_vec_cost (mode, cost->addss, true);
41126 return false;
41128 case FLOAT_TRUNCATE:
41129 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
41130 *total = cost->fadd;
41131 else
41132 *total = ix86_vec_cost (mode, cost->addss, true);
41133 return false;
41135 case ABS:
41136 /* SSE requires memory load for the constant operand. It may make
41137 sense to account for this. Of course the constant operand may or
41138 may not be reused. */
41139 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
41140 *total = cost->sse_op;
41141 else if (X87_FLOAT_MODE_P (mode))
41142 *total = cost->fabs;
41143 else if (FLOAT_MODE_P (mode))
41144 *total = ix86_vec_cost (mode, cost->sse_op, true);
41145 return false;
41147 case SQRT:
41148 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
41149 *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd;
41150 else if (X87_FLOAT_MODE_P (mode))
41151 *total = cost->fsqrt;
41152 else if (FLOAT_MODE_P (mode))
41153 *total = ix86_vec_cost (mode,
41154 mode == SFmode ? cost->sqrtss : cost->sqrtsd,
41155 true);
41156 return false;
41158 case UNSPEC:
41159 if (XINT (x, 1) == UNSPEC_TP)
41160 *total = 0;
41161 return false;
41163 case VEC_SELECT:
41164 case VEC_CONCAT:
41165 case VEC_DUPLICATE:
41166 /* ??? Assume all of these vector manipulation patterns are
41167 recognizable. In which case they all pretty much have the
41168 same cost. */
41169 *total = cost->sse_op;
41170 return true;
41171 case VEC_MERGE:
41172 mask = XEXP (x, 2);
41173 /* This is masked instruction, assume the same cost,
41174 as nonmasked variant. */
41175 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
41176 *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
41177 else
41178 *total = cost->sse_op;
41179 return true;
41181 default:
41182 return false;
41186 #if TARGET_MACHO
41188 static int current_machopic_label_num;
41190 /* Given a symbol name and its associated stub, write out the
41191 definition of the stub. */
41193 void
41194 machopic_output_stub (FILE *file, const char *symb, const char *stub)
41196 unsigned int length;
41197 char *binder_name, *symbol_name, lazy_ptr_name[32];
41198 int label = ++current_machopic_label_num;
41200 /* For 64-bit we shouldn't get here. */
41201 gcc_assert (!TARGET_64BIT);
41203 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
41204 symb = targetm.strip_name_encoding (symb);
41206 length = strlen (stub);
41207 binder_name = XALLOCAVEC (char, length + 32);
41208 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
41210 length = strlen (symb);
41211 symbol_name = XALLOCAVEC (char, length + 32);
41212 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
41214 sprintf (lazy_ptr_name, "L%d$lz", label);
41216 if (MACHOPIC_ATT_STUB)
41217 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
41218 else if (MACHOPIC_PURE)
41219 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
41220 else
41221 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
41223 fprintf (file, "%s:\n", stub);
41224 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
41226 if (MACHOPIC_ATT_STUB)
41228 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
41230 else if (MACHOPIC_PURE)
41232 /* PIC stub. */
41233 /* 25-byte PIC stub using "CALL get_pc_thunk". */
41234 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
41235 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
41236 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
41237 label, lazy_ptr_name, label);
41238 fprintf (file, "\tjmp\t*%%ecx\n");
41240 else
41241 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
41243 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
41244 it needs no stub-binding-helper. */
41245 if (MACHOPIC_ATT_STUB)
41246 return;
41248 fprintf (file, "%s:\n", binder_name);
41250 if (MACHOPIC_PURE)
41252 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
41253 fprintf (file, "\tpushl\t%%ecx\n");
41255 else
41256 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
41258 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
41260 /* N.B. Keep the correspondence of these
41261 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
41262 old-pic/new-pic/non-pic stubs; altering this will break
41263 compatibility with existing dylibs. */
41264 if (MACHOPIC_PURE)
41266 /* 25-byte PIC stub using "CALL get_pc_thunk". */
41267 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
41269 else
41270 /* 16-byte -mdynamic-no-pic stub. */
41271 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
41273 fprintf (file, "%s:\n", lazy_ptr_name);
41274 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
41275 fprintf (file, ASM_LONG "%s\n", binder_name);
41277 #endif /* TARGET_MACHO */
41279 /* Order the registers for register allocator. */
41281 void
41282 x86_order_regs_for_local_alloc (void)
41284 int pos = 0;
41285 int i;
41287 /* First allocate the local general purpose registers. */
41288 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
41289 if (GENERAL_REGNO_P (i) && call_used_regs[i])
41290 reg_alloc_order [pos++] = i;
41292 /* Global general purpose registers. */
41293 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
41294 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
41295 reg_alloc_order [pos++] = i;
41297 /* x87 registers come first in case we are doing FP math
41298 using them. */
41299 if (!TARGET_SSE_MATH)
41300 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
41301 reg_alloc_order [pos++] = i;
41303 /* SSE registers. */
41304 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
41305 reg_alloc_order [pos++] = i;
41306 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
41307 reg_alloc_order [pos++] = i;
41309 /* Extended REX SSE registers. */
41310 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
41311 reg_alloc_order [pos++] = i;
41313 /* Mask register. */
41314 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
41315 reg_alloc_order [pos++] = i;
41317 /* MPX bound registers. */
41318 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
41319 reg_alloc_order [pos++] = i;
41321 /* x87 registers. */
41322 if (TARGET_SSE_MATH)
41323 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
41324 reg_alloc_order [pos++] = i;
41326 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
41327 reg_alloc_order [pos++] = i;
41329 /* Initialize the rest of array as we do not allocate some registers
41330 at all. */
41331 while (pos < FIRST_PSEUDO_REGISTER)
41332 reg_alloc_order [pos++] = 0;
41335 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
41336 in struct attribute_spec handler. */
41337 static tree
41338 ix86_handle_callee_pop_aggregate_return (tree *node, tree name, tree args, int,
41339 bool *no_add_attrs)
41341 if (TREE_CODE (*node) != FUNCTION_TYPE
41342 && TREE_CODE (*node) != METHOD_TYPE
41343 && TREE_CODE (*node) != FIELD_DECL
41344 && TREE_CODE (*node) != TYPE_DECL)
41346 warning (OPT_Wattributes, "%qE attribute only applies to functions",
41347 name);
41348 *no_add_attrs = true;
41349 return NULL_TREE;
41351 if (TARGET_64BIT)
41353 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
41354 name);
41355 *no_add_attrs = true;
41356 return NULL_TREE;
41358 if (is_attribute_p ("callee_pop_aggregate_return", name))
41360 tree cst;
41362 cst = TREE_VALUE (args);
41363 if (TREE_CODE (cst) != INTEGER_CST)
41365 warning (OPT_Wattributes,
41366 "%qE attribute requires an integer constant argument",
41367 name);
41368 *no_add_attrs = true;
41370 else if (compare_tree_int (cst, 0) != 0
41371 && compare_tree_int (cst, 1) != 0)
41373 warning (OPT_Wattributes,
41374 "argument to %qE attribute is neither zero, nor one",
41375 name);
41376 *no_add_attrs = true;
41379 return NULL_TREE;
41382 return NULL_TREE;
41385 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
41386 struct attribute_spec.handler. */
41387 static tree
41388 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
41389 bool *no_add_attrs)
41391 if (TREE_CODE (*node) != FUNCTION_TYPE
41392 && TREE_CODE (*node) != METHOD_TYPE
41393 && TREE_CODE (*node) != FIELD_DECL
41394 && TREE_CODE (*node) != TYPE_DECL)
41396 warning (OPT_Wattributes, "%qE attribute only applies to functions",
41397 name);
41398 *no_add_attrs = true;
41399 return NULL_TREE;
41402 /* Can combine regparm with all attributes but fastcall. */
41403 if (is_attribute_p ("ms_abi", name))
41405 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
41407 error ("ms_abi and sysv_abi attributes are not compatible");
41410 return NULL_TREE;
41412 else if (is_attribute_p ("sysv_abi", name))
41414 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
41416 error ("ms_abi and sysv_abi attributes are not compatible");
41419 return NULL_TREE;
41422 return NULL_TREE;
41425 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
41426 struct attribute_spec.handler. */
41427 static tree
41428 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
41429 bool *no_add_attrs)
41431 tree *type = NULL;
41432 if (DECL_P (*node))
41434 if (TREE_CODE (*node) == TYPE_DECL)
41435 type = &TREE_TYPE (*node);
41437 else
41438 type = node;
41440 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
41442 warning (OPT_Wattributes, "%qE attribute ignored",
41443 name);
41444 *no_add_attrs = true;
41447 else if ((is_attribute_p ("ms_struct", name)
41448 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
41449 || ((is_attribute_p ("gcc_struct", name)
41450 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
41452 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
41453 name);
41454 *no_add_attrs = true;
41457 return NULL_TREE;
41460 static tree
41461 ix86_handle_fndecl_attribute (tree *node, tree name, tree args, int,
41462 bool *no_add_attrs)
41464 if (TREE_CODE (*node) != FUNCTION_DECL)
41466 warning (OPT_Wattributes, "%qE attribute only applies to functions",
41467 name);
41468 *no_add_attrs = true;
41471 if (is_attribute_p ("indirect_branch", name))
41473 tree cst = TREE_VALUE (args);
41474 if (TREE_CODE (cst) != STRING_CST)
41476 warning (OPT_Wattributes,
41477 "%qE attribute requires a string constant argument",
41478 name);
41479 *no_add_attrs = true;
41481 else if (strcmp (TREE_STRING_POINTER (cst), "keep") != 0
41482 && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
41483 && strcmp (TREE_STRING_POINTER (cst), "thunk-inline") != 0
41484 && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
41486 warning (OPT_Wattributes,
41487 "argument to %qE attribute is not "
41488 "(keep|thunk|thunk-inline|thunk-extern)", name);
41489 *no_add_attrs = true;
41493 if (is_attribute_p ("function_return", name))
41495 tree cst = TREE_VALUE (args);
41496 if (TREE_CODE (cst) != STRING_CST)
41498 warning (OPT_Wattributes,
41499 "%qE attribute requires a string constant argument",
41500 name);
41501 *no_add_attrs = true;
41503 else if (strcmp (TREE_STRING_POINTER (cst), "keep") != 0
41504 && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
41505 && strcmp (TREE_STRING_POINTER (cst), "thunk-inline") != 0
41506 && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
41508 warning (OPT_Wattributes,
41509 "argument to %qE attribute is not "
41510 "(keep|thunk|thunk-inline|thunk-extern)", name);
41511 *no_add_attrs = true;
41515 return NULL_TREE;
41518 static tree
41519 ix86_handle_no_caller_saved_registers_attribute (tree *, tree, tree,
41520 int, bool *)
41522 return NULL_TREE;
41525 static tree
41526 ix86_handle_interrupt_attribute (tree *node, tree, tree, int, bool *)
41528 /* DECL_RESULT and DECL_ARGUMENTS do not exist there yet,
41529 but the function type contains args and return type data. */
41530 tree func_type = *node;
41531 tree return_type = TREE_TYPE (func_type);
41533 int nargs = 0;
41534 tree current_arg_type = TYPE_ARG_TYPES (func_type);
41535 while (current_arg_type
41536 && ! VOID_TYPE_P (TREE_VALUE (current_arg_type)))
41538 if (nargs == 0)
41540 if (! POINTER_TYPE_P (TREE_VALUE (current_arg_type)))
41541 error ("interrupt service routine should have a pointer "
41542 "as the first argument");
41544 else if (nargs == 1)
41546 if (TREE_CODE (TREE_VALUE (current_arg_type)) != INTEGER_TYPE
41547 || TYPE_MODE (TREE_VALUE (current_arg_type)) != word_mode)
41548 error ("interrupt service routine should have unsigned %s"
41549 "int as the second argument",
41550 TARGET_64BIT
41551 ? (TARGET_X32 ? "long long " : "long ")
41552 : "");
41554 nargs++;
41555 current_arg_type = TREE_CHAIN (current_arg_type);
41557 if (!nargs || nargs > 2)
41558 error ("interrupt service routine can only have a pointer argument "
41559 "and an optional integer argument");
41560 if (! VOID_TYPE_P (return_type))
41561 error ("interrupt service routine can't have non-void return value");
41563 return NULL_TREE;
41566 static bool
41567 ix86_ms_bitfield_layout_p (const_tree record_type)
41569 return ((TARGET_MS_BITFIELD_LAYOUT
41570 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
41571 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
41574 /* Returns an expression indicating where the this parameter is
41575 located on entry to the FUNCTION. */
41577 static rtx
41578 x86_this_parameter (tree function)
41580 tree type = TREE_TYPE (function);
41581 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
41582 int nregs;
41584 if (TARGET_64BIT)
41586 const int *parm_regs;
41588 if (ix86_function_type_abi (type) == MS_ABI)
41589 parm_regs = x86_64_ms_abi_int_parameter_registers;
41590 else
41591 parm_regs = x86_64_int_parameter_registers;
41592 return gen_rtx_REG (Pmode, parm_regs[aggr]);
41595 nregs = ix86_function_regparm (type, function);
41597 if (nregs > 0 && !stdarg_p (type))
41599 int regno;
41600 unsigned int ccvt = ix86_get_callcvt (type);
41602 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
41603 regno = aggr ? DX_REG : CX_REG;
41604 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
41606 regno = CX_REG;
41607 if (aggr)
41608 return gen_rtx_MEM (SImode,
41609 plus_constant (Pmode, stack_pointer_rtx, 4));
41611 else
41613 regno = AX_REG;
41614 if (aggr)
41616 regno = DX_REG;
41617 if (nregs == 1)
41618 return gen_rtx_MEM (SImode,
41619 plus_constant (Pmode,
41620 stack_pointer_rtx, 4));
41623 return gen_rtx_REG (SImode, regno);
41626 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
41627 aggr ? 8 : 4));
41630 /* Determine whether x86_output_mi_thunk can succeed. */
41632 static bool
41633 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
41634 const_tree function)
41636 /* 64-bit can handle anything. */
41637 if (TARGET_64BIT)
41638 return true;
41640 /* For 32-bit, everything's fine if we have one free register. */
41641 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
41642 return true;
41644 /* Need a free register for vcall_offset. */
41645 if (vcall_offset)
41646 return false;
41648 /* Need a free register for GOT references. */
41649 if (flag_pic && !targetm.binds_local_p (function))
41650 return false;
41652 /* Otherwise ok. */
41653 return true;
41656 /* Output the assembler code for a thunk function. THUNK_DECL is the
41657 declaration for the thunk function itself, FUNCTION is the decl for
41658 the target function. DELTA is an immediate constant offset to be
41659 added to THIS. If VCALL_OFFSET is nonzero, the word at
41660 *(*this + vcall_offset) should be added to THIS. */
41662 static void
41663 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
41664 HOST_WIDE_INT vcall_offset, tree function)
41666 rtx this_param = x86_this_parameter (function);
41667 rtx this_reg, tmp, fnaddr;
41668 unsigned int tmp_regno;
41669 rtx_insn *insn;
41671 if (TARGET_64BIT)
41672 tmp_regno = R10_REG;
41673 else
41675 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
41676 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
41677 tmp_regno = AX_REG;
41678 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
41679 tmp_regno = DX_REG;
41680 else
41681 tmp_regno = CX_REG;
41684 emit_note (NOTE_INSN_PROLOGUE_END);
41686 /* CET is enabled, insert EB instruction. */
41687 if ((flag_cf_protection & CF_BRANCH) && TARGET_IBT)
41688 emit_insn (gen_nop_endbr ());
41690 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
41691 pull it in now and let DELTA benefit. */
41692 if (REG_P (this_param))
41693 this_reg = this_param;
41694 else if (vcall_offset)
41696 /* Put the this parameter into %eax. */
41697 this_reg = gen_rtx_REG (Pmode, AX_REG);
41698 emit_move_insn (this_reg, this_param);
41700 else
41701 this_reg = NULL_RTX;
41703 /* Adjust the this parameter by a fixed constant. */
41704 if (delta)
41706 rtx delta_rtx = GEN_INT (delta);
41707 rtx delta_dst = this_reg ? this_reg : this_param;
41709 if (TARGET_64BIT)
41711 if (!x86_64_general_operand (delta_rtx, Pmode))
41713 tmp = gen_rtx_REG (Pmode, tmp_regno);
41714 emit_move_insn (tmp, delta_rtx);
41715 delta_rtx = tmp;
41719 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
41722 /* Adjust the this parameter by a value stored in the vtable. */
41723 if (vcall_offset)
41725 rtx vcall_addr, vcall_mem, this_mem;
41727 tmp = gen_rtx_REG (Pmode, tmp_regno);
41729 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
41730 if (Pmode != ptr_mode)
41731 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
41732 emit_move_insn (tmp, this_mem);
41734 /* Adjust the this parameter. */
41735 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
41736 if (TARGET_64BIT
41737 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
41739 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
41740 emit_move_insn (tmp2, GEN_INT (vcall_offset));
41741 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
41744 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
41745 if (Pmode != ptr_mode)
41746 emit_insn (gen_addsi_1_zext (this_reg,
41747 gen_rtx_REG (ptr_mode,
41748 REGNO (this_reg)),
41749 vcall_mem));
41750 else
41751 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
41754 /* If necessary, drop THIS back to its stack slot. */
41755 if (this_reg && this_reg != this_param)
41756 emit_move_insn (this_param, this_reg);
41758 fnaddr = XEXP (DECL_RTL (function), 0);
41759 if (TARGET_64BIT)
41761 if (!flag_pic || targetm.binds_local_p (function)
41762 || TARGET_PECOFF)
41764 else
41766 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
41767 tmp = gen_rtx_CONST (Pmode, tmp);
41768 fnaddr = gen_const_mem (Pmode, tmp);
41771 else
41773 if (!flag_pic || targetm.binds_local_p (function))
41775 #if TARGET_MACHO
41776 else if (TARGET_MACHO)
41778 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
41779 fnaddr = XEXP (fnaddr, 0);
41781 #endif /* TARGET_MACHO */
41782 else
41784 tmp = gen_rtx_REG (Pmode, CX_REG);
41785 output_set_got (tmp, NULL_RTX);
41787 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
41788 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
41789 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
41790 fnaddr = gen_const_mem (Pmode, fnaddr);
41794 /* Our sibling call patterns do not allow memories, because we have no
41795 predicate that can distinguish between frame and non-frame memory.
41796 For our purposes here, we can get away with (ab)using a jump pattern,
41797 because we're going to do no optimization. */
41798 if (MEM_P (fnaddr))
41800 if (sibcall_insn_operand (fnaddr, word_mode))
41802 fnaddr = XEXP (DECL_RTL (function), 0);
41803 tmp = gen_rtx_MEM (QImode, fnaddr);
41804 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
41805 tmp = emit_call_insn (tmp);
41806 SIBLING_CALL_P (tmp) = 1;
41808 else
41809 emit_jump_insn (gen_indirect_jump (fnaddr));
41811 else
41813 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
41815 // CM_LARGE_PIC always uses pseudo PIC register which is
41816 // uninitialized. Since FUNCTION is local and calling it
41817 // doesn't go through PLT, we use scratch register %r11 as
41818 // PIC register and initialize it here.
41819 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
41820 ix86_init_large_pic_reg (tmp_regno);
41821 fnaddr = legitimize_pic_address (fnaddr,
41822 gen_rtx_REG (Pmode, tmp_regno));
41825 if (!sibcall_insn_operand (fnaddr, word_mode))
41827 tmp = gen_rtx_REG (word_mode, tmp_regno);
41828 if (GET_MODE (fnaddr) != word_mode)
41829 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
41830 emit_move_insn (tmp, fnaddr);
41831 fnaddr = tmp;
41834 tmp = gen_rtx_MEM (QImode, fnaddr);
41835 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
41836 tmp = emit_call_insn (tmp);
41837 SIBLING_CALL_P (tmp) = 1;
41839 emit_barrier ();
41841 /* Emit just enough of rest_of_compilation to get the insns emitted.
41842 Note that use_thunk calls assemble_start_function et al. */
41843 insn = get_insns ();
41844 shorten_branches (insn);
41845 final_start_function (insn, file, 1);
41846 final (insn, file, 1);
41847 final_end_function ();
41850 static void
41851 x86_file_start (void)
41853 default_file_start ();
41854 if (TARGET_16BIT)
41855 fputs ("\t.code16gcc\n", asm_out_file);
41856 #if TARGET_MACHO
41857 darwin_file_start ();
41858 #endif
41859 if (X86_FILE_START_VERSION_DIRECTIVE)
41860 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
41861 if (X86_FILE_START_FLTUSED)
41862 fputs ("\t.global\t__fltused\n", asm_out_file);
41863 if (ix86_asm_dialect == ASM_INTEL)
41864 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
41868 x86_field_alignment (tree type, int computed)
41870 machine_mode mode;
41872 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
41873 return computed;
41874 if (TARGET_IAMCU)
41875 return iamcu_alignment (type, computed);
41876 mode = TYPE_MODE (strip_array_types (type));
41877 if (mode == DFmode || mode == DCmode
41878 || GET_MODE_CLASS (mode) == MODE_INT
41879 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
41880 return MIN (32, computed);
41881 return computed;
41884 /* Print call to TARGET to FILE. */
41886 static void
41887 x86_print_call_or_nop (FILE *file, const char *target)
41889 if (flag_nop_mcount)
41890 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
41891 fprintf (file, "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
41892 else
41893 fprintf (file, "1:\tcall\t%s\n", target);
41896 /* Output assembler code to FILE to increment profiler label # LABELNO
41897 for profiling a function entry. */
41898 void
41899 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
41901 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
41902 : MCOUNT_NAME);
41903 if (TARGET_64BIT)
41905 #ifndef NO_PROFILE_COUNTERS
41906 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
41907 #endif
41909 if (!TARGET_PECOFF && flag_pic)
41910 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
41911 else
41912 x86_print_call_or_nop (file, mcount_name);
41914 else if (flag_pic)
41916 #ifndef NO_PROFILE_COUNTERS
41917 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
41918 LPREFIX, labelno);
41919 #endif
41920 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
41922 else
41924 #ifndef NO_PROFILE_COUNTERS
41925 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
41926 LPREFIX, labelno);
41927 #endif
41928 x86_print_call_or_nop (file, mcount_name);
41931 if (flag_record_mcount)
41933 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
41934 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
41935 fprintf (file, "\t.previous\n");
41939 /* We don't have exact information about the insn sizes, but we may assume
41940 quite safely that we are informed about all 1 byte insns and memory
41941 address sizes. This is enough to eliminate unnecessary padding in
41942 99% of cases. */
41945 ix86_min_insn_size (rtx_insn *insn)
41947 int l = 0, len;
41949 if (!INSN_P (insn) || !active_insn_p (insn))
41950 return 0;
41952 /* Discard alignments we've emit and jump instructions. */
41953 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
41954 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
41955 return 0;
41957 /* Important case - calls are always 5 bytes.
41958 It is common to have many calls in the row. */
41959 if (CALL_P (insn)
41960 && symbolic_reference_mentioned_p (PATTERN (insn))
41961 && !SIBLING_CALL_P (insn))
41962 return 5;
41963 len = get_attr_length (insn);
41964 if (len <= 1)
41965 return 1;
41967 /* For normal instructions we rely on get_attr_length being exact,
41968 with a few exceptions. */
41969 if (!JUMP_P (insn))
41971 enum attr_type type = get_attr_type (insn);
41973 switch (type)
41975 case TYPE_MULTI:
41976 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
41977 || asm_noperands (PATTERN (insn)) >= 0)
41978 return 0;
41979 break;
41980 case TYPE_OTHER:
41981 case TYPE_FCMP:
41982 break;
41983 default:
41984 /* Otherwise trust get_attr_length. */
41985 return len;
41988 l = get_attr_length_address (insn);
41989 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
41990 l = 4;
41992 if (l)
41993 return 1+l;
41994 else
41995 return 2;
41998 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
42000 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
42001 window. */
42003 static void
42004 ix86_avoid_jump_mispredicts (void)
42006 rtx_insn *insn, *start = get_insns ();
42007 int nbytes = 0, njumps = 0;
42008 bool isjump = false;
42010 /* Look for all minimal intervals of instructions containing 4 jumps.
42011 The intervals are bounded by START and INSN. NBYTES is the total
42012 size of instructions in the interval including INSN and not including
42013 START. When the NBYTES is smaller than 16 bytes, it is possible
42014 that the end of START and INSN ends up in the same 16byte page.
42016 The smallest offset in the page INSN can start is the case where START
42017 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
42018 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
42020 Don't consider asm goto as jump, while it can contain a jump, it doesn't
42021 have to, control transfer to label(s) can be performed through other
42022 means, and also we estimate minimum length of all asm stmts as 0. */
42023 for (insn = start; insn; insn = NEXT_INSN (insn))
42025 int min_size;
42027 if (LABEL_P (insn))
42029 int align = label_to_alignment (insn);
42030 int max_skip = label_to_max_skip (insn);
42032 if (max_skip > 15)
42033 max_skip = 15;
42034 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
42035 already in the current 16 byte page, because otherwise
42036 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
42037 bytes to reach 16 byte boundary. */
42038 if (align <= 0
42039 || (align <= 3 && max_skip != (1 << align) - 1))
42040 max_skip = 0;
42041 if (dump_file)
42042 fprintf (dump_file, "Label %i with max_skip %i\n",
42043 INSN_UID (insn), max_skip);
42044 if (max_skip)
42046 while (nbytes + max_skip >= 16)
42048 start = NEXT_INSN (start);
42049 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
42050 || CALL_P (start))
42051 njumps--, isjump = true;
42052 else
42053 isjump = false;
42054 nbytes -= ix86_min_insn_size (start);
42057 continue;
42060 min_size = ix86_min_insn_size (insn);
42061 nbytes += min_size;
42062 if (dump_file)
42063 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
42064 INSN_UID (insn), min_size);
42065 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
42066 || CALL_P (insn))
42067 njumps++;
42068 else
42069 continue;
42071 while (njumps > 3)
42073 start = NEXT_INSN (start);
42074 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
42075 || CALL_P (start))
42076 njumps--, isjump = true;
42077 else
42078 isjump = false;
42079 nbytes -= ix86_min_insn_size (start);
42081 gcc_assert (njumps >= 0);
42082 if (dump_file)
42083 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
42084 INSN_UID (start), INSN_UID (insn), nbytes);
42086 if (njumps == 3 && isjump && nbytes < 16)
42088 int padsize = 15 - nbytes + ix86_min_insn_size (insn);
42090 if (dump_file)
42091 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
42092 INSN_UID (insn), padsize);
42093 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
42097 #endif
42099 /* AMD Athlon works faster
42100 when RET is not destination of conditional jump or directly preceded
42101 by other jump instruction. We avoid the penalty by inserting NOP just
42102 before the RET instructions in such cases. */
42103 static void
42104 ix86_pad_returns (void)
42106 edge e;
42107 edge_iterator ei;
42109 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
42111 basic_block bb = e->src;
42112 rtx_insn *ret = BB_END (bb);
42113 rtx_insn *prev;
42114 bool replace = false;
42116 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
42117 || optimize_bb_for_size_p (bb))
42118 continue;
42119 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
42120 if (active_insn_p (prev) || LABEL_P (prev))
42121 break;
42122 if (prev && LABEL_P (prev))
42124 edge e;
42125 edge_iterator ei;
42127 FOR_EACH_EDGE (e, ei, bb->preds)
42128 if (EDGE_FREQUENCY (e) && e->src->index >= 0
42129 && !(e->flags & EDGE_FALLTHRU))
42131 replace = true;
42132 break;
42135 if (!replace)
42137 prev = prev_active_insn (ret);
42138 if (prev
42139 && ((JUMP_P (prev) && any_condjump_p (prev))
42140 || CALL_P (prev)))
42141 replace = true;
42142 /* Empty functions get branch mispredict even when
42143 the jump destination is not visible to us. */
42144 if (!prev && !optimize_function_for_size_p (cfun))
42145 replace = true;
42147 if (replace)
42149 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
42150 delete_insn (ret);
42155 /* Count the minimum number of instructions in BB. Return 4 if the
42156 number of instructions >= 4. */
42158 static int
42159 ix86_count_insn_bb (basic_block bb)
42161 rtx_insn *insn;
42162 int insn_count = 0;
42164 /* Count number of instructions in this block. Return 4 if the number
42165 of instructions >= 4. */
42166 FOR_BB_INSNS (bb, insn)
42168 /* Only happen in exit blocks. */
42169 if (JUMP_P (insn)
42170 && ANY_RETURN_P (PATTERN (insn)))
42171 break;
42173 if (NONDEBUG_INSN_P (insn)
42174 && GET_CODE (PATTERN (insn)) != USE
42175 && GET_CODE (PATTERN (insn)) != CLOBBER)
42177 insn_count++;
42178 if (insn_count >= 4)
42179 return insn_count;
42183 return insn_count;
42187 /* Count the minimum number of instructions in code path in BB.
42188 Return 4 if the number of instructions >= 4. */
42190 static int
42191 ix86_count_insn (basic_block bb)
42193 edge e;
42194 edge_iterator ei;
42195 int min_prev_count;
42197 /* Only bother counting instructions along paths with no
42198 more than 2 basic blocks between entry and exit. Given
42199 that BB has an edge to exit, determine if a predecessor
42200 of BB has an edge from entry. If so, compute the number
42201 of instructions in the predecessor block. If there
42202 happen to be multiple such blocks, compute the minimum. */
42203 min_prev_count = 4;
42204 FOR_EACH_EDGE (e, ei, bb->preds)
42206 edge prev_e;
42207 edge_iterator prev_ei;
42209 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
42211 min_prev_count = 0;
42212 break;
42214 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
42216 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
42218 int count = ix86_count_insn_bb (e->src);
42219 if (count < min_prev_count)
42220 min_prev_count = count;
42221 break;
42226 if (min_prev_count < 4)
42227 min_prev_count += ix86_count_insn_bb (bb);
42229 return min_prev_count;
42232 /* Pad short function to 4 instructions. */
42234 static void
42235 ix86_pad_short_function (void)
42237 edge e;
42238 edge_iterator ei;
42240 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
42242 rtx_insn *ret = BB_END (e->src);
42243 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
42245 int insn_count = ix86_count_insn (e->src);
42247 /* Pad short function. */
42248 if (insn_count < 4)
42250 rtx_insn *insn = ret;
42252 /* Find epilogue. */
42253 while (insn
42254 && (!NOTE_P (insn)
42255 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
42256 insn = PREV_INSN (insn);
42258 if (!insn)
42259 insn = ret;
42261 /* Two NOPs count as one instruction. */
42262 insn_count = 2 * (4 - insn_count);
42263 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
42269 /* Fix up a Windows system unwinder issue. If an EH region falls through into
42270 the epilogue, the Windows system unwinder will apply epilogue logic and
42271 produce incorrect offsets. This can be avoided by adding a nop between
42272 the last insn that can throw and the first insn of the epilogue. */
42274 static void
42275 ix86_seh_fixup_eh_fallthru (void)
42277 edge e;
42278 edge_iterator ei;
42280 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
42282 rtx_insn *insn, *next;
42284 /* Find the beginning of the epilogue. */
42285 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
42286 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
42287 break;
42288 if (insn == NULL)
42289 continue;
42291 /* We only care about preceding insns that can throw. */
42292 insn = prev_active_insn (insn);
42293 if (insn == NULL || !can_throw_internal (insn))
42294 continue;
42296 /* Do not separate calls from their debug information. */
42297 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
42298 if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION)
42299 insn = next;
42300 else
42301 break;
42303 emit_insn_after (gen_nops (const1_rtx), insn);
42307 /* Given a register number BASE, the lowest of a group of registers, update
42308 regsets IN and OUT with the registers that should be avoided in input
42309 and output operands respectively when trying to avoid generating a modr/m
42310 byte for -mmitigate-rop. */
42312 static void
42313 set_rop_modrm_reg_bits (int base, HARD_REG_SET &in, HARD_REG_SET &out)
42315 SET_HARD_REG_BIT (out, base);
42316 SET_HARD_REG_BIT (out, base + 1);
42317 SET_HARD_REG_BIT (in, base + 2);
42318 SET_HARD_REG_BIT (in, base + 3);
42321 /* Called if -mmitigate-rop is in effect. Try to rewrite instructions so
42322 that certain encodings of modr/m bytes do not occur. */
42323 static void
42324 ix86_mitigate_rop (void)
42326 HARD_REG_SET input_risky;
42327 HARD_REG_SET output_risky;
42328 HARD_REG_SET inout_risky;
42330 CLEAR_HARD_REG_SET (output_risky);
42331 CLEAR_HARD_REG_SET (input_risky);
42332 SET_HARD_REG_BIT (output_risky, AX_REG);
42333 SET_HARD_REG_BIT (output_risky, CX_REG);
42334 SET_HARD_REG_BIT (input_risky, BX_REG);
42335 SET_HARD_REG_BIT (input_risky, DX_REG);
42336 set_rop_modrm_reg_bits (FIRST_SSE_REG, input_risky, output_risky);
42337 set_rop_modrm_reg_bits (FIRST_REX_INT_REG, input_risky, output_risky);
42338 set_rop_modrm_reg_bits (FIRST_REX_SSE_REG, input_risky, output_risky);
42339 set_rop_modrm_reg_bits (FIRST_EXT_REX_SSE_REG, input_risky, output_risky);
42340 set_rop_modrm_reg_bits (FIRST_MASK_REG, input_risky, output_risky);
42341 set_rop_modrm_reg_bits (FIRST_BND_REG, input_risky, output_risky);
42342 COPY_HARD_REG_SET (inout_risky, input_risky);
42343 IOR_HARD_REG_SET (inout_risky, output_risky);
42345 df_note_add_problem ();
42346 /* Fix up what stack-regs did. */
42347 df_insn_rescan_all ();
42348 df_analyze ();
42350 regrename_init (true);
42351 regrename_analyze (NULL);
42353 auto_vec<du_head_p> cands;
42355 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
42357 if (!NONDEBUG_INSN_P (insn))
42358 continue;
42360 if (GET_CODE (PATTERN (insn)) == USE
42361 || GET_CODE (PATTERN (insn)) == CLOBBER)
42362 continue;
42364 extract_insn (insn);
42366 int opno0, opno1;
42367 int modrm = ix86_get_modrm_for_rop (insn, recog_data.operand,
42368 recog_data.n_operands, &opno0,
42369 &opno1);
42371 if (!ix86_rop_should_change_byte_p (modrm))
42372 continue;
42374 insn_rr_info *info = &insn_rr[INSN_UID (insn)];
42376 /* This happens when regrename has to fail a block. */
42377 if (!info->op_info)
42378 continue;
42380 if (info->op_info[opno0].n_chains != 0)
42382 gcc_assert (info->op_info[opno0].n_chains == 1);
42383 du_head_p op0c;
42384 op0c = regrename_chain_from_id (info->op_info[opno0].heads[0]->id);
42385 if (op0c->target_data_1 + op0c->target_data_2 == 0
42386 && !op0c->cannot_rename)
42387 cands.safe_push (op0c);
42389 op0c->target_data_1++;
42391 if (info->op_info[opno1].n_chains != 0)
42393 gcc_assert (info->op_info[opno1].n_chains == 1);
42394 du_head_p op1c;
42395 op1c = regrename_chain_from_id (info->op_info[opno1].heads[0]->id);
42396 if (op1c->target_data_1 + op1c->target_data_2 == 0
42397 && !op1c->cannot_rename)
42398 cands.safe_push (op1c);
42400 op1c->target_data_2++;
42404 int i;
42405 du_head_p head;
42406 FOR_EACH_VEC_ELT (cands, i, head)
42408 int old_reg, best_reg;
42409 HARD_REG_SET unavailable;
42411 CLEAR_HARD_REG_SET (unavailable);
42412 if (head->target_data_1)
42413 IOR_HARD_REG_SET (unavailable, output_risky);
42414 if (head->target_data_2)
42415 IOR_HARD_REG_SET (unavailable, input_risky);
42417 int n_uses;
42418 reg_class superclass = regrename_find_superclass (head, &n_uses,
42419 &unavailable);
42420 old_reg = head->regno;
42421 best_reg = find_rename_reg (head, superclass, &unavailable,
42422 old_reg, false);
42423 bool ok = regrename_do_replace (head, best_reg);
42424 gcc_assert (ok);
42425 if (dump_file)
42426 fprintf (dump_file, "Chain %d renamed as %s in %s\n", head->id,
42427 reg_names[best_reg], reg_class_names[superclass]);
42431 regrename_finish ();
42433 df_analyze ();
42435 basic_block bb;
42436 regset_head live;
42438 INIT_REG_SET (&live);
42440 FOR_EACH_BB_FN (bb, cfun)
42442 rtx_insn *insn;
42444 COPY_REG_SET (&live, DF_LR_OUT (bb));
42445 df_simulate_initialize_backwards (bb, &live);
42447 FOR_BB_INSNS_REVERSE (bb, insn)
42449 if (!NONDEBUG_INSN_P (insn))
42450 continue;
42452 df_simulate_one_insn_backwards (bb, insn, &live);
42454 if (GET_CODE (PATTERN (insn)) == USE
42455 || GET_CODE (PATTERN (insn)) == CLOBBER)
42456 continue;
42458 extract_insn (insn);
42459 constrain_operands_cached (insn, reload_completed);
42460 int opno0, opno1;
42461 int modrm = ix86_get_modrm_for_rop (insn, recog_data.operand,
42462 recog_data.n_operands, &opno0,
42463 &opno1);
42464 if (modrm < 0
42465 || !ix86_rop_should_change_byte_p (modrm)
42466 || opno0 == opno1)
42467 continue;
42469 rtx oldreg = recog_data.operand[opno1];
42470 preprocess_constraints (insn);
42471 const operand_alternative *alt = which_op_alt ();
42473 int i;
42474 for (i = 0; i < recog_data.n_operands; i++)
42475 if (i != opno1
42476 && alt[i].earlyclobber
42477 && reg_overlap_mentioned_p (recog_data.operand[i],
42478 oldreg))
42479 break;
42481 if (i < recog_data.n_operands)
42482 continue;
42484 if (dump_file)
42485 fprintf (dump_file,
42486 "attempting to fix modrm byte in insn %d:"
42487 " reg %d class %s", INSN_UID (insn), REGNO (oldreg),
42488 reg_class_names[alt[opno1].cl]);
42490 HARD_REG_SET unavailable;
42491 REG_SET_TO_HARD_REG_SET (unavailable, &live);
42492 SET_HARD_REG_BIT (unavailable, REGNO (oldreg));
42493 IOR_COMPL_HARD_REG_SET (unavailable, call_used_reg_set);
42494 IOR_HARD_REG_SET (unavailable, fixed_reg_set);
42495 IOR_HARD_REG_SET (unavailable, output_risky);
42496 IOR_COMPL_HARD_REG_SET (unavailable,
42497 reg_class_contents[alt[opno1].cl]);
42499 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42500 if (!TEST_HARD_REG_BIT (unavailable, i))
42501 break;
42502 if (i == FIRST_PSEUDO_REGISTER)
42504 if (dump_file)
42505 fprintf (dump_file, ", none available\n");
42506 continue;
42508 if (dump_file)
42509 fprintf (dump_file, " -> %d\n", i);
42510 rtx newreg = gen_rtx_REG (recog_data.operand_mode[opno1], i);
42511 validate_change (insn, recog_data.operand_loc[opno1], newreg, false);
42512 insn = emit_insn_before (gen_move_insn (newreg, oldreg), insn);
42517 /* Implement machine specific optimizations. We implement padding of returns
42518 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
42519 static void
42520 ix86_reorg (void)
42522 /* We are freeing block_for_insn in the toplev to keep compatibility
42523 with old MDEP_REORGS that are not CFG based. Recompute it now. */
42524 compute_bb_for_insn ();
42526 if (flag_mitigate_rop)
42527 ix86_mitigate_rop ();
42529 if (TARGET_SEH && current_function_has_exception_handlers ())
42530 ix86_seh_fixup_eh_fallthru ();
42532 if (optimize && optimize_function_for_speed_p (cfun))
42534 if (TARGET_PAD_SHORT_FUNCTION)
42535 ix86_pad_short_function ();
42536 else if (TARGET_PAD_RETURNS)
42537 ix86_pad_returns ();
42538 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
42539 if (TARGET_FOUR_JUMP_LIMIT)
42540 ix86_avoid_jump_mispredicts ();
42541 #endif
42545 /* Return nonzero when QImode register that must be represented via REX prefix
42546 is used. */
42547 bool
42548 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
42550 int i;
42551 extract_insn_cached (insn);
42552 for (i = 0; i < recog_data.n_operands; i++)
42553 if (GENERAL_REG_P (recog_data.operand[i])
42554 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
42555 return true;
42556 return false;
42559 /* Return true when INSN mentions register that must be encoded using REX
42560 prefix. */
42561 bool
42562 x86_extended_reg_mentioned_p (rtx insn)
42564 subrtx_iterator::array_type array;
42565 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
42567 const_rtx x = *iter;
42568 if (REG_P (x)
42569 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
42570 return true;
42572 return false;
42575 /* If profitable, negate (without causing overflow) integer constant
42576 of mode MODE at location LOC. Return true in this case. */
42577 bool
42578 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
42580 HOST_WIDE_INT val;
42582 if (!CONST_INT_P (*loc))
42583 return false;
42585 switch (mode)
42587 case E_DImode:
42588 /* DImode x86_64 constants must fit in 32 bits. */
42589 gcc_assert (x86_64_immediate_operand (*loc, mode));
42591 mode = SImode;
42592 break;
42594 case E_SImode:
42595 case E_HImode:
42596 case E_QImode:
42597 break;
42599 default:
42600 gcc_unreachable ();
42603 /* Avoid overflows. */
42604 if (mode_signbit_p (mode, *loc))
42605 return false;
42607 val = INTVAL (*loc);
42609 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
42610 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
42611 if ((val < 0 && val != -128)
42612 || val == 128)
42614 *loc = GEN_INT (-val);
42615 return true;
42618 return false;
42621 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
42622 optabs would emit if we didn't have TFmode patterns. */
42624 void
42625 x86_emit_floatuns (rtx operands[2])
42627 rtx_code_label *neglab, *donelab;
42628 rtx i0, i1, f0, in, out;
42629 machine_mode mode, inmode;
42631 inmode = GET_MODE (operands[1]);
42632 gcc_assert (inmode == SImode || inmode == DImode);
42634 out = operands[0];
42635 in = force_reg (inmode, operands[1]);
42636 mode = GET_MODE (out);
42637 neglab = gen_label_rtx ();
42638 donelab = gen_label_rtx ();
42639 f0 = gen_reg_rtx (mode);
42641 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
42643 expand_float (out, in, 0);
42645 emit_jump_insn (gen_jump (donelab));
42646 emit_barrier ();
42648 emit_label (neglab);
42650 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
42651 1, OPTAB_DIRECT);
42652 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
42653 1, OPTAB_DIRECT);
42654 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
42656 expand_float (f0, i0, 0);
42658 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
42660 emit_label (donelab);
42663 static bool canonicalize_perm (struct expand_vec_perm_d *d);
42664 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
42665 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
42666 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
42668 /* Get a vector mode of the same size as the original but with elements
42669 twice as wide. This is only guaranteed to apply to integral vectors. */
42671 static inline machine_mode
42672 get_mode_wider_vector (machine_mode o)
42674 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
42675 machine_mode n = GET_MODE_WIDER_MODE (o).require ();
42676 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
42677 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
42678 return n;
42681 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
42682 fill target with val via vec_duplicate. */
42684 static bool
42685 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
42687 bool ok;
42688 rtx_insn *insn;
42689 rtx dup;
42691 /* First attempt to recognize VAL as-is. */
42692 dup = gen_vec_duplicate (mode, val);
42693 insn = emit_insn (gen_rtx_SET (target, dup));
42694 if (recog_memoized (insn) < 0)
42696 rtx_insn *seq;
42697 machine_mode innermode = GET_MODE_INNER (mode);
42698 rtx reg;
42700 /* If that fails, force VAL into a register. */
42702 start_sequence ();
42703 reg = force_reg (innermode, val);
42704 if (GET_MODE (reg) != innermode)
42705 reg = gen_lowpart (innermode, reg);
42706 SET_SRC (PATTERN (insn)) = gen_vec_duplicate (mode, reg);
42707 seq = get_insns ();
42708 end_sequence ();
42709 if (seq)
42710 emit_insn_before (seq, insn);
42712 ok = recog_memoized (insn) >= 0;
42713 gcc_assert (ok);
42715 return true;
42718 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
42719 with all elements equal to VAR. Return true if successful. */
42721 static bool
42722 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
42723 rtx target, rtx val)
42725 bool ok;
42727 switch (mode)
42729 case E_V2SImode:
42730 case E_V2SFmode:
42731 if (!mmx_ok)
42732 return false;
42733 /* FALLTHRU */
42735 case E_V4DFmode:
42736 case E_V4DImode:
42737 case E_V8SFmode:
42738 case E_V8SImode:
42739 case E_V2DFmode:
42740 case E_V2DImode:
42741 case E_V4SFmode:
42742 case E_V4SImode:
42743 case E_V16SImode:
42744 case E_V8DImode:
42745 case E_V16SFmode:
42746 case E_V8DFmode:
42747 return ix86_vector_duplicate_value (mode, target, val);
42749 case E_V4HImode:
42750 if (!mmx_ok)
42751 return false;
42752 if (TARGET_SSE || TARGET_3DNOW_A)
42754 rtx x;
42756 val = gen_lowpart (SImode, val);
42757 x = gen_rtx_TRUNCATE (HImode, val);
42758 x = gen_rtx_VEC_DUPLICATE (mode, x);
42759 emit_insn (gen_rtx_SET (target, x));
42760 return true;
42762 goto widen;
42764 case E_V8QImode:
42765 if (!mmx_ok)
42766 return false;
42767 goto widen;
42769 case E_V8HImode:
42770 if (TARGET_AVX2)
42771 return ix86_vector_duplicate_value (mode, target, val);
42773 if (TARGET_SSE2)
42775 struct expand_vec_perm_d dperm;
42776 rtx tmp1, tmp2;
42778 permute:
42779 memset (&dperm, 0, sizeof (dperm));
42780 dperm.target = target;
42781 dperm.vmode = mode;
42782 dperm.nelt = GET_MODE_NUNITS (mode);
42783 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
42784 dperm.one_operand_p = true;
42786 /* Extend to SImode using a paradoxical SUBREG. */
42787 tmp1 = gen_reg_rtx (SImode);
42788 emit_move_insn (tmp1, gen_lowpart (SImode, val));
42790 /* Insert the SImode value as low element of a V4SImode vector. */
42791 tmp2 = gen_reg_rtx (V4SImode);
42792 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
42793 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
42795 ok = (expand_vec_perm_1 (&dperm)
42796 || expand_vec_perm_broadcast_1 (&dperm));
42797 gcc_assert (ok);
42798 return ok;
42800 goto widen;
42802 case E_V16QImode:
42803 if (TARGET_AVX2)
42804 return ix86_vector_duplicate_value (mode, target, val);
42806 if (TARGET_SSE2)
42807 goto permute;
42808 goto widen;
42810 widen:
42811 /* Replicate the value once into the next wider mode and recurse. */
42813 machine_mode smode, wsmode, wvmode;
42814 rtx x;
42816 smode = GET_MODE_INNER (mode);
42817 wvmode = get_mode_wider_vector (mode);
42818 wsmode = GET_MODE_INNER (wvmode);
42820 val = convert_modes (wsmode, smode, val, true);
42821 x = expand_simple_binop (wsmode, ASHIFT, val,
42822 GEN_INT (GET_MODE_BITSIZE (smode)),
42823 NULL_RTX, 1, OPTAB_LIB_WIDEN);
42824 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
42826 x = gen_reg_rtx (wvmode);
42827 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
42828 gcc_assert (ok);
42829 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
42830 return ok;
42833 case E_V16HImode:
42834 case E_V32QImode:
42835 if (TARGET_AVX2)
42836 return ix86_vector_duplicate_value (mode, target, val);
42837 else
42839 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
42840 rtx x = gen_reg_rtx (hvmode);
42842 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
42843 gcc_assert (ok);
42845 x = gen_rtx_VEC_CONCAT (mode, x, x);
42846 emit_insn (gen_rtx_SET (target, x));
42848 return true;
42850 case E_V64QImode:
42851 case E_V32HImode:
42852 if (TARGET_AVX512BW)
42853 return ix86_vector_duplicate_value (mode, target, val);
42854 else
42856 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
42857 rtx x = gen_reg_rtx (hvmode);
42859 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
42860 gcc_assert (ok);
42862 x = gen_rtx_VEC_CONCAT (mode, x, x);
42863 emit_insn (gen_rtx_SET (target, x));
42865 return true;
42867 default:
42868 return false;
42872 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
42873 whose ONE_VAR element is VAR, and other elements are zero. Return true
42874 if successful. */
42876 static bool
42877 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
42878 rtx target, rtx var, int one_var)
42880 machine_mode vsimode;
42881 rtx new_target;
42882 rtx x, tmp;
42883 bool use_vector_set = false;
42884 rtx (*gen_vec_set_0) (rtx, rtx, rtx) = NULL;
42886 switch (mode)
42888 case E_V2DImode:
42889 /* For SSE4.1, we normally use vector set. But if the second
42890 element is zero and inter-unit moves are OK, we use movq
42891 instead. */
42892 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
42893 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
42894 && one_var == 0));
42895 break;
42896 case E_V16QImode:
42897 case E_V4SImode:
42898 case E_V4SFmode:
42899 use_vector_set = TARGET_SSE4_1;
42900 break;
42901 case E_V8HImode:
42902 use_vector_set = TARGET_SSE2;
42903 break;
42904 case E_V4HImode:
42905 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
42906 break;
42907 case E_V32QImode:
42908 case E_V16HImode:
42909 use_vector_set = TARGET_AVX;
42910 break;
42911 case E_V8SImode:
42912 use_vector_set = TARGET_AVX;
42913 gen_vec_set_0 = gen_vec_setv8si_0;
42914 break;
42915 case E_V8SFmode:
42916 use_vector_set = TARGET_AVX;
42917 gen_vec_set_0 = gen_vec_setv8sf_0;
42918 break;
42919 case E_V4DFmode:
42920 use_vector_set = TARGET_AVX;
42921 gen_vec_set_0 = gen_vec_setv4df_0;
42922 break;
42923 case E_V4DImode:
42924 /* Use ix86_expand_vector_set in 64bit mode only. */
42925 use_vector_set = TARGET_AVX && TARGET_64BIT;
42926 gen_vec_set_0 = gen_vec_setv4di_0;
42927 break;
42928 case E_V16SImode:
42929 use_vector_set = TARGET_AVX512F && one_var == 0;
42930 gen_vec_set_0 = gen_vec_setv16si_0;
42931 break;
42932 case E_V16SFmode:
42933 use_vector_set = TARGET_AVX512F && one_var == 0;
42934 gen_vec_set_0 = gen_vec_setv16sf_0;
42935 break;
42936 case E_V8DFmode:
42937 use_vector_set = TARGET_AVX512F && one_var == 0;
42938 gen_vec_set_0 = gen_vec_setv8df_0;
42939 break;
42940 case E_V8DImode:
42941 /* Use ix86_expand_vector_set in 64bit mode only. */
42942 use_vector_set = TARGET_AVX512F && TARGET_64BIT && one_var == 0;
42943 gen_vec_set_0 = gen_vec_setv8di_0;
42944 break;
42945 default:
42946 break;
42949 if (use_vector_set)
42951 if (gen_vec_set_0 && one_var == 0)
42953 var = force_reg (GET_MODE_INNER (mode), var);
42954 emit_insn (gen_vec_set_0 (target, CONST0_RTX (mode), var));
42955 return true;
42957 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
42958 var = force_reg (GET_MODE_INNER (mode), var);
42959 ix86_expand_vector_set (mmx_ok, target, var, one_var);
42960 return true;
42963 switch (mode)
42965 case E_V2SFmode:
42966 case E_V2SImode:
42967 if (!mmx_ok)
42968 return false;
42969 /* FALLTHRU */
42971 case E_V2DFmode:
42972 case E_V2DImode:
42973 if (one_var != 0)
42974 return false;
42975 var = force_reg (GET_MODE_INNER (mode), var);
42976 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
42977 emit_insn (gen_rtx_SET (target, x));
42978 return true;
42980 case E_V4SFmode:
42981 case E_V4SImode:
42982 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
42983 new_target = gen_reg_rtx (mode);
42984 else
42985 new_target = target;
42986 var = force_reg (GET_MODE_INNER (mode), var);
42987 x = gen_rtx_VEC_DUPLICATE (mode, var);
42988 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
42989 emit_insn (gen_rtx_SET (new_target, x));
42990 if (one_var != 0)
42992 /* We need to shuffle the value to the correct position, so
42993 create a new pseudo to store the intermediate result. */
42995 /* With SSE2, we can use the integer shuffle insns. */
42996 if (mode != V4SFmode && TARGET_SSE2)
42998 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
42999 const1_rtx,
43000 GEN_INT (one_var == 1 ? 0 : 1),
43001 GEN_INT (one_var == 2 ? 0 : 1),
43002 GEN_INT (one_var == 3 ? 0 : 1)));
43003 if (target != new_target)
43004 emit_move_insn (target, new_target);
43005 return true;
43008 /* Otherwise convert the intermediate result to V4SFmode and
43009 use the SSE1 shuffle instructions. */
43010 if (mode != V4SFmode)
43012 tmp = gen_reg_rtx (V4SFmode);
43013 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
43015 else
43016 tmp = new_target;
43018 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
43019 const1_rtx,
43020 GEN_INT (one_var == 1 ? 0 : 1),
43021 GEN_INT (one_var == 2 ? 0+4 : 1+4),
43022 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
43024 if (mode != V4SFmode)
43025 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
43026 else if (tmp != target)
43027 emit_move_insn (target, tmp);
43029 else if (target != new_target)
43030 emit_move_insn (target, new_target);
43031 return true;
43033 case E_V8HImode:
43034 case E_V16QImode:
43035 vsimode = V4SImode;
43036 goto widen;
43037 case E_V4HImode:
43038 case E_V8QImode:
43039 if (!mmx_ok)
43040 return false;
43041 vsimode = V2SImode;
43042 goto widen;
43043 widen:
43044 if (one_var != 0)
43045 return false;
43047 /* Zero extend the variable element to SImode and recurse. */
43048 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
43050 x = gen_reg_rtx (vsimode);
43051 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
43052 var, one_var))
43053 gcc_unreachable ();
43055 emit_move_insn (target, gen_lowpart (mode, x));
43056 return true;
43058 default:
43059 return false;
43063 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43064 consisting of the values in VALS. It is known that all elements
43065 except ONE_VAR are constants. Return true if successful. */
43067 static bool
43068 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
43069 rtx target, rtx vals, int one_var)
43071 rtx var = XVECEXP (vals, 0, one_var);
43072 machine_mode wmode;
43073 rtx const_vec, x;
43075 const_vec = copy_rtx (vals);
43076 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
43077 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
43079 switch (mode)
43081 case E_V2DFmode:
43082 case E_V2DImode:
43083 case E_V2SFmode:
43084 case E_V2SImode:
43085 /* For the two element vectors, it's just as easy to use
43086 the general case. */
43087 return false;
43089 case E_V4DImode:
43090 /* Use ix86_expand_vector_set in 64bit mode only. */
43091 if (!TARGET_64BIT)
43092 return false;
43093 /* FALLTHRU */
43094 case E_V4DFmode:
43095 case E_V8SFmode:
43096 case E_V8SImode:
43097 case E_V16HImode:
43098 case E_V32QImode:
43099 case E_V4SFmode:
43100 case E_V4SImode:
43101 case E_V8HImode:
43102 case E_V4HImode:
43103 break;
43105 case E_V16QImode:
43106 if (TARGET_SSE4_1)
43107 break;
43108 wmode = V8HImode;
43109 goto widen;
43110 case E_V8QImode:
43111 wmode = V4HImode;
43112 goto widen;
43113 widen:
43114 /* There's no way to set one QImode entry easily. Combine
43115 the variable value with its adjacent constant value, and
43116 promote to an HImode set. */
43117 x = XVECEXP (vals, 0, one_var ^ 1);
43118 if (one_var & 1)
43120 var = convert_modes (HImode, QImode, var, true);
43121 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
43122 NULL_RTX, 1, OPTAB_LIB_WIDEN);
43123 x = GEN_INT (INTVAL (x) & 0xff);
43125 else
43127 var = convert_modes (HImode, QImode, var, true);
43128 x = gen_int_mode (INTVAL (x) << 8, HImode);
43130 if (x != const0_rtx)
43131 var = expand_simple_binop (HImode, IOR, var, x, var,
43132 1, OPTAB_LIB_WIDEN);
43134 x = gen_reg_rtx (wmode);
43135 emit_move_insn (x, gen_lowpart (wmode, const_vec));
43136 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
43138 emit_move_insn (target, gen_lowpart (mode, x));
43139 return true;
43141 default:
43142 return false;
43145 emit_move_insn (target, const_vec);
43146 ix86_expand_vector_set (mmx_ok, target, var, one_var);
43147 return true;
43150 /* A subroutine of ix86_expand_vector_init_general. Use vector
43151 concatenate to handle the most general case: all values variable,
43152 and none identical. */
43154 static void
43155 ix86_expand_vector_init_concat (machine_mode mode,
43156 rtx target, rtx *ops, int n)
43158 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
43159 rtx first[16], second[8], third[4];
43160 rtvec v;
43161 int i, j;
43163 switch (n)
43165 case 2:
43166 switch (mode)
43168 case E_V16SImode:
43169 cmode = V8SImode;
43170 break;
43171 case E_V16SFmode:
43172 cmode = V8SFmode;
43173 break;
43174 case E_V8DImode:
43175 cmode = V4DImode;
43176 break;
43177 case E_V8DFmode:
43178 cmode = V4DFmode;
43179 break;
43180 case E_V8SImode:
43181 cmode = V4SImode;
43182 break;
43183 case E_V8SFmode:
43184 cmode = V4SFmode;
43185 break;
43186 case E_V4DImode:
43187 cmode = V2DImode;
43188 break;
43189 case E_V4DFmode:
43190 cmode = V2DFmode;
43191 break;
43192 case E_V4SImode:
43193 cmode = V2SImode;
43194 break;
43195 case E_V4SFmode:
43196 cmode = V2SFmode;
43197 break;
43198 case E_V2DImode:
43199 cmode = DImode;
43200 break;
43201 case E_V2SImode:
43202 cmode = SImode;
43203 break;
43204 case E_V2DFmode:
43205 cmode = DFmode;
43206 break;
43207 case E_V2SFmode:
43208 cmode = SFmode;
43209 break;
43210 default:
43211 gcc_unreachable ();
43214 if (!register_operand (ops[1], cmode))
43215 ops[1] = force_reg (cmode, ops[1]);
43216 if (!register_operand (ops[0], cmode))
43217 ops[0] = force_reg (cmode, ops[0]);
43218 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, ops[0],
43219 ops[1])));
43220 break;
43222 case 4:
43223 switch (mode)
43225 case E_V4DImode:
43226 cmode = V2DImode;
43227 break;
43228 case E_V4DFmode:
43229 cmode = V2DFmode;
43230 break;
43231 case E_V4SImode:
43232 cmode = V2SImode;
43233 break;
43234 case E_V4SFmode:
43235 cmode = V2SFmode;
43236 break;
43237 default:
43238 gcc_unreachable ();
43240 goto half;
43242 case 8:
43243 switch (mode)
43245 case E_V8DImode:
43246 cmode = V2DImode;
43247 hmode = V4DImode;
43248 break;
43249 case E_V8DFmode:
43250 cmode = V2DFmode;
43251 hmode = V4DFmode;
43252 break;
43253 case E_V8SImode:
43254 cmode = V2SImode;
43255 hmode = V4SImode;
43256 break;
43257 case E_V8SFmode:
43258 cmode = V2SFmode;
43259 hmode = V4SFmode;
43260 break;
43261 default:
43262 gcc_unreachable ();
43264 goto half;
43266 case 16:
43267 switch (mode)
43269 case E_V16SImode:
43270 cmode = V2SImode;
43271 hmode = V4SImode;
43272 gmode = V8SImode;
43273 break;
43274 case E_V16SFmode:
43275 cmode = V2SFmode;
43276 hmode = V4SFmode;
43277 gmode = V8SFmode;
43278 break;
43279 default:
43280 gcc_unreachable ();
43282 goto half;
43284 half:
43285 /* FIXME: We process inputs backward to help RA. PR 36222. */
43286 i = n - 1;
43287 j = (n >> 1) - 1;
43288 for (; i > 0; i -= 2, j--)
43290 first[j] = gen_reg_rtx (cmode);
43291 v = gen_rtvec (2, ops[i - 1], ops[i]);
43292 ix86_expand_vector_init (false, first[j],
43293 gen_rtx_PARALLEL (cmode, v));
43296 n >>= 1;
43297 if (n > 4)
43299 gcc_assert (hmode != VOIDmode);
43300 gcc_assert (gmode != VOIDmode);
43301 for (i = j = 0; i < n; i += 2, j++)
43303 second[j] = gen_reg_rtx (hmode);
43304 ix86_expand_vector_init_concat (hmode, second [j],
43305 &first [i], 2);
43307 n >>= 1;
43308 for (i = j = 0; i < n; i += 2, j++)
43310 third[j] = gen_reg_rtx (gmode);
43311 ix86_expand_vector_init_concat (gmode, third[j],
43312 &second[i], 2);
43314 n >>= 1;
43315 ix86_expand_vector_init_concat (mode, target, third, n);
43317 else if (n > 2)
43319 gcc_assert (hmode != VOIDmode);
43320 for (i = j = 0; i < n; i += 2, j++)
43322 second[j] = gen_reg_rtx (hmode);
43323 ix86_expand_vector_init_concat (hmode, second [j],
43324 &first [i], 2);
43326 n >>= 1;
43327 ix86_expand_vector_init_concat (mode, target, second, n);
43329 else
43330 ix86_expand_vector_init_concat (mode, target, first, n);
43331 break;
43333 default:
43334 gcc_unreachable ();
43338 /* A subroutine of ix86_expand_vector_init_general. Use vector
43339 interleave to handle the most general case: all values variable,
43340 and none identical. */
43342 static void
43343 ix86_expand_vector_init_interleave (machine_mode mode,
43344 rtx target, rtx *ops, int n)
43346 machine_mode first_imode, second_imode, third_imode, inner_mode;
43347 int i, j;
43348 rtx op0, op1;
43349 rtx (*gen_load_even) (rtx, rtx, rtx);
43350 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
43351 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
43353 switch (mode)
43355 case E_V8HImode:
43356 gen_load_even = gen_vec_setv8hi;
43357 gen_interleave_first_low = gen_vec_interleave_lowv4si;
43358 gen_interleave_second_low = gen_vec_interleave_lowv2di;
43359 inner_mode = HImode;
43360 first_imode = V4SImode;
43361 second_imode = V2DImode;
43362 third_imode = VOIDmode;
43363 break;
43364 case E_V16QImode:
43365 gen_load_even = gen_vec_setv16qi;
43366 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
43367 gen_interleave_second_low = gen_vec_interleave_lowv4si;
43368 inner_mode = QImode;
43369 first_imode = V8HImode;
43370 second_imode = V4SImode;
43371 third_imode = V2DImode;
43372 break;
43373 default:
43374 gcc_unreachable ();
43377 for (i = 0; i < n; i++)
43379 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
43380 op0 = gen_reg_rtx (SImode);
43381 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
43383 /* Insert the SImode value as low element of V4SImode vector. */
43384 op1 = gen_reg_rtx (V4SImode);
43385 op0 = gen_rtx_VEC_MERGE (V4SImode,
43386 gen_rtx_VEC_DUPLICATE (V4SImode,
43387 op0),
43388 CONST0_RTX (V4SImode),
43389 const1_rtx);
43390 emit_insn (gen_rtx_SET (op1, op0));
43392 /* Cast the V4SImode vector back to a vector in orignal mode. */
43393 op0 = gen_reg_rtx (mode);
43394 emit_move_insn (op0, gen_lowpart (mode, op1));
43396 /* Load even elements into the second position. */
43397 emit_insn (gen_load_even (op0,
43398 force_reg (inner_mode,
43399 ops [i + i + 1]),
43400 const1_rtx));
43402 /* Cast vector to FIRST_IMODE vector. */
43403 ops[i] = gen_reg_rtx (first_imode);
43404 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
43407 /* Interleave low FIRST_IMODE vectors. */
43408 for (i = j = 0; i < n; i += 2, j++)
43410 op0 = gen_reg_rtx (first_imode);
43411 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
43413 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
43414 ops[j] = gen_reg_rtx (second_imode);
43415 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
43418 /* Interleave low SECOND_IMODE vectors. */
43419 switch (second_imode)
43421 case E_V4SImode:
43422 for (i = j = 0; i < n / 2; i += 2, j++)
43424 op0 = gen_reg_rtx (second_imode);
43425 emit_insn (gen_interleave_second_low (op0, ops[i],
43426 ops[i + 1]));
43428 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
43429 vector. */
43430 ops[j] = gen_reg_rtx (third_imode);
43431 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
43433 second_imode = V2DImode;
43434 gen_interleave_second_low = gen_vec_interleave_lowv2di;
43435 /* FALLTHRU */
43437 case E_V2DImode:
43438 op0 = gen_reg_rtx (second_imode);
43439 emit_insn (gen_interleave_second_low (op0, ops[0],
43440 ops[1]));
43442 /* Cast the SECOND_IMODE vector back to a vector on original
43443 mode. */
43444 emit_insn (gen_rtx_SET (target, gen_lowpart (mode, op0)));
43445 break;
43447 default:
43448 gcc_unreachable ();
43452 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
43453 all values variable, and none identical. */
43455 static void
43456 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
43457 rtx target, rtx vals)
43459 rtx ops[64], op0, op1, op2, op3, op4, op5;
43460 machine_mode half_mode = VOIDmode;
43461 machine_mode quarter_mode = VOIDmode;
43462 int n, i;
43464 switch (mode)
43466 case E_V2SFmode:
43467 case E_V2SImode:
43468 if (!mmx_ok && !TARGET_SSE)
43469 break;
43470 /* FALLTHRU */
43472 case E_V16SImode:
43473 case E_V16SFmode:
43474 case E_V8DFmode:
43475 case E_V8DImode:
43476 case E_V8SFmode:
43477 case E_V8SImode:
43478 case E_V4DFmode:
43479 case E_V4DImode:
43480 case E_V4SFmode:
43481 case E_V4SImode:
43482 case E_V2DFmode:
43483 case E_V2DImode:
43484 n = GET_MODE_NUNITS (mode);
43485 for (i = 0; i < n; i++)
43486 ops[i] = XVECEXP (vals, 0, i);
43487 ix86_expand_vector_init_concat (mode, target, ops, n);
43488 return;
43490 case E_V2TImode:
43491 for (i = 0; i < 2; i++)
43492 ops[i] = gen_lowpart (V2DImode, XVECEXP (vals, 0, i));
43493 op0 = gen_reg_rtx (V4DImode);
43494 ix86_expand_vector_init_concat (V4DImode, op0, ops, 2);
43495 emit_move_insn (target, gen_lowpart (GET_MODE (target), op0));
43496 return;
43498 case E_V4TImode:
43499 for (i = 0; i < 4; i++)
43500 ops[i] = gen_lowpart (V2DImode, XVECEXP (vals, 0, i));
43501 ops[4] = gen_reg_rtx (V4DImode);
43502 ix86_expand_vector_init_concat (V4DImode, ops[4], ops, 2);
43503 ops[5] = gen_reg_rtx (V4DImode);
43504 ix86_expand_vector_init_concat (V4DImode, ops[5], ops + 2, 2);
43505 op0 = gen_reg_rtx (V8DImode);
43506 ix86_expand_vector_init_concat (V8DImode, op0, ops + 4, 2);
43507 emit_move_insn (target, gen_lowpart (GET_MODE (target), op0));
43508 return;
43510 case E_V32QImode:
43511 half_mode = V16QImode;
43512 goto half;
43514 case E_V16HImode:
43515 half_mode = V8HImode;
43516 goto half;
43518 half:
43519 n = GET_MODE_NUNITS (mode);
43520 for (i = 0; i < n; i++)
43521 ops[i] = XVECEXP (vals, 0, i);
43522 op0 = gen_reg_rtx (half_mode);
43523 op1 = gen_reg_rtx (half_mode);
43524 ix86_expand_vector_init_interleave (half_mode, op0, ops,
43525 n >> 2);
43526 ix86_expand_vector_init_interleave (half_mode, op1,
43527 &ops [n >> 1], n >> 2);
43528 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op0, op1)));
43529 return;
43531 case E_V64QImode:
43532 quarter_mode = V16QImode;
43533 half_mode = V32QImode;
43534 goto quarter;
43536 case E_V32HImode:
43537 quarter_mode = V8HImode;
43538 half_mode = V16HImode;
43539 goto quarter;
43541 quarter:
43542 n = GET_MODE_NUNITS (mode);
43543 for (i = 0; i < n; i++)
43544 ops[i] = XVECEXP (vals, 0, i);
43545 op0 = gen_reg_rtx (quarter_mode);
43546 op1 = gen_reg_rtx (quarter_mode);
43547 op2 = gen_reg_rtx (quarter_mode);
43548 op3 = gen_reg_rtx (quarter_mode);
43549 op4 = gen_reg_rtx (half_mode);
43550 op5 = gen_reg_rtx (half_mode);
43551 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
43552 n >> 3);
43553 ix86_expand_vector_init_interleave (quarter_mode, op1,
43554 &ops [n >> 2], n >> 3);
43555 ix86_expand_vector_init_interleave (quarter_mode, op2,
43556 &ops [n >> 1], n >> 3);
43557 ix86_expand_vector_init_interleave (quarter_mode, op3,
43558 &ops [(n >> 1) | (n >> 2)], n >> 3);
43559 emit_insn (gen_rtx_SET (op4, gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
43560 emit_insn (gen_rtx_SET (op5, gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
43561 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op4, op5)));
43562 return;
43564 case E_V16QImode:
43565 if (!TARGET_SSE4_1)
43566 break;
43567 /* FALLTHRU */
43569 case E_V8HImode:
43570 if (!TARGET_SSE2)
43571 break;
43573 /* Don't use ix86_expand_vector_init_interleave if we can't
43574 move from GPR to SSE register directly. */
43575 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
43576 break;
43578 n = GET_MODE_NUNITS (mode);
43579 for (i = 0; i < n; i++)
43580 ops[i] = XVECEXP (vals, 0, i);
43581 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
43582 return;
43584 case E_V4HImode:
43585 case E_V8QImode:
43586 break;
43588 default:
43589 gcc_unreachable ();
43593 int i, j, n_elts, n_words, n_elt_per_word;
43594 machine_mode inner_mode;
43595 rtx words[4], shift;
43597 inner_mode = GET_MODE_INNER (mode);
43598 n_elts = GET_MODE_NUNITS (mode);
43599 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
43600 n_elt_per_word = n_elts / n_words;
43601 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
43603 for (i = 0; i < n_words; ++i)
43605 rtx word = NULL_RTX;
43607 for (j = 0; j < n_elt_per_word; ++j)
43609 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
43610 elt = convert_modes (word_mode, inner_mode, elt, true);
43612 if (j == 0)
43613 word = elt;
43614 else
43616 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
43617 word, 1, OPTAB_LIB_WIDEN);
43618 word = expand_simple_binop (word_mode, IOR, word, elt,
43619 word, 1, OPTAB_LIB_WIDEN);
43623 words[i] = word;
43626 if (n_words == 1)
43627 emit_move_insn (target, gen_lowpart (mode, words[0]));
43628 else if (n_words == 2)
43630 rtx tmp = gen_reg_rtx (mode);
43631 emit_clobber (tmp);
43632 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
43633 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
43634 emit_move_insn (target, tmp);
43636 else if (n_words == 4)
43638 rtx tmp = gen_reg_rtx (V4SImode);
43639 gcc_assert (word_mode == SImode);
43640 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
43641 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
43642 emit_move_insn (target, gen_lowpart (mode, tmp));
43644 else
43645 gcc_unreachable ();
43649 /* Initialize vector TARGET via VALS. Suppress the use of MMX
43650 instructions unless MMX_OK is true. */
43652 void
43653 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
43655 machine_mode mode = GET_MODE (target);
43656 machine_mode inner_mode = GET_MODE_INNER (mode);
43657 int n_elts = GET_MODE_NUNITS (mode);
43658 int n_var = 0, one_var = -1;
43659 bool all_same = true, all_const_zero = true;
43660 int i;
43661 rtx x;
43663 /* Handle first initialization from vector elts. */
43664 if (n_elts != XVECLEN (vals, 0))
43666 rtx subtarget = target;
43667 x = XVECEXP (vals, 0, 0);
43668 gcc_assert (GET_MODE_INNER (GET_MODE (x)) == inner_mode);
43669 if (GET_MODE_NUNITS (GET_MODE (x)) * 2 == n_elts)
43671 rtx ops[2] = { XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1) };
43672 if (inner_mode == QImode || inner_mode == HImode)
43674 unsigned int n_bits = n_elts * GET_MODE_SIZE (inner_mode);
43675 mode = mode_for_vector (SImode, n_bits / 4).require ();
43676 inner_mode = mode_for_vector (SImode, n_bits / 8).require ();
43677 ops[0] = gen_lowpart (inner_mode, ops[0]);
43678 ops[1] = gen_lowpart (inner_mode, ops[1]);
43679 subtarget = gen_reg_rtx (mode);
43681 ix86_expand_vector_init_concat (mode, subtarget, ops, 2);
43682 if (subtarget != target)
43683 emit_move_insn (target, gen_lowpart (GET_MODE (target), subtarget));
43684 return;
43686 gcc_unreachable ();
43689 for (i = 0; i < n_elts; ++i)
43691 x = XVECEXP (vals, 0, i);
43692 if (!(CONST_SCALAR_INT_P (x)
43693 || CONST_DOUBLE_P (x)
43694 || CONST_FIXED_P (x)))
43695 n_var++, one_var = i;
43696 else if (x != CONST0_RTX (inner_mode))
43697 all_const_zero = false;
43698 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
43699 all_same = false;
43702 /* Constants are best loaded from the constant pool. */
43703 if (n_var == 0)
43705 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
43706 return;
43709 /* If all values are identical, broadcast the value. */
43710 if (all_same
43711 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
43712 XVECEXP (vals, 0, 0)))
43713 return;
43715 /* Values where only one field is non-constant are best loaded from
43716 the pool and overwritten via move later. */
43717 if (n_var == 1)
43719 if (all_const_zero
43720 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
43721 XVECEXP (vals, 0, one_var),
43722 one_var))
43723 return;
43725 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
43726 return;
43729 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
43732 void
43733 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
43735 machine_mode mode = GET_MODE (target);
43736 machine_mode inner_mode = GET_MODE_INNER (mode);
43737 machine_mode half_mode;
43738 bool use_vec_merge = false;
43739 rtx tmp;
43740 static rtx (*gen_extract[6][2]) (rtx, rtx)
43742 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
43743 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
43744 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
43745 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
43746 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
43747 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
43749 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
43751 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
43752 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
43753 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
43754 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
43755 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
43756 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
43758 int i, j, n;
43759 machine_mode mmode = VOIDmode;
43760 rtx (*gen_blendm) (rtx, rtx, rtx, rtx);
43762 switch (mode)
43764 case E_V2SFmode:
43765 case E_V2SImode:
43766 if (mmx_ok)
43768 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
43769 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
43770 if (elt == 0)
43771 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
43772 else
43773 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
43774 emit_insn (gen_rtx_SET (target, tmp));
43775 return;
43777 break;
43779 case E_V2DImode:
43780 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
43781 if (use_vec_merge)
43782 break;
43784 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
43785 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
43786 if (elt == 0)
43787 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
43788 else
43789 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
43790 emit_insn (gen_rtx_SET (target, tmp));
43791 return;
43793 case E_V2DFmode:
43795 rtx op0, op1;
43797 /* For the two element vectors, we implement a VEC_CONCAT with
43798 the extraction of the other element. */
43800 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
43801 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
43803 if (elt == 0)
43804 op0 = val, op1 = tmp;
43805 else
43806 op0 = tmp, op1 = val;
43808 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
43809 emit_insn (gen_rtx_SET (target, tmp));
43811 return;
43813 case E_V4SFmode:
43814 use_vec_merge = TARGET_SSE4_1;
43815 if (use_vec_merge)
43816 break;
43818 switch (elt)
43820 case 0:
43821 use_vec_merge = true;
43822 break;
43824 case 1:
43825 /* tmp = target = A B C D */
43826 tmp = copy_to_reg (target);
43827 /* target = A A B B */
43828 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
43829 /* target = X A B B */
43830 ix86_expand_vector_set (false, target, val, 0);
43831 /* target = A X C D */
43832 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
43833 const1_rtx, const0_rtx,
43834 GEN_INT (2+4), GEN_INT (3+4)));
43835 return;
43837 case 2:
43838 /* tmp = target = A B C D */
43839 tmp = copy_to_reg (target);
43840 /* tmp = X B C D */
43841 ix86_expand_vector_set (false, tmp, val, 0);
43842 /* target = A B X D */
43843 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
43844 const0_rtx, const1_rtx,
43845 GEN_INT (0+4), GEN_INT (3+4)));
43846 return;
43848 case 3:
43849 /* tmp = target = A B C D */
43850 tmp = copy_to_reg (target);
43851 /* tmp = X B C D */
43852 ix86_expand_vector_set (false, tmp, val, 0);
43853 /* target = A B X D */
43854 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
43855 const0_rtx, const1_rtx,
43856 GEN_INT (2+4), GEN_INT (0+4)));
43857 return;
43859 default:
43860 gcc_unreachable ();
43862 break;
43864 case E_V4SImode:
43865 use_vec_merge = TARGET_SSE4_1;
43866 if (use_vec_merge)
43867 break;
43869 /* Element 0 handled by vec_merge below. */
43870 if (elt == 0)
43872 use_vec_merge = true;
43873 break;
43876 if (TARGET_SSE2)
43878 /* With SSE2, use integer shuffles to swap element 0 and ELT,
43879 store into element 0, then shuffle them back. */
43881 rtx order[4];
43883 order[0] = GEN_INT (elt);
43884 order[1] = const1_rtx;
43885 order[2] = const2_rtx;
43886 order[3] = GEN_INT (3);
43887 order[elt] = const0_rtx;
43889 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
43890 order[1], order[2], order[3]));
43892 ix86_expand_vector_set (false, target, val, 0);
43894 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
43895 order[1], order[2], order[3]));
43897 else
43899 /* For SSE1, we have to reuse the V4SF code. */
43900 rtx t = gen_reg_rtx (V4SFmode);
43901 emit_move_insn (t, gen_lowpart (V4SFmode, target));
43902 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
43903 emit_move_insn (target, gen_lowpart (mode, t));
43905 return;
43907 case E_V8HImode:
43908 use_vec_merge = TARGET_SSE2;
43909 break;
43910 case E_V4HImode:
43911 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
43912 break;
43914 case E_V16QImode:
43915 use_vec_merge = TARGET_SSE4_1;
43916 break;
43918 case E_V8QImode:
43919 break;
43921 case E_V32QImode:
43922 half_mode = V16QImode;
43923 j = 0;
43924 n = 16;
43925 goto half;
43927 case E_V16HImode:
43928 half_mode = V8HImode;
43929 j = 1;
43930 n = 8;
43931 goto half;
43933 case E_V8SImode:
43934 half_mode = V4SImode;
43935 j = 2;
43936 n = 4;
43937 goto half;
43939 case E_V4DImode:
43940 half_mode = V2DImode;
43941 j = 3;
43942 n = 2;
43943 goto half;
43945 case E_V8SFmode:
43946 half_mode = V4SFmode;
43947 j = 4;
43948 n = 4;
43949 goto half;
43951 case E_V4DFmode:
43952 half_mode = V2DFmode;
43953 j = 5;
43954 n = 2;
43955 goto half;
43957 half:
43958 /* Compute offset. */
43959 i = elt / n;
43960 elt %= n;
43962 gcc_assert (i <= 1);
43964 /* Extract the half. */
43965 tmp = gen_reg_rtx (half_mode);
43966 emit_insn (gen_extract[j][i] (tmp, target));
43968 /* Put val in tmp at elt. */
43969 ix86_expand_vector_set (false, tmp, val, elt);
43971 /* Put it back. */
43972 emit_insn (gen_insert[j][i] (target, target, tmp));
43973 return;
43975 case E_V8DFmode:
43976 if (TARGET_AVX512F)
43978 mmode = QImode;
43979 gen_blendm = gen_avx512f_blendmv8df;
43981 break;
43983 case E_V8DImode:
43984 if (TARGET_AVX512F)
43986 mmode = QImode;
43987 gen_blendm = gen_avx512f_blendmv8di;
43989 break;
43991 case E_V16SFmode:
43992 if (TARGET_AVX512F)
43994 mmode = HImode;
43995 gen_blendm = gen_avx512f_blendmv16sf;
43997 break;
43999 case E_V16SImode:
44000 if (TARGET_AVX512F)
44002 mmode = HImode;
44003 gen_blendm = gen_avx512f_blendmv16si;
44005 break;
44007 case E_V32HImode:
44008 if (TARGET_AVX512F && TARGET_AVX512BW)
44010 mmode = SImode;
44011 gen_blendm = gen_avx512bw_blendmv32hi;
44013 break;
44015 case E_V64QImode:
44016 if (TARGET_AVX512F && TARGET_AVX512BW)
44018 mmode = DImode;
44019 gen_blendm = gen_avx512bw_blendmv64qi;
44021 break;
44023 default:
44024 break;
44027 if (mmode != VOIDmode)
44029 tmp = gen_reg_rtx (mode);
44030 emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val)));
44031 /* The avx512*_blendm<mode> expanders have different operand order
44032 from VEC_MERGE. In VEC_MERGE, the first input operand is used for
44033 elements where the mask is set and second input operand otherwise,
44034 in {sse,avx}*_*blend* the first input operand is used for elements
44035 where the mask is clear and second input operand otherwise. */
44036 emit_insn (gen_blendm (target, target, tmp,
44037 force_reg (mmode,
44038 gen_int_mode (1 << elt, mmode))));
44040 else if (use_vec_merge)
44042 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
44043 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
44044 emit_insn (gen_rtx_SET (target, tmp));
44046 else
44048 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
44050 emit_move_insn (mem, target);
44052 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
44053 emit_move_insn (tmp, val);
44055 emit_move_insn (target, mem);
44059 void
44060 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
44062 machine_mode mode = GET_MODE (vec);
44063 machine_mode inner_mode = GET_MODE_INNER (mode);
44064 bool use_vec_extr = false;
44065 rtx tmp;
44067 switch (mode)
44069 case E_V2SImode:
44070 case E_V2SFmode:
44071 if (!mmx_ok)
44072 break;
44073 /* FALLTHRU */
44075 case E_V2DFmode:
44076 case E_V2DImode:
44077 case E_V2TImode:
44078 case E_V4TImode:
44079 use_vec_extr = true;
44080 break;
44082 case E_V4SFmode:
44083 use_vec_extr = TARGET_SSE4_1;
44084 if (use_vec_extr)
44085 break;
44087 switch (elt)
44089 case 0:
44090 tmp = vec;
44091 break;
44093 case 1:
44094 case 3:
44095 tmp = gen_reg_rtx (mode);
44096 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
44097 GEN_INT (elt), GEN_INT (elt),
44098 GEN_INT (elt+4), GEN_INT (elt+4)));
44099 break;
44101 case 2:
44102 tmp = gen_reg_rtx (mode);
44103 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
44104 break;
44106 default:
44107 gcc_unreachable ();
44109 vec = tmp;
44110 use_vec_extr = true;
44111 elt = 0;
44112 break;
44114 case E_V4SImode:
44115 use_vec_extr = TARGET_SSE4_1;
44116 if (use_vec_extr)
44117 break;
44119 if (TARGET_SSE2)
44121 switch (elt)
44123 case 0:
44124 tmp = vec;
44125 break;
44127 case 1:
44128 case 3:
44129 tmp = gen_reg_rtx (mode);
44130 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
44131 GEN_INT (elt), GEN_INT (elt),
44132 GEN_INT (elt), GEN_INT (elt)));
44133 break;
44135 case 2:
44136 tmp = gen_reg_rtx (mode);
44137 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
44138 break;
44140 default:
44141 gcc_unreachable ();
44143 vec = tmp;
44144 use_vec_extr = true;
44145 elt = 0;
44147 else
44149 /* For SSE1, we have to reuse the V4SF code. */
44150 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
44151 gen_lowpart (V4SFmode, vec), elt);
44152 return;
44154 break;
44156 case E_V8HImode:
44157 use_vec_extr = TARGET_SSE2;
44158 break;
44159 case E_V4HImode:
44160 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
44161 break;
44163 case E_V16QImode:
44164 use_vec_extr = TARGET_SSE4_1;
44165 break;
44167 case E_V8SFmode:
44168 if (TARGET_AVX)
44170 tmp = gen_reg_rtx (V4SFmode);
44171 if (elt < 4)
44172 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
44173 else
44174 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
44175 ix86_expand_vector_extract (false, target, tmp, elt & 3);
44176 return;
44178 break;
44180 case E_V4DFmode:
44181 if (TARGET_AVX)
44183 tmp = gen_reg_rtx (V2DFmode);
44184 if (elt < 2)
44185 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
44186 else
44187 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
44188 ix86_expand_vector_extract (false, target, tmp, elt & 1);
44189 return;
44191 break;
44193 case E_V32QImode:
44194 if (TARGET_AVX)
44196 tmp = gen_reg_rtx (V16QImode);
44197 if (elt < 16)
44198 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
44199 else
44200 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
44201 ix86_expand_vector_extract (false, target, tmp, elt & 15);
44202 return;
44204 break;
44206 case E_V16HImode:
44207 if (TARGET_AVX)
44209 tmp = gen_reg_rtx (V8HImode);
44210 if (elt < 8)
44211 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
44212 else
44213 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
44214 ix86_expand_vector_extract (false, target, tmp, elt & 7);
44215 return;
44217 break;
44219 case E_V8SImode:
44220 if (TARGET_AVX)
44222 tmp = gen_reg_rtx (V4SImode);
44223 if (elt < 4)
44224 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
44225 else
44226 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
44227 ix86_expand_vector_extract (false, target, tmp, elt & 3);
44228 return;
44230 break;
44232 case E_V4DImode:
44233 if (TARGET_AVX)
44235 tmp = gen_reg_rtx (V2DImode);
44236 if (elt < 2)
44237 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
44238 else
44239 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
44240 ix86_expand_vector_extract (false, target, tmp, elt & 1);
44241 return;
44243 break;
44245 case E_V32HImode:
44246 if (TARGET_AVX512BW)
44248 tmp = gen_reg_rtx (V16HImode);
44249 if (elt < 16)
44250 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
44251 else
44252 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
44253 ix86_expand_vector_extract (false, target, tmp, elt & 15);
44254 return;
44256 break;
44258 case E_V64QImode:
44259 if (TARGET_AVX512BW)
44261 tmp = gen_reg_rtx (V32QImode);
44262 if (elt < 32)
44263 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
44264 else
44265 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
44266 ix86_expand_vector_extract (false, target, tmp, elt & 31);
44267 return;
44269 break;
44271 case E_V16SFmode:
44272 tmp = gen_reg_rtx (V8SFmode);
44273 if (elt < 8)
44274 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
44275 else
44276 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
44277 ix86_expand_vector_extract (false, target, tmp, elt & 7);
44278 return;
44280 case E_V8DFmode:
44281 tmp = gen_reg_rtx (V4DFmode);
44282 if (elt < 4)
44283 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
44284 else
44285 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
44286 ix86_expand_vector_extract (false, target, tmp, elt & 3);
44287 return;
44289 case E_V16SImode:
44290 tmp = gen_reg_rtx (V8SImode);
44291 if (elt < 8)
44292 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
44293 else
44294 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
44295 ix86_expand_vector_extract (false, target, tmp, elt & 7);
44296 return;
44298 case E_V8DImode:
44299 tmp = gen_reg_rtx (V4DImode);
44300 if (elt < 4)
44301 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
44302 else
44303 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
44304 ix86_expand_vector_extract (false, target, tmp, elt & 3);
44305 return;
44307 case E_V8QImode:
44308 /* ??? Could extract the appropriate HImode element and shift. */
44309 default:
44310 break;
44313 if (use_vec_extr)
44315 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
44316 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
44318 /* Let the rtl optimizers know about the zero extension performed. */
44319 if (inner_mode == QImode || inner_mode == HImode)
44321 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
44322 target = gen_lowpart (SImode, target);
44325 emit_insn (gen_rtx_SET (target, tmp));
44327 else
44329 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
44331 emit_move_insn (mem, vec);
44333 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
44334 emit_move_insn (target, tmp);
44338 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
44339 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
44340 The upper bits of DEST are undefined, though they shouldn't cause
44341 exceptions (some bits from src or all zeros are ok). */
44343 static void
44344 emit_reduc_half (rtx dest, rtx src, int i)
44346 rtx tem, d = dest;
44347 switch (GET_MODE (src))
44349 case E_V4SFmode:
44350 if (i == 128)
44351 tem = gen_sse_movhlps (dest, src, src);
44352 else
44353 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
44354 GEN_INT (1 + 4), GEN_INT (1 + 4));
44355 break;
44356 case E_V2DFmode:
44357 tem = gen_vec_interleave_highv2df (dest, src, src);
44358 break;
44359 case E_V16QImode:
44360 case E_V8HImode:
44361 case E_V4SImode:
44362 case E_V2DImode:
44363 d = gen_reg_rtx (V1TImode);
44364 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
44365 GEN_INT (i / 2));
44366 break;
44367 case E_V8SFmode:
44368 if (i == 256)
44369 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
44370 else
44371 tem = gen_avx_shufps256 (dest, src, src,
44372 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
44373 break;
44374 case E_V4DFmode:
44375 if (i == 256)
44376 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
44377 else
44378 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
44379 break;
44380 case E_V32QImode:
44381 case E_V16HImode:
44382 case E_V8SImode:
44383 case E_V4DImode:
44384 if (i == 256)
44386 if (GET_MODE (dest) != V4DImode)
44387 d = gen_reg_rtx (V4DImode);
44388 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
44389 gen_lowpart (V4DImode, src),
44390 const1_rtx);
44392 else
44394 d = gen_reg_rtx (V2TImode);
44395 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
44396 GEN_INT (i / 2));
44398 break;
44399 case E_V64QImode:
44400 case E_V32HImode:
44401 case E_V16SImode:
44402 case E_V16SFmode:
44403 case E_V8DImode:
44404 case E_V8DFmode:
44405 if (i > 128)
44406 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
44407 gen_lowpart (V16SImode, src),
44408 gen_lowpart (V16SImode, src),
44409 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
44410 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
44411 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
44412 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
44413 GEN_INT (0xC), GEN_INT (0xD),
44414 GEN_INT (0xE), GEN_INT (0xF),
44415 GEN_INT (0x10), GEN_INT (0x11),
44416 GEN_INT (0x12), GEN_INT (0x13),
44417 GEN_INT (0x14), GEN_INT (0x15),
44418 GEN_INT (0x16), GEN_INT (0x17));
44419 else
44420 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
44421 gen_lowpart (V16SImode, src),
44422 GEN_INT (i == 128 ? 0x2 : 0x1),
44423 GEN_INT (0x3),
44424 GEN_INT (0x3),
44425 GEN_INT (0x3),
44426 GEN_INT (i == 128 ? 0x6 : 0x5),
44427 GEN_INT (0x7),
44428 GEN_INT (0x7),
44429 GEN_INT (0x7),
44430 GEN_INT (i == 128 ? 0xA : 0x9),
44431 GEN_INT (0xB),
44432 GEN_INT (0xB),
44433 GEN_INT (0xB),
44434 GEN_INT (i == 128 ? 0xE : 0xD),
44435 GEN_INT (0xF),
44436 GEN_INT (0xF),
44437 GEN_INT (0xF));
44438 break;
44439 default:
44440 gcc_unreachable ();
44442 emit_insn (tem);
44443 if (d != dest)
44444 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
44447 /* Expand a vector reduction. FN is the binary pattern to reduce;
44448 DEST is the destination; IN is the input vector. */
44450 void
44451 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
44453 rtx half, dst, vec = in;
44454 machine_mode mode = GET_MODE (in);
44455 int i;
44457 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
44458 if (TARGET_SSE4_1
44459 && mode == V8HImode
44460 && fn == gen_uminv8hi3)
44462 emit_insn (gen_sse4_1_phminposuw (dest, in));
44463 return;
44466 for (i = GET_MODE_BITSIZE (mode);
44467 i > GET_MODE_UNIT_BITSIZE (mode);
44468 i >>= 1)
44470 half = gen_reg_rtx (mode);
44471 emit_reduc_half (half, vec, i);
44472 if (i == GET_MODE_UNIT_BITSIZE (mode) * 2)
44473 dst = dest;
44474 else
44475 dst = gen_reg_rtx (mode);
44476 emit_insn (fn (dst, half, vec));
44477 vec = dst;
44481 /* Target hook for scalar_mode_supported_p. */
44482 static bool
44483 ix86_scalar_mode_supported_p (scalar_mode mode)
44485 if (DECIMAL_FLOAT_MODE_P (mode))
44486 return default_decimal_float_supported_p ();
44487 else if (mode == TFmode)
44488 return true;
44489 else
44490 return default_scalar_mode_supported_p (mode);
44493 /* Implements target hook vector_mode_supported_p. */
44494 static bool
44495 ix86_vector_mode_supported_p (machine_mode mode)
44497 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
44498 return true;
44499 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
44500 return true;
44501 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
44502 return true;
44503 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
44504 return true;
44505 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
44506 return true;
44507 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
44508 return true;
44509 return false;
44512 /* Target hook for c_mode_for_suffix. */
44513 static machine_mode
44514 ix86_c_mode_for_suffix (char suffix)
44516 if (suffix == 'q')
44517 return TFmode;
44518 if (suffix == 'w')
44519 return XFmode;
44521 return VOIDmode;
44524 /* Worker function for TARGET_MD_ASM_ADJUST.
44526 We implement asm flag outputs, and maintain source compatibility
44527 with the old cc0-based compiler. */
44529 static rtx_insn *
44530 ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
44531 vec<const char *> &constraints,
44532 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
44534 clobbers.safe_push (gen_rtx_REG (CCFPmode, FPSR_REG));
44535 SET_HARD_REG_BIT (clobbered_regs, FPSR_REG);
44537 bool saw_asm_flag = false;
44539 start_sequence ();
44540 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
44542 const char *con = constraints[i];
44543 if (strncmp (con, "=@cc", 4) != 0)
44544 continue;
44545 con += 4;
44546 if (strchr (con, ',') != NULL)
44548 error ("alternatives not allowed in asm flag output");
44549 continue;
44552 bool invert = false;
44553 if (con[0] == 'n')
44554 invert = true, con++;
44556 machine_mode mode = CCmode;
44557 rtx_code code = UNKNOWN;
44559 switch (con[0])
44561 case 'a':
44562 if (con[1] == 0)
44563 mode = CCAmode, code = EQ;
44564 else if (con[1] == 'e' && con[2] == 0)
44565 mode = CCCmode, code = NE;
44566 break;
44567 case 'b':
44568 if (con[1] == 0)
44569 mode = CCCmode, code = EQ;
44570 else if (con[1] == 'e' && con[2] == 0)
44571 mode = CCAmode, code = NE;
44572 break;
44573 case 'c':
44574 if (con[1] == 0)
44575 mode = CCCmode, code = EQ;
44576 break;
44577 case 'e':
44578 if (con[1] == 0)
44579 mode = CCZmode, code = EQ;
44580 break;
44581 case 'g':
44582 if (con[1] == 0)
44583 mode = CCGCmode, code = GT;
44584 else if (con[1] == 'e' && con[2] == 0)
44585 mode = CCGCmode, code = GE;
44586 break;
44587 case 'l':
44588 if (con[1] == 0)
44589 mode = CCGCmode, code = LT;
44590 else if (con[1] == 'e' && con[2] == 0)
44591 mode = CCGCmode, code = LE;
44592 break;
44593 case 'o':
44594 if (con[1] == 0)
44595 mode = CCOmode, code = EQ;
44596 break;
44597 case 'p':
44598 if (con[1] == 0)
44599 mode = CCPmode, code = EQ;
44600 break;
44601 case 's':
44602 if (con[1] == 0)
44603 mode = CCSmode, code = EQ;
44604 break;
44605 case 'z':
44606 if (con[1] == 0)
44607 mode = CCZmode, code = EQ;
44608 break;
44610 if (code == UNKNOWN)
44612 error ("unknown asm flag output %qs", constraints[i]);
44613 continue;
44615 if (invert)
44616 code = reverse_condition (code);
44618 rtx dest = outputs[i];
44619 if (!saw_asm_flag)
44621 /* This is the first asm flag output. Here we put the flags
44622 register in as the real output and adjust the condition to
44623 allow it. */
44624 constraints[i] = "=Bf";
44625 outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
44626 saw_asm_flag = true;
44628 else
44630 /* We don't need the flags register as output twice. */
44631 constraints[i] = "=X";
44632 outputs[i] = gen_rtx_SCRATCH (SImode);
44635 rtx x = gen_rtx_REG (mode, FLAGS_REG);
44636 x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
44638 machine_mode dest_mode = GET_MODE (dest);
44639 if (!SCALAR_INT_MODE_P (dest_mode))
44641 error ("invalid type for asm flag output");
44642 continue;
44645 if (dest_mode == DImode && !TARGET_64BIT)
44646 dest_mode = SImode;
44648 if (dest_mode != QImode)
44650 rtx destqi = gen_reg_rtx (QImode);
44651 emit_insn (gen_rtx_SET (destqi, x));
44653 if (TARGET_ZERO_EXTEND_WITH_AND
44654 && optimize_function_for_speed_p (cfun))
44656 x = force_reg (dest_mode, const0_rtx);
44658 emit_insn (gen_movstrictqi
44659 (gen_lowpart (QImode, x), destqi));
44661 else
44662 x = gen_rtx_ZERO_EXTEND (dest_mode, destqi);
44665 if (dest_mode != GET_MODE (dest))
44667 rtx tmp = gen_reg_rtx (SImode);
44669 emit_insn (gen_rtx_SET (tmp, x));
44670 emit_insn (gen_zero_extendsidi2 (dest, tmp));
44672 else
44673 emit_insn (gen_rtx_SET (dest, x));
44675 rtx_insn *seq = get_insns ();
44676 end_sequence ();
44678 if (saw_asm_flag)
44679 return seq;
44680 else
44682 /* If we had no asm flag outputs, clobber the flags. */
44683 clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
44684 SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
44685 return NULL;
44689 /* Implements target vector targetm.asm.encode_section_info. */
44691 static void ATTRIBUTE_UNUSED
44692 ix86_encode_section_info (tree decl, rtx rtl, int first)
44694 default_encode_section_info (decl, rtl, first);
44696 if (ix86_in_large_data_p (decl))
44697 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
44700 /* Worker function for REVERSE_CONDITION. */
44702 enum rtx_code
44703 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
44705 return (mode == CCFPmode
44706 ? reverse_condition_maybe_unordered (code)
44707 : reverse_condition (code));
44710 /* Output code to perform an x87 FP register move, from OPERANDS[1]
44711 to OPERANDS[0]. */
44713 const char *
44714 output_387_reg_move (rtx_insn *insn, rtx *operands)
44716 if (REG_P (operands[0]))
44718 if (REG_P (operands[1])
44719 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
44721 if (REGNO (operands[0]) == FIRST_STACK_REG)
44722 return output_387_ffreep (operands, 0);
44723 return "fstp\t%y0";
44725 if (STACK_TOP_P (operands[0]))
44726 return "fld%Z1\t%y1";
44727 return "fst\t%y0";
44729 else if (MEM_P (operands[0]))
44731 gcc_assert (REG_P (operands[1]));
44732 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
44733 return "fstp%Z0\t%y0";
44734 else
44736 /* There is no non-popping store to memory for XFmode.
44737 So if we need one, follow the store with a load. */
44738 if (GET_MODE (operands[0]) == XFmode)
44739 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
44740 else
44741 return "fst%Z0\t%y0";
44744 else
44745 gcc_unreachable();
44748 /* Output code to perform a conditional jump to LABEL, if C2 flag in
44749 FP status register is set. */
44751 void
44752 ix86_emit_fp_unordered_jump (rtx label)
44754 rtx reg = gen_reg_rtx (HImode);
44755 rtx temp;
44757 emit_insn (gen_x86_fnstsw_1 (reg));
44759 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
44761 emit_insn (gen_x86_sahf_1 (reg));
44763 temp = gen_rtx_REG (CCmode, FLAGS_REG);
44764 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
44766 else
44768 emit_insn (gen_testqi_ext_1_ccno (reg, GEN_INT (0x04)));
44770 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
44771 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
44774 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
44775 gen_rtx_LABEL_REF (VOIDmode, label),
44776 pc_rtx);
44777 temp = gen_rtx_SET (pc_rtx, temp);
44779 emit_jump_insn (temp);
44780 predict_jump (REG_BR_PROB_BASE * 10 / 100);
44783 /* Output code to perform a log1p XFmode calculation. */
44785 void ix86_emit_i387_log1p (rtx op0, rtx op1)
44787 rtx_code_label *label1 = gen_label_rtx ();
44788 rtx_code_label *label2 = gen_label_rtx ();
44790 rtx tmp = gen_reg_rtx (XFmode);
44791 rtx tmp2 = gen_reg_rtx (XFmode);
44792 rtx test;
44794 emit_insn (gen_absxf2 (tmp, op1));
44795 test = gen_rtx_GE (VOIDmode, tmp,
44796 const_double_from_real_value (
44797 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
44798 XFmode));
44799 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
44801 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
44802 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
44803 emit_jump (label2);
44805 emit_label (label1);
44806 emit_move_insn (tmp, CONST1_RTX (XFmode));
44807 emit_insn (gen_addxf3 (tmp, op1, tmp));
44808 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
44809 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
44811 emit_label (label2);
44814 /* Emit code for round calculation. */
44815 void ix86_emit_i387_round (rtx op0, rtx op1)
44817 machine_mode inmode = GET_MODE (op1);
44818 machine_mode outmode = GET_MODE (op0);
44819 rtx e1, e2, res, tmp, tmp1, half;
44820 rtx scratch = gen_reg_rtx (HImode);
44821 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
44822 rtx_code_label *jump_label = gen_label_rtx ();
44823 rtx insn;
44824 rtx (*gen_abs) (rtx, rtx);
44825 rtx (*gen_neg) (rtx, rtx);
44827 switch (inmode)
44829 case E_SFmode:
44830 gen_abs = gen_abssf2;
44831 break;
44832 case E_DFmode:
44833 gen_abs = gen_absdf2;
44834 break;
44835 case E_XFmode:
44836 gen_abs = gen_absxf2;
44837 break;
44838 default:
44839 gcc_unreachable ();
44842 switch (outmode)
44844 case E_SFmode:
44845 gen_neg = gen_negsf2;
44846 break;
44847 case E_DFmode:
44848 gen_neg = gen_negdf2;
44849 break;
44850 case E_XFmode:
44851 gen_neg = gen_negxf2;
44852 break;
44853 case E_HImode:
44854 gen_neg = gen_neghi2;
44855 break;
44856 case E_SImode:
44857 gen_neg = gen_negsi2;
44858 break;
44859 case E_DImode:
44860 gen_neg = gen_negdi2;
44861 break;
44862 default:
44863 gcc_unreachable ();
44866 e1 = gen_reg_rtx (inmode);
44867 e2 = gen_reg_rtx (inmode);
44868 res = gen_reg_rtx (outmode);
44870 half = const_double_from_real_value (dconsthalf, inmode);
44872 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
44874 /* scratch = fxam(op1) */
44875 emit_insn (gen_rtx_SET (scratch,
44876 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
44877 UNSPEC_FXAM)));
44878 /* e1 = fabs(op1) */
44879 emit_insn (gen_abs (e1, op1));
44881 /* e2 = e1 + 0.5 */
44882 half = force_reg (inmode, half);
44883 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (inmode, e1, half)));
44885 /* res = floor(e2) */
44886 if (inmode != XFmode)
44888 tmp1 = gen_reg_rtx (XFmode);
44890 emit_insn (gen_rtx_SET (tmp1, gen_rtx_FLOAT_EXTEND (XFmode, e2)));
44892 else
44893 tmp1 = e2;
44895 switch (outmode)
44897 case E_SFmode:
44898 case E_DFmode:
44900 rtx tmp0 = gen_reg_rtx (XFmode);
44902 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
44904 emit_insn (gen_rtx_SET (res,
44905 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
44906 UNSPEC_TRUNC_NOOP)));
44908 break;
44909 case E_XFmode:
44910 emit_insn (gen_frndintxf2_floor (res, tmp1));
44911 break;
44912 case E_HImode:
44913 emit_insn (gen_lfloorxfhi2 (res, tmp1));
44914 break;
44915 case E_SImode:
44916 emit_insn (gen_lfloorxfsi2 (res, tmp1));
44917 break;
44918 case E_DImode:
44919 emit_insn (gen_lfloorxfdi2 (res, tmp1));
44920 break;
44921 default:
44922 gcc_unreachable ();
44925 /* flags = signbit(a) */
44926 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x02)));
44928 /* if (flags) then res = -res */
44929 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
44930 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
44931 gen_rtx_LABEL_REF (VOIDmode, jump_label),
44932 pc_rtx);
44933 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
44934 predict_jump (REG_BR_PROB_BASE * 50 / 100);
44935 JUMP_LABEL (insn) = jump_label;
44937 emit_insn (gen_neg (res, res));
44939 emit_label (jump_label);
44940 LABEL_NUSES (jump_label) = 1;
44942 emit_move_insn (op0, res);
44945 /* Output code to perform a Newton-Rhapson approximation of a single precision
44946 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
44948 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
44950 rtx x0, x1, e0, e1;
44952 x0 = gen_reg_rtx (mode);
44953 e0 = gen_reg_rtx (mode);
44954 e1 = gen_reg_rtx (mode);
44955 x1 = gen_reg_rtx (mode);
44957 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
44959 b = force_reg (mode, b);
44961 /* x0 = rcp(b) estimate */
44962 if (mode == V16SFmode || mode == V8DFmode)
44964 if (TARGET_AVX512ER)
44966 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
44967 UNSPEC_RCP28)));
44968 /* res = a * x0 */
44969 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x0)));
44970 return;
44972 else
44973 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
44974 UNSPEC_RCP14)));
44976 else
44977 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
44978 UNSPEC_RCP)));
44980 /* e0 = x0 * b */
44981 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
44983 /* e0 = x0 * e0 */
44984 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
44986 /* e1 = x0 + x0 */
44987 emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
44989 /* x1 = e1 - e0 */
44990 emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
44992 /* res = a * x1 */
44993 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
44996 /* Output code to perform a Newton-Rhapson approximation of a
44997 single precision floating point [reciprocal] square root. */
44999 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode, bool recip)
45001 rtx x0, e0, e1, e2, e3, mthree, mhalf;
45002 REAL_VALUE_TYPE r;
45003 int unspec;
45005 x0 = gen_reg_rtx (mode);
45006 e0 = gen_reg_rtx (mode);
45007 e1 = gen_reg_rtx (mode);
45008 e2 = gen_reg_rtx (mode);
45009 e3 = gen_reg_rtx (mode);
45011 if (TARGET_AVX512ER && mode == V16SFmode)
45013 if (recip)
45014 /* res = rsqrt28(a) estimate */
45015 emit_insn (gen_rtx_SET (res, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
45016 UNSPEC_RSQRT28)));
45017 else
45019 /* x0 = rsqrt28(a) estimate */
45020 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
45021 UNSPEC_RSQRT28)));
45022 /* res = rcp28(x0) estimate */
45023 emit_insn (gen_rtx_SET (res, gen_rtx_UNSPEC (mode, gen_rtvec (1, x0),
45024 UNSPEC_RCP28)));
45026 return;
45029 real_from_integer (&r, VOIDmode, -3, SIGNED);
45030 mthree = const_double_from_real_value (r, SFmode);
45032 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
45033 mhalf = const_double_from_real_value (r, SFmode);
45034 unspec = UNSPEC_RSQRT;
45036 if (VECTOR_MODE_P (mode))
45038 mthree = ix86_build_const_vector (mode, true, mthree);
45039 mhalf = ix86_build_const_vector (mode, true, mhalf);
45040 /* There is no 512-bit rsqrt. There is however rsqrt14. */
45041 if (GET_MODE_SIZE (mode) == 64)
45042 unspec = UNSPEC_RSQRT14;
45045 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
45046 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
45048 a = force_reg (mode, a);
45050 /* x0 = rsqrt(a) estimate */
45051 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
45052 unspec)));
45054 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
45055 if (!recip)
45057 rtx zero = force_reg (mode, CONST0_RTX(mode));
45058 rtx mask;
45060 /* Handle masked compare. */
45061 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
45063 mask = gen_reg_rtx (HImode);
45064 /* Imm value 0x4 corresponds to not-equal comparison. */
45065 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
45066 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
45068 else
45070 mask = gen_reg_rtx (mode);
45071 emit_insn (gen_rtx_SET (mask, gen_rtx_NE (mode, zero, a)));
45072 emit_insn (gen_rtx_SET (x0, gen_rtx_AND (mode, x0, mask)));
45076 /* e0 = x0 * a */
45077 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
45078 /* e1 = e0 * x0 */
45079 emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0)));
45081 /* e2 = e1 - 3. */
45082 mthree = force_reg (mode, mthree);
45083 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (mode, e1, mthree)));
45085 mhalf = force_reg (mode, mhalf);
45086 if (recip)
45087 /* e3 = -.5 * x0 */
45088 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, x0, mhalf)));
45089 else
45090 /* e3 = -.5 * e0 */
45091 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, e0, mhalf)));
45092 /* ret = e2 * e3 */
45093 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e2, e3)));
45096 #ifdef TARGET_SOLARIS
45097 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
45099 static void
45100 i386_solaris_elf_named_section (const char *name, unsigned int flags,
45101 tree decl)
45103 /* With Binutils 2.15, the "@unwind" marker must be specified on
45104 every occurrence of the ".eh_frame" section, not just the first
45105 one. */
45106 if (TARGET_64BIT
45107 && strcmp (name, ".eh_frame") == 0)
45109 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
45110 flags & SECTION_WRITE ? "aw" : "a");
45111 return;
45114 #ifndef USE_GAS
45115 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
45117 solaris_elf_asm_comdat_section (name, flags, decl);
45118 return;
45120 #endif
45122 default_elf_asm_named_section (name, flags, decl);
45124 #endif /* TARGET_SOLARIS */
45126 /* Return the mangling of TYPE if it is an extended fundamental type. */
45128 static const char *
45129 ix86_mangle_type (const_tree type)
45131 type = TYPE_MAIN_VARIANT (type);
45133 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
45134 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
45135 return NULL;
45137 switch (TYPE_MODE (type))
45139 case E_TFmode:
45140 /* __float128 is "g". */
45141 return "g";
45142 case E_XFmode:
45143 /* "long double" or __float80 is "e". */
45144 return "e";
45145 default:
45146 return NULL;
45150 static GTY(()) tree ix86_tls_stack_chk_guard_decl;
45152 static tree
45153 ix86_stack_protect_guard (void)
45155 if (TARGET_SSP_TLS_GUARD)
45157 tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1);
45158 int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg);
45159 tree type = build_qualified_type (type_node, qual);
45160 tree t;
45162 if (global_options_set.x_ix86_stack_protector_guard_symbol_str)
45164 t = ix86_tls_stack_chk_guard_decl;
45166 if (t == NULL)
45168 rtx x;
45170 t = build_decl
45171 (UNKNOWN_LOCATION, VAR_DECL,
45172 get_identifier (ix86_stack_protector_guard_symbol_str),
45173 type);
45174 TREE_STATIC (t) = 1;
45175 TREE_PUBLIC (t) = 1;
45176 DECL_EXTERNAL (t) = 1;
45177 TREE_USED (t) = 1;
45178 TREE_THIS_VOLATILE (t) = 1;
45179 DECL_ARTIFICIAL (t) = 1;
45180 DECL_IGNORED_P (t) = 1;
45182 /* Do not share RTL as the declaration is visible outside of
45183 current function. */
45184 x = DECL_RTL (t);
45185 RTX_FLAG (x, used) = 1;
45187 ix86_tls_stack_chk_guard_decl = t;
45190 else
45192 tree asptrtype = build_pointer_type (type);
45194 t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset);
45195 t = build2 (MEM_REF, asptrtype, t,
45196 build_int_cst (asptrtype, 0));
45199 return t;
45202 return default_stack_protect_guard ();
45205 /* For 32-bit code we can save PIC register setup by using
45206 __stack_chk_fail_local hidden function instead of calling
45207 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
45208 register, so it is better to call __stack_chk_fail directly. */
45210 static tree ATTRIBUTE_UNUSED
45211 ix86_stack_protect_fail (void)
45213 return TARGET_64BIT
45214 ? default_external_stack_protect_fail ()
45215 : default_hidden_stack_protect_fail ();
45218 /* Select a format to encode pointers in exception handling data. CODE
45219 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
45220 true if the symbol may be affected by dynamic relocations.
45222 ??? All x86 object file formats are capable of representing this.
45223 After all, the relocation needed is the same as for the call insn.
45224 Whether or not a particular assembler allows us to enter such, I
45225 guess we'll have to see. */
45227 asm_preferred_eh_data_format (int code, int global)
45229 if (flag_pic)
45231 int type = DW_EH_PE_sdata8;
45232 if (!TARGET_64BIT
45233 || ix86_cmodel == CM_SMALL_PIC
45234 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
45235 type = DW_EH_PE_sdata4;
45236 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
45238 if (ix86_cmodel == CM_SMALL
45239 || (ix86_cmodel == CM_MEDIUM && code))
45240 return DW_EH_PE_udata4;
45241 return DW_EH_PE_absptr;
45244 /* Expand copysign from SIGN to the positive value ABS_VALUE
45245 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
45246 the sign-bit. */
45247 static void
45248 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
45250 machine_mode mode = GET_MODE (sign);
45251 rtx sgn = gen_reg_rtx (mode);
45252 if (mask == NULL_RTX)
45254 machine_mode vmode;
45256 if (mode == SFmode)
45257 vmode = V4SFmode;
45258 else if (mode == DFmode)
45259 vmode = V2DFmode;
45260 else
45261 vmode = mode;
45263 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
45264 if (!VECTOR_MODE_P (mode))
45266 /* We need to generate a scalar mode mask in this case. */
45267 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
45268 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
45269 mask = gen_reg_rtx (mode);
45270 emit_insn (gen_rtx_SET (mask, tmp));
45273 else
45274 mask = gen_rtx_NOT (mode, mask);
45275 emit_insn (gen_rtx_SET (sgn, gen_rtx_AND (mode, mask, sign)));
45276 emit_insn (gen_rtx_SET (result, gen_rtx_IOR (mode, abs_value, sgn)));
45279 /* Expand fabs (OP0) and return a new rtx that holds the result. The
45280 mask for masking out the sign-bit is stored in *SMASK, if that is
45281 non-null. */
45282 static rtx
45283 ix86_expand_sse_fabs (rtx op0, rtx *smask)
45285 machine_mode vmode, mode = GET_MODE (op0);
45286 rtx xa, mask;
45288 xa = gen_reg_rtx (mode);
45289 if (mode == SFmode)
45290 vmode = V4SFmode;
45291 else if (mode == DFmode)
45292 vmode = V2DFmode;
45293 else
45294 vmode = mode;
45295 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
45296 if (!VECTOR_MODE_P (mode))
45298 /* We need to generate a scalar mode mask in this case. */
45299 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
45300 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
45301 mask = gen_reg_rtx (mode);
45302 emit_insn (gen_rtx_SET (mask, tmp));
45304 emit_insn (gen_rtx_SET (xa, gen_rtx_AND (mode, op0, mask)));
45306 if (smask)
45307 *smask = mask;
45309 return xa;
45312 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
45313 swapping the operands if SWAP_OPERANDS is true. The expanded
45314 code is a forward jump to a newly created label in case the
45315 comparison is true. The generated label rtx is returned. */
45316 static rtx_code_label *
45317 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
45318 bool swap_operands)
45320 bool unordered_compare = ix86_unordered_fp_compare (code);
45321 rtx_code_label *label;
45322 rtx tmp, reg;
45324 if (swap_operands)
45325 std::swap (op0, op1);
45327 label = gen_label_rtx ();
45328 tmp = gen_rtx_COMPARE (CCFPmode, op0, op1);
45329 if (unordered_compare)
45330 tmp = gen_rtx_UNSPEC (CCFPmode, gen_rtvec (1, tmp), UNSPEC_NOTRAP);
45331 reg = gen_rtx_REG (CCFPmode, FLAGS_REG);
45332 emit_insn (gen_rtx_SET (reg, tmp));
45333 tmp = gen_rtx_fmt_ee (code, VOIDmode, reg, const0_rtx);
45334 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
45335 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
45336 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
45337 JUMP_LABEL (tmp) = label;
45339 return label;
45342 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
45343 using comparison code CODE. Operands are swapped for the comparison if
45344 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
45345 static rtx
45346 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
45347 bool swap_operands)
45349 rtx (*insn)(rtx, rtx, rtx, rtx);
45350 machine_mode mode = GET_MODE (op0);
45351 rtx mask = gen_reg_rtx (mode);
45353 if (swap_operands)
45354 std::swap (op0, op1);
45356 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
45358 emit_insn (insn (mask, op0, op1,
45359 gen_rtx_fmt_ee (code, mode, op0, op1)));
45360 return mask;
45363 /* Generate and return a rtx of mode MODE for 2**n where n is the number
45364 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
45365 static rtx
45366 ix86_gen_TWO52 (machine_mode mode)
45368 REAL_VALUE_TYPE TWO52r;
45369 rtx TWO52;
45371 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
45372 TWO52 = const_double_from_real_value (TWO52r, mode);
45373 TWO52 = force_reg (mode, TWO52);
45375 return TWO52;
45378 /* Expand SSE sequence for computing lround from OP1 storing
45379 into OP0. */
45380 void
45381 ix86_expand_lround (rtx op0, rtx op1)
45383 /* C code for the stuff we're doing below:
45384 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
45385 return (long)tmp;
45387 machine_mode mode = GET_MODE (op1);
45388 const struct real_format *fmt;
45389 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
45390 rtx adj;
45392 /* load nextafter (0.5, 0.0) */
45393 fmt = REAL_MODE_FORMAT (mode);
45394 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
45395 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
45397 /* adj = copysign (0.5, op1) */
45398 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
45399 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
45401 /* adj = op1 + adj */
45402 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
45404 /* op0 = (imode)adj */
45405 expand_fix (op0, adj, 0);
45408 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
45409 into OPERAND0. */
45410 void
45411 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
45413 /* C code for the stuff we're doing below (for do_floor):
45414 xi = (long)op1;
45415 xi -= (double)xi > op1 ? 1 : 0;
45416 return xi;
45418 machine_mode fmode = GET_MODE (op1);
45419 machine_mode imode = GET_MODE (op0);
45420 rtx ireg, freg, tmp;
45421 rtx_code_label *label;
45423 /* reg = (long)op1 */
45424 ireg = gen_reg_rtx (imode);
45425 expand_fix (ireg, op1, 0);
45427 /* freg = (double)reg */
45428 freg = gen_reg_rtx (fmode);
45429 expand_float (freg, ireg, 0);
45431 /* ireg = (freg > op1) ? ireg - 1 : ireg */
45432 label = ix86_expand_sse_compare_and_jump (UNLE,
45433 freg, op1, !do_floor);
45434 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
45435 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
45436 emit_move_insn (ireg, tmp);
45438 emit_label (label);
45439 LABEL_NUSES (label) = 1;
45441 emit_move_insn (op0, ireg);
45444 /* Expand rint rounding OPERAND1 and storing the result in OPERAND0. */
45445 void
45446 ix86_expand_rint (rtx operand0, rtx operand1)
45448 /* C code for the stuff we're doing below:
45449 xa = fabs (operand1);
45450 if (!isless (xa, 2**52))
45451 return operand1;
45452 two52 = 2**52;
45453 if (flag_rounding_math)
45455 two52 = copysign (two52, operand1);
45456 xa = operand1;
45458 xa = xa + two52 - two52;
45459 return copysign (xa, operand1);
45461 machine_mode mode = GET_MODE (operand0);
45462 rtx res, xa, TWO52, two52, mask;
45463 rtx_code_label *label;
45465 res = gen_reg_rtx (mode);
45466 emit_move_insn (res, operand1);
45468 /* xa = abs (operand1) */
45469 xa = ix86_expand_sse_fabs (res, &mask);
45471 /* if (!isless (xa, TWO52)) goto label; */
45472 TWO52 = ix86_gen_TWO52 (mode);
45473 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45475 two52 = TWO52;
45476 if (flag_rounding_math)
45478 two52 = gen_reg_rtx (mode);
45479 ix86_sse_copysign_to_positive (two52, TWO52, res, mask);
45480 xa = res;
45483 xa = expand_simple_binop (mode, PLUS, xa, two52, NULL_RTX, 0, OPTAB_DIRECT);
45484 xa = expand_simple_binop (mode, MINUS, xa, two52, xa, 0, OPTAB_DIRECT);
45486 ix86_sse_copysign_to_positive (res, xa, res, mask);
45488 emit_label (label);
45489 LABEL_NUSES (label) = 1;
45491 emit_move_insn (operand0, res);
45494 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
45495 into OPERAND0. */
45496 void
45497 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
45499 /* C code for the stuff we expand below.
45500 double xa = fabs (x), x2;
45501 if (!isless (xa, TWO52))
45502 return x;
45503 xa = xa + TWO52 - TWO52;
45504 x2 = copysign (xa, x);
45505 Compensate. Floor:
45506 if (x2 > x)
45507 x2 -= 1;
45508 Compensate. Ceil:
45509 if (x2 < x)
45510 x2 -= -1;
45511 return x2;
45513 machine_mode mode = GET_MODE (operand0);
45514 rtx xa, TWO52, tmp, one, res, mask;
45515 rtx_code_label *label;
45517 TWO52 = ix86_gen_TWO52 (mode);
45519 /* Temporary for holding the result, initialized to the input
45520 operand to ease control flow. */
45521 res = gen_reg_rtx (mode);
45522 emit_move_insn (res, operand1);
45524 /* xa = abs (operand1) */
45525 xa = ix86_expand_sse_fabs (res, &mask);
45527 /* if (!isless (xa, TWO52)) goto label; */
45528 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45530 /* xa = xa + TWO52 - TWO52; */
45531 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
45532 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
45534 /* xa = copysign (xa, operand1) */
45535 ix86_sse_copysign_to_positive (xa, xa, res, mask);
45537 /* generate 1.0 or -1.0 */
45538 one = force_reg (mode,
45539 const_double_from_real_value (do_floor
45540 ? dconst1 : dconstm1, mode));
45542 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
45543 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
45544 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
45545 /* We always need to subtract here to preserve signed zero. */
45546 tmp = expand_simple_binop (mode, MINUS,
45547 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
45548 emit_move_insn (res, tmp);
45550 emit_label (label);
45551 LABEL_NUSES (label) = 1;
45553 emit_move_insn (operand0, res);
45556 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
45557 into OPERAND0. */
45558 void
45559 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
45561 /* C code for the stuff we expand below.
45562 double xa = fabs (x), x2;
45563 if (!isless (xa, TWO52))
45564 return x;
45565 x2 = (double)(long)x;
45566 Compensate. Floor:
45567 if (x2 > x)
45568 x2 -= 1;
45569 Compensate. Ceil:
45570 if (x2 < x)
45571 x2 += 1;
45572 if (HONOR_SIGNED_ZEROS (mode))
45573 return copysign (x2, x);
45574 return x2;
45576 machine_mode mode = GET_MODE (operand0);
45577 rtx xa, xi, TWO52, tmp, one, res, mask;
45578 rtx_code_label *label;
45580 TWO52 = ix86_gen_TWO52 (mode);
45582 /* Temporary for holding the result, initialized to the input
45583 operand to ease control flow. */
45584 res = gen_reg_rtx (mode);
45585 emit_move_insn (res, operand1);
45587 /* xa = abs (operand1) */
45588 xa = ix86_expand_sse_fabs (res, &mask);
45590 /* if (!isless (xa, TWO52)) goto label; */
45591 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45593 /* xa = (double)(long)x */
45594 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
45595 expand_fix (xi, res, 0);
45596 expand_float (xa, xi, 0);
45598 /* generate 1.0 */
45599 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
45601 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
45602 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
45603 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
45604 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
45605 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
45606 emit_move_insn (res, tmp);
45608 if (HONOR_SIGNED_ZEROS (mode))
45609 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
45611 emit_label (label);
45612 LABEL_NUSES (label) = 1;
45614 emit_move_insn (operand0, res);
45617 /* Expand SSE sequence for computing round from OPERAND1 storing
45618 into OPERAND0. Sequence that works without relying on DImode truncation
45619 via cvttsd2siq that is only available on 64bit targets. */
45620 void
45621 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
45623 /* C code for the stuff we expand below.
45624 double xa = fabs (x), xa2, x2;
45625 if (!isless (xa, TWO52))
45626 return x;
45627 Using the absolute value and copying back sign makes
45628 -0.0 -> -0.0 correct.
45629 xa2 = xa + TWO52 - TWO52;
45630 Compensate.
45631 dxa = xa2 - xa;
45632 if (dxa <= -0.5)
45633 xa2 += 1;
45634 else if (dxa > 0.5)
45635 xa2 -= 1;
45636 x2 = copysign (xa2, x);
45637 return x2;
45639 machine_mode mode = GET_MODE (operand0);
45640 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
45641 rtx_code_label *label;
45643 TWO52 = ix86_gen_TWO52 (mode);
45645 /* Temporary for holding the result, initialized to the input
45646 operand to ease control flow. */
45647 res = gen_reg_rtx (mode);
45648 emit_move_insn (res, operand1);
45650 /* xa = abs (operand1) */
45651 xa = ix86_expand_sse_fabs (res, &mask);
45653 /* if (!isless (xa, TWO52)) goto label; */
45654 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45656 /* xa2 = xa + TWO52 - TWO52; */
45657 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
45658 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
45660 /* dxa = xa2 - xa; */
45661 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
45663 /* generate 0.5, 1.0 and -0.5 */
45664 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
45665 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
45666 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
45667 0, OPTAB_DIRECT);
45669 /* Compensate. */
45670 tmp = gen_reg_rtx (mode);
45671 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
45672 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
45673 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
45674 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
45675 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
45676 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
45677 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
45678 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
45680 /* res = copysign (xa2, operand1) */
45681 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
45683 emit_label (label);
45684 LABEL_NUSES (label) = 1;
45686 emit_move_insn (operand0, res);
45689 /* Expand SSE sequence for computing trunc from OPERAND1 storing
45690 into OPERAND0. */
45691 void
45692 ix86_expand_trunc (rtx operand0, rtx operand1)
45694 /* C code for SSE variant we expand below.
45695 double xa = fabs (x), x2;
45696 if (!isless (xa, TWO52))
45697 return x;
45698 x2 = (double)(long)x;
45699 if (HONOR_SIGNED_ZEROS (mode))
45700 return copysign (x2, x);
45701 return x2;
45703 machine_mode mode = GET_MODE (operand0);
45704 rtx xa, xi, TWO52, res, mask;
45705 rtx_code_label *label;
45707 TWO52 = ix86_gen_TWO52 (mode);
45709 /* Temporary for holding the result, initialized to the input
45710 operand to ease control flow. */
45711 res = gen_reg_rtx (mode);
45712 emit_move_insn (res, operand1);
45714 /* xa = abs (operand1) */
45715 xa = ix86_expand_sse_fabs (res, &mask);
45717 /* if (!isless (xa, TWO52)) goto label; */
45718 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45720 /* x = (double)(long)x */
45721 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
45722 expand_fix (xi, res, 0);
45723 expand_float (res, xi, 0);
45725 if (HONOR_SIGNED_ZEROS (mode))
45726 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
45728 emit_label (label);
45729 LABEL_NUSES (label) = 1;
45731 emit_move_insn (operand0, res);
45734 /* Expand SSE sequence for computing trunc from OPERAND1 storing
45735 into OPERAND0. */
45736 void
45737 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
45739 machine_mode mode = GET_MODE (operand0);
45740 rtx xa, mask, TWO52, one, res, smask, tmp;
45741 rtx_code_label *label;
45743 /* C code for SSE variant we expand below.
45744 double xa = fabs (x), x2;
45745 if (!isless (xa, TWO52))
45746 return x;
45747 xa2 = xa + TWO52 - TWO52;
45748 Compensate:
45749 if (xa2 > xa)
45750 xa2 -= 1.0;
45751 x2 = copysign (xa2, x);
45752 return x2;
45755 TWO52 = ix86_gen_TWO52 (mode);
45757 /* Temporary for holding the result, initialized to the input
45758 operand to ease control flow. */
45759 res = gen_reg_rtx (mode);
45760 emit_move_insn (res, operand1);
45762 /* xa = abs (operand1) */
45763 xa = ix86_expand_sse_fabs (res, &smask);
45765 /* if (!isless (xa, TWO52)) goto label; */
45766 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45768 /* res = xa + TWO52 - TWO52; */
45769 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
45770 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
45771 emit_move_insn (res, tmp);
45773 /* generate 1.0 */
45774 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
45776 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
45777 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
45778 emit_insn (gen_rtx_SET (mask, gen_rtx_AND (mode, mask, one)));
45779 tmp = expand_simple_binop (mode, MINUS,
45780 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
45781 emit_move_insn (res, tmp);
45783 /* res = copysign (res, operand1) */
45784 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
45786 emit_label (label);
45787 LABEL_NUSES (label) = 1;
45789 emit_move_insn (operand0, res);
45792 /* Expand SSE sequence for computing round from OPERAND1 storing
45793 into OPERAND0. */
45794 void
45795 ix86_expand_round (rtx operand0, rtx operand1)
45797 /* C code for the stuff we're doing below:
45798 double xa = fabs (x);
45799 if (!isless (xa, TWO52))
45800 return x;
45801 xa = (double)(long)(xa + nextafter (0.5, 0.0));
45802 return copysign (xa, x);
45804 machine_mode mode = GET_MODE (operand0);
45805 rtx res, TWO52, xa, xi, half, mask;
45806 rtx_code_label *label;
45807 const struct real_format *fmt;
45808 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
45810 /* Temporary for holding the result, initialized to the input
45811 operand to ease control flow. */
45812 res = gen_reg_rtx (mode);
45813 emit_move_insn (res, operand1);
45815 TWO52 = ix86_gen_TWO52 (mode);
45816 xa = ix86_expand_sse_fabs (res, &mask);
45817 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45819 /* load nextafter (0.5, 0.0) */
45820 fmt = REAL_MODE_FORMAT (mode);
45821 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
45822 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
45824 /* xa = xa + 0.5 */
45825 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
45826 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
45828 /* xa = (double)(int64_t)xa */
45829 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
45830 expand_fix (xi, xa, 0);
45831 expand_float (xa, xi, 0);
45833 /* res = copysign (xa, operand1) */
45834 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
45836 emit_label (label);
45837 LABEL_NUSES (label) = 1;
45839 emit_move_insn (operand0, res);
45842 /* Expand SSE sequence for computing round
45843 from OP1 storing into OP0 using sse4 round insn. */
45844 void
45845 ix86_expand_round_sse4 (rtx op0, rtx op1)
45847 machine_mode mode = GET_MODE (op0);
45848 rtx e1, e2, res, half;
45849 const struct real_format *fmt;
45850 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
45851 rtx (*gen_copysign) (rtx, rtx, rtx);
45852 rtx (*gen_round) (rtx, rtx, rtx);
45854 switch (mode)
45856 case E_SFmode:
45857 gen_copysign = gen_copysignsf3;
45858 gen_round = gen_sse4_1_roundsf2;
45859 break;
45860 case E_DFmode:
45861 gen_copysign = gen_copysigndf3;
45862 gen_round = gen_sse4_1_rounddf2;
45863 break;
45864 default:
45865 gcc_unreachable ();
45868 /* round (a) = trunc (a + copysign (0.5, a)) */
45870 /* load nextafter (0.5, 0.0) */
45871 fmt = REAL_MODE_FORMAT (mode);
45872 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
45873 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
45874 half = const_double_from_real_value (pred_half, mode);
45876 /* e1 = copysign (0.5, op1) */
45877 e1 = gen_reg_rtx (mode);
45878 emit_insn (gen_copysign (e1, half, op1));
45880 /* e2 = op1 + e1 */
45881 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
45883 /* res = trunc (e2) */
45884 res = gen_reg_rtx (mode);
45885 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
45887 emit_move_insn (op0, res);
45891 /* Table of valid machine attributes. */
45892 static const struct attribute_spec ix86_attribute_table[] =
45894 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
45895 affects_type_identity, handler, exclude } */
45896 /* Stdcall attribute says callee is responsible for popping arguments
45897 if they are not variable. */
45898 { "stdcall", 0, 0, false, true, true, true, ix86_handle_cconv_attribute,
45899 NULL },
45900 /* Fastcall attribute says callee is responsible for popping arguments
45901 if they are not variable. */
45902 { "fastcall", 0, 0, false, true, true, true, ix86_handle_cconv_attribute,
45903 NULL },
45904 /* Thiscall attribute says callee is responsible for popping arguments
45905 if they are not variable. */
45906 { "thiscall", 0, 0, false, true, true, true, ix86_handle_cconv_attribute,
45907 NULL },
45908 /* Cdecl attribute says the callee is a normal C declaration */
45909 { "cdecl", 0, 0, false, true, true, true, ix86_handle_cconv_attribute,
45910 NULL },
45911 /* Regparm attribute specifies how many integer arguments are to be
45912 passed in registers. */
45913 { "regparm", 1, 1, false, true, true, true, ix86_handle_cconv_attribute,
45914 NULL },
45915 /* Sseregparm attribute says we are using x86_64 calling conventions
45916 for FP arguments. */
45917 { "sseregparm", 0, 0, false, true, true, true, ix86_handle_cconv_attribute,
45918 NULL },
45919 /* The transactional memory builtins are implicitly regparm or fastcall
45920 depending on the ABI. Override the generic do-nothing attribute that
45921 these builtins were declared with. */
45922 { "*tm regparm", 0, 0, false, true, true, true,
45923 ix86_handle_tm_regparm_attribute, NULL },
45924 /* force_align_arg_pointer says this function realigns the stack at entry. */
45925 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
45926 false, true, true, false, ix86_handle_force_align_arg_pointer_attribute,
45927 NULL },
45928 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
45929 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute,
45930 NULL },
45931 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute,
45932 NULL },
45933 { "shared", 0, 0, true, false, false, false,
45934 ix86_handle_shared_attribute, NULL },
45935 #endif
45936 { "ms_struct", 0, 0, false, false, false, false,
45937 ix86_handle_struct_attribute, NULL },
45938 { "gcc_struct", 0, 0, false, false, false, false,
45939 ix86_handle_struct_attribute, NULL },
45940 #ifdef SUBTARGET_ATTRIBUTE_TABLE
45941 SUBTARGET_ATTRIBUTE_TABLE,
45942 #endif
45943 /* ms_abi and sysv_abi calling convention function attributes. */
45944 { "ms_abi", 0, 0, false, true, true, true, ix86_handle_abi_attribute, NULL },
45945 { "sysv_abi", 0, 0, false, true, true, true, ix86_handle_abi_attribute,
45946 NULL },
45947 { "ms_abi va_list", 0, 0, false, false, false, false, NULL, NULL },
45948 { "sysv_abi va_list", 0, 0, false, false, false, false, NULL, NULL },
45949 { "ms_hook_prologue", 0, 0, true, false, false, false,
45950 ix86_handle_fndecl_attribute, NULL },
45951 { "callee_pop_aggregate_return", 1, 1, false, true, true, true,
45952 ix86_handle_callee_pop_aggregate_return, NULL },
45953 { "interrupt", 0, 0, false, true, true, false,
45954 ix86_handle_interrupt_attribute, NULL },
45955 { "no_caller_saved_registers", 0, 0, false, true, true, false,
45956 ix86_handle_no_caller_saved_registers_attribute, NULL },
45957 { "naked", 0, 0, true, false, false, false,
45958 ix86_handle_fndecl_attribute, NULL },
45959 { "indirect_branch", 1, 1, true, false, false, false,
45960 ix86_handle_fndecl_attribute, NULL },
45961 { "function_return", 1, 1, true, false, false, false,
45962 ix86_handle_fndecl_attribute, NULL },
45964 /* End element. */
45965 { NULL, 0, 0, false, false, false, false, NULL, NULL }
45968 /* Implement targetm.vectorize.builtin_vectorization_cost. */
45969 static int
45970 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
45971 tree vectype, int)
45973 bool fp = false;
45974 machine_mode mode = TImode;
45975 int index;
45976 if (vectype != NULL)
45978 fp = FLOAT_TYPE_P (vectype);
45979 mode = TYPE_MODE (vectype);
45982 switch (type_of_cost)
45984 case scalar_stmt:
45985 return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
45987 case scalar_load:
45988 /* load/store costs are relative to register move which is 2. Recompute
45989 it to COSTS_N_INSNS so everything have same base. */
45990 return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0]
45991 : ix86_cost->int_load [2]) / 2;
45993 case scalar_store:
45994 return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0]
45995 : ix86_cost->int_store [2]) / 2;
45997 case vector_stmt:
45998 return ix86_vec_cost (mode,
45999 fp ? ix86_cost->addss : ix86_cost->sse_op,
46000 true);
46002 case vector_load:
46003 index = sse_store_index (mode);
46004 /* See PR82713 - we may end up being called on non-vector type. */
46005 if (index < 0)
46006 index = 2;
46007 return ix86_vec_cost (mode,
46008 COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2,
46009 true);
46011 case vector_store:
46012 index = sse_store_index (mode);
46013 /* See PR82713 - we may end up being called on non-vector type. */
46014 if (index < 0)
46015 index = 2;
46016 return ix86_vec_cost (mode,
46017 COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2,
46018 true);
46020 case vec_to_scalar:
46021 case scalar_to_vec:
46022 return ix86_vec_cost (mode, ix86_cost->sse_op, true);
46024 /* We should have separate costs for unaligned loads and gather/scatter.
46025 Do that incrementally. */
46026 case unaligned_load:
46027 index = sse_store_index (mode);
46028 /* See PR82713 - we may end up being called on non-vector type. */
46029 if (index < 0)
46030 index = 2;
46031 return ix86_vec_cost (mode,
46032 COSTS_N_INSNS
46033 (ix86_cost->sse_unaligned_load[index]) / 2,
46034 true);
46036 case unaligned_store:
46037 index = sse_store_index (mode);
46038 /* See PR82713 - we may end up being called on non-vector type. */
46039 if (index < 0)
46040 index = 2;
46041 return ix86_vec_cost (mode,
46042 COSTS_N_INSNS
46043 (ix86_cost->sse_unaligned_store[index]) / 2,
46044 true);
46046 case vector_gather_load:
46047 return ix86_vec_cost (mode,
46048 COSTS_N_INSNS
46049 (ix86_cost->gather_static
46050 + ix86_cost->gather_per_elt
46051 * TYPE_VECTOR_SUBPARTS (vectype)) / 2,
46052 true);
46054 case vector_scatter_store:
46055 return ix86_vec_cost (mode,
46056 COSTS_N_INSNS
46057 (ix86_cost->scatter_static
46058 + ix86_cost->scatter_per_elt
46059 * TYPE_VECTOR_SUBPARTS (vectype)) / 2,
46060 true);
46062 case cond_branch_taken:
46063 return ix86_cost->cond_taken_branch_cost;
46065 case cond_branch_not_taken:
46066 return ix86_cost->cond_not_taken_branch_cost;
46068 case vec_perm:
46069 case vec_promote_demote:
46070 return ix86_vec_cost (mode,
46071 ix86_cost->sse_op, true);
46073 case vec_construct:
46075 /* N element inserts. */
46076 int cost = ix86_vec_cost (mode, ix86_cost->sse_op, false);
46077 /* One vinserti128 for combining two SSE vectors for AVX256. */
46078 if (GET_MODE_BITSIZE (mode) == 256)
46079 cost += ix86_vec_cost (mode, ix86_cost->addss, true);
46080 /* One vinserti64x4 and two vinserti128 for combining SSE
46081 and AVX256 vectors to AVX512. */
46082 else if (GET_MODE_BITSIZE (mode) == 512)
46083 cost += 3 * ix86_vec_cost (mode, ix86_cost->addss, true);
46084 return cost;
46087 default:
46088 gcc_unreachable ();
46092 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
46093 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
46094 insn every time. */
46096 static GTY(()) rtx_insn *vselect_insn;
46098 /* Initialize vselect_insn. */
46100 static void
46101 init_vselect_insn (void)
46103 unsigned i;
46104 rtx x;
46106 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
46107 for (i = 0; i < MAX_VECT_LEN; ++i)
46108 XVECEXP (x, 0, i) = const0_rtx;
46109 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
46110 const0_rtx), x);
46111 x = gen_rtx_SET (const0_rtx, x);
46112 start_sequence ();
46113 vselect_insn = emit_insn (x);
46114 end_sequence ();
46117 /* Construct (set target (vec_select op0 (parallel perm))) and
46118 return true if that's a valid instruction in the active ISA. */
46120 static bool
46121 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
46122 unsigned nelt, bool testing_p)
46124 unsigned int i;
46125 rtx x, save_vconcat;
46126 int icode;
46128 if (vselect_insn == NULL_RTX)
46129 init_vselect_insn ();
46131 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
46132 PUT_NUM_ELEM (XVEC (x, 0), nelt);
46133 for (i = 0; i < nelt; ++i)
46134 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
46135 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46136 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
46137 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
46138 SET_DEST (PATTERN (vselect_insn)) = target;
46139 icode = recog_memoized (vselect_insn);
46141 if (icode >= 0 && !testing_p)
46142 emit_insn (copy_rtx (PATTERN (vselect_insn)));
46144 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
46145 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
46146 INSN_CODE (vselect_insn) = -1;
46148 return icode >= 0;
46151 /* Similar, but generate a vec_concat from op0 and op1 as well. */
46153 static bool
46154 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
46155 const unsigned char *perm, unsigned nelt,
46156 bool testing_p)
46158 machine_mode v2mode;
46159 rtx x;
46160 bool ok;
46162 if (vselect_insn == NULL_RTX)
46163 init_vselect_insn ();
46165 if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0)).exists (&v2mode))
46166 return false;
46167 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46168 PUT_MODE (x, v2mode);
46169 XEXP (x, 0) = op0;
46170 XEXP (x, 1) = op1;
46171 ok = expand_vselect (target, x, perm, nelt, testing_p);
46172 XEXP (x, 0) = const0_rtx;
46173 XEXP (x, 1) = const0_rtx;
46174 return ok;
46177 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46178 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
46180 static bool
46181 expand_vec_perm_blend (struct expand_vec_perm_d *d)
46183 machine_mode mmode, vmode = d->vmode;
46184 unsigned i, mask, nelt = d->nelt;
46185 rtx target, op0, op1, maskop, x;
46186 rtx rperm[32], vperm;
46188 if (d->one_operand_p)
46189 return false;
46190 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
46191 && (TARGET_AVX512BW
46192 || GET_MODE_UNIT_SIZE (vmode) >= 4))
46194 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
46196 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
46198 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
46200 else
46201 return false;
46203 /* This is a blend, not a permute. Elements must stay in their
46204 respective lanes. */
46205 for (i = 0; i < nelt; ++i)
46207 unsigned e = d->perm[i];
46208 if (!(e == i || e == i + nelt))
46209 return false;
46212 if (d->testing_p)
46213 return true;
46215 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
46216 decision should be extracted elsewhere, so that we only try that
46217 sequence once all budget==3 options have been tried. */
46218 target = d->target;
46219 op0 = d->op0;
46220 op1 = d->op1;
46221 mask = 0;
46223 switch (vmode)
46225 case E_V8DFmode:
46226 case E_V16SFmode:
46227 case E_V4DFmode:
46228 case E_V8SFmode:
46229 case E_V2DFmode:
46230 case E_V4SFmode:
46231 case E_V8HImode:
46232 case E_V8SImode:
46233 case E_V32HImode:
46234 case E_V64QImode:
46235 case E_V16SImode:
46236 case E_V8DImode:
46237 for (i = 0; i < nelt; ++i)
46238 mask |= (d->perm[i] >= nelt) << i;
46239 break;
46241 case E_V2DImode:
46242 for (i = 0; i < 2; ++i)
46243 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
46244 vmode = V8HImode;
46245 goto do_subreg;
46247 case E_V4SImode:
46248 for (i = 0; i < 4; ++i)
46249 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46250 vmode = V8HImode;
46251 goto do_subreg;
46253 case E_V16QImode:
46254 /* See if bytes move in pairs so we can use pblendw with
46255 an immediate argument, rather than pblendvb with a vector
46256 argument. */
46257 for (i = 0; i < 16; i += 2)
46258 if (d->perm[i] + 1 != d->perm[i + 1])
46260 use_pblendvb:
46261 for (i = 0; i < nelt; ++i)
46262 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
46264 finish_pblendvb:
46265 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
46266 vperm = force_reg (vmode, vperm);
46268 if (GET_MODE_SIZE (vmode) == 16)
46269 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
46270 else
46271 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
46272 if (target != d->target)
46273 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46274 return true;
46277 for (i = 0; i < 8; ++i)
46278 mask |= (d->perm[i * 2] >= 16) << i;
46279 vmode = V8HImode;
46280 /* FALLTHRU */
46282 do_subreg:
46283 target = gen_reg_rtx (vmode);
46284 op0 = gen_lowpart (vmode, op0);
46285 op1 = gen_lowpart (vmode, op1);
46286 break;
46288 case E_V32QImode:
46289 /* See if bytes move in pairs. If not, vpblendvb must be used. */
46290 for (i = 0; i < 32; i += 2)
46291 if (d->perm[i] + 1 != d->perm[i + 1])
46292 goto use_pblendvb;
46293 /* See if bytes move in quadruplets. If yes, vpblendd
46294 with immediate can be used. */
46295 for (i = 0; i < 32; i += 4)
46296 if (d->perm[i] + 2 != d->perm[i + 2])
46297 break;
46298 if (i < 32)
46300 /* See if bytes move the same in both lanes. If yes,
46301 vpblendw with immediate can be used. */
46302 for (i = 0; i < 16; i += 2)
46303 if (d->perm[i] + 16 != d->perm[i + 16])
46304 goto use_pblendvb;
46306 /* Use vpblendw. */
46307 for (i = 0; i < 16; ++i)
46308 mask |= (d->perm[i * 2] >= 32) << i;
46309 vmode = V16HImode;
46310 goto do_subreg;
46313 /* Use vpblendd. */
46314 for (i = 0; i < 8; ++i)
46315 mask |= (d->perm[i * 4] >= 32) << i;
46316 vmode = V8SImode;
46317 goto do_subreg;
46319 case E_V16HImode:
46320 /* See if words move in pairs. If yes, vpblendd can be used. */
46321 for (i = 0; i < 16; i += 2)
46322 if (d->perm[i] + 1 != d->perm[i + 1])
46323 break;
46324 if (i < 16)
46326 /* See if words move the same in both lanes. If not,
46327 vpblendvb must be used. */
46328 for (i = 0; i < 8; i++)
46329 if (d->perm[i] + 8 != d->perm[i + 8])
46331 /* Use vpblendvb. */
46332 for (i = 0; i < 32; ++i)
46333 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
46335 vmode = V32QImode;
46336 nelt = 32;
46337 target = gen_reg_rtx (vmode);
46338 op0 = gen_lowpart (vmode, op0);
46339 op1 = gen_lowpart (vmode, op1);
46340 goto finish_pblendvb;
46343 /* Use vpblendw. */
46344 for (i = 0; i < 16; ++i)
46345 mask |= (d->perm[i] >= 16) << i;
46346 break;
46349 /* Use vpblendd. */
46350 for (i = 0; i < 8; ++i)
46351 mask |= (d->perm[i * 2] >= 16) << i;
46352 vmode = V8SImode;
46353 goto do_subreg;
46355 case E_V4DImode:
46356 /* Use vpblendd. */
46357 for (i = 0; i < 4; ++i)
46358 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46359 vmode = V8SImode;
46360 goto do_subreg;
46362 default:
46363 gcc_unreachable ();
46366 switch (vmode)
46368 case E_V8DFmode:
46369 case E_V8DImode:
46370 mmode = QImode;
46371 break;
46372 case E_V16SFmode:
46373 case E_V16SImode:
46374 mmode = HImode;
46375 break;
46376 case E_V32HImode:
46377 mmode = SImode;
46378 break;
46379 case E_V64QImode:
46380 mmode = DImode;
46381 break;
46382 default:
46383 mmode = VOIDmode;
46386 if (mmode != VOIDmode)
46387 maskop = force_reg (mmode, gen_int_mode (mask, mmode));
46388 else
46389 maskop = GEN_INT (mask);
46391 /* This matches five different patterns with the different modes. */
46392 x = gen_rtx_VEC_MERGE (vmode, op1, op0, maskop);
46393 x = gen_rtx_SET (target, x);
46394 emit_insn (x);
46395 if (target != d->target)
46396 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46398 return true;
46401 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46402 in terms of the variable form of vpermilps.
46404 Note that we will have already failed the immediate input vpermilps,
46405 which requires that the high and low part shuffle be identical; the
46406 variable form doesn't require that. */
46408 static bool
46409 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
46411 rtx rperm[8], vperm;
46412 unsigned i;
46414 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
46415 return false;
46417 /* We can only permute within the 128-bit lane. */
46418 for (i = 0; i < 8; ++i)
46420 unsigned e = d->perm[i];
46421 if (i < 4 ? e >= 4 : e < 4)
46422 return false;
46425 if (d->testing_p)
46426 return true;
46428 for (i = 0; i < 8; ++i)
46430 unsigned e = d->perm[i];
46432 /* Within each 128-bit lane, the elements of op0 are numbered
46433 from 0 and the elements of op1 are numbered from 4. */
46434 if (e >= 8 + 4)
46435 e -= 8;
46436 else if (e >= 4)
46437 e -= 4;
46439 rperm[i] = GEN_INT (e);
46442 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
46443 vperm = force_reg (V8SImode, vperm);
46444 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
46446 return true;
46449 /* Return true if permutation D can be performed as VMODE permutation
46450 instead. */
46452 static bool
46453 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
46455 unsigned int i, j, chunk;
46457 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
46458 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
46459 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
46460 return false;
46462 if (GET_MODE_NUNITS (vmode) >= d->nelt)
46463 return true;
46465 chunk = d->nelt / GET_MODE_NUNITS (vmode);
46466 for (i = 0; i < d->nelt; i += chunk)
46467 if (d->perm[i] & (chunk - 1))
46468 return false;
46469 else
46470 for (j = 1; j < chunk; ++j)
46471 if (d->perm[i] + j != d->perm[i + j])
46472 return false;
46474 return true;
46477 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46478 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
46480 static bool
46481 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
46483 unsigned i, nelt, eltsz, mask;
46484 unsigned char perm[64];
46485 machine_mode vmode = V16QImode;
46486 rtx rperm[64], vperm, target, op0, op1;
46488 nelt = d->nelt;
46490 if (!d->one_operand_p)
46492 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
46494 if (TARGET_AVX2
46495 && valid_perm_using_mode_p (V2TImode, d))
46497 if (d->testing_p)
46498 return true;
46500 /* Use vperm2i128 insn. The pattern uses
46501 V4DImode instead of V2TImode. */
46502 target = d->target;
46503 if (d->vmode != V4DImode)
46504 target = gen_reg_rtx (V4DImode);
46505 op0 = gen_lowpart (V4DImode, d->op0);
46506 op1 = gen_lowpart (V4DImode, d->op1);
46507 rperm[0]
46508 = GEN_INT ((d->perm[0] / (nelt / 2))
46509 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
46510 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
46511 if (target != d->target)
46512 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46513 return true;
46515 return false;
46518 else
46520 if (GET_MODE_SIZE (d->vmode) == 16)
46522 if (!TARGET_SSSE3)
46523 return false;
46525 else if (GET_MODE_SIZE (d->vmode) == 32)
46527 if (!TARGET_AVX2)
46528 return false;
46530 /* V4DImode should be already handled through
46531 expand_vselect by vpermq instruction. */
46532 gcc_assert (d->vmode != V4DImode);
46534 vmode = V32QImode;
46535 if (d->vmode == V8SImode
46536 || d->vmode == V16HImode
46537 || d->vmode == V32QImode)
46539 /* First see if vpermq can be used for
46540 V8SImode/V16HImode/V32QImode. */
46541 if (valid_perm_using_mode_p (V4DImode, d))
46543 for (i = 0; i < 4; i++)
46544 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
46545 if (d->testing_p)
46546 return true;
46547 target = gen_reg_rtx (V4DImode);
46548 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
46549 perm, 4, false))
46551 emit_move_insn (d->target,
46552 gen_lowpart (d->vmode, target));
46553 return true;
46555 return false;
46558 /* Next see if vpermd can be used. */
46559 if (valid_perm_using_mode_p (V8SImode, d))
46560 vmode = V8SImode;
46562 /* Or if vpermps can be used. */
46563 else if (d->vmode == V8SFmode)
46564 vmode = V8SImode;
46566 if (vmode == V32QImode)
46568 /* vpshufb only works intra lanes, it is not
46569 possible to shuffle bytes in between the lanes. */
46570 for (i = 0; i < nelt; ++i)
46571 if ((d->perm[i] ^ i) & (nelt / 2))
46572 return false;
46575 else if (GET_MODE_SIZE (d->vmode) == 64)
46577 if (!TARGET_AVX512BW)
46578 return false;
46580 /* If vpermq didn't work, vpshufb won't work either. */
46581 if (d->vmode == V8DFmode || d->vmode == V8DImode)
46582 return false;
46584 vmode = V64QImode;
46585 if (d->vmode == V16SImode
46586 || d->vmode == V32HImode
46587 || d->vmode == V64QImode)
46589 /* First see if vpermq can be used for
46590 V16SImode/V32HImode/V64QImode. */
46591 if (valid_perm_using_mode_p (V8DImode, d))
46593 for (i = 0; i < 8; i++)
46594 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
46595 if (d->testing_p)
46596 return true;
46597 target = gen_reg_rtx (V8DImode);
46598 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
46599 perm, 8, false))
46601 emit_move_insn (d->target,
46602 gen_lowpart (d->vmode, target));
46603 return true;
46605 return false;
46608 /* Next see if vpermd can be used. */
46609 if (valid_perm_using_mode_p (V16SImode, d))
46610 vmode = V16SImode;
46612 /* Or if vpermps can be used. */
46613 else if (d->vmode == V16SFmode)
46614 vmode = V16SImode;
46615 if (vmode == V64QImode)
46617 /* vpshufb only works intra lanes, it is not
46618 possible to shuffle bytes in between the lanes. */
46619 for (i = 0; i < nelt; ++i)
46620 if ((d->perm[i] ^ i) & (nelt / 4))
46621 return false;
46624 else
46625 return false;
46628 if (d->testing_p)
46629 return true;
46631 if (vmode == V8SImode)
46632 for (i = 0; i < 8; ++i)
46633 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
46634 else if (vmode == V16SImode)
46635 for (i = 0; i < 16; ++i)
46636 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
46637 else
46639 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
46640 if (!d->one_operand_p)
46641 mask = 2 * nelt - 1;
46642 else if (vmode == V16QImode)
46643 mask = nelt - 1;
46644 else if (vmode == V64QImode)
46645 mask = nelt / 4 - 1;
46646 else
46647 mask = nelt / 2 - 1;
46649 for (i = 0; i < nelt; ++i)
46651 unsigned j, e = d->perm[i] & mask;
46652 for (j = 0; j < eltsz; ++j)
46653 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
46657 vperm = gen_rtx_CONST_VECTOR (vmode,
46658 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
46659 vperm = force_reg (vmode, vperm);
46661 target = d->target;
46662 if (d->vmode != vmode)
46663 target = gen_reg_rtx (vmode);
46664 op0 = gen_lowpart (vmode, d->op0);
46665 if (d->one_operand_p)
46667 if (vmode == V16QImode)
46668 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
46669 else if (vmode == V32QImode)
46670 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
46671 else if (vmode == V64QImode)
46672 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
46673 else if (vmode == V8SFmode)
46674 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
46675 else if (vmode == V8SImode)
46676 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
46677 else if (vmode == V16SFmode)
46678 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
46679 else if (vmode == V16SImode)
46680 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
46681 else
46682 gcc_unreachable ();
46684 else
46686 op1 = gen_lowpart (vmode, d->op1);
46687 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
46689 if (target != d->target)
46690 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46692 return true;
46695 /* For V*[QHS]Imode permutations, check if the same permutation
46696 can't be performed in a 2x, 4x or 8x wider inner mode. */
46698 static bool
46699 canonicalize_vector_int_perm (const struct expand_vec_perm_d *d,
46700 struct expand_vec_perm_d *nd)
46702 int i;
46703 machine_mode mode = VOIDmode;
46705 switch (d->vmode)
46707 case E_V16QImode: mode = V8HImode; break;
46708 case E_V32QImode: mode = V16HImode; break;
46709 case E_V64QImode: mode = V32HImode; break;
46710 case E_V8HImode: mode = V4SImode; break;
46711 case E_V16HImode: mode = V8SImode; break;
46712 case E_V32HImode: mode = V16SImode; break;
46713 case E_V4SImode: mode = V2DImode; break;
46714 case E_V8SImode: mode = V4DImode; break;
46715 case E_V16SImode: mode = V8DImode; break;
46716 default: return false;
46718 for (i = 0; i < d->nelt; i += 2)
46719 if ((d->perm[i] & 1) || d->perm[i + 1] != d->perm[i] + 1)
46720 return false;
46721 nd->vmode = mode;
46722 nd->nelt = d->nelt / 2;
46723 for (i = 0; i < nd->nelt; i++)
46724 nd->perm[i] = d->perm[2 * i] / 2;
46725 if (GET_MODE_INNER (mode) != DImode)
46726 canonicalize_vector_int_perm (nd, nd);
46727 if (nd != d)
46729 nd->one_operand_p = d->one_operand_p;
46730 nd->testing_p = d->testing_p;
46731 if (d->op0 == d->op1)
46732 nd->op0 = nd->op1 = gen_lowpart (nd->vmode, d->op0);
46733 else
46735 nd->op0 = gen_lowpart (nd->vmode, d->op0);
46736 nd->op1 = gen_lowpart (nd->vmode, d->op1);
46738 if (d->testing_p)
46739 nd->target = gen_raw_REG (nd->vmode, LAST_VIRTUAL_REGISTER + 1);
46740 else
46741 nd->target = gen_reg_rtx (nd->vmode);
46743 return true;
46746 /* Try to expand one-operand permutation with constant mask. */
46748 static bool
46749 ix86_expand_vec_one_operand_perm_avx512 (struct expand_vec_perm_d *d)
46751 machine_mode mode = GET_MODE (d->op0);
46752 machine_mode maskmode = mode;
46753 rtx (*gen) (rtx, rtx, rtx) = NULL;
46754 rtx target, op0, mask;
46755 rtx vec[64];
46757 if (!rtx_equal_p (d->op0, d->op1))
46758 return false;
46760 if (!TARGET_AVX512F)
46761 return false;
46763 switch (mode)
46765 case E_V16SImode:
46766 gen = gen_avx512f_permvarv16si;
46767 break;
46768 case E_V16SFmode:
46769 gen = gen_avx512f_permvarv16sf;
46770 maskmode = V16SImode;
46771 break;
46772 case E_V8DImode:
46773 gen = gen_avx512f_permvarv8di;
46774 break;
46775 case E_V8DFmode:
46776 gen = gen_avx512f_permvarv8df;
46777 maskmode = V8DImode;
46778 break;
46779 default:
46780 return false;
46783 target = d->target;
46784 op0 = d->op0;
46785 for (int i = 0; i < d->nelt; ++i)
46786 vec[i] = GEN_INT (d->perm[i]);
46787 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
46788 emit_insn (gen (target, op0, force_reg (maskmode, mask)));
46789 return true;
46792 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
46793 in a single instruction. */
46795 static bool
46796 expand_vec_perm_1 (struct expand_vec_perm_d *d)
46798 unsigned i, nelt = d->nelt;
46799 struct expand_vec_perm_d nd;
46801 /* Check plain VEC_SELECT first, because AVX has instructions that could
46802 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
46803 input where SEL+CONCAT may not. */
46804 if (d->one_operand_p)
46806 int mask = nelt - 1;
46807 bool identity_perm = true;
46808 bool broadcast_perm = true;
46810 for (i = 0; i < nelt; i++)
46812 nd.perm[i] = d->perm[i] & mask;
46813 if (nd.perm[i] != i)
46814 identity_perm = false;
46815 if (nd.perm[i])
46816 broadcast_perm = false;
46819 if (identity_perm)
46821 if (!d->testing_p)
46822 emit_move_insn (d->target, d->op0);
46823 return true;
46825 else if (broadcast_perm && TARGET_AVX2)
46827 /* Use vpbroadcast{b,w,d}. */
46828 rtx (*gen) (rtx, rtx) = NULL;
46829 switch (d->vmode)
46831 case E_V64QImode:
46832 if (TARGET_AVX512BW)
46833 gen = gen_avx512bw_vec_dupv64qi_1;
46834 break;
46835 case E_V32QImode:
46836 gen = gen_avx2_pbroadcastv32qi_1;
46837 break;
46838 case E_V32HImode:
46839 if (TARGET_AVX512BW)
46840 gen = gen_avx512bw_vec_dupv32hi_1;
46841 break;
46842 case E_V16HImode:
46843 gen = gen_avx2_pbroadcastv16hi_1;
46844 break;
46845 case E_V16SImode:
46846 if (TARGET_AVX512F)
46847 gen = gen_avx512f_vec_dupv16si_1;
46848 break;
46849 case E_V8SImode:
46850 gen = gen_avx2_pbroadcastv8si_1;
46851 break;
46852 case E_V16QImode:
46853 gen = gen_avx2_pbroadcastv16qi;
46854 break;
46855 case E_V8HImode:
46856 gen = gen_avx2_pbroadcastv8hi;
46857 break;
46858 case E_V16SFmode:
46859 if (TARGET_AVX512F)
46860 gen = gen_avx512f_vec_dupv16sf_1;
46861 break;
46862 case E_V8SFmode:
46863 gen = gen_avx2_vec_dupv8sf_1;
46864 break;
46865 case E_V8DFmode:
46866 if (TARGET_AVX512F)
46867 gen = gen_avx512f_vec_dupv8df_1;
46868 break;
46869 case E_V8DImode:
46870 if (TARGET_AVX512F)
46871 gen = gen_avx512f_vec_dupv8di_1;
46872 break;
46873 /* For other modes prefer other shuffles this function creates. */
46874 default: break;
46876 if (gen != NULL)
46878 if (!d->testing_p)
46879 emit_insn (gen (d->target, d->op0));
46880 return true;
46884 if (expand_vselect (d->target, d->op0, nd.perm, nelt, d->testing_p))
46885 return true;
46887 /* There are plenty of patterns in sse.md that are written for
46888 SEL+CONCAT and are not replicated for a single op. Perhaps
46889 that should be changed, to avoid the nastiness here. */
46891 /* Recognize interleave style patterns, which means incrementing
46892 every other permutation operand. */
46893 for (i = 0; i < nelt; i += 2)
46895 nd.perm[i] = d->perm[i] & mask;
46896 nd.perm[i + 1] = (d->perm[i + 1] & mask) + nelt;
46898 if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt,
46899 d->testing_p))
46900 return true;
46902 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
46903 if (nelt >= 4)
46905 for (i = 0; i < nelt; i += 4)
46907 nd.perm[i + 0] = d->perm[i + 0] & mask;
46908 nd.perm[i + 1] = d->perm[i + 1] & mask;
46909 nd.perm[i + 2] = (d->perm[i + 2] & mask) + nelt;
46910 nd.perm[i + 3] = (d->perm[i + 3] & mask) + nelt;
46913 if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt,
46914 d->testing_p))
46915 return true;
46919 /* Finally, try the fully general two operand permute. */
46920 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
46921 d->testing_p))
46922 return true;
46924 /* Recognize interleave style patterns with reversed operands. */
46925 if (!d->one_operand_p)
46927 for (i = 0; i < nelt; ++i)
46929 unsigned e = d->perm[i];
46930 if (e >= nelt)
46931 e -= nelt;
46932 else
46933 e += nelt;
46934 nd.perm[i] = e;
46937 if (expand_vselect_vconcat (d->target, d->op1, d->op0, nd.perm, nelt,
46938 d->testing_p))
46939 return true;
46942 /* Try the SSE4.1 blend variable merge instructions. */
46943 if (expand_vec_perm_blend (d))
46944 return true;
46946 /* Try one of the AVX vpermil variable permutations. */
46947 if (expand_vec_perm_vpermil (d))
46948 return true;
46950 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
46951 vpshufb, vpermd, vpermps or vpermq variable permutation. */
46952 if (expand_vec_perm_pshufb (d))
46953 return true;
46955 /* Try the AVX2 vpalignr instruction. */
46956 if (expand_vec_perm_palignr (d, true))
46957 return true;
46959 /* Try the AVX512F vperm{s,d} instructions. */
46960 if (ix86_expand_vec_one_operand_perm_avx512 (d))
46961 return true;
46963 /* Try the AVX512F vpermt2/vpermi2 instructions. */
46964 if (ix86_expand_vec_perm_vpermt2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
46965 return true;
46967 /* See if we can get the same permutation in different vector integer
46968 mode. */
46969 if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd))
46971 if (!d->testing_p)
46972 emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target));
46973 return true;
46975 return false;
46978 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46979 in terms of a pair of pshuflw + pshufhw instructions. */
46981 static bool
46982 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
46984 unsigned char perm2[MAX_VECT_LEN];
46985 unsigned i;
46986 bool ok;
46988 if (d->vmode != V8HImode || !d->one_operand_p)
46989 return false;
46991 /* The two permutations only operate in 64-bit lanes. */
46992 for (i = 0; i < 4; ++i)
46993 if (d->perm[i] >= 4)
46994 return false;
46995 for (i = 4; i < 8; ++i)
46996 if (d->perm[i] < 4)
46997 return false;
46999 if (d->testing_p)
47000 return true;
47002 /* Emit the pshuflw. */
47003 memcpy (perm2, d->perm, 4);
47004 for (i = 4; i < 8; ++i)
47005 perm2[i] = i;
47006 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
47007 gcc_assert (ok);
47009 /* Emit the pshufhw. */
47010 memcpy (perm2 + 4, d->perm + 4, 4);
47011 for (i = 0; i < 4; ++i)
47012 perm2[i] = i;
47013 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
47014 gcc_assert (ok);
47016 return true;
47019 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47020 the permutation using the SSSE3 palignr instruction. This succeeds
47021 when all of the elements in PERM fit within one vector and we merely
47022 need to shift them down so that a single vector permutation has a
47023 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
47024 the vpalignr instruction itself can perform the requested permutation. */
47026 static bool
47027 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
47029 unsigned i, nelt = d->nelt;
47030 unsigned min, max, minswap, maxswap;
47031 bool in_order, ok, swap = false;
47032 rtx shift, target;
47033 struct expand_vec_perm_d dcopy;
47035 /* Even with AVX, palignr only operates on 128-bit vectors,
47036 in AVX2 palignr operates on both 128-bit lanes. */
47037 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
47038 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
47039 return false;
47041 min = 2 * nelt;
47042 max = 0;
47043 minswap = 2 * nelt;
47044 maxswap = 0;
47045 for (i = 0; i < nelt; ++i)
47047 unsigned e = d->perm[i];
47048 unsigned eswap = d->perm[i] ^ nelt;
47049 if (GET_MODE_SIZE (d->vmode) == 32)
47051 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
47052 eswap = e ^ (nelt / 2);
47054 if (e < min)
47055 min = e;
47056 if (e > max)
47057 max = e;
47058 if (eswap < minswap)
47059 minswap = eswap;
47060 if (eswap > maxswap)
47061 maxswap = eswap;
47063 if (min == 0
47064 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
47066 if (d->one_operand_p
47067 || minswap == 0
47068 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
47069 ? nelt / 2 : nelt))
47070 return false;
47071 swap = true;
47072 min = minswap;
47073 max = maxswap;
47076 /* Given that we have SSSE3, we know we'll be able to implement the
47077 single operand permutation after the palignr with pshufb for
47078 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
47079 first. */
47080 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
47081 return true;
47083 dcopy = *d;
47084 if (swap)
47086 dcopy.op0 = d->op1;
47087 dcopy.op1 = d->op0;
47088 for (i = 0; i < nelt; ++i)
47089 dcopy.perm[i] ^= nelt;
47092 in_order = true;
47093 for (i = 0; i < nelt; ++i)
47095 unsigned e = dcopy.perm[i];
47096 if (GET_MODE_SIZE (d->vmode) == 32
47097 && e >= nelt
47098 && (e & (nelt / 2 - 1)) < min)
47099 e = e - min - (nelt / 2);
47100 else
47101 e = e - min;
47102 if (e != i)
47103 in_order = false;
47104 dcopy.perm[i] = e;
47106 dcopy.one_operand_p = true;
47108 if (single_insn_only_p && !in_order)
47109 return false;
47111 /* For AVX2, test whether we can permute the result in one instruction. */
47112 if (d->testing_p)
47114 if (in_order)
47115 return true;
47116 dcopy.op1 = dcopy.op0;
47117 return expand_vec_perm_1 (&dcopy);
47120 shift = GEN_INT (min * GET_MODE_UNIT_BITSIZE (d->vmode));
47121 if (GET_MODE_SIZE (d->vmode) == 16)
47123 target = gen_reg_rtx (TImode);
47124 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
47125 gen_lowpart (TImode, dcopy.op0), shift));
47127 else
47129 target = gen_reg_rtx (V2TImode);
47130 emit_insn (gen_avx2_palignrv2ti (target,
47131 gen_lowpart (V2TImode, dcopy.op1),
47132 gen_lowpart (V2TImode, dcopy.op0),
47133 shift));
47136 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
47138 /* Test for the degenerate case where the alignment by itself
47139 produces the desired permutation. */
47140 if (in_order)
47142 emit_move_insn (d->target, dcopy.op0);
47143 return true;
47146 ok = expand_vec_perm_1 (&dcopy);
47147 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
47149 return ok;
47152 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
47153 the permutation using the SSE4_1 pblendv instruction. Potentially
47154 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
47156 static bool
47157 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
47159 unsigned i, which, nelt = d->nelt;
47160 struct expand_vec_perm_d dcopy, dcopy1;
47161 machine_mode vmode = d->vmode;
47162 bool ok;
47164 /* Use the same checks as in expand_vec_perm_blend. */
47165 if (d->one_operand_p)
47166 return false;
47167 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
47169 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
47171 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
47173 else
47174 return false;
47176 /* Figure out where permutation elements stay not in their
47177 respective lanes. */
47178 for (i = 0, which = 0; i < nelt; ++i)
47180 unsigned e = d->perm[i];
47181 if (e != i)
47182 which |= (e < nelt ? 1 : 2);
47184 /* We can pblend the part where elements stay not in their
47185 respective lanes only when these elements are all in one
47186 half of a permutation.
47187 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
47188 lanes, but both 8 and 9 >= 8
47189 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
47190 respective lanes and 8 >= 8, but 2 not. */
47191 if (which != 1 && which != 2)
47192 return false;
47193 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
47194 return true;
47196 /* First we apply one operand permutation to the part where
47197 elements stay not in their respective lanes. */
47198 dcopy = *d;
47199 if (which == 2)
47200 dcopy.op0 = dcopy.op1 = d->op1;
47201 else
47202 dcopy.op0 = dcopy.op1 = d->op0;
47203 if (!d->testing_p)
47204 dcopy.target = gen_reg_rtx (vmode);
47205 dcopy.one_operand_p = true;
47207 for (i = 0; i < nelt; ++i)
47208 dcopy.perm[i] = d->perm[i] & (nelt - 1);
47210 ok = expand_vec_perm_1 (&dcopy);
47211 if (GET_MODE_SIZE (vmode) != 16 && !ok)
47212 return false;
47213 else
47214 gcc_assert (ok);
47215 if (d->testing_p)
47216 return true;
47218 /* Next we put permuted elements into their positions. */
47219 dcopy1 = *d;
47220 if (which == 2)
47221 dcopy1.op1 = dcopy.target;
47222 else
47223 dcopy1.op0 = dcopy.target;
47225 for (i = 0; i < nelt; ++i)
47226 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
47228 ok = expand_vec_perm_blend (&dcopy1);
47229 gcc_assert (ok);
47231 return true;
47234 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
47236 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47237 a two vector permutation into a single vector permutation by using
47238 an interleave operation to merge the vectors. */
47240 static bool
47241 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
47243 struct expand_vec_perm_d dremap, dfinal;
47244 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
47245 unsigned HOST_WIDE_INT contents;
47246 unsigned char remap[2 * MAX_VECT_LEN];
47247 rtx_insn *seq;
47248 bool ok, same_halves = false;
47250 if (GET_MODE_SIZE (d->vmode) == 16)
47252 if (d->one_operand_p)
47253 return false;
47255 else if (GET_MODE_SIZE (d->vmode) == 32)
47257 if (!TARGET_AVX)
47258 return false;
47259 /* For 32-byte modes allow even d->one_operand_p.
47260 The lack of cross-lane shuffling in some instructions
47261 might prevent a single insn shuffle. */
47262 dfinal = *d;
47263 dfinal.testing_p = true;
47264 /* If expand_vec_perm_interleave3 can expand this into
47265 a 3 insn sequence, give up and let it be expanded as
47266 3 insn sequence. While that is one insn longer,
47267 it doesn't need a memory operand and in the common
47268 case that both interleave low and high permutations
47269 with the same operands are adjacent needs 4 insns
47270 for both after CSE. */
47271 if (expand_vec_perm_interleave3 (&dfinal))
47272 return false;
47274 else
47275 return false;
47277 /* Examine from whence the elements come. */
47278 contents = 0;
47279 for (i = 0; i < nelt; ++i)
47280 contents |= HOST_WIDE_INT_1U << d->perm[i];
47282 memset (remap, 0xff, sizeof (remap));
47283 dremap = *d;
47285 if (GET_MODE_SIZE (d->vmode) == 16)
47287 unsigned HOST_WIDE_INT h1, h2, h3, h4;
47289 /* Split the two input vectors into 4 halves. */
47290 h1 = (HOST_WIDE_INT_1U << nelt2) - 1;
47291 h2 = h1 << nelt2;
47292 h3 = h2 << nelt2;
47293 h4 = h3 << nelt2;
47295 /* If the elements from the low halves use interleave low, and similarly
47296 for interleave high. If the elements are from mis-matched halves, we
47297 can use shufps for V4SF/V4SI or do a DImode shuffle. */
47298 if ((contents & (h1 | h3)) == contents)
47300 /* punpckl* */
47301 for (i = 0; i < nelt2; ++i)
47303 remap[i] = i * 2;
47304 remap[i + nelt] = i * 2 + 1;
47305 dremap.perm[i * 2] = i;
47306 dremap.perm[i * 2 + 1] = i + nelt;
47308 if (!TARGET_SSE2 && d->vmode == V4SImode)
47309 dremap.vmode = V4SFmode;
47311 else if ((contents & (h2 | h4)) == contents)
47313 /* punpckh* */
47314 for (i = 0; i < nelt2; ++i)
47316 remap[i + nelt2] = i * 2;
47317 remap[i + nelt + nelt2] = i * 2 + 1;
47318 dremap.perm[i * 2] = i + nelt2;
47319 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
47321 if (!TARGET_SSE2 && d->vmode == V4SImode)
47322 dremap.vmode = V4SFmode;
47324 else if ((contents & (h1 | h4)) == contents)
47326 /* shufps */
47327 for (i = 0; i < nelt2; ++i)
47329 remap[i] = i;
47330 remap[i + nelt + nelt2] = i + nelt2;
47331 dremap.perm[i] = i;
47332 dremap.perm[i + nelt2] = i + nelt + nelt2;
47334 if (nelt != 4)
47336 /* shufpd */
47337 dremap.vmode = V2DImode;
47338 dremap.nelt = 2;
47339 dremap.perm[0] = 0;
47340 dremap.perm[1] = 3;
47343 else if ((contents & (h2 | h3)) == contents)
47345 /* shufps */
47346 for (i = 0; i < nelt2; ++i)
47348 remap[i + nelt2] = i;
47349 remap[i + nelt] = i + nelt2;
47350 dremap.perm[i] = i + nelt2;
47351 dremap.perm[i + nelt2] = i + nelt;
47353 if (nelt != 4)
47355 /* shufpd */
47356 dremap.vmode = V2DImode;
47357 dremap.nelt = 2;
47358 dremap.perm[0] = 1;
47359 dremap.perm[1] = 2;
47362 else
47363 return false;
47365 else
47367 unsigned int nelt4 = nelt / 4, nzcnt = 0;
47368 unsigned HOST_WIDE_INT q[8];
47369 unsigned int nonzero_halves[4];
47371 /* Split the two input vectors into 8 quarters. */
47372 q[0] = (HOST_WIDE_INT_1U << nelt4) - 1;
47373 for (i = 1; i < 8; ++i)
47374 q[i] = q[0] << (nelt4 * i);
47375 for (i = 0; i < 4; ++i)
47376 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
47378 nonzero_halves[nzcnt] = i;
47379 ++nzcnt;
47382 if (nzcnt == 1)
47384 gcc_assert (d->one_operand_p);
47385 nonzero_halves[1] = nonzero_halves[0];
47386 same_halves = true;
47388 else if (d->one_operand_p)
47390 gcc_assert (nonzero_halves[0] == 0);
47391 gcc_assert (nonzero_halves[1] == 1);
47394 if (nzcnt <= 2)
47396 if (d->perm[0] / nelt2 == nonzero_halves[1])
47398 /* Attempt to increase the likelihood that dfinal
47399 shuffle will be intra-lane. */
47400 std::swap (nonzero_halves[0], nonzero_halves[1]);
47403 /* vperm2f128 or vperm2i128. */
47404 for (i = 0; i < nelt2; ++i)
47406 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
47407 remap[i + nonzero_halves[0] * nelt2] = i;
47408 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
47409 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
47412 if (d->vmode != V8SFmode
47413 && d->vmode != V4DFmode
47414 && d->vmode != V8SImode)
47416 dremap.vmode = V8SImode;
47417 dremap.nelt = 8;
47418 for (i = 0; i < 4; ++i)
47420 dremap.perm[i] = i + nonzero_halves[0] * 4;
47421 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
47425 else if (d->one_operand_p)
47426 return false;
47427 else if (TARGET_AVX2
47428 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
47430 /* vpunpckl* */
47431 for (i = 0; i < nelt4; ++i)
47433 remap[i] = i * 2;
47434 remap[i + nelt] = i * 2 + 1;
47435 remap[i + nelt2] = i * 2 + nelt2;
47436 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
47437 dremap.perm[i * 2] = i;
47438 dremap.perm[i * 2 + 1] = i + nelt;
47439 dremap.perm[i * 2 + nelt2] = i + nelt2;
47440 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
47443 else if (TARGET_AVX2
47444 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
47446 /* vpunpckh* */
47447 for (i = 0; i < nelt4; ++i)
47449 remap[i + nelt4] = i * 2;
47450 remap[i + nelt + nelt4] = i * 2 + 1;
47451 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
47452 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
47453 dremap.perm[i * 2] = i + nelt4;
47454 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
47455 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
47456 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
47459 else
47460 return false;
47463 /* Use the remapping array set up above to move the elements from their
47464 swizzled locations into their final destinations. */
47465 dfinal = *d;
47466 for (i = 0; i < nelt; ++i)
47468 unsigned e = remap[d->perm[i]];
47469 gcc_assert (e < nelt);
47470 /* If same_halves is true, both halves of the remapped vector are the
47471 same. Avoid cross-lane accesses if possible. */
47472 if (same_halves && i >= nelt2)
47474 gcc_assert (e < nelt2);
47475 dfinal.perm[i] = e + nelt2;
47477 else
47478 dfinal.perm[i] = e;
47480 if (!d->testing_p)
47482 dremap.target = gen_reg_rtx (dremap.vmode);
47483 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
47485 dfinal.op1 = dfinal.op0;
47486 dfinal.one_operand_p = true;
47488 /* Test if the final remap can be done with a single insn. For V4SFmode or
47489 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
47490 start_sequence ();
47491 ok = expand_vec_perm_1 (&dfinal);
47492 seq = get_insns ();
47493 end_sequence ();
47495 if (!ok)
47496 return false;
47498 if (d->testing_p)
47499 return true;
47501 if (dremap.vmode != dfinal.vmode)
47503 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
47504 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
47507 ok = expand_vec_perm_1 (&dremap);
47508 gcc_assert (ok);
47510 emit_insn (seq);
47511 return true;
47514 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47515 a single vector cross-lane permutation into vpermq followed
47516 by any of the single insn permutations. */
47518 static bool
47519 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
47521 struct expand_vec_perm_d dremap, dfinal;
47522 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
47523 unsigned contents[2];
47524 bool ok;
47526 if (!(TARGET_AVX2
47527 && (d->vmode == V32QImode || d->vmode == V16HImode)
47528 && d->one_operand_p))
47529 return false;
47531 contents[0] = 0;
47532 contents[1] = 0;
47533 for (i = 0; i < nelt2; ++i)
47535 contents[0] |= 1u << (d->perm[i] / nelt4);
47536 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
47539 for (i = 0; i < 2; ++i)
47541 unsigned int cnt = 0;
47542 for (j = 0; j < 4; ++j)
47543 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
47544 return false;
47547 if (d->testing_p)
47548 return true;
47550 dremap = *d;
47551 dremap.vmode = V4DImode;
47552 dremap.nelt = 4;
47553 dremap.target = gen_reg_rtx (V4DImode);
47554 dremap.op0 = gen_lowpart (V4DImode, d->op0);
47555 dremap.op1 = dremap.op0;
47556 dremap.one_operand_p = true;
47557 for (i = 0; i < 2; ++i)
47559 unsigned int cnt = 0;
47560 for (j = 0; j < 4; ++j)
47561 if ((contents[i] & (1u << j)) != 0)
47562 dremap.perm[2 * i + cnt++] = j;
47563 for (; cnt < 2; ++cnt)
47564 dremap.perm[2 * i + cnt] = 0;
47567 dfinal = *d;
47568 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
47569 dfinal.op1 = dfinal.op0;
47570 dfinal.one_operand_p = true;
47571 for (i = 0, j = 0; i < nelt; ++i)
47573 if (i == nelt2)
47574 j = 2;
47575 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
47576 if ((d->perm[i] / nelt4) == dremap.perm[j])
47578 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
47579 dfinal.perm[i] |= nelt4;
47580 else
47581 gcc_unreachable ();
47584 ok = expand_vec_perm_1 (&dremap);
47585 gcc_assert (ok);
47587 ok = expand_vec_perm_1 (&dfinal);
47588 gcc_assert (ok);
47590 return true;
47593 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
47594 a vector permutation using two instructions, vperm2f128 resp.
47595 vperm2i128 followed by any single in-lane permutation. */
47597 static bool
47598 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
47600 struct expand_vec_perm_d dfirst, dsecond;
47601 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
47602 bool ok;
47604 if (!TARGET_AVX
47605 || GET_MODE_SIZE (d->vmode) != 32
47606 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
47607 return false;
47609 dsecond = *d;
47610 dsecond.one_operand_p = false;
47611 dsecond.testing_p = true;
47613 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
47614 immediate. For perm < 16 the second permutation uses
47615 d->op0 as first operand, for perm >= 16 it uses d->op1
47616 as first operand. The second operand is the result of
47617 vperm2[fi]128. */
47618 for (perm = 0; perm < 32; perm++)
47620 /* Ignore permutations which do not move anything cross-lane. */
47621 if (perm < 16)
47623 /* The second shuffle for e.g. V4DFmode has
47624 0123 and ABCD operands.
47625 Ignore AB23, as 23 is already in the second lane
47626 of the first operand. */
47627 if ((perm & 0xc) == (1 << 2)) continue;
47628 /* And 01CD, as 01 is in the first lane of the first
47629 operand. */
47630 if ((perm & 3) == 0) continue;
47631 /* And 4567, as then the vperm2[fi]128 doesn't change
47632 anything on the original 4567 second operand. */
47633 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
47635 else
47637 /* The second shuffle for e.g. V4DFmode has
47638 4567 and ABCD operands.
47639 Ignore AB67, as 67 is already in the second lane
47640 of the first operand. */
47641 if ((perm & 0xc) == (3 << 2)) continue;
47642 /* And 45CD, as 45 is in the first lane of the first
47643 operand. */
47644 if ((perm & 3) == 2) continue;
47645 /* And 0123, as then the vperm2[fi]128 doesn't change
47646 anything on the original 0123 first operand. */
47647 if ((perm & 0xf) == (1 << 2)) continue;
47650 for (i = 0; i < nelt; i++)
47652 j = d->perm[i] / nelt2;
47653 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
47654 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
47655 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
47656 dsecond.perm[i] = d->perm[i] & (nelt - 1);
47657 else
47658 break;
47661 if (i == nelt)
47663 start_sequence ();
47664 ok = expand_vec_perm_1 (&dsecond);
47665 end_sequence ();
47667 else
47668 ok = false;
47670 if (ok)
47672 if (d->testing_p)
47673 return true;
47675 /* Found a usable second shuffle. dfirst will be
47676 vperm2f128 on d->op0 and d->op1. */
47677 dsecond.testing_p = false;
47678 dfirst = *d;
47679 dfirst.target = gen_reg_rtx (d->vmode);
47680 for (i = 0; i < nelt; i++)
47681 dfirst.perm[i] = (i & (nelt2 - 1))
47682 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
47684 canonicalize_perm (&dfirst);
47685 ok = expand_vec_perm_1 (&dfirst);
47686 gcc_assert (ok);
47688 /* And dsecond is some single insn shuffle, taking
47689 d->op0 and result of vperm2f128 (if perm < 16) or
47690 d->op1 and result of vperm2f128 (otherwise). */
47691 if (perm >= 16)
47692 dsecond.op0 = dsecond.op1;
47693 dsecond.op1 = dfirst.target;
47695 ok = expand_vec_perm_1 (&dsecond);
47696 gcc_assert (ok);
47698 return true;
47701 /* For one operand, the only useful vperm2f128 permutation is 0x01
47702 aka lanes swap. */
47703 if (d->one_operand_p)
47704 return false;
47707 return false;
47710 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47711 a two vector permutation using 2 intra-lane interleave insns
47712 and cross-lane shuffle for 32-byte vectors. */
47714 static bool
47715 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
47717 unsigned i, nelt;
47718 rtx (*gen) (rtx, rtx, rtx);
47720 if (d->one_operand_p)
47721 return false;
47722 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
47724 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
47726 else
47727 return false;
47729 nelt = d->nelt;
47730 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
47731 return false;
47732 for (i = 0; i < nelt; i += 2)
47733 if (d->perm[i] != d->perm[0] + i / 2
47734 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
47735 return false;
47737 if (d->testing_p)
47738 return true;
47740 switch (d->vmode)
47742 case E_V32QImode:
47743 if (d->perm[0])
47744 gen = gen_vec_interleave_highv32qi;
47745 else
47746 gen = gen_vec_interleave_lowv32qi;
47747 break;
47748 case E_V16HImode:
47749 if (d->perm[0])
47750 gen = gen_vec_interleave_highv16hi;
47751 else
47752 gen = gen_vec_interleave_lowv16hi;
47753 break;
47754 case E_V8SImode:
47755 if (d->perm[0])
47756 gen = gen_vec_interleave_highv8si;
47757 else
47758 gen = gen_vec_interleave_lowv8si;
47759 break;
47760 case E_V4DImode:
47761 if (d->perm[0])
47762 gen = gen_vec_interleave_highv4di;
47763 else
47764 gen = gen_vec_interleave_lowv4di;
47765 break;
47766 case E_V8SFmode:
47767 if (d->perm[0])
47768 gen = gen_vec_interleave_highv8sf;
47769 else
47770 gen = gen_vec_interleave_lowv8sf;
47771 break;
47772 case E_V4DFmode:
47773 if (d->perm[0])
47774 gen = gen_vec_interleave_highv4df;
47775 else
47776 gen = gen_vec_interleave_lowv4df;
47777 break;
47778 default:
47779 gcc_unreachable ();
47782 emit_insn (gen (d->target, d->op0, d->op1));
47783 return true;
47786 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
47787 a single vector permutation using a single intra-lane vector
47788 permutation, vperm2f128 swapping the lanes and vblend* insn blending
47789 the non-swapped and swapped vectors together. */
47791 static bool
47792 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
47794 struct expand_vec_perm_d dfirst, dsecond;
47795 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
47796 rtx_insn *seq;
47797 bool ok;
47798 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
47800 if (!TARGET_AVX
47801 || TARGET_AVX2
47802 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
47803 || !d->one_operand_p)
47804 return false;
47806 dfirst = *d;
47807 for (i = 0; i < nelt; i++)
47808 dfirst.perm[i] = 0xff;
47809 for (i = 0, msk = 0; i < nelt; i++)
47811 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
47812 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
47813 return false;
47814 dfirst.perm[j] = d->perm[i];
47815 if (j != i)
47816 msk |= (1 << i);
47818 for (i = 0; i < nelt; i++)
47819 if (dfirst.perm[i] == 0xff)
47820 dfirst.perm[i] = i;
47822 if (!d->testing_p)
47823 dfirst.target = gen_reg_rtx (dfirst.vmode);
47825 start_sequence ();
47826 ok = expand_vec_perm_1 (&dfirst);
47827 seq = get_insns ();
47828 end_sequence ();
47830 if (!ok)
47831 return false;
47833 if (d->testing_p)
47834 return true;
47836 emit_insn (seq);
47838 dsecond = *d;
47839 dsecond.op0 = dfirst.target;
47840 dsecond.op1 = dfirst.target;
47841 dsecond.one_operand_p = true;
47842 dsecond.target = gen_reg_rtx (dsecond.vmode);
47843 for (i = 0; i < nelt; i++)
47844 dsecond.perm[i] = i ^ nelt2;
47846 ok = expand_vec_perm_1 (&dsecond);
47847 gcc_assert (ok);
47849 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
47850 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
47851 return true;
47854 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
47855 permutation using two vperm2f128, followed by a vshufpd insn blending
47856 the two vectors together. */
47858 static bool
47859 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
47861 struct expand_vec_perm_d dfirst, dsecond, dthird;
47862 bool ok;
47864 if (!TARGET_AVX || (d->vmode != V4DFmode))
47865 return false;
47867 if (d->testing_p)
47868 return true;
47870 dfirst = *d;
47871 dsecond = *d;
47872 dthird = *d;
47874 dfirst.perm[0] = (d->perm[0] & ~1);
47875 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
47876 dfirst.perm[2] = (d->perm[2] & ~1);
47877 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
47878 dsecond.perm[0] = (d->perm[1] & ~1);
47879 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
47880 dsecond.perm[2] = (d->perm[3] & ~1);
47881 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
47882 dthird.perm[0] = (d->perm[0] % 2);
47883 dthird.perm[1] = (d->perm[1] % 2) + 4;
47884 dthird.perm[2] = (d->perm[2] % 2) + 2;
47885 dthird.perm[3] = (d->perm[3] % 2) + 6;
47887 dfirst.target = gen_reg_rtx (dfirst.vmode);
47888 dsecond.target = gen_reg_rtx (dsecond.vmode);
47889 dthird.op0 = dfirst.target;
47890 dthird.op1 = dsecond.target;
47891 dthird.one_operand_p = false;
47893 canonicalize_perm (&dfirst);
47894 canonicalize_perm (&dsecond);
47896 ok = expand_vec_perm_1 (&dfirst)
47897 && expand_vec_perm_1 (&dsecond)
47898 && expand_vec_perm_1 (&dthird);
47900 gcc_assert (ok);
47902 return true;
47905 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
47906 permutation with two pshufb insns and an ior. We should have already
47907 failed all two instruction sequences. */
47909 static bool
47910 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
47912 rtx rperm[2][16], vperm, l, h, op, m128;
47913 unsigned int i, nelt, eltsz;
47915 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
47916 return false;
47917 gcc_assert (!d->one_operand_p);
47919 if (d->testing_p)
47920 return true;
47922 nelt = d->nelt;
47923 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
47925 /* Generate two permutation masks. If the required element is within
47926 the given vector it is shuffled into the proper lane. If the required
47927 element is in the other vector, force a zero into the lane by setting
47928 bit 7 in the permutation mask. */
47929 m128 = GEN_INT (-128);
47930 for (i = 0; i < nelt; ++i)
47932 unsigned j, e = d->perm[i];
47933 unsigned which = (e >= nelt);
47934 if (e >= nelt)
47935 e -= nelt;
47937 for (j = 0; j < eltsz; ++j)
47939 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
47940 rperm[1-which][i*eltsz + j] = m128;
47944 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
47945 vperm = force_reg (V16QImode, vperm);
47947 l = gen_reg_rtx (V16QImode);
47948 op = gen_lowpart (V16QImode, d->op0);
47949 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
47951 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
47952 vperm = force_reg (V16QImode, vperm);
47954 h = gen_reg_rtx (V16QImode);
47955 op = gen_lowpart (V16QImode, d->op1);
47956 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
47958 op = d->target;
47959 if (d->vmode != V16QImode)
47960 op = gen_reg_rtx (V16QImode);
47961 emit_insn (gen_iorv16qi3 (op, l, h));
47962 if (op != d->target)
47963 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
47965 return true;
47968 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
47969 with two vpshufb insns, vpermq and vpor. We should have already failed
47970 all two or three instruction sequences. */
47972 static bool
47973 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
47975 rtx rperm[2][32], vperm, l, h, hp, op, m128;
47976 unsigned int i, nelt, eltsz;
47978 if (!TARGET_AVX2
47979 || !d->one_operand_p
47980 || (d->vmode != V32QImode && d->vmode != V16HImode))
47981 return false;
47983 if (d->testing_p)
47984 return true;
47986 nelt = d->nelt;
47987 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
47989 /* Generate two permutation masks. If the required element is within
47990 the same lane, it is shuffled in. If the required element from the
47991 other lane, force a zero by setting bit 7 in the permutation mask.
47992 In the other mask the mask has non-negative elements if element
47993 is requested from the other lane, but also moved to the other lane,
47994 so that the result of vpshufb can have the two V2TImode halves
47995 swapped. */
47996 m128 = GEN_INT (-128);
47997 for (i = 0; i < nelt; ++i)
47999 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48000 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48002 for (j = 0; j < eltsz; ++j)
48004 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
48005 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
48009 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48010 vperm = force_reg (V32QImode, vperm);
48012 h = gen_reg_rtx (V32QImode);
48013 op = gen_lowpart (V32QImode, d->op0);
48014 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48016 /* Swap the 128-byte lanes of h into hp. */
48017 hp = gen_reg_rtx (V4DImode);
48018 op = gen_lowpart (V4DImode, h);
48019 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
48020 const1_rtx));
48022 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48023 vperm = force_reg (V32QImode, vperm);
48025 l = gen_reg_rtx (V32QImode);
48026 op = gen_lowpart (V32QImode, d->op0);
48027 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48029 op = d->target;
48030 if (d->vmode != V32QImode)
48031 op = gen_reg_rtx (V32QImode);
48032 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
48033 if (op != d->target)
48034 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48036 return true;
48039 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48040 and extract-odd permutations of two V32QImode and V16QImode operand
48041 with two vpshufb insns, vpor and vpermq. We should have already
48042 failed all two or three instruction sequences. */
48044 static bool
48045 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
48047 rtx rperm[2][32], vperm, l, h, ior, op, m128;
48048 unsigned int i, nelt, eltsz;
48050 if (!TARGET_AVX2
48051 || d->one_operand_p
48052 || (d->vmode != V32QImode && d->vmode != V16HImode))
48053 return false;
48055 for (i = 0; i < d->nelt; ++i)
48056 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
48057 return false;
48059 if (d->testing_p)
48060 return true;
48062 nelt = d->nelt;
48063 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
48065 /* Generate two permutation masks. In the first permutation mask
48066 the first quarter will contain indexes for the first half
48067 of the op0, the second quarter will contain bit 7 set, third quarter
48068 will contain indexes for the second half of the op0 and the
48069 last quarter bit 7 set. In the second permutation mask
48070 the first quarter will contain bit 7 set, the second quarter
48071 indexes for the first half of the op1, the third quarter bit 7 set
48072 and last quarter indexes for the second half of the op1.
48073 I.e. the first mask e.g. for V32QImode extract even will be:
48074 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
48075 (all values masked with 0xf except for -128) and second mask
48076 for extract even will be
48077 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
48078 m128 = GEN_INT (-128);
48079 for (i = 0; i < nelt; ++i)
48081 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48082 unsigned which = d->perm[i] >= nelt;
48083 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
48085 for (j = 0; j < eltsz; ++j)
48087 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
48088 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
48092 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48093 vperm = force_reg (V32QImode, vperm);
48095 l = gen_reg_rtx (V32QImode);
48096 op = gen_lowpart (V32QImode, d->op0);
48097 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48099 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48100 vperm = force_reg (V32QImode, vperm);
48102 h = gen_reg_rtx (V32QImode);
48103 op = gen_lowpart (V32QImode, d->op1);
48104 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48106 ior = gen_reg_rtx (V32QImode);
48107 emit_insn (gen_iorv32qi3 (ior, l, h));
48109 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
48110 op = gen_reg_rtx (V4DImode);
48111 ior = gen_lowpart (V4DImode, ior);
48112 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
48113 const1_rtx, GEN_INT (3)));
48114 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48116 return true;
48119 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48120 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
48121 with two "and" and "pack" or two "shift" and "pack" insns. We should
48122 have already failed all two instruction sequences. */
48124 static bool
48125 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
48127 rtx op, dop0, dop1, t;
48128 unsigned i, odd, c, s, nelt = d->nelt;
48129 bool end_perm = false;
48130 machine_mode half_mode;
48131 rtx (*gen_and) (rtx, rtx, rtx);
48132 rtx (*gen_pack) (rtx, rtx, rtx);
48133 rtx (*gen_shift) (rtx, rtx, rtx);
48135 if (d->one_operand_p)
48136 return false;
48138 switch (d->vmode)
48140 case E_V8HImode:
48141 /* Required for "pack". */
48142 if (!TARGET_SSE4_1)
48143 return false;
48144 c = 0xffff;
48145 s = 16;
48146 half_mode = V4SImode;
48147 gen_and = gen_andv4si3;
48148 gen_pack = gen_sse4_1_packusdw;
48149 gen_shift = gen_lshrv4si3;
48150 break;
48151 case E_V16QImode:
48152 /* No check as all instructions are SSE2. */
48153 c = 0xff;
48154 s = 8;
48155 half_mode = V8HImode;
48156 gen_and = gen_andv8hi3;
48157 gen_pack = gen_sse2_packuswb;
48158 gen_shift = gen_lshrv8hi3;
48159 break;
48160 case E_V16HImode:
48161 if (!TARGET_AVX2)
48162 return false;
48163 c = 0xffff;
48164 s = 16;
48165 half_mode = V8SImode;
48166 gen_and = gen_andv8si3;
48167 gen_pack = gen_avx2_packusdw;
48168 gen_shift = gen_lshrv8si3;
48169 end_perm = true;
48170 break;
48171 case E_V32QImode:
48172 if (!TARGET_AVX2)
48173 return false;
48174 c = 0xff;
48175 s = 8;
48176 half_mode = V16HImode;
48177 gen_and = gen_andv16hi3;
48178 gen_pack = gen_avx2_packuswb;
48179 gen_shift = gen_lshrv16hi3;
48180 end_perm = true;
48181 break;
48182 default:
48183 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
48184 general shuffles. */
48185 return false;
48188 /* Check that permutation is even or odd. */
48189 odd = d->perm[0];
48190 if (odd > 1)
48191 return false;
48193 for (i = 1; i < nelt; ++i)
48194 if (d->perm[i] != 2 * i + odd)
48195 return false;
48197 if (d->testing_p)
48198 return true;
48200 dop0 = gen_reg_rtx (half_mode);
48201 dop1 = gen_reg_rtx (half_mode);
48202 if (odd == 0)
48204 t = gen_const_vec_duplicate (half_mode, GEN_INT (c));
48205 t = force_reg (half_mode, t);
48206 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
48207 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
48209 else
48211 emit_insn (gen_shift (dop0,
48212 gen_lowpart (half_mode, d->op0),
48213 GEN_INT (s)));
48214 emit_insn (gen_shift (dop1,
48215 gen_lowpart (half_mode, d->op1),
48216 GEN_INT (s)));
48218 /* In AVX2 for 256 bit case we need to permute pack result. */
48219 if (TARGET_AVX2 && end_perm)
48221 op = gen_reg_rtx (d->vmode);
48222 t = gen_reg_rtx (V4DImode);
48223 emit_insn (gen_pack (op, dop0, dop1));
48224 emit_insn (gen_avx2_permv4di_1 (t,
48225 gen_lowpart (V4DImode, op),
48226 const0_rtx,
48227 const2_rtx,
48228 const1_rtx,
48229 GEN_INT (3)));
48230 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
48232 else
48233 emit_insn (gen_pack (d->target, dop0, dop1));
48235 return true;
48238 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48239 and extract-odd permutations of two V64QI operands
48240 with two "shifts", two "truncs" and one "concat" insns for "odd"
48241 and two "truncs" and one concat insn for "even."
48242 Have already failed all two instruction sequences. */
48244 static bool
48245 expand_vec_perm_even_odd_trunc (struct expand_vec_perm_d *d)
48247 rtx t1, t2, t3, t4;
48248 unsigned i, odd, nelt = d->nelt;
48250 if (!TARGET_AVX512BW
48251 || d->one_operand_p
48252 || d->vmode != V64QImode)
48253 return false;
48255 /* Check that permutation is even or odd. */
48256 odd = d->perm[0];
48257 if (odd > 1)
48258 return false;
48260 for (i = 1; i < nelt; ++i)
48261 if (d->perm[i] != 2 * i + odd)
48262 return false;
48264 if (d->testing_p)
48265 return true;
48268 if (odd)
48270 t1 = gen_reg_rtx (V32HImode);
48271 t2 = gen_reg_rtx (V32HImode);
48272 emit_insn (gen_lshrv32hi3 (t1,
48273 gen_lowpart (V32HImode, d->op0),
48274 GEN_INT (8)));
48275 emit_insn (gen_lshrv32hi3 (t2,
48276 gen_lowpart (V32HImode, d->op1),
48277 GEN_INT (8)));
48279 else
48281 t1 = gen_lowpart (V32HImode, d->op0);
48282 t2 = gen_lowpart (V32HImode, d->op1);
48285 t3 = gen_reg_rtx (V32QImode);
48286 t4 = gen_reg_rtx (V32QImode);
48287 emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t3, t1));
48288 emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t4, t2));
48289 emit_insn (gen_avx_vec_concatv64qi (d->target, t3, t4));
48291 return true;
48294 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
48295 and extract-odd permutations. */
48297 static bool
48298 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
48300 rtx t1, t2, t3, t4, t5;
48302 switch (d->vmode)
48304 case E_V4DFmode:
48305 if (d->testing_p)
48306 break;
48307 t1 = gen_reg_rtx (V4DFmode);
48308 t2 = gen_reg_rtx (V4DFmode);
48310 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48311 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
48312 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
48314 /* Now an unpck[lh]pd will produce the result required. */
48315 if (odd)
48316 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
48317 else
48318 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
48319 emit_insn (t3);
48320 break;
48322 case E_V8SFmode:
48324 int mask = odd ? 0xdd : 0x88;
48326 if (d->testing_p)
48327 break;
48328 t1 = gen_reg_rtx (V8SFmode);
48329 t2 = gen_reg_rtx (V8SFmode);
48330 t3 = gen_reg_rtx (V8SFmode);
48332 /* Shuffle within the 128-bit lanes to produce:
48333 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
48334 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
48335 GEN_INT (mask)));
48337 /* Shuffle the lanes around to produce:
48338 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
48339 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
48340 GEN_INT (0x3)));
48342 /* Shuffle within the 128-bit lanes to produce:
48343 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
48344 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
48346 /* Shuffle within the 128-bit lanes to produce:
48347 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
48348 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
48350 /* Shuffle the lanes around to produce:
48351 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
48352 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
48353 GEN_INT (0x20)));
48355 break;
48357 case E_V2DFmode:
48358 case E_V4SFmode:
48359 case E_V2DImode:
48360 case E_V4SImode:
48361 /* These are always directly implementable by expand_vec_perm_1. */
48362 gcc_unreachable ();
48364 case E_V8HImode:
48365 if (TARGET_SSE4_1)
48366 return expand_vec_perm_even_odd_pack (d);
48367 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
48368 return expand_vec_perm_pshufb2 (d);
48369 else
48371 if (d->testing_p)
48372 break;
48373 /* We need 2*log2(N)-1 operations to achieve odd/even
48374 with interleave. */
48375 t1 = gen_reg_rtx (V8HImode);
48376 t2 = gen_reg_rtx (V8HImode);
48377 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
48378 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
48379 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
48380 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
48381 if (odd)
48382 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
48383 else
48384 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
48385 emit_insn (t3);
48387 break;
48389 case E_V16QImode:
48390 return expand_vec_perm_even_odd_pack (d);
48392 case E_V16HImode:
48393 case E_V32QImode:
48394 return expand_vec_perm_even_odd_pack (d);
48396 case E_V64QImode:
48397 return expand_vec_perm_even_odd_trunc (d);
48399 case E_V4DImode:
48400 if (!TARGET_AVX2)
48402 struct expand_vec_perm_d d_copy = *d;
48403 d_copy.vmode = V4DFmode;
48404 if (d->testing_p)
48405 d_copy.target = gen_raw_REG (V4DFmode, LAST_VIRTUAL_REGISTER + 1);
48406 else
48407 d_copy.target = gen_reg_rtx (V4DFmode);
48408 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
48409 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
48410 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48412 if (!d->testing_p)
48413 emit_move_insn (d->target,
48414 gen_lowpart (V4DImode, d_copy.target));
48415 return true;
48417 return false;
48420 if (d->testing_p)
48421 break;
48423 t1 = gen_reg_rtx (V4DImode);
48424 t2 = gen_reg_rtx (V4DImode);
48426 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48427 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
48428 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
48430 /* Now an vpunpck[lh]qdq will produce the result required. */
48431 if (odd)
48432 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
48433 else
48434 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
48435 emit_insn (t3);
48436 break;
48438 case E_V8SImode:
48439 if (!TARGET_AVX2)
48441 struct expand_vec_perm_d d_copy = *d;
48442 d_copy.vmode = V8SFmode;
48443 if (d->testing_p)
48444 d_copy.target = gen_raw_REG (V8SFmode, LAST_VIRTUAL_REGISTER + 1);
48445 else
48446 d_copy.target = gen_reg_rtx (V8SFmode);
48447 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
48448 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
48449 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48451 if (!d->testing_p)
48452 emit_move_insn (d->target,
48453 gen_lowpart (V8SImode, d_copy.target));
48454 return true;
48456 return false;
48459 if (d->testing_p)
48460 break;
48462 t1 = gen_reg_rtx (V8SImode);
48463 t2 = gen_reg_rtx (V8SImode);
48464 t3 = gen_reg_rtx (V4DImode);
48465 t4 = gen_reg_rtx (V4DImode);
48466 t5 = gen_reg_rtx (V4DImode);
48468 /* Shuffle the lanes around into
48469 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
48470 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
48471 gen_lowpart (V4DImode, d->op1),
48472 GEN_INT (0x20)));
48473 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
48474 gen_lowpart (V4DImode, d->op1),
48475 GEN_INT (0x31)));
48477 /* Swap the 2nd and 3rd position in each lane into
48478 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
48479 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
48480 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48481 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
48482 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48484 /* Now an vpunpck[lh]qdq will produce
48485 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
48486 if (odd)
48487 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
48488 gen_lowpart (V4DImode, t2));
48489 else
48490 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
48491 gen_lowpart (V4DImode, t2));
48492 emit_insn (t3);
48493 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
48494 break;
48496 default:
48497 gcc_unreachable ();
48500 return true;
48503 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
48504 extract-even and extract-odd permutations. */
48506 static bool
48507 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
48509 unsigned i, odd, nelt = d->nelt;
48511 odd = d->perm[0];
48512 if (odd != 0 && odd != 1)
48513 return false;
48515 for (i = 1; i < nelt; ++i)
48516 if (d->perm[i] != 2 * i + odd)
48517 return false;
48519 return expand_vec_perm_even_odd_1 (d, odd);
48522 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
48523 permutations. We assume that expand_vec_perm_1 has already failed. */
48525 static bool
48526 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
48528 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
48529 machine_mode vmode = d->vmode;
48530 unsigned char perm2[4];
48531 rtx op0 = d->op0, dest;
48532 bool ok;
48534 switch (vmode)
48536 case E_V4DFmode:
48537 case E_V8SFmode:
48538 /* These are special-cased in sse.md so that we can optionally
48539 use the vbroadcast instruction. They expand to two insns
48540 if the input happens to be in a register. */
48541 gcc_unreachable ();
48543 case E_V2DFmode:
48544 case E_V2DImode:
48545 case E_V4SFmode:
48546 case E_V4SImode:
48547 /* These are always implementable using standard shuffle patterns. */
48548 gcc_unreachable ();
48550 case E_V8HImode:
48551 case E_V16QImode:
48552 /* These can be implemented via interleave. We save one insn by
48553 stopping once we have promoted to V4SImode and then use pshufd. */
48554 if (d->testing_p)
48555 return true;
48558 rtx dest;
48559 rtx (*gen) (rtx, rtx, rtx)
48560 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
48561 : gen_vec_interleave_lowv8hi;
48563 if (elt >= nelt2)
48565 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
48566 : gen_vec_interleave_highv8hi;
48567 elt -= nelt2;
48569 nelt2 /= 2;
48571 dest = gen_reg_rtx (vmode);
48572 emit_insn (gen (dest, op0, op0));
48573 vmode = get_mode_wider_vector (vmode);
48574 op0 = gen_lowpart (vmode, dest);
48576 while (vmode != V4SImode);
48578 memset (perm2, elt, 4);
48579 dest = gen_reg_rtx (V4SImode);
48580 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
48581 gcc_assert (ok);
48582 if (!d->testing_p)
48583 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
48584 return true;
48586 case E_V64QImode:
48587 case E_V32QImode:
48588 case E_V16HImode:
48589 case E_V8SImode:
48590 case E_V4DImode:
48591 /* For AVX2 broadcasts of the first element vpbroadcast* or
48592 vpermq should be used by expand_vec_perm_1. */
48593 gcc_assert (!TARGET_AVX2 || d->perm[0]);
48594 return false;
48596 default:
48597 gcc_unreachable ();
48601 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
48602 broadcast permutations. */
48604 static bool
48605 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
48607 unsigned i, elt, nelt = d->nelt;
48609 if (!d->one_operand_p)
48610 return false;
48612 elt = d->perm[0];
48613 for (i = 1; i < nelt; ++i)
48614 if (d->perm[i] != elt)
48615 return false;
48617 return expand_vec_perm_broadcast_1 (d);
48620 /* Implement arbitrary permutations of two V64QImode operands
48621 with 2 vperm[it]2w, 2 vpshufb and one vpor instruction. */
48622 static bool
48623 expand_vec_perm_vpermt2_vpshub2 (struct expand_vec_perm_d *d)
48625 if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
48626 return false;
48628 if (d->testing_p)
48629 return true;
48631 struct expand_vec_perm_d ds[2];
48632 rtx rperm[128], vperm, target0, target1;
48633 unsigned int i, nelt;
48634 machine_mode vmode;
48636 nelt = d->nelt;
48637 vmode = V64QImode;
48639 for (i = 0; i < 2; i++)
48641 ds[i] = *d;
48642 ds[i].vmode = V32HImode;
48643 ds[i].nelt = 32;
48644 ds[i].target = gen_reg_rtx (V32HImode);
48645 ds[i].op0 = gen_lowpart (V32HImode, d->op0);
48646 ds[i].op1 = gen_lowpart (V32HImode, d->op1);
48649 /* Prepare permutations such that the first one takes care of
48650 putting the even bytes into the right positions or one higher
48651 positions (ds[0]) and the second one takes care of
48652 putting the odd bytes into the right positions or one below
48653 (ds[1]). */
48655 for (i = 0; i < nelt; i++)
48657 ds[i & 1].perm[i / 2] = d->perm[i] / 2;
48658 if (i & 1)
48660 rperm[i] = constm1_rtx;
48661 rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
48663 else
48665 rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
48666 rperm[i + 64] = constm1_rtx;
48670 bool ok = expand_vec_perm_1 (&ds[0]);
48671 gcc_assert (ok);
48672 ds[0].target = gen_lowpart (V64QImode, ds[0].target);
48674 ok = expand_vec_perm_1 (&ds[1]);
48675 gcc_assert (ok);
48676 ds[1].target = gen_lowpart (V64QImode, ds[1].target);
48678 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
48679 vperm = force_reg (vmode, vperm);
48680 target0 = gen_reg_rtx (V64QImode);
48681 emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
48683 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
48684 vperm = force_reg (vmode, vperm);
48685 target1 = gen_reg_rtx (V64QImode);
48686 emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
48688 emit_insn (gen_iorv64qi3 (d->target, target0, target1));
48689 return true;
48692 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
48693 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
48694 all the shorter instruction sequences. */
48696 static bool
48697 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
48699 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
48700 unsigned int i, nelt, eltsz;
48701 bool used[4];
48703 if (!TARGET_AVX2
48704 || d->one_operand_p
48705 || (d->vmode != V32QImode && d->vmode != V16HImode))
48706 return false;
48708 if (d->testing_p)
48709 return true;
48711 nelt = d->nelt;
48712 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
48714 /* Generate 4 permutation masks. If the required element is within
48715 the same lane, it is shuffled in. If the required element from the
48716 other lane, force a zero by setting bit 7 in the permutation mask.
48717 In the other mask the mask has non-negative elements if element
48718 is requested from the other lane, but also moved to the other lane,
48719 so that the result of vpshufb can have the two V2TImode halves
48720 swapped. */
48721 m128 = GEN_INT (-128);
48722 for (i = 0; i < 32; ++i)
48724 rperm[0][i] = m128;
48725 rperm[1][i] = m128;
48726 rperm[2][i] = m128;
48727 rperm[3][i] = m128;
48729 used[0] = false;
48730 used[1] = false;
48731 used[2] = false;
48732 used[3] = false;
48733 for (i = 0; i < nelt; ++i)
48735 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48736 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48737 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
48739 for (j = 0; j < eltsz; ++j)
48740 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
48741 used[which] = true;
48744 for (i = 0; i < 2; ++i)
48746 if (!used[2 * i + 1])
48748 h[i] = NULL_RTX;
48749 continue;
48751 vperm = gen_rtx_CONST_VECTOR (V32QImode,
48752 gen_rtvec_v (32, rperm[2 * i + 1]));
48753 vperm = force_reg (V32QImode, vperm);
48754 h[i] = gen_reg_rtx (V32QImode);
48755 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
48756 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
48759 /* Swap the 128-byte lanes of h[X]. */
48760 for (i = 0; i < 2; ++i)
48762 if (h[i] == NULL_RTX)
48763 continue;
48764 op = gen_reg_rtx (V4DImode);
48765 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
48766 const2_rtx, GEN_INT (3), const0_rtx,
48767 const1_rtx));
48768 h[i] = gen_lowpart (V32QImode, op);
48771 for (i = 0; i < 2; ++i)
48773 if (!used[2 * i])
48775 l[i] = NULL_RTX;
48776 continue;
48778 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
48779 vperm = force_reg (V32QImode, vperm);
48780 l[i] = gen_reg_rtx (V32QImode);
48781 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
48782 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
48785 for (i = 0; i < 2; ++i)
48787 if (h[i] && l[i])
48789 op = gen_reg_rtx (V32QImode);
48790 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
48791 l[i] = op;
48793 else if (h[i])
48794 l[i] = h[i];
48797 gcc_assert (l[0] && l[1]);
48798 op = d->target;
48799 if (d->vmode != V32QImode)
48800 op = gen_reg_rtx (V32QImode);
48801 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
48802 if (op != d->target)
48803 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48804 return true;
48807 /* The guts of ix86_vectorize_vec_perm_const. With all of the interface bits
48808 taken care of, perform the expansion in D and return true on success. */
48810 static bool
48811 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
48813 /* Try a single instruction expansion. */
48814 if (expand_vec_perm_1 (d))
48815 return true;
48817 /* Try sequences of two instructions. */
48819 if (expand_vec_perm_pshuflw_pshufhw (d))
48820 return true;
48822 if (expand_vec_perm_palignr (d, false))
48823 return true;
48825 if (expand_vec_perm_interleave2 (d))
48826 return true;
48828 if (expand_vec_perm_broadcast (d))
48829 return true;
48831 if (expand_vec_perm_vpermq_perm_1 (d))
48832 return true;
48834 if (expand_vec_perm_vperm2f128 (d))
48835 return true;
48837 if (expand_vec_perm_pblendv (d))
48838 return true;
48840 /* Try sequences of three instructions. */
48842 if (expand_vec_perm_even_odd_pack (d))
48843 return true;
48845 if (expand_vec_perm_2vperm2f128_vshuf (d))
48846 return true;
48848 if (expand_vec_perm_pshufb2 (d))
48849 return true;
48851 if (expand_vec_perm_interleave3 (d))
48852 return true;
48854 if (expand_vec_perm_vperm2f128_vblend (d))
48855 return true;
48857 /* Try sequences of four instructions. */
48859 if (expand_vec_perm_even_odd_trunc (d))
48860 return true;
48861 if (expand_vec_perm_vpshufb2_vpermq (d))
48862 return true;
48864 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
48865 return true;
48867 if (expand_vec_perm_vpermt2_vpshub2 (d))
48868 return true;
48870 /* ??? Look for narrow permutations whose element orderings would
48871 allow the promotion to a wider mode. */
48873 /* ??? Look for sequences of interleave or a wider permute that place
48874 the data into the correct lanes for a half-vector shuffle like
48875 pshuf[lh]w or vpermilps. */
48877 /* ??? Look for sequences of interleave that produce the desired results.
48878 The combinatorics of punpck[lh] get pretty ugly... */
48880 if (expand_vec_perm_even_odd (d))
48881 return true;
48883 /* Even longer sequences. */
48884 if (expand_vec_perm_vpshufb4_vpermq2 (d))
48885 return true;
48887 /* See if we can get the same permutation in different vector integer
48888 mode. */
48889 struct expand_vec_perm_d nd;
48890 if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd))
48892 if (!d->testing_p)
48893 emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target));
48894 return true;
48897 return false;
48900 /* If a permutation only uses one operand, make it clear. Returns true
48901 if the permutation references both operands. */
48903 static bool
48904 canonicalize_perm (struct expand_vec_perm_d *d)
48906 int i, which, nelt = d->nelt;
48908 for (i = which = 0; i < nelt; ++i)
48909 which |= (d->perm[i] < nelt ? 1 : 2);
48911 d->one_operand_p = true;
48912 switch (which)
48914 default:
48915 gcc_unreachable();
48917 case 3:
48918 if (!rtx_equal_p (d->op0, d->op1))
48920 d->one_operand_p = false;
48921 break;
48923 /* The elements of PERM do not suggest that only the first operand
48924 is used, but both operands are identical. Allow easier matching
48925 of the permutation by folding the permutation into the single
48926 input vector. */
48927 /* FALLTHRU */
48929 case 2:
48930 for (i = 0; i < nelt; ++i)
48931 d->perm[i] &= nelt - 1;
48932 d->op0 = d->op1;
48933 break;
48935 case 1:
48936 d->op1 = d->op0;
48937 break;
48940 return (which == 3);
48943 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
48945 static bool
48946 ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
48947 rtx op1, const vec_perm_indices &sel)
48949 struct expand_vec_perm_d d;
48950 unsigned char perm[MAX_VECT_LEN];
48951 unsigned int i, nelt, which;
48952 bool two_args;
48954 d.target = target;
48955 d.op0 = op0;
48956 d.op1 = op1;
48958 d.vmode = vmode;
48959 gcc_assert (VECTOR_MODE_P (d.vmode));
48960 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
48961 d.testing_p = !target;
48963 gcc_assert (sel.length () == nelt);
48964 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
48966 /* Given sufficient ISA support we can just return true here
48967 for selected vector modes. */
48968 switch (d.vmode)
48970 case E_V16SFmode:
48971 case E_V16SImode:
48972 case E_V8DImode:
48973 case E_V8DFmode:
48974 if (!TARGET_AVX512F)
48975 return false;
48976 /* All implementable with a single vperm[it]2 insn. */
48977 if (d.testing_p)
48978 return true;
48979 break;
48980 case E_V32HImode:
48981 if (!TARGET_AVX512BW)
48982 return false;
48983 if (d.testing_p)
48984 /* All implementable with a single vperm[it]2 insn. */
48985 return true;
48986 break;
48987 case E_V64QImode:
48988 if (!TARGET_AVX512BW)
48989 return false;
48990 if (d.testing_p)
48991 /* Implementable with 2 vperm[it]2, 2 vpshufb and 1 or insn. */
48992 return true;
48993 break;
48994 case E_V8SImode:
48995 case E_V8SFmode:
48996 case E_V4DFmode:
48997 case E_V4DImode:
48998 if (!TARGET_AVX)
48999 return false;
49000 if (d.testing_p && TARGET_AVX512VL)
49001 /* All implementable with a single vperm[it]2 insn. */
49002 return true;
49003 break;
49004 case E_V16HImode:
49005 if (!TARGET_SSE2)
49006 return false;
49007 if (d.testing_p && TARGET_AVX2)
49008 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49009 return true;
49010 break;
49011 case E_V32QImode:
49012 if (!TARGET_SSE2)
49013 return false;
49014 if (d.testing_p && TARGET_AVX2)
49015 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49016 return true;
49017 break;
49018 case E_V8HImode:
49019 case E_V16QImode:
49020 if (!TARGET_SSE2)
49021 return false;
49022 /* Fall through. */
49023 case E_V4SImode:
49024 case E_V4SFmode:
49025 if (!TARGET_SSE)
49026 return false;
49027 /* All implementable with a single vpperm insn. */
49028 if (d.testing_p && TARGET_XOP)
49029 return true;
49030 /* All implementable with 2 pshufb + 1 ior. */
49031 if (d.testing_p && TARGET_SSSE3)
49032 return true;
49033 break;
49034 case E_V2DImode:
49035 case E_V2DFmode:
49036 if (!TARGET_SSE)
49037 return false;
49038 /* All implementable with shufpd or unpck[lh]pd. */
49039 if (d.testing_p)
49040 return true;
49041 break;
49042 default:
49043 return false;
49046 for (i = which = 0; i < nelt; ++i)
49048 unsigned char e = sel[i];
49049 gcc_assert (e < 2 * nelt);
49050 d.perm[i] = e;
49051 perm[i] = e;
49052 which |= (e < nelt ? 1 : 2);
49055 if (d.testing_p)
49057 /* For all elements from second vector, fold the elements to first. */
49058 if (which == 2)
49059 for (i = 0; i < nelt; ++i)
49060 d.perm[i] -= nelt;
49062 /* Check whether the mask can be applied to the vector type. */
49063 d.one_operand_p = (which != 3);
49065 /* Implementable with shufps or pshufd. */
49066 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
49067 return true;
49069 /* Otherwise we have to go through the motions and see if we can
49070 figure out how to generate the requested permutation. */
49071 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
49072 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
49073 if (!d.one_operand_p)
49074 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
49076 start_sequence ();
49077 bool ret = ix86_expand_vec_perm_const_1 (&d);
49078 end_sequence ();
49080 return ret;
49083 two_args = canonicalize_perm (&d);
49085 if (ix86_expand_vec_perm_const_1 (&d))
49086 return true;
49088 /* If the selector says both arguments are needed, but the operands are the
49089 same, the above tried to expand with one_operand_p and flattened selector.
49090 If that didn't work, retry without one_operand_p; we succeeded with that
49091 during testing. */
49092 if (two_args && d.one_operand_p)
49094 d.one_operand_p = false;
49095 memcpy (d.perm, perm, sizeof (perm));
49096 return ix86_expand_vec_perm_const_1 (&d);
49099 return false;
49102 void
49103 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
49105 struct expand_vec_perm_d d;
49106 unsigned i, nelt;
49108 d.target = targ;
49109 d.op0 = op0;
49110 d.op1 = op1;
49111 d.vmode = GET_MODE (targ);
49112 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49113 d.one_operand_p = false;
49114 d.testing_p = false;
49116 for (i = 0; i < nelt; ++i)
49117 d.perm[i] = i * 2 + odd;
49119 /* We'll either be able to implement the permutation directly... */
49120 if (expand_vec_perm_1 (&d))
49121 return;
49123 /* ... or we use the special-case patterns. */
49124 expand_vec_perm_even_odd_1 (&d, odd);
49127 static void
49128 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
49130 struct expand_vec_perm_d d;
49131 unsigned i, nelt, base;
49132 bool ok;
49134 d.target = targ;
49135 d.op0 = op0;
49136 d.op1 = op1;
49137 d.vmode = GET_MODE (targ);
49138 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49139 d.one_operand_p = false;
49140 d.testing_p = false;
49142 base = high_p ? nelt / 2 : 0;
49143 for (i = 0; i < nelt / 2; ++i)
49145 d.perm[i * 2] = i + base;
49146 d.perm[i * 2 + 1] = i + base + nelt;
49149 /* Note that for AVX this isn't one instruction. */
49150 ok = ix86_expand_vec_perm_const_1 (&d);
49151 gcc_assert (ok);
49155 /* Expand a vector operation CODE for a V*QImode in terms of the
49156 same operation on V*HImode. */
49158 void
49159 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
49161 machine_mode qimode = GET_MODE (dest);
49162 machine_mode himode;
49163 rtx (*gen_il) (rtx, rtx, rtx);
49164 rtx (*gen_ih) (rtx, rtx, rtx);
49165 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
49166 struct expand_vec_perm_d d;
49167 bool ok, full_interleave;
49168 bool uns_p = false;
49169 int i;
49171 switch (qimode)
49173 case E_V16QImode:
49174 himode = V8HImode;
49175 gen_il = gen_vec_interleave_lowv16qi;
49176 gen_ih = gen_vec_interleave_highv16qi;
49177 break;
49178 case E_V32QImode:
49179 himode = V16HImode;
49180 gen_il = gen_avx2_interleave_lowv32qi;
49181 gen_ih = gen_avx2_interleave_highv32qi;
49182 break;
49183 case E_V64QImode:
49184 himode = V32HImode;
49185 gen_il = gen_avx512bw_interleave_lowv64qi;
49186 gen_ih = gen_avx512bw_interleave_highv64qi;
49187 break;
49188 default:
49189 gcc_unreachable ();
49192 op2_l = op2_h = op2;
49193 switch (code)
49195 case MULT:
49196 /* Unpack data such that we've got a source byte in each low byte of
49197 each word. We don't care what goes into the high byte of each word.
49198 Rather than trying to get zero in there, most convenient is to let
49199 it be a copy of the low byte. */
49200 op2_l = gen_reg_rtx (qimode);
49201 op2_h = gen_reg_rtx (qimode);
49202 emit_insn (gen_il (op2_l, op2, op2));
49203 emit_insn (gen_ih (op2_h, op2, op2));
49205 op1_l = gen_reg_rtx (qimode);
49206 op1_h = gen_reg_rtx (qimode);
49207 emit_insn (gen_il (op1_l, op1, op1));
49208 emit_insn (gen_ih (op1_h, op1, op1));
49209 full_interleave = qimode == V16QImode;
49210 break;
49212 case ASHIFT:
49213 case LSHIFTRT:
49214 uns_p = true;
49215 /* FALLTHRU */
49216 case ASHIFTRT:
49217 op1_l = gen_reg_rtx (himode);
49218 op1_h = gen_reg_rtx (himode);
49219 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
49220 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
49221 full_interleave = true;
49222 break;
49223 default:
49224 gcc_unreachable ();
49227 /* Perform the operation. */
49228 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
49229 1, OPTAB_DIRECT);
49230 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
49231 1, OPTAB_DIRECT);
49232 gcc_assert (res_l && res_h);
49234 /* Merge the data back into the right place. */
49235 d.target = dest;
49236 d.op0 = gen_lowpart (qimode, res_l);
49237 d.op1 = gen_lowpart (qimode, res_h);
49238 d.vmode = qimode;
49239 d.nelt = GET_MODE_NUNITS (qimode);
49240 d.one_operand_p = false;
49241 d.testing_p = false;
49243 if (full_interleave)
49245 /* For SSE2, we used an full interleave, so the desired
49246 results are in the even elements. */
49247 for (i = 0; i < d.nelt; ++i)
49248 d.perm[i] = i * 2;
49250 else
49252 /* For AVX, the interleave used above was not cross-lane. So the
49253 extraction is evens but with the second and third quarter swapped.
49254 Happily, that is even one insn shorter than even extraction.
49255 For AVX512BW we have 4 lanes. We extract evens from within a lane,
49256 always first from the first and then from the second source operand,
49257 the index bits above the low 4 bits remains the same.
49258 Thus, for d.nelt == 32 we want permutation
49259 0,2,4,..14, 32,34,36,..46, 16,18,20,..30, 48,50,52,..62
49260 and for d.nelt == 64 we want permutation
49261 0,2,4,..14, 64,66,68,..78, 16,18,20,..30, 80,82,84,..94,
49262 32,34,36,..46, 96,98,100,..110, 48,50,52,..62, 112,114,116,..126. */
49263 for (i = 0; i < d.nelt; ++i)
49264 d.perm[i] = ((i * 2) & 14) + ((i & 8) ? d.nelt : 0) + (i & ~15);
49267 ok = ix86_expand_vec_perm_const_1 (&d);
49268 gcc_assert (ok);
49270 set_unique_reg_note (get_last_insn (), REG_EQUAL,
49271 gen_rtx_fmt_ee (code, qimode, op1, op2));
49274 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
49275 if op is CONST_VECTOR with all odd elements equal to their
49276 preceding element. */
49278 static bool
49279 const_vector_equal_evenodd_p (rtx op)
49281 machine_mode mode = GET_MODE (op);
49282 int i, nunits = GET_MODE_NUNITS (mode);
49283 if (GET_CODE (op) != CONST_VECTOR
49284 || nunits != CONST_VECTOR_NUNITS (op))
49285 return false;
49286 for (i = 0; i < nunits; i += 2)
49287 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
49288 return false;
49289 return true;
49292 void
49293 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
49294 bool uns_p, bool odd_p)
49296 machine_mode mode = GET_MODE (op1);
49297 machine_mode wmode = GET_MODE (dest);
49298 rtx x;
49299 rtx orig_op1 = op1, orig_op2 = op2;
49301 if (!nonimmediate_operand (op1, mode))
49302 op1 = force_reg (mode, op1);
49303 if (!nonimmediate_operand (op2, mode))
49304 op2 = force_reg (mode, op2);
49306 /* We only play even/odd games with vectors of SImode. */
49307 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
49309 /* If we're looking for the odd results, shift those members down to
49310 the even slots. For some cpus this is faster than a PSHUFD. */
49311 if (odd_p)
49313 /* For XOP use vpmacsdqh, but only for smult, as it is only
49314 signed. */
49315 if (TARGET_XOP && mode == V4SImode && !uns_p)
49317 x = force_reg (wmode, CONST0_RTX (wmode));
49318 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
49319 return;
49322 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
49323 if (!const_vector_equal_evenodd_p (orig_op1))
49324 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
49325 x, NULL, 1, OPTAB_DIRECT);
49326 if (!const_vector_equal_evenodd_p (orig_op2))
49327 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
49328 x, NULL, 1, OPTAB_DIRECT);
49329 op1 = gen_lowpart (mode, op1);
49330 op2 = gen_lowpart (mode, op2);
49333 if (mode == V16SImode)
49335 if (uns_p)
49336 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
49337 else
49338 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
49340 else if (mode == V8SImode)
49342 if (uns_p)
49343 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
49344 else
49345 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
49347 else if (uns_p)
49348 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
49349 else if (TARGET_SSE4_1)
49350 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
49351 else
49353 rtx s1, s2, t0, t1, t2;
49355 /* The easiest way to implement this without PMULDQ is to go through
49356 the motions as if we are performing a full 64-bit multiply. With
49357 the exception that we need to do less shuffling of the elements. */
49359 /* Compute the sign-extension, aka highparts, of the two operands. */
49360 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49361 op1, pc_rtx, pc_rtx);
49362 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49363 op2, pc_rtx, pc_rtx);
49365 /* Multiply LO(A) * HI(B), and vice-versa. */
49366 t1 = gen_reg_rtx (wmode);
49367 t2 = gen_reg_rtx (wmode);
49368 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
49369 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
49371 /* Multiply LO(A) * LO(B). */
49372 t0 = gen_reg_rtx (wmode);
49373 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
49375 /* Combine and shift the highparts into place. */
49376 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
49377 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
49378 1, OPTAB_DIRECT);
49380 /* Combine high and low parts. */
49381 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
49382 return;
49384 emit_insn (x);
49387 void
49388 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
49389 bool uns_p, bool high_p)
49391 machine_mode wmode = GET_MODE (dest);
49392 machine_mode mode = GET_MODE (op1);
49393 rtx t1, t2, t3, t4, mask;
49395 switch (mode)
49397 case E_V4SImode:
49398 t1 = gen_reg_rtx (mode);
49399 t2 = gen_reg_rtx (mode);
49400 if (TARGET_XOP && !uns_p)
49402 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
49403 shuffle the elements once so that all elements are in the right
49404 place for immediate use: { A C B D }. */
49405 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
49406 const1_rtx, GEN_INT (3)));
49407 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
49408 const1_rtx, GEN_INT (3)));
49410 else
49412 /* Put the elements into place for the multiply. */
49413 ix86_expand_vec_interleave (t1, op1, op1, high_p);
49414 ix86_expand_vec_interleave (t2, op2, op2, high_p);
49415 high_p = false;
49417 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
49418 break;
49420 case E_V8SImode:
49421 /* Shuffle the elements between the lanes. After this we
49422 have { A B E F | C D G H } for each operand. */
49423 t1 = gen_reg_rtx (V4DImode);
49424 t2 = gen_reg_rtx (V4DImode);
49425 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
49426 const0_rtx, const2_rtx,
49427 const1_rtx, GEN_INT (3)));
49428 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
49429 const0_rtx, const2_rtx,
49430 const1_rtx, GEN_INT (3)));
49432 /* Shuffle the elements within the lanes. After this we
49433 have { A A B B | C C D D } or { E E F F | G G H H }. */
49434 t3 = gen_reg_rtx (V8SImode);
49435 t4 = gen_reg_rtx (V8SImode);
49436 mask = GEN_INT (high_p
49437 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
49438 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
49439 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
49440 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
49442 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
49443 break;
49445 case E_V8HImode:
49446 case E_V16HImode:
49447 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
49448 uns_p, OPTAB_DIRECT);
49449 t2 = expand_binop (mode,
49450 uns_p ? umul_highpart_optab : smul_highpart_optab,
49451 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
49452 gcc_assert (t1 && t2);
49454 t3 = gen_reg_rtx (mode);
49455 ix86_expand_vec_interleave (t3, t1, t2, high_p);
49456 emit_move_insn (dest, gen_lowpart (wmode, t3));
49457 break;
49459 case E_V16QImode:
49460 case E_V32QImode:
49461 case E_V32HImode:
49462 case E_V16SImode:
49463 case E_V64QImode:
49464 t1 = gen_reg_rtx (wmode);
49465 t2 = gen_reg_rtx (wmode);
49466 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
49467 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
49469 emit_insn (gen_rtx_SET (dest, gen_rtx_MULT (wmode, t1, t2)));
49470 break;
49472 default:
49473 gcc_unreachable ();
49477 void
49478 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
49480 rtx res_1, res_2, res_3, res_4;
49482 res_1 = gen_reg_rtx (V4SImode);
49483 res_2 = gen_reg_rtx (V4SImode);
49484 res_3 = gen_reg_rtx (V2DImode);
49485 res_4 = gen_reg_rtx (V2DImode);
49486 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
49487 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
49489 /* Move the results in element 2 down to element 1; we don't care
49490 what goes in elements 2 and 3. Then we can merge the parts
49491 back together with an interleave.
49493 Note that two other sequences were tried:
49494 (1) Use interleaves at the start instead of psrldq, which allows
49495 us to use a single shufps to merge things back at the end.
49496 (2) Use shufps here to combine the two vectors, then pshufd to
49497 put the elements in the correct order.
49498 In both cases the cost of the reformatting stall was too high
49499 and the overall sequence slower. */
49501 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
49502 const0_rtx, const2_rtx,
49503 const0_rtx, const0_rtx));
49504 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
49505 const0_rtx, const2_rtx,
49506 const0_rtx, const0_rtx));
49507 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
49509 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
49512 void
49513 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
49515 machine_mode mode = GET_MODE (op0);
49516 rtx t1, t2, t3, t4, t5, t6;
49518 if (TARGET_AVX512DQ && mode == V8DImode)
49519 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
49520 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
49521 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
49522 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
49523 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
49524 else if (TARGET_XOP && mode == V2DImode)
49526 /* op1: A,B,C,D, op2: E,F,G,H */
49527 op1 = gen_lowpart (V4SImode, op1);
49528 op2 = gen_lowpart (V4SImode, op2);
49530 t1 = gen_reg_rtx (V4SImode);
49531 t2 = gen_reg_rtx (V4SImode);
49532 t3 = gen_reg_rtx (V2DImode);
49533 t4 = gen_reg_rtx (V2DImode);
49535 /* t1: B,A,D,C */
49536 emit_insn (gen_sse2_pshufd_1 (t1, op1,
49537 GEN_INT (1),
49538 GEN_INT (0),
49539 GEN_INT (3),
49540 GEN_INT (2)));
49542 /* t2: (B*E),(A*F),(D*G),(C*H) */
49543 emit_insn (gen_mulv4si3 (t2, t1, op2));
49545 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
49546 emit_insn (gen_xop_phadddq (t3, t2));
49548 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
49549 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
49551 /* Multiply lower parts and add all */
49552 t5 = gen_reg_rtx (V2DImode);
49553 emit_insn (gen_vec_widen_umult_even_v4si (t5,
49554 gen_lowpart (V4SImode, op1),
49555 gen_lowpart (V4SImode, op2)));
49556 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
49559 else
49561 machine_mode nmode;
49562 rtx (*umul) (rtx, rtx, rtx);
49564 if (mode == V2DImode)
49566 umul = gen_vec_widen_umult_even_v4si;
49567 nmode = V4SImode;
49569 else if (mode == V4DImode)
49571 umul = gen_vec_widen_umult_even_v8si;
49572 nmode = V8SImode;
49574 else if (mode == V8DImode)
49576 umul = gen_vec_widen_umult_even_v16si;
49577 nmode = V16SImode;
49579 else
49580 gcc_unreachable ();
49583 /* Multiply low parts. */
49584 t1 = gen_reg_rtx (mode);
49585 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
49587 /* Shift input vectors right 32 bits so we can multiply high parts. */
49588 t6 = GEN_INT (32);
49589 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
49590 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
49592 /* Multiply high parts by low parts. */
49593 t4 = gen_reg_rtx (mode);
49594 t5 = gen_reg_rtx (mode);
49595 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
49596 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
49598 /* Combine and shift the highparts back. */
49599 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
49600 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
49602 /* Combine high and low parts. */
49603 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
49606 set_unique_reg_note (get_last_insn (), REG_EQUAL,
49607 gen_rtx_MULT (mode, op1, op2));
49610 /* Return 1 if control tansfer instruction INSN
49611 should be encoded with bnd prefix.
49612 If insn is NULL then return 1 when control
49613 transfer instructions should be prefixed with
49614 bnd by default for current function. */
49616 bool
49617 ix86_bnd_prefixed_insn_p (rtx insn)
49619 /* For call insns check special flag. */
49620 if (insn && CALL_P (insn))
49622 rtx call = get_call_rtx_from (insn);
49623 if (call)
49624 return CALL_EXPR_WITH_BOUNDS_P (call);
49627 /* All other insns are prefixed only if function is instrumented. */
49628 return chkp_function_instrumented_p (current_function_decl);
49631 /* Return 1 if control tansfer instruction INSN
49632 should be encoded with notrack prefix. */
49634 static bool
49635 ix86_notrack_prefixed_insn_p (rtx insn)
49637 if (!insn || !((flag_cf_protection & CF_BRANCH) && TARGET_IBT))
49638 return false;
49640 if (CALL_P (insn))
49642 rtx call = get_call_rtx_from (insn);
49643 gcc_assert (call != NULL_RTX);
49644 rtx addr = XEXP (call, 0);
49646 /* Do not emit 'notrack' if it's not an indirect call. */
49647 if (MEM_P (addr)
49648 && GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
49649 return false;
49650 else
49651 return find_reg_note (insn, REG_CALL_NOCF_CHECK, 0);
49654 if (JUMP_P (insn) && !flag_cet_switch)
49656 rtx target = JUMP_LABEL (insn);
49657 if (target == NULL_RTX || ANY_RETURN_P (target))
49658 return false;
49660 /* Check the jump is a switch table. */
49661 rtx_insn *label = as_a<rtx_insn *> (target);
49662 rtx_insn *table = next_insn (label);
49663 if (table == NULL_RTX || !JUMP_TABLE_DATA_P (table))
49664 return false;
49665 else
49666 return true;
49668 return false;
49671 /* Calculate integer abs() using only SSE2 instructions. */
49673 void
49674 ix86_expand_sse2_abs (rtx target, rtx input)
49676 machine_mode mode = GET_MODE (target);
49677 rtx tmp0, tmp1, x;
49679 switch (mode)
49681 /* For 32-bit signed integer X, the best way to calculate the absolute
49682 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
49683 case E_V4SImode:
49684 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
49685 GEN_INT (GET_MODE_UNIT_BITSIZE (mode) - 1),
49686 NULL, 0, OPTAB_DIRECT);
49687 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
49688 NULL, 0, OPTAB_DIRECT);
49689 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
49690 target, 0, OPTAB_DIRECT);
49691 break;
49693 /* For 16-bit signed integer X, the best way to calculate the absolute
49694 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
49695 case E_V8HImode:
49696 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
49698 x = expand_simple_binop (mode, SMAX, tmp0, input,
49699 target, 0, OPTAB_DIRECT);
49700 break;
49702 /* For 8-bit signed integer X, the best way to calculate the absolute
49703 value of X is min ((unsigned char) X, (unsigned char) (-X)),
49704 as SSE2 provides the PMINUB insn. */
49705 case E_V16QImode:
49706 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
49708 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
49709 target, 0, OPTAB_DIRECT);
49710 break;
49712 default:
49713 gcc_unreachable ();
49716 if (x != target)
49717 emit_move_insn (target, x);
49720 /* Expand an extract from a vector register through pextr insn.
49721 Return true if successful. */
49723 bool
49724 ix86_expand_pextr (rtx *operands)
49726 rtx dst = operands[0];
49727 rtx src = operands[1];
49729 unsigned int size = INTVAL (operands[2]);
49730 unsigned int pos = INTVAL (operands[3]);
49732 if (SUBREG_P (dst))
49734 /* Reject non-lowpart subregs. */
49735 if (SUBREG_BYTE (dst) > 0)
49736 return false;
49737 dst = SUBREG_REG (dst);
49740 if (SUBREG_P (src))
49742 pos += SUBREG_BYTE (src) * BITS_PER_UNIT;
49743 src = SUBREG_REG (src);
49746 switch (GET_MODE (src))
49748 case E_V16QImode:
49749 case E_V8HImode:
49750 case E_V4SImode:
49751 case E_V2DImode:
49752 case E_V1TImode:
49753 case E_TImode:
49755 machine_mode srcmode, dstmode;
49756 rtx d, pat;
49758 if (!int_mode_for_size (size, 0).exists (&dstmode))
49759 return false;
49761 switch (dstmode)
49763 case E_QImode:
49764 if (!TARGET_SSE4_1)
49765 return false;
49766 srcmode = V16QImode;
49767 break;
49769 case E_HImode:
49770 if (!TARGET_SSE2)
49771 return false;
49772 srcmode = V8HImode;
49773 break;
49775 case E_SImode:
49776 if (!TARGET_SSE4_1)
49777 return false;
49778 srcmode = V4SImode;
49779 break;
49781 case E_DImode:
49782 gcc_assert (TARGET_64BIT);
49783 if (!TARGET_SSE4_1)
49784 return false;
49785 srcmode = V2DImode;
49786 break;
49788 default:
49789 return false;
49792 /* Reject extractions from misaligned positions. */
49793 if (pos & (size-1))
49794 return false;
49796 if (GET_MODE (dst) == dstmode)
49797 d = dst;
49798 else
49799 d = gen_reg_rtx (dstmode);
49801 /* Construct insn pattern. */
49802 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (pos / size)));
49803 pat = gen_rtx_VEC_SELECT (dstmode, gen_lowpart (srcmode, src), pat);
49805 /* Let the rtl optimizers know about the zero extension performed. */
49806 if (dstmode == QImode || dstmode == HImode)
49808 pat = gen_rtx_ZERO_EXTEND (SImode, pat);
49809 d = gen_lowpart (SImode, d);
49812 emit_insn (gen_rtx_SET (d, pat));
49814 if (d != dst)
49815 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
49816 return true;
49819 default:
49820 return false;
49824 /* Expand an insert into a vector register through pinsr insn.
49825 Return true if successful. */
49827 bool
49828 ix86_expand_pinsr (rtx *operands)
49830 rtx dst = operands[0];
49831 rtx src = operands[3];
49833 unsigned int size = INTVAL (operands[1]);
49834 unsigned int pos = INTVAL (operands[2]);
49836 if (SUBREG_P (dst))
49838 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
49839 dst = SUBREG_REG (dst);
49842 switch (GET_MODE (dst))
49844 case E_V16QImode:
49845 case E_V8HImode:
49846 case E_V4SImode:
49847 case E_V2DImode:
49848 case E_V1TImode:
49849 case E_TImode:
49851 machine_mode srcmode, dstmode;
49852 rtx (*pinsr)(rtx, rtx, rtx, rtx);
49853 rtx d;
49855 if (!int_mode_for_size (size, 0).exists (&srcmode))
49856 return false;
49858 switch (srcmode)
49860 case E_QImode:
49861 if (!TARGET_SSE4_1)
49862 return false;
49863 dstmode = V16QImode;
49864 pinsr = gen_sse4_1_pinsrb;
49865 break;
49867 case E_HImode:
49868 if (!TARGET_SSE2)
49869 return false;
49870 dstmode = V8HImode;
49871 pinsr = gen_sse2_pinsrw;
49872 break;
49874 case E_SImode:
49875 if (!TARGET_SSE4_1)
49876 return false;
49877 dstmode = V4SImode;
49878 pinsr = gen_sse4_1_pinsrd;
49879 break;
49881 case E_DImode:
49882 gcc_assert (TARGET_64BIT);
49883 if (!TARGET_SSE4_1)
49884 return false;
49885 dstmode = V2DImode;
49886 pinsr = gen_sse4_1_pinsrq;
49887 break;
49889 default:
49890 return false;
49893 /* Reject insertions to misaligned positions. */
49894 if (pos & (size-1))
49895 return false;
49897 if (SUBREG_P (src))
49899 unsigned int srcpos = SUBREG_BYTE (src);
49901 if (srcpos > 0)
49903 rtx extr_ops[4];
49905 extr_ops[0] = gen_reg_rtx (srcmode);
49906 extr_ops[1] = gen_lowpart (srcmode, SUBREG_REG (src));
49907 extr_ops[2] = GEN_INT (size);
49908 extr_ops[3] = GEN_INT (srcpos * BITS_PER_UNIT);
49910 if (!ix86_expand_pextr (extr_ops))
49911 return false;
49913 src = extr_ops[0];
49915 else
49916 src = gen_lowpart (srcmode, SUBREG_REG (src));
49919 if (GET_MODE (dst) == dstmode)
49920 d = dst;
49921 else
49922 d = gen_reg_rtx (dstmode);
49924 emit_insn (pinsr (d, gen_lowpart (dstmode, dst),
49925 gen_lowpart (srcmode, src),
49926 GEN_INT (1 << (pos / size))));
49927 if (d != dst)
49928 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
49929 return true;
49932 default:
49933 return false;
49937 /* This function returns the calling abi specific va_list type node.
49938 It returns the FNDECL specific va_list type. */
49940 static tree
49941 ix86_fn_abi_va_list (tree fndecl)
49943 if (!TARGET_64BIT)
49944 return va_list_type_node;
49945 gcc_assert (fndecl != NULL_TREE);
49947 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
49948 return ms_va_list_type_node;
49949 else
49950 return sysv_va_list_type_node;
49953 /* Returns the canonical va_list type specified by TYPE. If there
49954 is no valid TYPE provided, it return NULL_TREE. */
49956 static tree
49957 ix86_canonical_va_list_type (tree type)
49959 if (TARGET_64BIT)
49961 if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type)))
49962 return ms_va_list_type_node;
49964 if ((TREE_CODE (type) == ARRAY_TYPE
49965 && integer_zerop (array_type_nelts (type)))
49966 || POINTER_TYPE_P (type))
49968 tree elem_type = TREE_TYPE (type);
49969 if (TREE_CODE (elem_type) == RECORD_TYPE
49970 && lookup_attribute ("sysv_abi va_list",
49971 TYPE_ATTRIBUTES (elem_type)))
49972 return sysv_va_list_type_node;
49975 return NULL_TREE;
49978 return std_canonical_va_list_type (type);
49981 /* Iterate through the target-specific builtin types for va_list.
49982 IDX denotes the iterator, *PTREE is set to the result type of
49983 the va_list builtin, and *PNAME to its internal type.
49984 Returns zero if there is no element for this index, otherwise
49985 IDX should be increased upon the next call.
49986 Note, do not iterate a base builtin's name like __builtin_va_list.
49987 Used from c_common_nodes_and_builtins. */
49989 static int
49990 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
49992 if (TARGET_64BIT)
49994 switch (idx)
49996 default:
49997 break;
49999 case 0:
50000 *ptree = ms_va_list_type_node;
50001 *pname = "__builtin_ms_va_list";
50002 return 1;
50004 case 1:
50005 *ptree = sysv_va_list_type_node;
50006 *pname = "__builtin_sysv_va_list";
50007 return 1;
50011 return 0;
50014 #undef TARGET_SCHED_DISPATCH
50015 #define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
50016 #undef TARGET_SCHED_DISPATCH_DO
50017 #define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
50018 #undef TARGET_SCHED_REASSOCIATION_WIDTH
50019 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
50020 #undef TARGET_SCHED_REORDER
50021 #define TARGET_SCHED_REORDER ix86_atom_sched_reorder
50022 #undef TARGET_SCHED_ADJUST_PRIORITY
50023 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
50024 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
50025 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
50026 ix86_dependencies_evaluation_hook
50029 /* Implementation of reassociation_width target hook used by
50030 reassoc phase to identify parallelism level in reassociated
50031 tree. Statements tree_code is passed in OPC. Arguments type
50032 is passed in MODE. */
50034 static int
50035 ix86_reassociation_width (unsigned int op, machine_mode mode)
50037 int width = 1;
50038 /* Vector part. */
50039 if (VECTOR_MODE_P (mode))
50041 int div = 1;
50042 if (INTEGRAL_MODE_P (mode))
50043 width = ix86_cost->reassoc_vec_int;
50044 else if (FLOAT_MODE_P (mode))
50045 width = ix86_cost->reassoc_vec_fp;
50047 if (width == 1)
50048 return 1;
50050 /* Integer vector instructions execute in FP unit
50051 and can execute 3 additions and one multiplication per cycle. */
50052 if (ix86_tune == PROCESSOR_ZNVER1 && INTEGRAL_MODE_P (mode)
50053 && op != PLUS && op != MINUS)
50054 return 1;
50056 /* Account for targets that splits wide vectors into multiple parts. */
50057 if (TARGET_AVX128_OPTIMAL && GET_MODE_BITSIZE (mode) > 128)
50058 div = GET_MODE_BITSIZE (mode) / 128;
50059 else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64)
50060 div = GET_MODE_BITSIZE (mode) / 64;
50061 width = (width + div - 1) / div;
50063 /* Scalar part. */
50064 else if (INTEGRAL_MODE_P (mode))
50065 width = ix86_cost->reassoc_int;
50066 else if (FLOAT_MODE_P (mode))
50067 width = ix86_cost->reassoc_fp;
50069 /* Avoid using too many registers in 32bit mode. */
50070 if (!TARGET_64BIT && width > 2)
50071 width = 2;
50072 return width;
50075 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
50076 place emms and femms instructions. */
50078 static machine_mode
50079 ix86_preferred_simd_mode (scalar_mode mode)
50081 if (!TARGET_SSE)
50082 return word_mode;
50084 switch (mode)
50086 case E_QImode:
50087 if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
50088 return V64QImode;
50089 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
50090 return V32QImode;
50091 else
50092 return V16QImode;
50094 case E_HImode:
50095 if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
50096 return V32HImode;
50097 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
50098 return V16HImode;
50099 else
50100 return V8HImode;
50102 case E_SImode:
50103 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
50104 return V16SImode;
50105 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
50106 return V8SImode;
50107 else
50108 return V4SImode;
50110 case E_DImode:
50111 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
50112 return V8DImode;
50113 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
50114 return V4DImode;
50115 else
50116 return V2DImode;
50118 case E_SFmode:
50119 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
50120 return V16SFmode;
50121 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
50122 return V8SFmode;
50123 else
50124 return V4SFmode;
50126 case E_DFmode:
50127 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
50128 return V8DFmode;
50129 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
50130 return V4DFmode;
50131 else if (TARGET_SSE2)
50132 return V2DFmode;
50133 /* FALLTHRU */
50135 default:
50136 return word_mode;
50140 /* All CPUs prefer to avoid cross-lane operations so perform reductions
50141 upper against lower halves up to SSE reg size. */
50143 static machine_mode
50144 ix86_split_reduction (machine_mode mode)
50146 /* Reduce lowpart against highpart until we reach SSE reg width to
50147 avoid cross-lane operations. */
50148 switch (mode)
50150 case E_V8DImode:
50151 case E_V4DImode:
50152 return V2DImode;
50153 case E_V16SImode:
50154 case E_V8SImode:
50155 return V4SImode;
50156 case E_V32HImode:
50157 case E_V16HImode:
50158 return V8HImode;
50159 case E_V64QImode:
50160 case E_V32QImode:
50161 return V16QImode;
50162 case E_V16SFmode:
50163 case E_V8SFmode:
50164 return V4SFmode;
50165 case E_V8DFmode:
50166 case E_V4DFmode:
50167 return V2DFmode;
50168 default:
50169 return mode;
50173 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
50174 vectors. If AVX512F is enabled then try vectorizing with 512bit,
50175 256bit and 128bit vectors. */
50177 static void
50178 ix86_autovectorize_vector_sizes (vector_sizes *sizes)
50180 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
50182 sizes->safe_push (64);
50183 sizes->safe_push (32);
50184 sizes->safe_push (16);
50186 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
50188 sizes->safe_push (32);
50189 sizes->safe_push (16);
50193 /* Implemenation of targetm.vectorize.get_mask_mode. */
50195 static opt_machine_mode
50196 ix86_get_mask_mode (poly_uint64 nunits, poly_uint64 vector_size)
50198 unsigned elem_size = vector_size / nunits;
50200 /* Scalar mask case. */
50201 if ((TARGET_AVX512F && vector_size == 64)
50202 || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
50204 if (elem_size == 4 || elem_size == 8 || TARGET_AVX512BW)
50205 return smallest_int_mode_for_size (nunits);
50208 scalar_int_mode elem_mode
50209 = smallest_int_mode_for_size (elem_size * BITS_PER_UNIT);
50211 gcc_assert (elem_size * nunits == vector_size);
50213 return mode_for_vector (elem_mode, nunits);
50218 /* Return class of registers which could be used for pseudo of MODE
50219 and of class RCLASS for spilling instead of memory. Return NO_REGS
50220 if it is not possible or non-profitable. */
50222 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
50224 static reg_class_t
50225 ix86_spill_class (reg_class_t rclass, machine_mode mode)
50227 if (0 && TARGET_GENERAL_REGS_SSE_SPILL
50228 && TARGET_SSE2
50229 && TARGET_INTER_UNIT_MOVES_TO_VEC
50230 && TARGET_INTER_UNIT_MOVES_FROM_VEC
50231 && (mode == SImode || (TARGET_64BIT && mode == DImode))
50232 && INTEGER_CLASS_P (rclass))
50233 return ALL_SSE_REGS;
50234 return NO_REGS;
50237 /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
50238 but returns a lower bound. */
50240 static unsigned int
50241 ix86_max_noce_ifcvt_seq_cost (edge e)
50243 bool predictable_p = predictable_edge_p (e);
50245 enum compiler_param param
50246 = (predictable_p
50247 ? PARAM_MAX_RTL_IF_CONVERSION_PREDICTABLE_COST
50248 : PARAM_MAX_RTL_IF_CONVERSION_UNPREDICTABLE_COST);
50250 /* If we have a parameter set, use that, otherwise take a guess using
50251 BRANCH_COST. */
50252 if (global_options_set.x_param_values[param])
50253 return PARAM_VALUE (param);
50254 else
50255 return BRANCH_COST (true, predictable_p) * COSTS_N_INSNS (2);
50258 /* Return true if SEQ is a good candidate as a replacement for the
50259 if-convertible sequence described in IF_INFO. */
50261 static bool
50262 ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
50264 if (TARGET_ONE_IF_CONV_INSN && if_info->speed_p)
50266 int cmov_cnt = 0;
50267 /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
50268 Maybe we should allow even more conditional moves as long as they
50269 are used far enough not to stall the CPU, or also consider
50270 IF_INFO->TEST_BB succ edge probabilities. */
50271 for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
50273 rtx set = single_set (insn);
50274 if (!set)
50275 continue;
50276 if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
50277 continue;
50278 rtx src = SET_SRC (set);
50279 machine_mode mode = GET_MODE (src);
50280 if (GET_MODE_CLASS (mode) != MODE_INT
50281 && GET_MODE_CLASS (mode) != MODE_FLOAT)
50282 continue;
50283 if ((!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
50284 || (!REG_P (XEXP (src, 2)) && !MEM_P (XEXP (src, 2))))
50285 continue;
50286 /* insn is CMOV or FCMOV. */
50287 if (++cmov_cnt > 1)
50288 return false;
50291 return default_noce_conversion_profitable_p (seq, if_info);
50294 /* Implement targetm.vectorize.init_cost. */
50296 static void *
50297 ix86_init_cost (struct loop *)
50299 unsigned *cost = XNEWVEC (unsigned, 3);
50300 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
50301 return cost;
50304 /* Implement targetm.vectorize.add_stmt_cost. */
50306 static unsigned
50307 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
50308 struct _stmt_vec_info *stmt_info, int misalign,
50309 enum vect_cost_model_location where)
50311 unsigned *cost = (unsigned *) data;
50312 unsigned retval = 0;
50314 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
50315 int stmt_cost = - 1;
50317 if ((kind == vector_stmt || kind == scalar_stmt)
50318 && stmt_info
50319 && stmt_info->stmt && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
50321 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
50322 bool fp = false;
50323 machine_mode mode = TImode;
50325 if (vectype != NULL)
50327 fp = FLOAT_TYPE_P (vectype);
50328 mode = TYPE_MODE (vectype);
50330 /*machine_mode inner_mode = mode;
50331 if (VECTOR_MODE_P (mode))
50332 inner_mode = GET_MODE_INNER (mode);*/
50334 switch (subcode)
50336 case PLUS_EXPR:
50337 case POINTER_PLUS_EXPR:
50338 case MINUS_EXPR:
50339 if (kind == scalar_stmt)
50341 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
50342 stmt_cost = ix86_cost->addss;
50343 else if (X87_FLOAT_MODE_P (mode))
50344 stmt_cost = ix86_cost->fadd;
50345 else
50346 stmt_cost = ix86_cost->add;
50348 else
50349 stmt_cost = ix86_vec_cost (mode,
50350 fp ? ix86_cost->addss
50351 : ix86_cost->sse_op,
50352 true);
50353 break;
50355 case MULT_EXPR:
50356 case WIDEN_MULT_EXPR:
50357 case MULT_HIGHPART_EXPR:
50358 stmt_cost = ix86_multiplication_cost (ix86_cost, mode);
50359 break;
50360 case FMA_EXPR:
50361 stmt_cost = ix86_vec_cost (mode,
50362 mode == SFmode ? ix86_cost->fmass
50363 : ix86_cost->fmasd,
50364 true);
50365 break;
50366 case NEGATE_EXPR:
50367 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
50368 stmt_cost = ix86_cost->sse_op;
50369 else if (X87_FLOAT_MODE_P (mode))
50370 stmt_cost = ix86_cost->fchs;
50371 else if (VECTOR_MODE_P (mode))
50372 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op, true);
50373 else
50374 stmt_cost = ix86_cost->add;
50375 break;
50376 case TRUNC_DIV_EXPR:
50377 case CEIL_DIV_EXPR:
50378 case FLOOR_DIV_EXPR:
50379 case ROUND_DIV_EXPR:
50380 case TRUNC_MOD_EXPR:
50381 case CEIL_MOD_EXPR:
50382 case FLOOR_MOD_EXPR:
50383 case RDIV_EXPR:
50384 case ROUND_MOD_EXPR:
50385 case EXACT_DIV_EXPR:
50386 stmt_cost = ix86_division_cost (ix86_cost, mode);
50387 break;
50389 case RSHIFT_EXPR:
50390 case LSHIFT_EXPR:
50391 case LROTATE_EXPR:
50392 case RROTATE_EXPR:
50394 tree op2 = gimple_assign_rhs2 (stmt_info->stmt);
50395 stmt_cost = ix86_shift_rotate_cost
50396 (ix86_cost, mode,
50397 TREE_CODE (op2) == INTEGER_CST,
50398 cst_and_fits_in_hwi (op2) ? int_cst_value (op2) : -1,
50399 true, false, false, NULL, NULL);
50401 break;
50402 case NOP_EXPR:
50403 stmt_cost = 0;
50404 break;
50406 case BIT_IOR_EXPR:
50407 case ABS_EXPR:
50408 case MIN_EXPR:
50409 case MAX_EXPR:
50410 case BIT_XOR_EXPR:
50411 case BIT_AND_EXPR:
50412 case BIT_NOT_EXPR:
50413 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
50414 stmt_cost = ix86_cost->sse_op;
50415 else if (VECTOR_MODE_P (mode))
50416 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op, true);
50417 else
50418 stmt_cost = ix86_cost->add;
50419 break;
50420 default:
50421 break;
50424 /* If we do elementwise loads into a vector then we are bound by
50425 latency and execution resources for the many scalar loads
50426 (AGU and load ports). Try to account for this by scaling the
50427 construction cost by the number of elements involved. */
50428 if (kind == vec_construct
50429 && stmt_info
50430 && stmt_info->type == load_vec_info_type
50431 && stmt_info->memory_access_type == VMAT_ELEMENTWISE)
50433 stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
50434 stmt_cost *= TYPE_VECTOR_SUBPARTS (vectype);
50436 if (stmt_cost == -1)
50437 stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
50439 /* Penalize DFmode vector operations for Bonnell. */
50440 if (TARGET_BONNELL && kind == vector_stmt
50441 && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode)
50442 stmt_cost *= 5; /* FIXME: The value here is arbitrary. */
50444 /* Statements in an inner loop relative to the loop being
50445 vectorized are weighted more heavily. The value here is
50446 arbitrary and could potentially be improved with analysis. */
50447 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
50448 count *= 50; /* FIXME. */
50450 retval = (unsigned) (count * stmt_cost);
50452 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
50453 for Silvermont as it has out of order integer pipeline and can execute
50454 2 scalar instruction per tick, but has in order SIMD pipeline. */
50455 if ((TARGET_SILVERMONT || TARGET_INTEL)
50456 && stmt_info && stmt_info->stmt)
50458 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
50459 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
50460 retval = (retval * 17) / 10;
50463 cost[where] += retval;
50465 return retval;
50468 /* Implement targetm.vectorize.finish_cost. */
50470 static void
50471 ix86_finish_cost (void *data, unsigned *prologue_cost,
50472 unsigned *body_cost, unsigned *epilogue_cost)
50474 unsigned *cost = (unsigned *) data;
50475 *prologue_cost = cost[vect_prologue];
50476 *body_cost = cost[vect_body];
50477 *epilogue_cost = cost[vect_epilogue];
50480 /* Implement targetm.vectorize.destroy_cost_data. */
50482 static void
50483 ix86_destroy_cost_data (void *data)
50485 free (data);
50488 /* Validate target specific memory model bits in VAL. */
50490 static unsigned HOST_WIDE_INT
50491 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
50493 enum memmodel model = memmodel_from_int (val);
50494 bool strong;
50496 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
50497 |MEMMODEL_MASK)
50498 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
50500 warning (OPT_Winvalid_memory_model,
50501 "unknown architecture specific memory model");
50502 return MEMMODEL_SEQ_CST;
50504 strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
50505 if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
50507 warning (OPT_Winvalid_memory_model,
50508 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
50509 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
50511 if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
50513 warning (OPT_Winvalid_memory_model,
50514 "HLE_RELEASE not used with RELEASE or stronger memory model");
50515 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
50517 return val;
50520 /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
50521 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
50522 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
50523 or number of vecsize_mangle variants that should be emitted. */
50525 static int
50526 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
50527 struct cgraph_simd_clone *clonei,
50528 tree base_type, int num)
50530 int ret = 1;
50532 if (clonei->simdlen
50533 && (clonei->simdlen < 2
50534 || clonei->simdlen > 1024
50535 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
50537 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
50538 "unsupported simdlen %d", clonei->simdlen);
50539 return 0;
50542 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
50543 if (TREE_CODE (ret_type) != VOID_TYPE)
50544 switch (TYPE_MODE (ret_type))
50546 case E_QImode:
50547 case E_HImode:
50548 case E_SImode:
50549 case E_DImode:
50550 case E_SFmode:
50551 case E_DFmode:
50552 /* case E_SCmode: */
50553 /* case E_DCmode: */
50554 break;
50555 default:
50556 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
50557 "unsupported return type %qT for simd", ret_type);
50558 return 0;
50561 tree t;
50562 int i;
50564 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
50565 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
50566 switch (TYPE_MODE (TREE_TYPE (t)))
50568 case E_QImode:
50569 case E_HImode:
50570 case E_SImode:
50571 case E_DImode:
50572 case E_SFmode:
50573 case E_DFmode:
50574 /* case E_SCmode: */
50575 /* case E_DCmode: */
50576 break;
50577 default:
50578 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
50579 "unsupported argument type %qT for simd", TREE_TYPE (t));
50580 return 0;
50583 if (!TREE_PUBLIC (node->decl))
50585 /* If the function isn't exported, we can pick up just one ISA
50586 for the clones. */
50587 if (TARGET_AVX512F)
50588 clonei->vecsize_mangle = 'e';
50589 else if (TARGET_AVX2)
50590 clonei->vecsize_mangle = 'd';
50591 else if (TARGET_AVX)
50592 clonei->vecsize_mangle = 'c';
50593 else
50594 clonei->vecsize_mangle = 'b';
50595 ret = 1;
50597 else
50599 clonei->vecsize_mangle = "bcde"[num];
50600 ret = 4;
50602 clonei->mask_mode = VOIDmode;
50603 switch (clonei->vecsize_mangle)
50605 case 'b':
50606 clonei->vecsize_int = 128;
50607 clonei->vecsize_float = 128;
50608 break;
50609 case 'c':
50610 clonei->vecsize_int = 128;
50611 clonei->vecsize_float = 256;
50612 break;
50613 case 'd':
50614 clonei->vecsize_int = 256;
50615 clonei->vecsize_float = 256;
50616 break;
50617 case 'e':
50618 clonei->vecsize_int = 512;
50619 clonei->vecsize_float = 512;
50620 if (TYPE_MODE (base_type) == QImode)
50621 clonei->mask_mode = DImode;
50622 else
50623 clonei->mask_mode = SImode;
50624 break;
50626 if (clonei->simdlen == 0)
50628 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
50629 clonei->simdlen = clonei->vecsize_int;
50630 else
50631 clonei->simdlen = clonei->vecsize_float;
50632 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
50634 else if (clonei->simdlen > 16)
50636 /* For compatibility with ICC, use the same upper bounds
50637 for simdlen. In particular, for CTYPE below, use the return type,
50638 unless the function returns void, in that case use the characteristic
50639 type. If it is possible for given SIMDLEN to pass CTYPE value
50640 in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
50641 for 64-bit code), accept that SIMDLEN, otherwise warn and don't
50642 emit corresponding clone. */
50643 tree ctype = ret_type;
50644 if (TREE_CODE (ret_type) == VOID_TYPE)
50645 ctype = base_type;
50646 int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen;
50647 if (SCALAR_INT_MODE_P (TYPE_MODE (ctype)))
50648 cnt /= clonei->vecsize_int;
50649 else
50650 cnt /= clonei->vecsize_float;
50651 if (cnt > (TARGET_64BIT ? 16 : 8))
50653 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
50654 "unsupported simdlen %d", clonei->simdlen);
50655 return 0;
50658 return ret;
50661 /* Add target attribute to SIMD clone NODE if needed. */
50663 static void
50664 ix86_simd_clone_adjust (struct cgraph_node *node)
50666 const char *str = NULL;
50667 gcc_assert (node->decl == cfun->decl);
50668 switch (node->simdclone->vecsize_mangle)
50670 case 'b':
50671 if (!TARGET_SSE2)
50672 str = "sse2";
50673 break;
50674 case 'c':
50675 if (!TARGET_AVX)
50676 str = "avx";
50677 break;
50678 case 'd':
50679 if (!TARGET_AVX2)
50680 str = "avx2";
50681 break;
50682 case 'e':
50683 if (!TARGET_AVX512F)
50684 str = "avx512f";
50685 break;
50686 default:
50687 gcc_unreachable ();
50689 if (str == NULL)
50690 return;
50691 push_cfun (NULL);
50692 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
50693 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
50694 gcc_assert (ok);
50695 pop_cfun ();
50696 ix86_reset_previous_fndecl ();
50697 ix86_set_current_function (node->decl);
50700 /* If SIMD clone NODE can't be used in a vectorized loop
50701 in current function, return -1, otherwise return a badness of using it
50702 (0 if it is most desirable from vecsize_mangle point of view, 1
50703 slightly less desirable, etc.). */
50705 static int
50706 ix86_simd_clone_usable (struct cgraph_node *node)
50708 switch (node->simdclone->vecsize_mangle)
50710 case 'b':
50711 if (!TARGET_SSE2)
50712 return -1;
50713 if (!TARGET_AVX)
50714 return 0;
50715 return TARGET_AVX2 ? 2 : 1;
50716 case 'c':
50717 if (!TARGET_AVX)
50718 return -1;
50719 return TARGET_AVX2 ? 1 : 0;
50720 case 'd':
50721 if (!TARGET_AVX2)
50722 return -1;
50723 return 0;
50724 case 'e':
50725 if (!TARGET_AVX512F)
50726 return -1;
50727 return 0;
50728 default:
50729 gcc_unreachable ();
50733 /* This function adjusts the unroll factor based on
50734 the hardware capabilities. For ex, bdver3 has
50735 a loop buffer which makes unrolling of smaller
50736 loops less important. This function decides the
50737 unroll factor using number of memory references
50738 (value 32 is used) as a heuristic. */
50740 static unsigned
50741 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
50743 basic_block *bbs;
50744 rtx_insn *insn;
50745 unsigned i;
50746 unsigned mem_count = 0;
50748 if (!TARGET_ADJUST_UNROLL)
50749 return nunroll;
50751 /* Count the number of memory references within the loop body.
50752 This value determines the unrolling factor for bdver3 and bdver4
50753 architectures. */
50754 subrtx_iterator::array_type array;
50755 bbs = get_loop_body (loop);
50756 for (i = 0; i < loop->num_nodes; i++)
50757 FOR_BB_INSNS (bbs[i], insn)
50758 if (NONDEBUG_INSN_P (insn))
50759 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
50760 if (const_rtx x = *iter)
50761 if (MEM_P (x))
50763 machine_mode mode = GET_MODE (x);
50764 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
50765 if (n_words > 4)
50766 mem_count += 2;
50767 else
50768 mem_count += 1;
50770 free (bbs);
50772 if (mem_count && mem_count <=32)
50773 return MIN (nunroll, 32 / mem_count);
50775 return nunroll;
50779 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
50781 static bool
50782 ix86_float_exceptions_rounding_supported_p (void)
50784 /* For x87 floating point with standard excess precision handling,
50785 there is no adddf3 pattern (since x87 floating point only has
50786 XFmode operations) so the default hook implementation gets this
50787 wrong. */
50788 return TARGET_80387 || TARGET_SSE_MATH;
50791 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
50793 static void
50794 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
50796 if (!TARGET_80387 && !TARGET_SSE_MATH)
50797 return;
50798 tree exceptions_var = create_tmp_var_raw (integer_type_node);
50799 if (TARGET_80387)
50801 tree fenv_index_type = build_index_type (size_int (6));
50802 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
50803 tree fenv_var = create_tmp_var_raw (fenv_type);
50804 TREE_ADDRESSABLE (fenv_var) = 1;
50805 tree fenv_ptr = build_pointer_type (fenv_type);
50806 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
50807 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
50808 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
50809 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
50810 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
50811 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
50812 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
50813 tree hold_fnclex = build_call_expr (fnclex, 0);
50814 fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
50815 NULL_TREE, NULL_TREE);
50816 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
50817 hold_fnclex);
50818 *clear = build_call_expr (fnclex, 0);
50819 tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
50820 tree fnstsw_call = build_call_expr (fnstsw, 0);
50821 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
50822 sw_var, fnstsw_call);
50823 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
50824 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
50825 exceptions_var, exceptions_x87);
50826 *update = build2 (COMPOUND_EXPR, integer_type_node,
50827 sw_mod, update_mod);
50828 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
50829 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
50831 if (TARGET_SSE_MATH)
50833 tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
50834 tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
50835 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
50836 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
50837 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
50838 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
50839 mxcsr_orig_var, stmxcsr_hold_call);
50840 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
50841 mxcsr_orig_var,
50842 build_int_cst (unsigned_type_node, 0x1f80));
50843 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
50844 build_int_cst (unsigned_type_node, 0xffffffc0));
50845 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
50846 mxcsr_mod_var, hold_mod_val);
50847 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
50848 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
50849 hold_assign_orig, hold_assign_mod);
50850 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
50851 ldmxcsr_hold_call);
50852 if (*hold)
50853 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
50854 else
50855 *hold = hold_all;
50856 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
50857 if (*clear)
50858 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
50859 ldmxcsr_clear_call);
50860 else
50861 *clear = ldmxcsr_clear_call;
50862 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
50863 tree exceptions_sse = fold_convert (integer_type_node,
50864 stxmcsr_update_call);
50865 if (*update)
50867 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
50868 exceptions_var, exceptions_sse);
50869 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
50870 exceptions_var, exceptions_mod);
50871 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
50872 exceptions_assign);
50874 else
50875 *update = build2 (MODIFY_EXPR, integer_type_node,
50876 exceptions_var, exceptions_sse);
50877 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
50878 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
50879 ldmxcsr_update_call);
50881 tree atomic_feraiseexcept
50882 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
50883 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
50884 1, exceptions_var);
50885 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
50886 atomic_feraiseexcept_call);
50889 /* Return mode to be used for bounds or VOIDmode
50890 if bounds are not supported. */
50892 static machine_mode
50893 ix86_mpx_bound_mode ()
50895 /* Do not support pointer checker if MPX
50896 is not enabled. */
50897 if (!TARGET_MPX)
50899 if (flag_check_pointer_bounds)
50900 warning (0, "Pointer Checker requires MPX support on this target."
50901 " Use -mmpx options to enable MPX.");
50902 return VOIDmode;
50905 return BNDmode;
50908 /* Return constant used to statically initialize constant bounds.
50910 This function is used to create special bound values. For now
50911 only INIT bounds and NONE bounds are expected. More special
50912 values may be added later. */
50914 static tree
50915 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
50917 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
50918 : build_zero_cst (pointer_sized_int_node);
50919 tree high = ub ? build_zero_cst (pointer_sized_int_node)
50920 : build_minus_one_cst (pointer_sized_int_node);
50922 /* This function is supposed to be used to create INIT and
50923 NONE bounds only. */
50924 gcc_assert ((lb == 0 && ub == -1)
50925 || (lb == -1 && ub == 0));
50927 return build_complex (NULL, low, high);
50930 /* Generate a list of statements STMTS to initialize pointer bounds
50931 variable VAR with bounds LB and UB. Return the number of generated
50932 statements. */
50934 static int
50935 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
50937 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
50938 tree lhs, modify, var_p;
50940 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
50941 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
50943 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
50944 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
50945 append_to_statement_list (modify, stmts);
50947 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
50948 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
50949 TYPE_SIZE_UNIT (pointer_sized_int_node)));
50950 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
50951 append_to_statement_list (modify, stmts);
50953 return 2;
50956 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
50957 /* For i386, common symbol is local only for non-PIE binaries. For
50958 x86-64, common symbol is local only for non-PIE binaries or linker
50959 supports copy reloc in PIE binaries. */
50961 static bool
50962 ix86_binds_local_p (const_tree exp)
50964 return default_binds_local_p_3 (exp, flag_shlib != 0, true, true,
50965 (!flag_pic
50966 || (TARGET_64BIT
50967 && HAVE_LD_PIE_COPYRELOC != 0)));
50969 #endif
50971 /* If MEM is in the form of [base+offset], extract the two parts
50972 of address and set to BASE and OFFSET, otherwise return false. */
50974 static bool
50975 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
50977 rtx addr;
50979 gcc_assert (MEM_P (mem));
50981 addr = XEXP (mem, 0);
50983 if (GET_CODE (addr) == CONST)
50984 addr = XEXP (addr, 0);
50986 if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
50988 *base = addr;
50989 *offset = const0_rtx;
50990 return true;
50993 if (GET_CODE (addr) == PLUS
50994 && (REG_P (XEXP (addr, 0))
50995 || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
50996 && CONST_INT_P (XEXP (addr, 1)))
50998 *base = XEXP (addr, 0);
50999 *offset = XEXP (addr, 1);
51000 return true;
51003 return false;
51006 /* Given OPERANDS of consecutive load/store, check if we can merge
51007 them into move multiple. LOAD is true if they are load instructions.
51008 MODE is the mode of memory operands. */
51010 bool
51011 ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
51012 machine_mode mode)
51014 HOST_WIDE_INT offval_1, offval_2, msize;
51015 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
51017 if (load)
51019 mem_1 = operands[1];
51020 mem_2 = operands[3];
51021 reg_1 = operands[0];
51022 reg_2 = operands[2];
51024 else
51026 mem_1 = operands[0];
51027 mem_2 = operands[2];
51028 reg_1 = operands[1];
51029 reg_2 = operands[3];
51032 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
51034 if (REGNO (reg_1) != REGNO (reg_2))
51035 return false;
51037 /* Check if the addresses are in the form of [base+offset]. */
51038 if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
51039 return false;
51040 if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
51041 return false;
51043 /* Check if the bases are the same. */
51044 if (!rtx_equal_p (base_1, base_2))
51045 return false;
51047 offval_1 = INTVAL (offset_1);
51048 offval_2 = INTVAL (offset_2);
51049 msize = GET_MODE_SIZE (mode);
51050 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
51051 if (offval_1 + msize != offval_2)
51052 return false;
51054 return true;
51057 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
51059 static bool
51060 ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
51061 optimization_type opt_type)
51063 switch (op)
51065 case asin_optab:
51066 case acos_optab:
51067 case log1p_optab:
51068 case exp_optab:
51069 case exp10_optab:
51070 case exp2_optab:
51071 case expm1_optab:
51072 case ldexp_optab:
51073 case scalb_optab:
51074 case round_optab:
51075 return opt_type == OPTIMIZE_FOR_SPEED;
51077 case rint_optab:
51078 if (SSE_FLOAT_MODE_P (mode1)
51079 && TARGET_SSE_MATH
51080 && !flag_trapping_math
51081 && !TARGET_SSE4_1)
51082 return opt_type == OPTIMIZE_FOR_SPEED;
51083 return true;
51085 case floor_optab:
51086 case ceil_optab:
51087 case btrunc_optab:
51088 if (SSE_FLOAT_MODE_P (mode1)
51089 && TARGET_SSE_MATH
51090 && !flag_trapping_math
51091 && TARGET_SSE4_1)
51092 return true;
51093 return opt_type == OPTIMIZE_FOR_SPEED;
51095 case rsqrt_optab:
51096 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p ();
51098 default:
51099 return true;
51103 /* Address space support.
51105 This is not "far pointers" in the 16-bit sense, but an easy way
51106 to use %fs and %gs segment prefixes. Therefore:
51108 (a) All address spaces have the same modes,
51109 (b) All address spaces have the same addresss forms,
51110 (c) While %fs and %gs are technically subsets of the generic
51111 address space, they are probably not subsets of each other.
51112 (d) Since we have no access to the segment base register values
51113 without resorting to a system call, we cannot convert a
51114 non-default address space to a default address space.
51115 Therefore we do not claim %fs or %gs are subsets of generic.
51117 Therefore we can (mostly) use the default hooks. */
51119 /* All use of segmentation is assumed to make address 0 valid. */
51121 static bool
51122 ix86_addr_space_zero_address_valid (addr_space_t as)
51124 return as != ADDR_SPACE_GENERIC;
51127 static void
51128 ix86_init_libfuncs (void)
51130 if (TARGET_64BIT)
51132 set_optab_libfunc (sdivmod_optab, TImode, "__divmodti4");
51133 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
51135 else
51137 set_optab_libfunc (sdivmod_optab, DImode, "__divmoddi4");
51138 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
51141 #if TARGET_MACHO
51142 darwin_rename_builtins ();
51143 #endif
51146 /* Generate call to __divmoddi4. */
51148 static void
51149 ix86_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
51150 rtx op0, rtx op1,
51151 rtx *quot_p, rtx *rem_p)
51153 rtx rem = assign_386_stack_local (mode, SLOT_TEMP);
51155 rtx quot = emit_library_call_value (libfunc, NULL_RTX, LCT_NORMAL,
51156 mode,
51157 op0, GET_MODE (op0),
51158 op1, GET_MODE (op1),
51159 XEXP (rem, 0), Pmode);
51160 *quot_p = quot;
51161 *rem_p = rem;
51164 /* Set the value of FLT_EVAL_METHOD in float.h. When using only the
51165 FPU, assume that the fpcw is set to extended precision; when using
51166 only SSE, rounding is correct; when using both SSE and the FPU,
51167 the rounding precision is indeterminate, since either may be chosen
51168 apparently at random. */
51170 static enum flt_eval_method
51171 ix86_excess_precision (enum excess_precision_type type)
51173 switch (type)
51175 case EXCESS_PRECISION_TYPE_FAST:
51176 /* The fastest type to promote to will always be the native type,
51177 whether that occurs with implicit excess precision or
51178 otherwise. */
51179 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
51180 case EXCESS_PRECISION_TYPE_STANDARD:
51181 case EXCESS_PRECISION_TYPE_IMPLICIT:
51182 /* Otherwise, the excess precision we want when we are
51183 in a standards compliant mode, and the implicit precision we
51184 provide would be identical were it not for the unpredictable
51185 cases. */
51186 if (!TARGET_80387)
51187 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
51188 else if (!TARGET_MIX_SSE_I387)
51190 if (!TARGET_SSE_MATH)
51191 return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE;
51192 else if (TARGET_SSE2)
51193 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
51196 /* If we are in standards compliant mode, but we know we will
51197 calculate in unpredictable precision, return
51198 FLT_EVAL_METHOD_FLOAT. There is no reason to introduce explicit
51199 excess precision if the target can't guarantee it will honor
51200 it. */
51201 return (type == EXCESS_PRECISION_TYPE_STANDARD
51202 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
51203 : FLT_EVAL_METHOD_UNPREDICTABLE);
51204 default:
51205 gcc_unreachable ();
51208 return FLT_EVAL_METHOD_UNPREDICTABLE;
51211 /* Implement PUSH_ROUNDING. On 386, we have pushw instruction that
51212 decrements by exactly 2 no matter what the position was, there is no pushb.
51214 But as CIE data alignment factor on this arch is -4 for 32bit targets
51215 and -8 for 64bit targets, we need to make sure all stack pointer adjustments
51216 are in multiple of 4 for 32bit targets and 8 for 64bit targets. */
51218 poly_int64
51219 ix86_push_rounding (poly_int64 bytes)
51221 return ROUND_UP (bytes, UNITS_PER_WORD);
51224 /* Target-specific selftests. */
51226 #if CHECKING_P
51228 namespace selftest {
51230 /* Verify that hard regs are dumped as expected (in compact mode). */
51232 static void
51233 ix86_test_dumping_hard_regs ()
51235 ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode, 0));
51236 ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode, 1));
51239 /* Test dumping an insn with repeated references to the same SCRATCH,
51240 to verify the rtx_reuse code. */
51242 static void
51243 ix86_test_dumping_memory_blockage ()
51245 set_new_first_and_last_insn (NULL, NULL);
51247 rtx pat = gen_memory_blockage ();
51248 rtx_reuse_manager r;
51249 r.preprocess (pat);
51251 /* Verify that the repeated references to the SCRATCH show use
51252 reuse IDS. The first should be prefixed with a reuse ID,
51253 and the second should be dumped as a "reuse_rtx" of that ID.
51254 The expected string assumes Pmode == DImode. */
51255 if (Pmode == DImode)
51256 ASSERT_RTL_DUMP_EQ_WITH_REUSE
51257 ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0 A8])\n"
51258 " (unspec:BLK [\n"
51259 " (mem/v:BLK (reuse_rtx 0) [0 A8])\n"
51260 " ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat, &r);
51263 /* Verify loading an RTL dump; specifically a dump of copying
51264 a param on x86_64 from a hard reg into the frame.
51265 This test is target-specific since the dump contains target-specific
51266 hard reg names. */
51268 static void
51269 ix86_test_loading_dump_fragment_1 ()
51271 rtl_dump_test t (SELFTEST_LOCATION,
51272 locate_file ("x86_64/copy-hard-reg-into-frame.rtl"));
51274 rtx_insn *insn = get_insn_by_uid (1);
51276 /* The block structure and indentation here is purely for
51277 readability; it mirrors the structure of the rtx. */
51278 tree mem_expr;
51280 rtx pat = PATTERN (insn);
51281 ASSERT_EQ (SET, GET_CODE (pat));
51283 rtx dest = SET_DEST (pat);
51284 ASSERT_EQ (MEM, GET_CODE (dest));
51285 /* Verify the "/c" was parsed. */
51286 ASSERT_TRUE (RTX_FLAG (dest, call));
51287 ASSERT_EQ (SImode, GET_MODE (dest));
51289 rtx addr = XEXP (dest, 0);
51290 ASSERT_EQ (PLUS, GET_CODE (addr));
51291 ASSERT_EQ (DImode, GET_MODE (addr));
51293 rtx lhs = XEXP (addr, 0);
51294 /* Verify that the "frame" REG was consolidated. */
51295 ASSERT_RTX_PTR_EQ (frame_pointer_rtx, lhs);
51298 rtx rhs = XEXP (addr, 1);
51299 ASSERT_EQ (CONST_INT, GET_CODE (rhs));
51300 ASSERT_EQ (-4, INTVAL (rhs));
51303 /* Verify the "[1 i+0 S4 A32]" was parsed. */
51304 ASSERT_EQ (1, MEM_ALIAS_SET (dest));
51305 /* "i" should have been handled by synthesizing a global int
51306 variable named "i". */
51307 mem_expr = MEM_EXPR (dest);
51308 ASSERT_NE (mem_expr, NULL);
51309 ASSERT_EQ (VAR_DECL, TREE_CODE (mem_expr));
51310 ASSERT_EQ (integer_type_node, TREE_TYPE (mem_expr));
51311 ASSERT_EQ (IDENTIFIER_NODE, TREE_CODE (DECL_NAME (mem_expr)));
51312 ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr)));
51313 /* "+0". */
51314 ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest));
51315 ASSERT_EQ (0, MEM_OFFSET (dest));
51316 /* "S4". */
51317 ASSERT_EQ (4, MEM_SIZE (dest));
51318 /* "A32. */
51319 ASSERT_EQ (32, MEM_ALIGN (dest));
51322 rtx src = SET_SRC (pat);
51323 ASSERT_EQ (REG, GET_CODE (src));
51324 ASSERT_EQ (SImode, GET_MODE (src));
51325 ASSERT_EQ (5, REGNO (src));
51326 tree reg_expr = REG_EXPR (src);
51327 /* "i" here should point to the same var as for the MEM_EXPR. */
51328 ASSERT_EQ (reg_expr, mem_expr);
51333 /* Verify that the RTL loader copes with a call_insn dump.
51334 This test is target-specific since the dump contains a target-specific
51335 hard reg name. */
51337 static void
51338 ix86_test_loading_call_insn ()
51340 /* The test dump includes register "xmm0", where requires TARGET_SSE
51341 to exist. */
51342 if (!TARGET_SSE)
51343 return;
51345 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/call-insn.rtl"));
51347 rtx_insn *insn = get_insns ();
51348 ASSERT_EQ (CALL_INSN, GET_CODE (insn));
51350 /* "/j". */
51351 ASSERT_TRUE (RTX_FLAG (insn, jump));
51353 rtx pat = PATTERN (insn);
51354 ASSERT_EQ (CALL, GET_CODE (SET_SRC (pat)));
51356 /* Verify REG_NOTES. */
51358 /* "(expr_list:REG_CALL_DECL". */
51359 ASSERT_EQ (EXPR_LIST, GET_CODE (REG_NOTES (insn)));
51360 rtx_expr_list *note0 = as_a <rtx_expr_list *> (REG_NOTES (insn));
51361 ASSERT_EQ (REG_CALL_DECL, REG_NOTE_KIND (note0));
51363 /* "(expr_list:REG_EH_REGION (const_int 0 [0])". */
51364 rtx_expr_list *note1 = note0->next ();
51365 ASSERT_EQ (REG_EH_REGION, REG_NOTE_KIND (note1));
51367 ASSERT_EQ (NULL, note1->next ());
51370 /* Verify CALL_INSN_FUNCTION_USAGE. */
51372 /* "(expr_list:DF (use (reg:DF 21 xmm0))". */
51373 rtx_expr_list *usage
51374 = as_a <rtx_expr_list *> (CALL_INSN_FUNCTION_USAGE (insn));
51375 ASSERT_EQ (EXPR_LIST, GET_CODE (usage));
51376 ASSERT_EQ (DFmode, GET_MODE (usage));
51377 ASSERT_EQ (USE, GET_CODE (usage->element ()));
51378 ASSERT_EQ (NULL, usage->next ());
51382 /* Verify that the RTL loader copes a dump from print_rtx_function.
51383 This test is target-specific since the dump contains target-specific
51384 hard reg names. */
51386 static void
51387 ix86_test_loading_full_dump ()
51389 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/times-two.rtl"));
51391 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
51393 rtx_insn *insn_1 = get_insn_by_uid (1);
51394 ASSERT_EQ (NOTE, GET_CODE (insn_1));
51396 rtx_insn *insn_7 = get_insn_by_uid (7);
51397 ASSERT_EQ (INSN, GET_CODE (insn_7));
51398 ASSERT_EQ (PARALLEL, GET_CODE (PATTERN (insn_7)));
51400 rtx_insn *insn_15 = get_insn_by_uid (15);
51401 ASSERT_EQ (INSN, GET_CODE (insn_15));
51402 ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
51404 /* Verify crtl->return_rtx. */
51405 ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
51406 ASSERT_EQ (0, REGNO (crtl->return_rtx));
51407 ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
51410 /* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
51411 In particular, verify that it correctly loads the 2nd operand.
51412 This test is target-specific since these are machine-specific
51413 operands (and enums). */
51415 static void
51416 ix86_test_loading_unspec ()
51418 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/unspec.rtl"));
51420 ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
51422 ASSERT_TRUE (cfun);
51424 /* Test of an UNSPEC. */
51425 rtx_insn *insn = get_insns ();
51426 ASSERT_EQ (INSN, GET_CODE (insn));
51427 rtx set = single_set (insn);
51428 ASSERT_NE (NULL, set);
51429 rtx dst = SET_DEST (set);
51430 ASSERT_EQ (MEM, GET_CODE (dst));
51431 rtx src = SET_SRC (set);
51432 ASSERT_EQ (UNSPEC, GET_CODE (src));
51433 ASSERT_EQ (BLKmode, GET_MODE (src));
51434 ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE, XINT (src, 1));
51436 rtx v0 = XVECEXP (src, 0, 0);
51438 /* Verify that the two uses of the first SCRATCH have pointer
51439 equality. */
51440 rtx scratch_a = XEXP (dst, 0);
51441 ASSERT_EQ (SCRATCH, GET_CODE (scratch_a));
51443 rtx scratch_b = XEXP (v0, 0);
51444 ASSERT_EQ (SCRATCH, GET_CODE (scratch_b));
51446 ASSERT_EQ (scratch_a, scratch_b);
51448 /* Verify that the two mems are thus treated as equal. */
51449 ASSERT_TRUE (rtx_equal_p (dst, v0));
51451 /* Verify the the insn is recognized. */
51452 ASSERT_NE(-1, recog_memoized (insn));
51454 /* Test of an UNSPEC_VOLATILE, which has its own enum values. */
51455 insn = NEXT_INSN (insn);
51456 ASSERT_EQ (INSN, GET_CODE (insn));
51458 set = single_set (insn);
51459 ASSERT_NE (NULL, set);
51461 src = SET_SRC (set);
51462 ASSERT_EQ (UNSPEC_VOLATILE, GET_CODE (src));
51463 ASSERT_EQ (UNSPECV_RDTSCP, XINT (src, 1));
51466 /* Run all target-specific selftests. */
51468 static void
51469 ix86_run_selftests (void)
51471 ix86_test_dumping_hard_regs ();
51472 ix86_test_dumping_memory_blockage ();
51474 /* Various tests of loading RTL dumps, here because they contain
51475 ix86-isms (e.g. names of hard regs). */
51476 ix86_test_loading_dump_fragment_1 ();
51477 ix86_test_loading_call_insn ();
51478 ix86_test_loading_full_dump ();
51479 ix86_test_loading_unspec ();
51482 } // namespace selftest
51484 #endif /* CHECKING_P */
51486 /* Initialize the GCC target structure. */
51487 #undef TARGET_RETURN_IN_MEMORY
51488 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
51490 #undef TARGET_LEGITIMIZE_ADDRESS
51491 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
51493 #undef TARGET_ATTRIBUTE_TABLE
51494 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
51495 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
51496 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
51497 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51498 # undef TARGET_MERGE_DECL_ATTRIBUTES
51499 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
51500 #endif
51502 #undef TARGET_COMP_TYPE_ATTRIBUTES
51503 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
51505 #undef TARGET_INIT_BUILTINS
51506 #define TARGET_INIT_BUILTINS ix86_init_builtins
51507 #undef TARGET_BUILTIN_DECL
51508 #define TARGET_BUILTIN_DECL ix86_builtin_decl
51509 #undef TARGET_EXPAND_BUILTIN
51510 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
51512 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
51513 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
51514 ix86_builtin_vectorized_function
51516 #undef TARGET_VECTORIZE_BUILTIN_GATHER
51517 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
51519 #undef TARGET_VECTORIZE_BUILTIN_SCATTER
51520 #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
51522 #undef TARGET_BUILTIN_RECIPROCAL
51523 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
51525 #undef TARGET_ASM_FUNCTION_EPILOGUE
51526 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
51528 #undef TARGET_ENCODE_SECTION_INFO
51529 #ifndef SUBTARGET_ENCODE_SECTION_INFO
51530 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
51531 #else
51532 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
51533 #endif
51535 #undef TARGET_ASM_OPEN_PAREN
51536 #define TARGET_ASM_OPEN_PAREN ""
51537 #undef TARGET_ASM_CLOSE_PAREN
51538 #define TARGET_ASM_CLOSE_PAREN ""
51540 #undef TARGET_ASM_BYTE_OP
51541 #define TARGET_ASM_BYTE_OP ASM_BYTE
51543 #undef TARGET_ASM_ALIGNED_HI_OP
51544 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
51545 #undef TARGET_ASM_ALIGNED_SI_OP
51546 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
51547 #ifdef ASM_QUAD
51548 #undef TARGET_ASM_ALIGNED_DI_OP
51549 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
51550 #endif
51552 #undef TARGET_PROFILE_BEFORE_PROLOGUE
51553 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
51555 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
51556 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
51558 #undef TARGET_ASM_UNALIGNED_HI_OP
51559 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
51560 #undef TARGET_ASM_UNALIGNED_SI_OP
51561 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
51562 #undef TARGET_ASM_UNALIGNED_DI_OP
51563 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
51565 #undef TARGET_PRINT_OPERAND
51566 #define TARGET_PRINT_OPERAND ix86_print_operand
51567 #undef TARGET_PRINT_OPERAND_ADDRESS
51568 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
51569 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
51570 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
51571 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
51572 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
51574 #undef TARGET_SCHED_INIT_GLOBAL
51575 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
51576 #undef TARGET_SCHED_ADJUST_COST
51577 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
51578 #undef TARGET_SCHED_ISSUE_RATE
51579 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
51580 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
51581 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
51582 ia32_multipass_dfa_lookahead
51583 #undef TARGET_SCHED_MACRO_FUSION_P
51584 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
51585 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
51586 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
51588 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
51589 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
51591 #undef TARGET_MEMMODEL_CHECK
51592 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
51594 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
51595 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
51597 #ifdef HAVE_AS_TLS
51598 #undef TARGET_HAVE_TLS
51599 #define TARGET_HAVE_TLS true
51600 #endif
51601 #undef TARGET_CANNOT_FORCE_CONST_MEM
51602 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
51603 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
51604 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
51606 #undef TARGET_DELEGITIMIZE_ADDRESS
51607 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
51609 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
51610 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p
51612 #undef TARGET_MS_BITFIELD_LAYOUT_P
51613 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
51615 #if TARGET_MACHO
51616 #undef TARGET_BINDS_LOCAL_P
51617 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
51618 #else
51619 #undef TARGET_BINDS_LOCAL_P
51620 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
51621 #endif
51622 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51623 #undef TARGET_BINDS_LOCAL_P
51624 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
51625 #endif
51627 #undef TARGET_ASM_OUTPUT_MI_THUNK
51628 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
51629 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
51630 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
51632 #undef TARGET_ASM_FILE_START
51633 #define TARGET_ASM_FILE_START x86_file_start
51635 #undef TARGET_OPTION_OVERRIDE
51636 #define TARGET_OPTION_OVERRIDE ix86_option_override
51638 #undef TARGET_REGISTER_MOVE_COST
51639 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
51640 #undef TARGET_MEMORY_MOVE_COST
51641 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
51642 #undef TARGET_RTX_COSTS
51643 #define TARGET_RTX_COSTS ix86_rtx_costs
51644 #undef TARGET_ADDRESS_COST
51645 #define TARGET_ADDRESS_COST ix86_address_cost
51647 #undef TARGET_FLAGS_REGNUM
51648 #define TARGET_FLAGS_REGNUM FLAGS_REG
51649 #undef TARGET_FIXED_CONDITION_CODE_REGS
51650 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
51651 #undef TARGET_CC_MODES_COMPATIBLE
51652 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
51654 #undef TARGET_MACHINE_DEPENDENT_REORG
51655 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
51657 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
51658 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
51660 #undef TARGET_BUILD_BUILTIN_VA_LIST
51661 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
51663 #undef TARGET_FOLD_BUILTIN
51664 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
51666 #undef TARGET_GIMPLE_FOLD_BUILTIN
51667 #define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
51669 #undef TARGET_COMPARE_VERSION_PRIORITY
51670 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
51672 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
51673 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
51674 ix86_generate_version_dispatcher_body
51676 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
51677 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
51678 ix86_get_function_versions_dispatcher
51680 #undef TARGET_ENUM_VA_LIST_P
51681 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
51683 #undef TARGET_FN_ABI_VA_LIST
51684 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
51686 #undef TARGET_CANONICAL_VA_LIST_TYPE
51687 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
51689 #undef TARGET_EXPAND_BUILTIN_VA_START
51690 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
51692 #undef TARGET_MD_ASM_ADJUST
51693 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
51695 #undef TARGET_C_EXCESS_PRECISION
51696 #define TARGET_C_EXCESS_PRECISION ix86_excess_precision
51697 #undef TARGET_PROMOTE_PROTOTYPES
51698 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
51699 #undef TARGET_SETUP_INCOMING_VARARGS
51700 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
51701 #undef TARGET_MUST_PASS_IN_STACK
51702 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
51703 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
51704 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
51705 #undef TARGET_FUNCTION_ARG_ADVANCE
51706 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
51707 #undef TARGET_FUNCTION_ARG
51708 #define TARGET_FUNCTION_ARG ix86_function_arg
51709 #undef TARGET_INIT_PIC_REG
51710 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
51711 #undef TARGET_USE_PSEUDO_PIC_REG
51712 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
51713 #undef TARGET_FUNCTION_ARG_BOUNDARY
51714 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
51715 #undef TARGET_PASS_BY_REFERENCE
51716 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
51717 #undef TARGET_INTERNAL_ARG_POINTER
51718 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
51719 #undef TARGET_UPDATE_STACK_BOUNDARY
51720 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
51721 #undef TARGET_GET_DRAP_RTX
51722 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
51723 #undef TARGET_STRICT_ARGUMENT_NAMING
51724 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
51725 #undef TARGET_STATIC_CHAIN
51726 #define TARGET_STATIC_CHAIN ix86_static_chain
51727 #undef TARGET_TRAMPOLINE_INIT
51728 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
51729 #undef TARGET_RETURN_POPS_ARGS
51730 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
51732 #undef TARGET_WARN_FUNC_RETURN
51733 #define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
51735 #undef TARGET_LEGITIMATE_COMBINED_INSN
51736 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
51738 #undef TARGET_ASAN_SHADOW_OFFSET
51739 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
51741 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
51742 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
51744 #undef TARGET_SCALAR_MODE_SUPPORTED_P
51745 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
51747 #undef TARGET_VECTOR_MODE_SUPPORTED_P
51748 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
51750 #undef TARGET_C_MODE_FOR_SUFFIX
51751 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
51753 #ifdef HAVE_AS_TLS
51754 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
51755 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
51756 #endif
51758 #ifdef SUBTARGET_INSERT_ATTRIBUTES
51759 #undef TARGET_INSERT_ATTRIBUTES
51760 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
51761 #endif
51763 #undef TARGET_MANGLE_TYPE
51764 #define TARGET_MANGLE_TYPE ix86_mangle_type
51766 #undef TARGET_STACK_PROTECT_GUARD
51767 #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
51769 #if !TARGET_MACHO
51770 #undef TARGET_STACK_PROTECT_FAIL
51771 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
51772 #endif
51774 #undef TARGET_FUNCTION_VALUE
51775 #define TARGET_FUNCTION_VALUE ix86_function_value
51777 #undef TARGET_FUNCTION_VALUE_REGNO_P
51778 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
51780 #undef TARGET_PROMOTE_FUNCTION_MODE
51781 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
51783 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
51784 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
51786 #undef TARGET_MEMBER_TYPE_FORCES_BLK
51787 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
51789 #undef TARGET_INSTANTIATE_DECLS
51790 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
51792 #undef TARGET_SECONDARY_RELOAD
51793 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
51794 #undef TARGET_SECONDARY_MEMORY_NEEDED
51795 #define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
51796 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
51797 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
51799 #undef TARGET_CLASS_MAX_NREGS
51800 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
51802 #undef TARGET_PREFERRED_RELOAD_CLASS
51803 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
51804 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
51805 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
51806 #undef TARGET_CLASS_LIKELY_SPILLED_P
51807 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
51809 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
51810 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
51811 ix86_builtin_vectorization_cost
51812 #undef TARGET_VECTORIZE_VEC_PERM_CONST
51813 #define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const
51814 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
51815 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
51816 ix86_preferred_simd_mode
51817 #undef TARGET_VECTORIZE_SPLIT_REDUCTION
51818 #define TARGET_VECTORIZE_SPLIT_REDUCTION \
51819 ix86_split_reduction
51820 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
51821 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
51822 ix86_autovectorize_vector_sizes
51823 #undef TARGET_VECTORIZE_GET_MASK_MODE
51824 #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
51825 #undef TARGET_VECTORIZE_INIT_COST
51826 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
51827 #undef TARGET_VECTORIZE_ADD_STMT_COST
51828 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
51829 #undef TARGET_VECTORIZE_FINISH_COST
51830 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
51831 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
51832 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
51834 #undef TARGET_SET_CURRENT_FUNCTION
51835 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
51837 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
51838 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
51840 #undef TARGET_OPTION_SAVE
51841 #define TARGET_OPTION_SAVE ix86_function_specific_save
51843 #undef TARGET_OPTION_RESTORE
51844 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
51846 #undef TARGET_OPTION_POST_STREAM_IN
51847 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
51849 #undef TARGET_OPTION_PRINT
51850 #define TARGET_OPTION_PRINT ix86_function_specific_print
51852 #undef TARGET_OPTION_FUNCTION_VERSIONS
51853 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
51855 #undef TARGET_CAN_INLINE_P
51856 #define TARGET_CAN_INLINE_P ix86_can_inline_p
51858 #undef TARGET_LEGITIMATE_ADDRESS_P
51859 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
51861 #undef TARGET_REGISTER_PRIORITY
51862 #define TARGET_REGISTER_PRIORITY ix86_register_priority
51864 #undef TARGET_REGISTER_USAGE_LEVELING_P
51865 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
51867 #undef TARGET_LEGITIMATE_CONSTANT_P
51868 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
51870 #undef TARGET_COMPUTE_FRAME_LAYOUT
51871 #define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
51873 #undef TARGET_FRAME_POINTER_REQUIRED
51874 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
51876 #undef TARGET_CAN_ELIMINATE
51877 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
51879 #undef TARGET_EXTRA_LIVE_ON_ENTRY
51880 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
51882 #undef TARGET_ASM_CODE_END
51883 #define TARGET_ASM_CODE_END ix86_code_end
51885 #undef TARGET_CONDITIONAL_REGISTER_USAGE
51886 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
51888 #undef TARGET_CANONICALIZE_COMPARISON
51889 #define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
51891 #undef TARGET_LOOP_UNROLL_ADJUST
51892 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
51894 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
51895 #undef TARGET_SPILL_CLASS
51896 #define TARGET_SPILL_CLASS ix86_spill_class
51898 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
51899 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
51900 ix86_simd_clone_compute_vecsize_and_simdlen
51902 #undef TARGET_SIMD_CLONE_ADJUST
51903 #define TARGET_SIMD_CLONE_ADJUST \
51904 ix86_simd_clone_adjust
51906 #undef TARGET_SIMD_CLONE_USABLE
51907 #define TARGET_SIMD_CLONE_USABLE \
51908 ix86_simd_clone_usable
51910 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
51911 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
51912 ix86_float_exceptions_rounding_supported_p
51914 #undef TARGET_MODE_EMIT
51915 #define TARGET_MODE_EMIT ix86_emit_mode_set
51917 #undef TARGET_MODE_NEEDED
51918 #define TARGET_MODE_NEEDED ix86_mode_needed
51920 #undef TARGET_MODE_AFTER
51921 #define TARGET_MODE_AFTER ix86_mode_after
51923 #undef TARGET_MODE_ENTRY
51924 #define TARGET_MODE_ENTRY ix86_mode_entry
51926 #undef TARGET_MODE_EXIT
51927 #define TARGET_MODE_EXIT ix86_mode_exit
51929 #undef TARGET_MODE_PRIORITY
51930 #define TARGET_MODE_PRIORITY ix86_mode_priority
51932 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
51933 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
51935 #undef TARGET_LOAD_BOUNDS_FOR_ARG
51936 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
51938 #undef TARGET_STORE_BOUNDS_FOR_ARG
51939 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
51941 #undef TARGET_LOAD_RETURNED_BOUNDS
51942 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
51944 #undef TARGET_STORE_RETURNED_BOUNDS
51945 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
51947 #undef TARGET_CHKP_BOUND_MODE
51948 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
51950 #undef TARGET_BUILTIN_CHKP_FUNCTION
51951 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
51953 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
51954 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
51956 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
51957 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
51959 #undef TARGET_CHKP_INITIALIZE_BOUNDS
51960 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
51962 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
51963 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
51965 #undef TARGET_OFFLOAD_OPTIONS
51966 #define TARGET_OFFLOAD_OPTIONS \
51967 ix86_offload_options
51969 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
51970 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
51972 #undef TARGET_OPTAB_SUPPORTED_P
51973 #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
51975 #undef TARGET_HARD_REGNO_SCRATCH_OK
51976 #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
51978 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
51979 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
51981 #undef TARGET_ADDITIONAL_ALLOCNO_CLASS_P
51982 #define TARGET_ADDITIONAL_ALLOCNO_CLASS_P ix86_additional_allocno_class_p
51984 #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
51985 #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
51987 #undef TARGET_INIT_LIBFUNCS
51988 #define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
51990 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
51991 #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
51993 #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
51994 #define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
51996 #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
51997 #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
51999 #undef TARGET_HARD_REGNO_NREGS
52000 #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
52001 #undef TARGET_HARD_REGNO_MODE_OK
52002 #define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
52004 #undef TARGET_MODES_TIEABLE_P
52005 #define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
52007 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
52008 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
52009 ix86_hard_regno_call_part_clobbered
52011 #undef TARGET_CAN_CHANGE_MODE_CLASS
52012 #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
52014 #undef TARGET_STATIC_RTX_ALIGNMENT
52015 #define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment
52016 #undef TARGET_CONSTANT_ALIGNMENT
52017 #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment
52019 #undef TARGET_EMPTY_RECORD_P
52020 #define TARGET_EMPTY_RECORD_P ix86_is_empty_record
52022 #undef TARGET_WARN_PARAMETER_PASSING_ABI
52023 #define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi
52025 #if CHECKING_P
52026 #undef TARGET_RUN_TARGET_SELFTESTS
52027 #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
52028 #endif /* #if CHECKING_P */
52030 struct gcc_target targetm = TARGET_INITIALIZER;
52032 #include "gt-i386.h"