IA MCU psABI support: GCC changes
[official-gcc.git] / gcc / config / i386 / i386.c
blob7d26e8c2bd00df18fc916619615d605ec85e59e2
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "alias.h"
26 #include "symtab.h"
27 #include "tree.h"
28 #include "fold-const.h"
29 #include "stringpool.h"
30 #include "attribs.h"
31 #include "calls.h"
32 #include "stor-layout.h"
33 #include "varasm.h"
34 #include "tm_p.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
39 #include "output.h"
40 #include "insn-codes.h"
41 #include "insn-attr.h"
42 #include "flags.h"
43 #include "except.h"
44 #include "function.h"
45 #include "recog.h"
46 #include "expmed.h"
47 #include "dojump.h"
48 #include "explow.h"
49 #include "emit-rtl.h"
50 #include "stmt.h"
51 #include "expr.h"
52 #include "optabs.h"
53 #include "diagnostic-core.h"
54 #include "toplev.h"
55 #include "predict.h"
56 #include "dominance.h"
57 #include "cfg.h"
58 #include "cfgrtl.h"
59 #include "cfganal.h"
60 #include "lcm.h"
61 #include "cfgbuild.h"
62 #include "cfgcleanup.h"
63 #include "basic-block.h"
64 #include "target.h"
65 #include "common/common-target.h"
66 #include "langhooks.h"
67 #include "reload.h"
68 #include "cgraph.h"
69 #include "tree-ssa-alias.h"
70 #include "internal-fn.h"
71 #include "gimple-fold.h"
72 #include "tree-eh.h"
73 #include "gimple-expr.h"
74 #include "gimple.h"
75 #include "gimplify.h"
76 #include "cfgloop.h"
77 #include "dwarf2.h"
78 #include "df.h"
79 #include "tm-constrs.h"
80 #include "params.h"
81 #include "cselib.h"
82 #include "debug.h"
83 #include "sched-int.h"
84 #include "sbitmap.h"
85 #include "fibheap.h"
86 #include "opts.h"
87 #include "diagnostic.h"
88 #include "dumpfile.h"
89 #include "tree-pass.h"
90 #include "context.h"
91 #include "pass_manager.h"
92 #include "target-globals.h"
93 #include "tree-vectorizer.h"
94 #include "shrink-wrap.h"
95 #include "builtins.h"
96 #include "rtl-iter.h"
97 #include "tree-iterator.h"
98 #include "tree-chkp.h"
99 #include "rtl-chkp.h"
101 /* This file should be included last. */
102 #include "target-def.h"
104 static rtx legitimize_dllimport_symbol (rtx, bool);
105 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
106 static rtx legitimize_pe_coff_symbol (rtx, bool);
108 #ifndef CHECK_STACK_LIMIT
109 #define CHECK_STACK_LIMIT (-1)
110 #endif
112 /* Return index of given mode in mult and division cost tables. */
113 #define MODE_INDEX(mode) \
114 ((mode) == QImode ? 0 \
115 : (mode) == HImode ? 1 \
116 : (mode) == SImode ? 2 \
117 : (mode) == DImode ? 3 \
118 : 4)
120 /* Processor costs (relative to an add) */
121 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
122 #define COSTS_N_BYTES(N) ((N) * 2)
124 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
126 static stringop_algs ix86_size_memcpy[2] = {
127 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
128 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
129 static stringop_algs ix86_size_memset[2] = {
130 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
131 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
133 const
134 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
135 COSTS_N_BYTES (2), /* cost of an add instruction */
136 COSTS_N_BYTES (3), /* cost of a lea instruction */
137 COSTS_N_BYTES (2), /* variable shift costs */
138 COSTS_N_BYTES (3), /* constant shift costs */
139 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
140 COSTS_N_BYTES (3), /* HI */
141 COSTS_N_BYTES (3), /* SI */
142 COSTS_N_BYTES (3), /* DI */
143 COSTS_N_BYTES (5)}, /* other */
144 0, /* cost of multiply per each bit set */
145 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
146 COSTS_N_BYTES (3), /* HI */
147 COSTS_N_BYTES (3), /* SI */
148 COSTS_N_BYTES (3), /* DI */
149 COSTS_N_BYTES (5)}, /* other */
150 COSTS_N_BYTES (3), /* cost of movsx */
151 COSTS_N_BYTES (3), /* cost of movzx */
152 0, /* "large" insn */
153 2, /* MOVE_RATIO */
154 2, /* cost for loading QImode using movzbl */
155 {2, 2, 2}, /* cost of loading integer registers
156 in QImode, HImode and SImode.
157 Relative to reg-reg move (2). */
158 {2, 2, 2}, /* cost of storing integer registers */
159 2, /* cost of reg,reg fld/fst */
160 {2, 2, 2}, /* cost of loading fp registers
161 in SFmode, DFmode and XFmode */
162 {2, 2, 2}, /* cost of storing fp registers
163 in SFmode, DFmode and XFmode */
164 3, /* cost of moving MMX register */
165 {3, 3}, /* cost of loading MMX registers
166 in SImode and DImode */
167 {3, 3}, /* cost of storing MMX registers
168 in SImode and DImode */
169 3, /* cost of moving SSE register */
170 {3, 3, 3}, /* cost of loading SSE registers
171 in SImode, DImode and TImode */
172 {3, 3, 3}, /* cost of storing SSE registers
173 in SImode, DImode and TImode */
174 3, /* MMX or SSE register to integer */
175 0, /* size of l1 cache */
176 0, /* size of l2 cache */
177 0, /* size of prefetch block */
178 0, /* number of parallel prefetches */
179 2, /* Branch cost */
180 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
181 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
182 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
183 COSTS_N_BYTES (2), /* cost of FABS instruction. */
184 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
185 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
186 ix86_size_memcpy,
187 ix86_size_memset,
188 1, /* scalar_stmt_cost. */
189 1, /* scalar load_cost. */
190 1, /* scalar_store_cost. */
191 1, /* vec_stmt_cost. */
192 1, /* vec_to_scalar_cost. */
193 1, /* scalar_to_vec_cost. */
194 1, /* vec_align_load_cost. */
195 1, /* vec_unalign_load_cost. */
196 1, /* vec_store_cost. */
197 1, /* cond_taken_branch_cost. */
198 1, /* cond_not_taken_branch_cost. */
201 /* Processor costs (relative to an add) */
202 static stringop_algs i386_memcpy[2] = {
203 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
204 DUMMY_STRINGOP_ALGS};
205 static stringop_algs i386_memset[2] = {
206 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
207 DUMMY_STRINGOP_ALGS};
209 static const
210 struct processor_costs i386_cost = { /* 386 specific costs */
211 COSTS_N_INSNS (1), /* cost of an add instruction */
212 COSTS_N_INSNS (1), /* cost of a lea instruction */
213 COSTS_N_INSNS (3), /* variable shift costs */
214 COSTS_N_INSNS (2), /* constant shift costs */
215 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
216 COSTS_N_INSNS (6), /* HI */
217 COSTS_N_INSNS (6), /* SI */
218 COSTS_N_INSNS (6), /* DI */
219 COSTS_N_INSNS (6)}, /* other */
220 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
221 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
222 COSTS_N_INSNS (23), /* HI */
223 COSTS_N_INSNS (23), /* SI */
224 COSTS_N_INSNS (23), /* DI */
225 COSTS_N_INSNS (23)}, /* other */
226 COSTS_N_INSNS (3), /* cost of movsx */
227 COSTS_N_INSNS (2), /* cost of movzx */
228 15, /* "large" insn */
229 3, /* MOVE_RATIO */
230 4, /* cost for loading QImode using movzbl */
231 {2, 4, 2}, /* cost of loading integer registers
232 in QImode, HImode and SImode.
233 Relative to reg-reg move (2). */
234 {2, 4, 2}, /* cost of storing integer registers */
235 2, /* cost of reg,reg fld/fst */
236 {8, 8, 8}, /* cost of loading fp registers
237 in SFmode, DFmode and XFmode */
238 {8, 8, 8}, /* cost of storing fp registers
239 in SFmode, DFmode and XFmode */
240 2, /* cost of moving MMX register */
241 {4, 8}, /* cost of loading MMX registers
242 in SImode and DImode */
243 {4, 8}, /* cost of storing MMX registers
244 in SImode and DImode */
245 2, /* cost of moving SSE register */
246 {4, 8, 16}, /* cost of loading SSE registers
247 in SImode, DImode and TImode */
248 {4, 8, 16}, /* cost of storing SSE registers
249 in SImode, DImode and TImode */
250 3, /* MMX or SSE register to integer */
251 0, /* size of l1 cache */
252 0, /* size of l2 cache */
253 0, /* size of prefetch block */
254 0, /* number of parallel prefetches */
255 1, /* Branch cost */
256 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
257 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
258 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
259 COSTS_N_INSNS (22), /* cost of FABS instruction. */
260 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
261 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
262 i386_memcpy,
263 i386_memset,
264 1, /* scalar_stmt_cost. */
265 1, /* scalar load_cost. */
266 1, /* scalar_store_cost. */
267 1, /* vec_stmt_cost. */
268 1, /* vec_to_scalar_cost. */
269 1, /* scalar_to_vec_cost. */
270 1, /* vec_align_load_cost. */
271 2, /* vec_unalign_load_cost. */
272 1, /* vec_store_cost. */
273 3, /* cond_taken_branch_cost. */
274 1, /* cond_not_taken_branch_cost. */
277 static stringop_algs i486_memcpy[2] = {
278 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
279 DUMMY_STRINGOP_ALGS};
280 static stringop_algs i486_memset[2] = {
281 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
282 DUMMY_STRINGOP_ALGS};
284 static const
285 struct processor_costs i486_cost = { /* 486 specific costs */
286 COSTS_N_INSNS (1), /* cost of an add instruction */
287 COSTS_N_INSNS (1), /* cost of a lea instruction */
288 COSTS_N_INSNS (3), /* variable shift costs */
289 COSTS_N_INSNS (2), /* constant shift costs */
290 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
291 COSTS_N_INSNS (12), /* HI */
292 COSTS_N_INSNS (12), /* SI */
293 COSTS_N_INSNS (12), /* DI */
294 COSTS_N_INSNS (12)}, /* other */
295 1, /* cost of multiply per each bit set */
296 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
297 COSTS_N_INSNS (40), /* HI */
298 COSTS_N_INSNS (40), /* SI */
299 COSTS_N_INSNS (40), /* DI */
300 COSTS_N_INSNS (40)}, /* other */
301 COSTS_N_INSNS (3), /* cost of movsx */
302 COSTS_N_INSNS (2), /* cost of movzx */
303 15, /* "large" insn */
304 3, /* MOVE_RATIO */
305 4, /* cost for loading QImode using movzbl */
306 {2, 4, 2}, /* cost of loading integer registers
307 in QImode, HImode and SImode.
308 Relative to reg-reg move (2). */
309 {2, 4, 2}, /* cost of storing integer registers */
310 2, /* cost of reg,reg fld/fst */
311 {8, 8, 8}, /* cost of loading fp registers
312 in SFmode, DFmode and XFmode */
313 {8, 8, 8}, /* cost of storing fp registers
314 in SFmode, DFmode and XFmode */
315 2, /* cost of moving MMX register */
316 {4, 8}, /* cost of loading MMX registers
317 in SImode and DImode */
318 {4, 8}, /* cost of storing MMX registers
319 in SImode and DImode */
320 2, /* cost of moving SSE register */
321 {4, 8, 16}, /* cost of loading SSE registers
322 in SImode, DImode and TImode */
323 {4, 8, 16}, /* cost of storing SSE registers
324 in SImode, DImode and TImode */
325 3, /* MMX or SSE register to integer */
326 4, /* size of l1 cache. 486 has 8kB cache
327 shared for code and data, so 4kB is
328 not really precise. */
329 4, /* size of l2 cache */
330 0, /* size of prefetch block */
331 0, /* number of parallel prefetches */
332 1, /* Branch cost */
333 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
334 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
335 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
336 COSTS_N_INSNS (3), /* cost of FABS instruction. */
337 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
338 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
339 i486_memcpy,
340 i486_memset,
341 1, /* scalar_stmt_cost. */
342 1, /* scalar load_cost. */
343 1, /* scalar_store_cost. */
344 1, /* vec_stmt_cost. */
345 1, /* vec_to_scalar_cost. */
346 1, /* scalar_to_vec_cost. */
347 1, /* vec_align_load_cost. */
348 2, /* vec_unalign_load_cost. */
349 1, /* vec_store_cost. */
350 3, /* cond_taken_branch_cost. */
351 1, /* cond_not_taken_branch_cost. */
354 static stringop_algs pentium_memcpy[2] = {
355 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
356 DUMMY_STRINGOP_ALGS};
357 static stringop_algs pentium_memset[2] = {
358 {libcall, {{-1, rep_prefix_4_byte, false}}},
359 DUMMY_STRINGOP_ALGS};
361 static const
362 struct processor_costs pentium_cost = {
363 COSTS_N_INSNS (1), /* cost of an add instruction */
364 COSTS_N_INSNS (1), /* cost of a lea instruction */
365 COSTS_N_INSNS (4), /* variable shift costs */
366 COSTS_N_INSNS (1), /* constant shift costs */
367 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
368 COSTS_N_INSNS (11), /* HI */
369 COSTS_N_INSNS (11), /* SI */
370 COSTS_N_INSNS (11), /* DI */
371 COSTS_N_INSNS (11)}, /* other */
372 0, /* cost of multiply per each bit set */
373 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
374 COSTS_N_INSNS (25), /* HI */
375 COSTS_N_INSNS (25), /* SI */
376 COSTS_N_INSNS (25), /* DI */
377 COSTS_N_INSNS (25)}, /* other */
378 COSTS_N_INSNS (3), /* cost of movsx */
379 COSTS_N_INSNS (2), /* cost of movzx */
380 8, /* "large" insn */
381 6, /* MOVE_RATIO */
382 6, /* cost for loading QImode using movzbl */
383 {2, 4, 2}, /* cost of loading integer registers
384 in QImode, HImode and SImode.
385 Relative to reg-reg move (2). */
386 {2, 4, 2}, /* cost of storing integer registers */
387 2, /* cost of reg,reg fld/fst */
388 {2, 2, 6}, /* cost of loading fp registers
389 in SFmode, DFmode and XFmode */
390 {4, 4, 6}, /* cost of storing fp registers
391 in SFmode, DFmode and XFmode */
392 8, /* cost of moving MMX register */
393 {8, 8}, /* cost of loading MMX registers
394 in SImode and DImode */
395 {8, 8}, /* cost of storing MMX registers
396 in SImode and DImode */
397 2, /* cost of moving SSE register */
398 {4, 8, 16}, /* cost of loading SSE registers
399 in SImode, DImode and TImode */
400 {4, 8, 16}, /* cost of storing SSE registers
401 in SImode, DImode and TImode */
402 3, /* MMX or SSE register to integer */
403 8, /* size of l1 cache. */
404 8, /* size of l2 cache */
405 0, /* size of prefetch block */
406 0, /* number of parallel prefetches */
407 2, /* Branch cost */
408 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
409 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
410 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
411 COSTS_N_INSNS (1), /* cost of FABS instruction. */
412 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
413 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
414 pentium_memcpy,
415 pentium_memset,
416 1, /* scalar_stmt_cost. */
417 1, /* scalar load_cost. */
418 1, /* scalar_store_cost. */
419 1, /* vec_stmt_cost. */
420 1, /* vec_to_scalar_cost. */
421 1, /* scalar_to_vec_cost. */
422 1, /* vec_align_load_cost. */
423 2, /* vec_unalign_load_cost. */
424 1, /* vec_store_cost. */
425 3, /* cond_taken_branch_cost. */
426 1, /* cond_not_taken_branch_cost. */
429 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
430 (we ensure the alignment). For small blocks inline loop is still a
431 noticeable win, for bigger blocks either rep movsl or rep movsb is
432 way to go. Rep movsb has apparently more expensive startup time in CPU,
433 but after 4K the difference is down in the noise. */
434 static stringop_algs pentiumpro_memcpy[2] = {
435 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
436 {8192, rep_prefix_4_byte, false},
437 {-1, rep_prefix_1_byte, false}}},
438 DUMMY_STRINGOP_ALGS};
439 static stringop_algs pentiumpro_memset[2] = {
440 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
441 {8192, rep_prefix_4_byte, false},
442 {-1, libcall, false}}},
443 DUMMY_STRINGOP_ALGS};
444 static const
445 struct processor_costs pentiumpro_cost = {
446 COSTS_N_INSNS (1), /* cost of an add instruction */
447 COSTS_N_INSNS (1), /* cost of a lea instruction */
448 COSTS_N_INSNS (1), /* variable shift costs */
449 COSTS_N_INSNS (1), /* constant shift costs */
450 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
451 COSTS_N_INSNS (4), /* HI */
452 COSTS_N_INSNS (4), /* SI */
453 COSTS_N_INSNS (4), /* DI */
454 COSTS_N_INSNS (4)}, /* other */
455 0, /* cost of multiply per each bit set */
456 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
457 COSTS_N_INSNS (17), /* HI */
458 COSTS_N_INSNS (17), /* SI */
459 COSTS_N_INSNS (17), /* DI */
460 COSTS_N_INSNS (17)}, /* other */
461 COSTS_N_INSNS (1), /* cost of movsx */
462 COSTS_N_INSNS (1), /* cost of movzx */
463 8, /* "large" insn */
464 6, /* MOVE_RATIO */
465 2, /* cost for loading QImode using movzbl */
466 {4, 4, 4}, /* cost of loading integer registers
467 in QImode, HImode and SImode.
468 Relative to reg-reg move (2). */
469 {2, 2, 2}, /* cost of storing integer registers */
470 2, /* cost of reg,reg fld/fst */
471 {2, 2, 6}, /* cost of loading fp registers
472 in SFmode, DFmode and XFmode */
473 {4, 4, 6}, /* cost of storing fp registers
474 in SFmode, DFmode and XFmode */
475 2, /* cost of moving MMX register */
476 {2, 2}, /* cost of loading MMX registers
477 in SImode and DImode */
478 {2, 2}, /* cost of storing MMX registers
479 in SImode and DImode */
480 2, /* cost of moving SSE register */
481 {2, 2, 8}, /* cost of loading SSE registers
482 in SImode, DImode and TImode */
483 {2, 2, 8}, /* cost of storing SSE registers
484 in SImode, DImode and TImode */
485 3, /* MMX or SSE register to integer */
486 8, /* size of l1 cache. */
487 256, /* size of l2 cache */
488 32, /* size of prefetch block */
489 6, /* number of parallel prefetches */
490 2, /* Branch cost */
491 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
492 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
493 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
494 COSTS_N_INSNS (2), /* cost of FABS instruction. */
495 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
496 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
497 pentiumpro_memcpy,
498 pentiumpro_memset,
499 1, /* scalar_stmt_cost. */
500 1, /* scalar load_cost. */
501 1, /* scalar_store_cost. */
502 1, /* vec_stmt_cost. */
503 1, /* vec_to_scalar_cost. */
504 1, /* scalar_to_vec_cost. */
505 1, /* vec_align_load_cost. */
506 2, /* vec_unalign_load_cost. */
507 1, /* vec_store_cost. */
508 3, /* cond_taken_branch_cost. */
509 1, /* cond_not_taken_branch_cost. */
512 static stringop_algs geode_memcpy[2] = {
513 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
514 DUMMY_STRINGOP_ALGS};
515 static stringop_algs geode_memset[2] = {
516 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
517 DUMMY_STRINGOP_ALGS};
518 static const
519 struct processor_costs geode_cost = {
520 COSTS_N_INSNS (1), /* cost of an add instruction */
521 COSTS_N_INSNS (1), /* cost of a lea instruction */
522 COSTS_N_INSNS (2), /* variable shift costs */
523 COSTS_N_INSNS (1), /* constant shift costs */
524 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
525 COSTS_N_INSNS (4), /* HI */
526 COSTS_N_INSNS (7), /* SI */
527 COSTS_N_INSNS (7), /* DI */
528 COSTS_N_INSNS (7)}, /* other */
529 0, /* cost of multiply per each bit set */
530 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
531 COSTS_N_INSNS (23), /* HI */
532 COSTS_N_INSNS (39), /* SI */
533 COSTS_N_INSNS (39), /* DI */
534 COSTS_N_INSNS (39)}, /* other */
535 COSTS_N_INSNS (1), /* cost of movsx */
536 COSTS_N_INSNS (1), /* cost of movzx */
537 8, /* "large" insn */
538 4, /* MOVE_RATIO */
539 1, /* cost for loading QImode using movzbl */
540 {1, 1, 1}, /* cost of loading integer registers
541 in QImode, HImode and SImode.
542 Relative to reg-reg move (2). */
543 {1, 1, 1}, /* cost of storing integer registers */
544 1, /* cost of reg,reg fld/fst */
545 {1, 1, 1}, /* cost of loading fp registers
546 in SFmode, DFmode and XFmode */
547 {4, 6, 6}, /* cost of storing fp registers
548 in SFmode, DFmode and XFmode */
550 1, /* cost of moving MMX register */
551 {1, 1}, /* cost of loading MMX registers
552 in SImode and DImode */
553 {1, 1}, /* cost of storing MMX registers
554 in SImode and DImode */
555 1, /* cost of moving SSE register */
556 {1, 1, 1}, /* cost of loading SSE registers
557 in SImode, DImode and TImode */
558 {1, 1, 1}, /* cost of storing SSE registers
559 in SImode, DImode and TImode */
560 1, /* MMX or SSE register to integer */
561 64, /* size of l1 cache. */
562 128, /* size of l2 cache. */
563 32, /* size of prefetch block */
564 1, /* number of parallel prefetches */
565 1, /* Branch cost */
566 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
567 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
568 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
569 COSTS_N_INSNS (1), /* cost of FABS instruction. */
570 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
571 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
572 geode_memcpy,
573 geode_memset,
574 1, /* scalar_stmt_cost. */
575 1, /* scalar load_cost. */
576 1, /* scalar_store_cost. */
577 1, /* vec_stmt_cost. */
578 1, /* vec_to_scalar_cost. */
579 1, /* scalar_to_vec_cost. */
580 1, /* vec_align_load_cost. */
581 2, /* vec_unalign_load_cost. */
582 1, /* vec_store_cost. */
583 3, /* cond_taken_branch_cost. */
584 1, /* cond_not_taken_branch_cost. */
587 static stringop_algs k6_memcpy[2] = {
588 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
589 DUMMY_STRINGOP_ALGS};
590 static stringop_algs k6_memset[2] = {
591 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
592 DUMMY_STRINGOP_ALGS};
593 static const
594 struct processor_costs k6_cost = {
595 COSTS_N_INSNS (1), /* cost of an add instruction */
596 COSTS_N_INSNS (2), /* cost of a lea instruction */
597 COSTS_N_INSNS (1), /* variable shift costs */
598 COSTS_N_INSNS (1), /* constant shift costs */
599 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
600 COSTS_N_INSNS (3), /* HI */
601 COSTS_N_INSNS (3), /* SI */
602 COSTS_N_INSNS (3), /* DI */
603 COSTS_N_INSNS (3)}, /* other */
604 0, /* cost of multiply per each bit set */
605 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
606 COSTS_N_INSNS (18), /* HI */
607 COSTS_N_INSNS (18), /* SI */
608 COSTS_N_INSNS (18), /* DI */
609 COSTS_N_INSNS (18)}, /* other */
610 COSTS_N_INSNS (2), /* cost of movsx */
611 COSTS_N_INSNS (2), /* cost of movzx */
612 8, /* "large" insn */
613 4, /* MOVE_RATIO */
614 3, /* cost for loading QImode using movzbl */
615 {4, 5, 4}, /* cost of loading integer registers
616 in QImode, HImode and SImode.
617 Relative to reg-reg move (2). */
618 {2, 3, 2}, /* cost of storing integer registers */
619 4, /* cost of reg,reg fld/fst */
620 {6, 6, 6}, /* cost of loading fp registers
621 in SFmode, DFmode and XFmode */
622 {4, 4, 4}, /* cost of storing fp registers
623 in SFmode, DFmode and XFmode */
624 2, /* cost of moving MMX register */
625 {2, 2}, /* cost of loading MMX registers
626 in SImode and DImode */
627 {2, 2}, /* cost of storing MMX registers
628 in SImode and DImode */
629 2, /* cost of moving SSE register */
630 {2, 2, 8}, /* cost of loading SSE registers
631 in SImode, DImode and TImode */
632 {2, 2, 8}, /* cost of storing SSE registers
633 in SImode, DImode and TImode */
634 6, /* MMX or SSE register to integer */
635 32, /* size of l1 cache. */
636 32, /* size of l2 cache. Some models
637 have integrated l2 cache, but
638 optimizing for k6 is not important
639 enough to worry about that. */
640 32, /* size of prefetch block */
641 1, /* number of parallel prefetches */
642 1, /* Branch cost */
643 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
644 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
645 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
646 COSTS_N_INSNS (2), /* cost of FABS instruction. */
647 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
648 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
649 k6_memcpy,
650 k6_memset,
651 1, /* scalar_stmt_cost. */
652 1, /* scalar load_cost. */
653 1, /* scalar_store_cost. */
654 1, /* vec_stmt_cost. */
655 1, /* vec_to_scalar_cost. */
656 1, /* scalar_to_vec_cost. */
657 1, /* vec_align_load_cost. */
658 2, /* vec_unalign_load_cost. */
659 1, /* vec_store_cost. */
660 3, /* cond_taken_branch_cost. */
661 1, /* cond_not_taken_branch_cost. */
664 /* For some reason, Athlon deals better with REP prefix (relative to loops)
665 compared to K8. Alignment becomes important after 8 bytes for memcpy and
666 128 bytes for memset. */
667 static stringop_algs athlon_memcpy[2] = {
668 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
669 DUMMY_STRINGOP_ALGS};
670 static stringop_algs athlon_memset[2] = {
671 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
672 DUMMY_STRINGOP_ALGS};
673 static const
674 struct processor_costs athlon_cost = {
675 COSTS_N_INSNS (1), /* cost of an add instruction */
676 COSTS_N_INSNS (2), /* cost of a lea instruction */
677 COSTS_N_INSNS (1), /* variable shift costs */
678 COSTS_N_INSNS (1), /* constant shift costs */
679 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
680 COSTS_N_INSNS (5), /* HI */
681 COSTS_N_INSNS (5), /* SI */
682 COSTS_N_INSNS (5), /* DI */
683 COSTS_N_INSNS (5)}, /* other */
684 0, /* cost of multiply per each bit set */
685 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
686 COSTS_N_INSNS (26), /* HI */
687 COSTS_N_INSNS (42), /* SI */
688 COSTS_N_INSNS (74), /* DI */
689 COSTS_N_INSNS (74)}, /* other */
690 COSTS_N_INSNS (1), /* cost of movsx */
691 COSTS_N_INSNS (1), /* cost of movzx */
692 8, /* "large" insn */
693 9, /* MOVE_RATIO */
694 4, /* cost for loading QImode using movzbl */
695 {3, 4, 3}, /* cost of loading integer registers
696 in QImode, HImode and SImode.
697 Relative to reg-reg move (2). */
698 {3, 4, 3}, /* cost of storing integer registers */
699 4, /* cost of reg,reg fld/fst */
700 {4, 4, 12}, /* cost of loading fp registers
701 in SFmode, DFmode and XFmode */
702 {6, 6, 8}, /* cost of storing fp registers
703 in SFmode, DFmode and XFmode */
704 2, /* cost of moving MMX register */
705 {4, 4}, /* cost of loading MMX registers
706 in SImode and DImode */
707 {4, 4}, /* cost of storing MMX registers
708 in SImode and DImode */
709 2, /* cost of moving SSE register */
710 {4, 4, 6}, /* cost of loading SSE registers
711 in SImode, DImode and TImode */
712 {4, 4, 5}, /* cost of storing SSE registers
713 in SImode, DImode and TImode */
714 5, /* MMX or SSE register to integer */
715 64, /* size of l1 cache. */
716 256, /* size of l2 cache. */
717 64, /* size of prefetch block */
718 6, /* number of parallel prefetches */
719 5, /* Branch cost */
720 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
721 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
722 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
723 COSTS_N_INSNS (2), /* cost of FABS instruction. */
724 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
725 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
726 athlon_memcpy,
727 athlon_memset,
728 1, /* scalar_stmt_cost. */
729 1, /* scalar load_cost. */
730 1, /* scalar_store_cost. */
731 1, /* vec_stmt_cost. */
732 1, /* vec_to_scalar_cost. */
733 1, /* scalar_to_vec_cost. */
734 1, /* vec_align_load_cost. */
735 2, /* vec_unalign_load_cost. */
736 1, /* vec_store_cost. */
737 3, /* cond_taken_branch_cost. */
738 1, /* cond_not_taken_branch_cost. */
741 /* K8 has optimized REP instruction for medium sized blocks, but for very
742 small blocks it is better to use loop. For large blocks, libcall can
743 do nontemporary accesses and beat inline considerably. */
744 static stringop_algs k8_memcpy[2] = {
745 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
746 {-1, rep_prefix_4_byte, false}}},
747 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
748 {-1, libcall, false}}}};
749 static stringop_algs k8_memset[2] = {
750 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
751 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
752 {libcall, {{48, unrolled_loop, false},
753 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
754 static const
755 struct processor_costs k8_cost = {
756 COSTS_N_INSNS (1), /* cost of an add instruction */
757 COSTS_N_INSNS (2), /* cost of a lea instruction */
758 COSTS_N_INSNS (1), /* variable shift costs */
759 COSTS_N_INSNS (1), /* constant shift costs */
760 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
761 COSTS_N_INSNS (4), /* HI */
762 COSTS_N_INSNS (3), /* SI */
763 COSTS_N_INSNS (4), /* DI */
764 COSTS_N_INSNS (5)}, /* other */
765 0, /* cost of multiply per each bit set */
766 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
767 COSTS_N_INSNS (26), /* HI */
768 COSTS_N_INSNS (42), /* SI */
769 COSTS_N_INSNS (74), /* DI */
770 COSTS_N_INSNS (74)}, /* other */
771 COSTS_N_INSNS (1), /* cost of movsx */
772 COSTS_N_INSNS (1), /* cost of movzx */
773 8, /* "large" insn */
774 9, /* MOVE_RATIO */
775 4, /* cost for loading QImode using movzbl */
776 {3, 4, 3}, /* cost of loading integer registers
777 in QImode, HImode and SImode.
778 Relative to reg-reg move (2). */
779 {3, 4, 3}, /* cost of storing integer registers */
780 4, /* cost of reg,reg fld/fst */
781 {4, 4, 12}, /* cost of loading fp registers
782 in SFmode, DFmode and XFmode */
783 {6, 6, 8}, /* cost of storing fp registers
784 in SFmode, DFmode and XFmode */
785 2, /* cost of moving MMX register */
786 {3, 3}, /* cost of loading MMX registers
787 in SImode and DImode */
788 {4, 4}, /* cost of storing MMX registers
789 in SImode and DImode */
790 2, /* cost of moving SSE register */
791 {4, 3, 6}, /* cost of loading SSE registers
792 in SImode, DImode and TImode */
793 {4, 4, 5}, /* cost of storing SSE registers
794 in SImode, DImode and TImode */
795 5, /* MMX or SSE register to integer */
796 64, /* size of l1 cache. */
797 512, /* size of l2 cache. */
798 64, /* size of prefetch block */
799 /* New AMD processors never drop prefetches; if they cannot be performed
800 immediately, they are queued. We set number of simultaneous prefetches
801 to a large constant to reflect this (it probably is not a good idea not
802 to limit number of prefetches at all, as their execution also takes some
803 time). */
804 100, /* number of parallel prefetches */
805 3, /* Branch cost */
806 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
807 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
808 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
809 COSTS_N_INSNS (2), /* cost of FABS instruction. */
810 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
811 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
813 k8_memcpy,
814 k8_memset,
815 4, /* scalar_stmt_cost. */
816 2, /* scalar load_cost. */
817 2, /* scalar_store_cost. */
818 5, /* vec_stmt_cost. */
819 0, /* vec_to_scalar_cost. */
820 2, /* scalar_to_vec_cost. */
821 2, /* vec_align_load_cost. */
822 3, /* vec_unalign_load_cost. */
823 3, /* vec_store_cost. */
824 3, /* cond_taken_branch_cost. */
825 2, /* cond_not_taken_branch_cost. */
828 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
829 very small blocks it is better to use loop. For large blocks, libcall can
830 do nontemporary accesses and beat inline considerably. */
831 static stringop_algs amdfam10_memcpy[2] = {
832 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
833 {-1, rep_prefix_4_byte, false}}},
834 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
835 {-1, libcall, false}}}};
836 static stringop_algs amdfam10_memset[2] = {
837 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
838 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
839 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
840 {-1, libcall, false}}}};
841 struct processor_costs amdfam10_cost = {
842 COSTS_N_INSNS (1), /* cost of an add instruction */
843 COSTS_N_INSNS (2), /* cost of a lea instruction */
844 COSTS_N_INSNS (1), /* variable shift costs */
845 COSTS_N_INSNS (1), /* constant shift costs */
846 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
847 COSTS_N_INSNS (4), /* HI */
848 COSTS_N_INSNS (3), /* SI */
849 COSTS_N_INSNS (4), /* DI */
850 COSTS_N_INSNS (5)}, /* other */
851 0, /* cost of multiply per each bit set */
852 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
853 COSTS_N_INSNS (35), /* HI */
854 COSTS_N_INSNS (51), /* SI */
855 COSTS_N_INSNS (83), /* DI */
856 COSTS_N_INSNS (83)}, /* other */
857 COSTS_N_INSNS (1), /* cost of movsx */
858 COSTS_N_INSNS (1), /* cost of movzx */
859 8, /* "large" insn */
860 9, /* MOVE_RATIO */
861 4, /* cost for loading QImode using movzbl */
862 {3, 4, 3}, /* cost of loading integer registers
863 in QImode, HImode and SImode.
864 Relative to reg-reg move (2). */
865 {3, 4, 3}, /* cost of storing integer registers */
866 4, /* cost of reg,reg fld/fst */
867 {4, 4, 12}, /* cost of loading fp registers
868 in SFmode, DFmode and XFmode */
869 {6, 6, 8}, /* cost of storing fp registers
870 in SFmode, DFmode and XFmode */
871 2, /* cost of moving MMX register */
872 {3, 3}, /* cost of loading MMX registers
873 in SImode and DImode */
874 {4, 4}, /* cost of storing MMX registers
875 in SImode and DImode */
876 2, /* cost of moving SSE register */
877 {4, 4, 3}, /* cost of loading SSE registers
878 in SImode, DImode and TImode */
879 {4, 4, 5}, /* cost of storing SSE registers
880 in SImode, DImode and TImode */
881 3, /* MMX or SSE register to integer */
882 /* On K8:
883 MOVD reg64, xmmreg Double FSTORE 4
884 MOVD reg32, xmmreg Double FSTORE 4
885 On AMDFAM10:
886 MOVD reg64, xmmreg Double FADD 3
887 1/1 1/1
888 MOVD reg32, xmmreg Double FADD 3
889 1/1 1/1 */
890 64, /* size of l1 cache. */
891 512, /* size of l2 cache. */
892 64, /* size of prefetch block */
893 /* New AMD processors never drop prefetches; if they cannot be performed
894 immediately, they are queued. We set number of simultaneous prefetches
895 to a large constant to reflect this (it probably is not a good idea not
896 to limit number of prefetches at all, as their execution also takes some
897 time). */
898 100, /* number of parallel prefetches */
899 2, /* Branch cost */
900 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
901 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
902 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
903 COSTS_N_INSNS (2), /* cost of FABS instruction. */
904 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
905 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
907 amdfam10_memcpy,
908 amdfam10_memset,
909 4, /* scalar_stmt_cost. */
910 2, /* scalar load_cost. */
911 2, /* scalar_store_cost. */
912 6, /* vec_stmt_cost. */
913 0, /* vec_to_scalar_cost. */
914 2, /* scalar_to_vec_cost. */
915 2, /* vec_align_load_cost. */
916 2, /* vec_unalign_load_cost. */
917 2, /* vec_store_cost. */
918 2, /* cond_taken_branch_cost. */
919 1, /* cond_not_taken_branch_cost. */
922 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
923 very small blocks it is better to use loop. For large blocks, libcall
924 can do nontemporary accesses and beat inline considerably. */
925 static stringop_algs bdver1_memcpy[2] = {
926 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
927 {-1, rep_prefix_4_byte, false}}},
928 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
929 {-1, libcall, false}}}};
930 static stringop_algs bdver1_memset[2] = {
931 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
932 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
933 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
934 {-1, libcall, false}}}};
936 const struct processor_costs bdver1_cost = {
937 COSTS_N_INSNS (1), /* cost of an add instruction */
938 COSTS_N_INSNS (1), /* cost of a lea instruction */
939 COSTS_N_INSNS (1), /* variable shift costs */
940 COSTS_N_INSNS (1), /* constant shift costs */
941 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
942 COSTS_N_INSNS (4), /* HI */
943 COSTS_N_INSNS (4), /* SI */
944 COSTS_N_INSNS (6), /* DI */
945 COSTS_N_INSNS (6)}, /* other */
946 0, /* cost of multiply per each bit set */
947 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
948 COSTS_N_INSNS (35), /* HI */
949 COSTS_N_INSNS (51), /* SI */
950 COSTS_N_INSNS (83), /* DI */
951 COSTS_N_INSNS (83)}, /* other */
952 COSTS_N_INSNS (1), /* cost of movsx */
953 COSTS_N_INSNS (1), /* cost of movzx */
954 8, /* "large" insn */
955 9, /* MOVE_RATIO */
956 4, /* cost for loading QImode using movzbl */
957 {5, 5, 4}, /* cost of loading integer registers
958 in QImode, HImode and SImode.
959 Relative to reg-reg move (2). */
960 {4, 4, 4}, /* cost of storing integer registers */
961 2, /* cost of reg,reg fld/fst */
962 {5, 5, 12}, /* cost of loading fp registers
963 in SFmode, DFmode and XFmode */
964 {4, 4, 8}, /* cost of storing fp registers
965 in SFmode, DFmode and XFmode */
966 2, /* cost of moving MMX register */
967 {4, 4}, /* cost of loading MMX registers
968 in SImode and DImode */
969 {4, 4}, /* cost of storing MMX registers
970 in SImode and DImode */
971 2, /* cost of moving SSE register */
972 {4, 4, 4}, /* cost of loading SSE registers
973 in SImode, DImode and TImode */
974 {4, 4, 4}, /* cost of storing SSE registers
975 in SImode, DImode and TImode */
976 2, /* MMX or SSE register to integer */
977 /* On K8:
978 MOVD reg64, xmmreg Double FSTORE 4
979 MOVD reg32, xmmreg Double FSTORE 4
980 On AMDFAM10:
981 MOVD reg64, xmmreg Double FADD 3
982 1/1 1/1
983 MOVD reg32, xmmreg Double FADD 3
984 1/1 1/1 */
985 16, /* size of l1 cache. */
986 2048, /* size of l2 cache. */
987 64, /* size of prefetch block */
988 /* New AMD processors never drop prefetches; if they cannot be performed
989 immediately, they are queued. We set number of simultaneous prefetches
990 to a large constant to reflect this (it probably is not a good idea not
991 to limit number of prefetches at all, as their execution also takes some
992 time). */
993 100, /* number of parallel prefetches */
994 2, /* Branch cost */
995 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
996 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
997 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
998 COSTS_N_INSNS (2), /* cost of FABS instruction. */
999 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1000 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1002 bdver1_memcpy,
1003 bdver1_memset,
1004 6, /* scalar_stmt_cost. */
1005 4, /* scalar load_cost. */
1006 4, /* scalar_store_cost. */
1007 6, /* vec_stmt_cost. */
1008 0, /* vec_to_scalar_cost. */
1009 2, /* scalar_to_vec_cost. */
1010 4, /* vec_align_load_cost. */
1011 4, /* vec_unalign_load_cost. */
1012 4, /* vec_store_cost. */
1013 4, /* cond_taken_branch_cost. */
1014 2, /* cond_not_taken_branch_cost. */
1017 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1018 very small blocks it is better to use loop. For large blocks, libcall
1019 can do nontemporary accesses and beat inline considerably. */
1021 static stringop_algs bdver2_memcpy[2] = {
1022 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1023 {-1, rep_prefix_4_byte, false}}},
1024 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1025 {-1, libcall, false}}}};
1026 static stringop_algs bdver2_memset[2] = {
1027 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1028 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1029 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1030 {-1, libcall, false}}}};
1032 const struct processor_costs bdver2_cost = {
1033 COSTS_N_INSNS (1), /* cost of an add instruction */
1034 COSTS_N_INSNS (1), /* cost of a lea instruction */
1035 COSTS_N_INSNS (1), /* variable shift costs */
1036 COSTS_N_INSNS (1), /* constant shift costs */
1037 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1038 COSTS_N_INSNS (4), /* HI */
1039 COSTS_N_INSNS (4), /* SI */
1040 COSTS_N_INSNS (6), /* DI */
1041 COSTS_N_INSNS (6)}, /* other */
1042 0, /* cost of multiply per each bit set */
1043 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1044 COSTS_N_INSNS (35), /* HI */
1045 COSTS_N_INSNS (51), /* SI */
1046 COSTS_N_INSNS (83), /* DI */
1047 COSTS_N_INSNS (83)}, /* other */
1048 COSTS_N_INSNS (1), /* cost of movsx */
1049 COSTS_N_INSNS (1), /* cost of movzx */
1050 8, /* "large" insn */
1051 9, /* MOVE_RATIO */
1052 4, /* cost for loading QImode using movzbl */
1053 {5, 5, 4}, /* cost of loading integer registers
1054 in QImode, HImode and SImode.
1055 Relative to reg-reg move (2). */
1056 {4, 4, 4}, /* cost of storing integer registers */
1057 2, /* cost of reg,reg fld/fst */
1058 {5, 5, 12}, /* cost of loading fp registers
1059 in SFmode, DFmode and XFmode */
1060 {4, 4, 8}, /* cost of storing fp registers
1061 in SFmode, DFmode and XFmode */
1062 2, /* cost of moving MMX register */
1063 {4, 4}, /* cost of loading MMX registers
1064 in SImode and DImode */
1065 {4, 4}, /* cost of storing MMX registers
1066 in SImode and DImode */
1067 2, /* cost of moving SSE register */
1068 {4, 4, 4}, /* cost of loading SSE registers
1069 in SImode, DImode and TImode */
1070 {4, 4, 4}, /* cost of storing SSE registers
1071 in SImode, DImode and TImode */
1072 2, /* MMX or SSE register to integer */
1073 /* On K8:
1074 MOVD reg64, xmmreg Double FSTORE 4
1075 MOVD reg32, xmmreg Double FSTORE 4
1076 On AMDFAM10:
1077 MOVD reg64, xmmreg Double FADD 3
1078 1/1 1/1
1079 MOVD reg32, xmmreg Double FADD 3
1080 1/1 1/1 */
1081 16, /* size of l1 cache. */
1082 2048, /* size of l2 cache. */
1083 64, /* size of prefetch block */
1084 /* New AMD processors never drop prefetches; if they cannot be performed
1085 immediately, they are queued. We set number of simultaneous prefetches
1086 to a large constant to reflect this (it probably is not a good idea not
1087 to limit number of prefetches at all, as their execution also takes some
1088 time). */
1089 100, /* number of parallel prefetches */
1090 2, /* Branch cost */
1091 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1092 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1093 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1094 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1095 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1096 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1098 bdver2_memcpy,
1099 bdver2_memset,
1100 6, /* scalar_stmt_cost. */
1101 4, /* scalar load_cost. */
1102 4, /* scalar_store_cost. */
1103 6, /* vec_stmt_cost. */
1104 0, /* vec_to_scalar_cost. */
1105 2, /* scalar_to_vec_cost. */
1106 4, /* vec_align_load_cost. */
1107 4, /* vec_unalign_load_cost. */
1108 4, /* vec_store_cost. */
1109 4, /* cond_taken_branch_cost. */
1110 2, /* cond_not_taken_branch_cost. */
1114 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1115 very small blocks it is better to use loop. For large blocks, libcall
1116 can do nontemporary accesses and beat inline considerably. */
1117 static stringop_algs bdver3_memcpy[2] = {
1118 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1119 {-1, rep_prefix_4_byte, false}}},
1120 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1121 {-1, libcall, false}}}};
1122 static stringop_algs bdver3_memset[2] = {
1123 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1124 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1125 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1126 {-1, libcall, false}}}};
1127 struct processor_costs bdver3_cost = {
1128 COSTS_N_INSNS (1), /* cost of an add instruction */
1129 COSTS_N_INSNS (1), /* cost of a lea instruction */
1130 COSTS_N_INSNS (1), /* variable shift costs */
1131 COSTS_N_INSNS (1), /* constant shift costs */
1132 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1133 COSTS_N_INSNS (4), /* HI */
1134 COSTS_N_INSNS (4), /* SI */
1135 COSTS_N_INSNS (6), /* DI */
1136 COSTS_N_INSNS (6)}, /* other */
1137 0, /* cost of multiply per each bit set */
1138 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1139 COSTS_N_INSNS (35), /* HI */
1140 COSTS_N_INSNS (51), /* SI */
1141 COSTS_N_INSNS (83), /* DI */
1142 COSTS_N_INSNS (83)}, /* other */
1143 COSTS_N_INSNS (1), /* cost of movsx */
1144 COSTS_N_INSNS (1), /* cost of movzx */
1145 8, /* "large" insn */
1146 9, /* MOVE_RATIO */
1147 4, /* cost for loading QImode using movzbl */
1148 {5, 5, 4}, /* cost of loading integer registers
1149 in QImode, HImode and SImode.
1150 Relative to reg-reg move (2). */
1151 {4, 4, 4}, /* cost of storing integer registers */
1152 2, /* cost of reg,reg fld/fst */
1153 {5, 5, 12}, /* cost of loading fp registers
1154 in SFmode, DFmode and XFmode */
1155 {4, 4, 8}, /* cost of storing fp registers
1156 in SFmode, DFmode and XFmode */
1157 2, /* cost of moving MMX register */
1158 {4, 4}, /* cost of loading MMX registers
1159 in SImode and DImode */
1160 {4, 4}, /* cost of storing MMX registers
1161 in SImode and DImode */
1162 2, /* cost of moving SSE register */
1163 {4, 4, 4}, /* cost of loading SSE registers
1164 in SImode, DImode and TImode */
1165 {4, 4, 4}, /* cost of storing SSE registers
1166 in SImode, DImode and TImode */
1167 2, /* MMX or SSE register to integer */
1168 16, /* size of l1 cache. */
1169 2048, /* size of l2 cache. */
1170 64, /* size of prefetch block */
1171 /* New AMD processors never drop prefetches; if they cannot be performed
1172 immediately, they are queued. We set number of simultaneous prefetches
1173 to a large constant to reflect this (it probably is not a good idea not
1174 to limit number of prefetches at all, as their execution also takes some
1175 time). */
1176 100, /* number of parallel prefetches */
1177 2, /* Branch cost */
1178 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1179 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1180 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1181 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1182 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1183 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1185 bdver3_memcpy,
1186 bdver3_memset,
1187 6, /* scalar_stmt_cost. */
1188 4, /* scalar load_cost. */
1189 4, /* scalar_store_cost. */
1190 6, /* vec_stmt_cost. */
1191 0, /* vec_to_scalar_cost. */
1192 2, /* scalar_to_vec_cost. */
1193 4, /* vec_align_load_cost. */
1194 4, /* vec_unalign_load_cost. */
1195 4, /* vec_store_cost. */
1196 4, /* cond_taken_branch_cost. */
1197 2, /* cond_not_taken_branch_cost. */
1200 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1201 very small blocks it is better to use loop. For large blocks, libcall
1202 can do nontemporary accesses and beat inline considerably. */
1203 static stringop_algs bdver4_memcpy[2] = {
1204 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1205 {-1, rep_prefix_4_byte, false}}},
1206 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1207 {-1, libcall, false}}}};
1208 static stringop_algs bdver4_memset[2] = {
1209 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1210 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1211 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1212 {-1, libcall, false}}}};
1213 struct processor_costs bdver4_cost = {
1214 COSTS_N_INSNS (1), /* cost of an add instruction */
1215 COSTS_N_INSNS (1), /* cost of a lea instruction */
1216 COSTS_N_INSNS (1), /* variable shift costs */
1217 COSTS_N_INSNS (1), /* constant shift costs */
1218 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1219 COSTS_N_INSNS (4), /* HI */
1220 COSTS_N_INSNS (4), /* SI */
1221 COSTS_N_INSNS (6), /* DI */
1222 COSTS_N_INSNS (6)}, /* other */
1223 0, /* cost of multiply per each bit set */
1224 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1225 COSTS_N_INSNS (35), /* HI */
1226 COSTS_N_INSNS (51), /* SI */
1227 COSTS_N_INSNS (83), /* DI */
1228 COSTS_N_INSNS (83)}, /* other */
1229 COSTS_N_INSNS (1), /* cost of movsx */
1230 COSTS_N_INSNS (1), /* cost of movzx */
1231 8, /* "large" insn */
1232 9, /* MOVE_RATIO */
1233 4, /* cost for loading QImode using movzbl */
1234 {5, 5, 4}, /* cost of loading integer registers
1235 in QImode, HImode and SImode.
1236 Relative to reg-reg move (2). */
1237 {4, 4, 4}, /* cost of storing integer registers */
1238 2, /* cost of reg,reg fld/fst */
1239 {5, 5, 12}, /* cost of loading fp registers
1240 in SFmode, DFmode and XFmode */
1241 {4, 4, 8}, /* cost of storing fp registers
1242 in SFmode, DFmode and XFmode */
1243 2, /* cost of moving MMX register */
1244 {4, 4}, /* cost of loading MMX registers
1245 in SImode and DImode */
1246 {4, 4}, /* cost of storing MMX registers
1247 in SImode and DImode */
1248 2, /* cost of moving SSE register */
1249 {4, 4, 4}, /* cost of loading SSE registers
1250 in SImode, DImode and TImode */
1251 {4, 4, 4}, /* cost of storing SSE registers
1252 in SImode, DImode and TImode */
1253 2, /* MMX or SSE register to integer */
1254 16, /* size of l1 cache. */
1255 2048, /* size of l2 cache. */
1256 64, /* size of prefetch block */
1257 /* New AMD processors never drop prefetches; if they cannot be performed
1258 immediately, they are queued. We set number of simultaneous prefetches
1259 to a large constant to reflect this (it probably is not a good idea not
1260 to limit number of prefetches at all, as their execution also takes some
1261 time). */
1262 100, /* number of parallel prefetches */
1263 2, /* Branch cost */
1264 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1265 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1266 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1267 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1268 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1269 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1271 bdver4_memcpy,
1272 bdver4_memset,
1273 6, /* scalar_stmt_cost. */
1274 4, /* scalar load_cost. */
1275 4, /* scalar_store_cost. */
1276 6, /* vec_stmt_cost. */
1277 0, /* vec_to_scalar_cost. */
1278 2, /* scalar_to_vec_cost. */
1279 4, /* vec_align_load_cost. */
1280 4, /* vec_unalign_load_cost. */
1281 4, /* vec_store_cost. */
1282 4, /* cond_taken_branch_cost. */
1283 2, /* cond_not_taken_branch_cost. */
1286 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1287 very small blocks it is better to use loop. For large blocks, libcall can
1288 do nontemporary accesses and beat inline considerably. */
1289 static stringop_algs btver1_memcpy[2] = {
1290 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1291 {-1, rep_prefix_4_byte, false}}},
1292 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1293 {-1, libcall, false}}}};
1294 static stringop_algs btver1_memset[2] = {
1295 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1296 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1297 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1298 {-1, libcall, false}}}};
1299 const struct processor_costs btver1_cost = {
1300 COSTS_N_INSNS (1), /* cost of an add instruction */
1301 COSTS_N_INSNS (2), /* cost of a lea instruction */
1302 COSTS_N_INSNS (1), /* variable shift costs */
1303 COSTS_N_INSNS (1), /* constant shift costs */
1304 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1305 COSTS_N_INSNS (4), /* HI */
1306 COSTS_N_INSNS (3), /* SI */
1307 COSTS_N_INSNS (4), /* DI */
1308 COSTS_N_INSNS (5)}, /* other */
1309 0, /* cost of multiply per each bit set */
1310 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1311 COSTS_N_INSNS (35), /* HI */
1312 COSTS_N_INSNS (51), /* SI */
1313 COSTS_N_INSNS (83), /* DI */
1314 COSTS_N_INSNS (83)}, /* other */
1315 COSTS_N_INSNS (1), /* cost of movsx */
1316 COSTS_N_INSNS (1), /* cost of movzx */
1317 8, /* "large" insn */
1318 9, /* MOVE_RATIO */
1319 4, /* cost for loading QImode using movzbl */
1320 {3, 4, 3}, /* cost of loading integer registers
1321 in QImode, HImode and SImode.
1322 Relative to reg-reg move (2). */
1323 {3, 4, 3}, /* cost of storing integer registers */
1324 4, /* cost of reg,reg fld/fst */
1325 {4, 4, 12}, /* cost of loading fp registers
1326 in SFmode, DFmode and XFmode */
1327 {6, 6, 8}, /* cost of storing fp registers
1328 in SFmode, DFmode and XFmode */
1329 2, /* cost of moving MMX register */
1330 {3, 3}, /* cost of loading MMX registers
1331 in SImode and DImode */
1332 {4, 4}, /* cost of storing MMX registers
1333 in SImode and DImode */
1334 2, /* cost of moving SSE register */
1335 {4, 4, 3}, /* cost of loading SSE registers
1336 in SImode, DImode and TImode */
1337 {4, 4, 5}, /* cost of storing SSE registers
1338 in SImode, DImode and TImode */
1339 3, /* MMX or SSE register to integer */
1340 /* On K8:
1341 MOVD reg64, xmmreg Double FSTORE 4
1342 MOVD reg32, xmmreg Double FSTORE 4
1343 On AMDFAM10:
1344 MOVD reg64, xmmreg Double FADD 3
1345 1/1 1/1
1346 MOVD reg32, xmmreg Double FADD 3
1347 1/1 1/1 */
1348 32, /* size of l1 cache. */
1349 512, /* size of l2 cache. */
1350 64, /* size of prefetch block */
1351 100, /* number of parallel prefetches */
1352 2, /* Branch cost */
1353 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1354 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1355 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1356 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1357 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1358 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1360 btver1_memcpy,
1361 btver1_memset,
1362 4, /* scalar_stmt_cost. */
1363 2, /* scalar load_cost. */
1364 2, /* scalar_store_cost. */
1365 6, /* vec_stmt_cost. */
1366 0, /* vec_to_scalar_cost. */
1367 2, /* scalar_to_vec_cost. */
1368 2, /* vec_align_load_cost. */
1369 2, /* vec_unalign_load_cost. */
1370 2, /* vec_store_cost. */
1371 2, /* cond_taken_branch_cost. */
1372 1, /* cond_not_taken_branch_cost. */
1375 static stringop_algs btver2_memcpy[2] = {
1376 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1377 {-1, rep_prefix_4_byte, false}}},
1378 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1379 {-1, libcall, false}}}};
1380 static stringop_algs btver2_memset[2] = {
1381 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1382 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1383 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1384 {-1, libcall, false}}}};
1385 const struct processor_costs btver2_cost = {
1386 COSTS_N_INSNS (1), /* cost of an add instruction */
1387 COSTS_N_INSNS (2), /* cost of a lea instruction */
1388 COSTS_N_INSNS (1), /* variable shift costs */
1389 COSTS_N_INSNS (1), /* constant shift costs */
1390 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1391 COSTS_N_INSNS (4), /* HI */
1392 COSTS_N_INSNS (3), /* SI */
1393 COSTS_N_INSNS (4), /* DI */
1394 COSTS_N_INSNS (5)}, /* other */
1395 0, /* cost of multiply per each bit set */
1396 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1397 COSTS_N_INSNS (35), /* HI */
1398 COSTS_N_INSNS (51), /* SI */
1399 COSTS_N_INSNS (83), /* DI */
1400 COSTS_N_INSNS (83)}, /* other */
1401 COSTS_N_INSNS (1), /* cost of movsx */
1402 COSTS_N_INSNS (1), /* cost of movzx */
1403 8, /* "large" insn */
1404 9, /* MOVE_RATIO */
1405 4, /* cost for loading QImode using movzbl */
1406 {3, 4, 3}, /* cost of loading integer registers
1407 in QImode, HImode and SImode.
1408 Relative to reg-reg move (2). */
1409 {3, 4, 3}, /* cost of storing integer registers */
1410 4, /* cost of reg,reg fld/fst */
1411 {4, 4, 12}, /* cost of loading fp registers
1412 in SFmode, DFmode and XFmode */
1413 {6, 6, 8}, /* cost of storing fp registers
1414 in SFmode, DFmode and XFmode */
1415 2, /* cost of moving MMX register */
1416 {3, 3}, /* cost of loading MMX registers
1417 in SImode and DImode */
1418 {4, 4}, /* cost of storing MMX registers
1419 in SImode and DImode */
1420 2, /* cost of moving SSE register */
1421 {4, 4, 3}, /* cost of loading SSE registers
1422 in SImode, DImode and TImode */
1423 {4, 4, 5}, /* cost of storing SSE registers
1424 in SImode, DImode and TImode */
1425 3, /* MMX or SSE register to integer */
1426 /* On K8:
1427 MOVD reg64, xmmreg Double FSTORE 4
1428 MOVD reg32, xmmreg Double FSTORE 4
1429 On AMDFAM10:
1430 MOVD reg64, xmmreg Double FADD 3
1431 1/1 1/1
1432 MOVD reg32, xmmreg Double FADD 3
1433 1/1 1/1 */
1434 32, /* size of l1 cache. */
1435 2048, /* size of l2 cache. */
1436 64, /* size of prefetch block */
1437 100, /* number of parallel prefetches */
1438 2, /* Branch cost */
1439 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1440 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1441 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1442 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1443 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1444 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1445 btver2_memcpy,
1446 btver2_memset,
1447 4, /* scalar_stmt_cost. */
1448 2, /* scalar load_cost. */
1449 2, /* scalar_store_cost. */
1450 6, /* vec_stmt_cost. */
1451 0, /* vec_to_scalar_cost. */
1452 2, /* scalar_to_vec_cost. */
1453 2, /* vec_align_load_cost. */
1454 2, /* vec_unalign_load_cost. */
1455 2, /* vec_store_cost. */
1456 2, /* cond_taken_branch_cost. */
1457 1, /* cond_not_taken_branch_cost. */
1460 static stringop_algs pentium4_memcpy[2] = {
1461 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1462 DUMMY_STRINGOP_ALGS};
1463 static stringop_algs pentium4_memset[2] = {
1464 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1465 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1466 DUMMY_STRINGOP_ALGS};
1468 static const
1469 struct processor_costs pentium4_cost = {
1470 COSTS_N_INSNS (1), /* cost of an add instruction */
1471 COSTS_N_INSNS (3), /* cost of a lea instruction */
1472 COSTS_N_INSNS (4), /* variable shift costs */
1473 COSTS_N_INSNS (4), /* constant shift costs */
1474 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1475 COSTS_N_INSNS (15), /* HI */
1476 COSTS_N_INSNS (15), /* SI */
1477 COSTS_N_INSNS (15), /* DI */
1478 COSTS_N_INSNS (15)}, /* other */
1479 0, /* cost of multiply per each bit set */
1480 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1481 COSTS_N_INSNS (56), /* HI */
1482 COSTS_N_INSNS (56), /* SI */
1483 COSTS_N_INSNS (56), /* DI */
1484 COSTS_N_INSNS (56)}, /* other */
1485 COSTS_N_INSNS (1), /* cost of movsx */
1486 COSTS_N_INSNS (1), /* cost of movzx */
1487 16, /* "large" insn */
1488 6, /* MOVE_RATIO */
1489 2, /* cost for loading QImode using movzbl */
1490 {4, 5, 4}, /* cost of loading integer registers
1491 in QImode, HImode and SImode.
1492 Relative to reg-reg move (2). */
1493 {2, 3, 2}, /* cost of storing integer registers */
1494 2, /* cost of reg,reg fld/fst */
1495 {2, 2, 6}, /* cost of loading fp registers
1496 in SFmode, DFmode and XFmode */
1497 {4, 4, 6}, /* cost of storing fp registers
1498 in SFmode, DFmode and XFmode */
1499 2, /* cost of moving MMX register */
1500 {2, 2}, /* cost of loading MMX registers
1501 in SImode and DImode */
1502 {2, 2}, /* cost of storing MMX registers
1503 in SImode and DImode */
1504 12, /* cost of moving SSE register */
1505 {12, 12, 12}, /* cost of loading SSE registers
1506 in SImode, DImode and TImode */
1507 {2, 2, 8}, /* cost of storing SSE registers
1508 in SImode, DImode and TImode */
1509 10, /* MMX or SSE register to integer */
1510 8, /* size of l1 cache. */
1511 256, /* size of l2 cache. */
1512 64, /* size of prefetch block */
1513 6, /* number of parallel prefetches */
1514 2, /* Branch cost */
1515 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1516 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1517 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1518 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1519 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1520 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1521 pentium4_memcpy,
1522 pentium4_memset,
1523 1, /* scalar_stmt_cost. */
1524 1, /* scalar load_cost. */
1525 1, /* scalar_store_cost. */
1526 1, /* vec_stmt_cost. */
1527 1, /* vec_to_scalar_cost. */
1528 1, /* scalar_to_vec_cost. */
1529 1, /* vec_align_load_cost. */
1530 2, /* vec_unalign_load_cost. */
1531 1, /* vec_store_cost. */
1532 3, /* cond_taken_branch_cost. */
1533 1, /* cond_not_taken_branch_cost. */
1536 static stringop_algs nocona_memcpy[2] = {
1537 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1538 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1539 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1541 static stringop_algs nocona_memset[2] = {
1542 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1543 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1544 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1545 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1547 static const
1548 struct processor_costs nocona_cost = {
1549 COSTS_N_INSNS (1), /* cost of an add instruction */
1550 COSTS_N_INSNS (1), /* cost of a lea instruction */
1551 COSTS_N_INSNS (1), /* variable shift costs */
1552 COSTS_N_INSNS (1), /* constant shift costs */
1553 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1554 COSTS_N_INSNS (10), /* HI */
1555 COSTS_N_INSNS (10), /* SI */
1556 COSTS_N_INSNS (10), /* DI */
1557 COSTS_N_INSNS (10)}, /* other */
1558 0, /* cost of multiply per each bit set */
1559 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1560 COSTS_N_INSNS (66), /* HI */
1561 COSTS_N_INSNS (66), /* SI */
1562 COSTS_N_INSNS (66), /* DI */
1563 COSTS_N_INSNS (66)}, /* other */
1564 COSTS_N_INSNS (1), /* cost of movsx */
1565 COSTS_N_INSNS (1), /* cost of movzx */
1566 16, /* "large" insn */
1567 17, /* MOVE_RATIO */
1568 4, /* cost for loading QImode using movzbl */
1569 {4, 4, 4}, /* cost of loading integer registers
1570 in QImode, HImode and SImode.
1571 Relative to reg-reg move (2). */
1572 {4, 4, 4}, /* cost of storing integer registers */
1573 3, /* cost of reg,reg fld/fst */
1574 {12, 12, 12}, /* cost of loading fp registers
1575 in SFmode, DFmode and XFmode */
1576 {4, 4, 4}, /* cost of storing fp registers
1577 in SFmode, DFmode and XFmode */
1578 6, /* cost of moving MMX register */
1579 {12, 12}, /* cost of loading MMX registers
1580 in SImode and DImode */
1581 {12, 12}, /* cost of storing MMX registers
1582 in SImode and DImode */
1583 6, /* cost of moving SSE register */
1584 {12, 12, 12}, /* cost of loading SSE registers
1585 in SImode, DImode and TImode */
1586 {12, 12, 12}, /* cost of storing SSE registers
1587 in SImode, DImode and TImode */
1588 8, /* MMX or SSE register to integer */
1589 8, /* size of l1 cache. */
1590 1024, /* size of l2 cache. */
1591 64, /* size of prefetch block */
1592 8, /* number of parallel prefetches */
1593 1, /* Branch cost */
1594 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1595 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1596 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1597 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1598 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1599 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1600 nocona_memcpy,
1601 nocona_memset,
1602 1, /* scalar_stmt_cost. */
1603 1, /* scalar load_cost. */
1604 1, /* scalar_store_cost. */
1605 1, /* vec_stmt_cost. */
1606 1, /* vec_to_scalar_cost. */
1607 1, /* scalar_to_vec_cost. */
1608 1, /* vec_align_load_cost. */
1609 2, /* vec_unalign_load_cost. */
1610 1, /* vec_store_cost. */
1611 3, /* cond_taken_branch_cost. */
1612 1, /* cond_not_taken_branch_cost. */
1615 static stringop_algs atom_memcpy[2] = {
1616 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1617 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1618 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1619 static stringop_algs atom_memset[2] = {
1620 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1621 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1622 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1623 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1624 static const
1625 struct processor_costs atom_cost = {
1626 COSTS_N_INSNS (1), /* cost of an add instruction */
1627 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1628 COSTS_N_INSNS (1), /* variable shift costs */
1629 COSTS_N_INSNS (1), /* constant shift costs */
1630 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1631 COSTS_N_INSNS (4), /* HI */
1632 COSTS_N_INSNS (3), /* SI */
1633 COSTS_N_INSNS (4), /* DI */
1634 COSTS_N_INSNS (2)}, /* other */
1635 0, /* cost of multiply per each bit set */
1636 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1637 COSTS_N_INSNS (26), /* HI */
1638 COSTS_N_INSNS (42), /* SI */
1639 COSTS_N_INSNS (74), /* DI */
1640 COSTS_N_INSNS (74)}, /* other */
1641 COSTS_N_INSNS (1), /* cost of movsx */
1642 COSTS_N_INSNS (1), /* cost of movzx */
1643 8, /* "large" insn */
1644 17, /* MOVE_RATIO */
1645 4, /* cost for loading QImode using movzbl */
1646 {4, 4, 4}, /* cost of loading integer registers
1647 in QImode, HImode and SImode.
1648 Relative to reg-reg move (2). */
1649 {4, 4, 4}, /* cost of storing integer registers */
1650 4, /* cost of reg,reg fld/fst */
1651 {12, 12, 12}, /* cost of loading fp registers
1652 in SFmode, DFmode and XFmode */
1653 {6, 6, 8}, /* cost of storing fp registers
1654 in SFmode, DFmode and XFmode */
1655 2, /* cost of moving MMX register */
1656 {8, 8}, /* cost of loading MMX registers
1657 in SImode and DImode */
1658 {8, 8}, /* cost of storing MMX registers
1659 in SImode and DImode */
1660 2, /* cost of moving SSE register */
1661 {8, 8, 8}, /* cost of loading SSE registers
1662 in SImode, DImode and TImode */
1663 {8, 8, 8}, /* cost of storing SSE registers
1664 in SImode, DImode and TImode */
1665 5, /* MMX or SSE register to integer */
1666 32, /* size of l1 cache. */
1667 256, /* size of l2 cache. */
1668 64, /* size of prefetch block */
1669 6, /* number of parallel prefetches */
1670 3, /* Branch cost */
1671 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1672 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1673 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1674 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1675 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1676 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1677 atom_memcpy,
1678 atom_memset,
1679 1, /* scalar_stmt_cost. */
1680 1, /* scalar load_cost. */
1681 1, /* scalar_store_cost. */
1682 1, /* vec_stmt_cost. */
1683 1, /* vec_to_scalar_cost. */
1684 1, /* scalar_to_vec_cost. */
1685 1, /* vec_align_load_cost. */
1686 2, /* vec_unalign_load_cost. */
1687 1, /* vec_store_cost. */
1688 3, /* cond_taken_branch_cost. */
1689 1, /* cond_not_taken_branch_cost. */
1692 static stringop_algs slm_memcpy[2] = {
1693 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1694 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1695 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1696 static stringop_algs slm_memset[2] = {
1697 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1698 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1699 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1700 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1701 static const
1702 struct processor_costs slm_cost = {
1703 COSTS_N_INSNS (1), /* cost of an add instruction */
1704 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1705 COSTS_N_INSNS (1), /* variable shift costs */
1706 COSTS_N_INSNS (1), /* constant shift costs */
1707 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1708 COSTS_N_INSNS (3), /* HI */
1709 COSTS_N_INSNS (3), /* SI */
1710 COSTS_N_INSNS (4), /* DI */
1711 COSTS_N_INSNS (2)}, /* other */
1712 0, /* cost of multiply per each bit set */
1713 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1714 COSTS_N_INSNS (26), /* HI */
1715 COSTS_N_INSNS (42), /* SI */
1716 COSTS_N_INSNS (74), /* DI */
1717 COSTS_N_INSNS (74)}, /* other */
1718 COSTS_N_INSNS (1), /* cost of movsx */
1719 COSTS_N_INSNS (1), /* cost of movzx */
1720 8, /* "large" insn */
1721 17, /* MOVE_RATIO */
1722 4, /* cost for loading QImode using movzbl */
1723 {4, 4, 4}, /* cost of loading integer registers
1724 in QImode, HImode and SImode.
1725 Relative to reg-reg move (2). */
1726 {4, 4, 4}, /* cost of storing integer registers */
1727 4, /* cost of reg,reg fld/fst */
1728 {12, 12, 12}, /* cost of loading fp registers
1729 in SFmode, DFmode and XFmode */
1730 {6, 6, 8}, /* cost of storing fp registers
1731 in SFmode, DFmode and XFmode */
1732 2, /* cost of moving MMX register */
1733 {8, 8}, /* cost of loading MMX registers
1734 in SImode and DImode */
1735 {8, 8}, /* cost of storing MMX registers
1736 in SImode and DImode */
1737 2, /* cost of moving SSE register */
1738 {8, 8, 8}, /* cost of loading SSE registers
1739 in SImode, DImode and TImode */
1740 {8, 8, 8}, /* cost of storing SSE registers
1741 in SImode, DImode and TImode */
1742 5, /* MMX or SSE register to integer */
1743 32, /* size of l1 cache. */
1744 256, /* size of l2 cache. */
1745 64, /* size of prefetch block */
1746 6, /* number of parallel prefetches */
1747 3, /* Branch cost */
1748 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1749 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1750 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1751 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1752 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1753 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1754 slm_memcpy,
1755 slm_memset,
1756 1, /* scalar_stmt_cost. */
1757 1, /* scalar load_cost. */
1758 1, /* scalar_store_cost. */
1759 1, /* vec_stmt_cost. */
1760 4, /* vec_to_scalar_cost. */
1761 1, /* scalar_to_vec_cost. */
1762 1, /* vec_align_load_cost. */
1763 2, /* vec_unalign_load_cost. */
1764 1, /* vec_store_cost. */
1765 3, /* cond_taken_branch_cost. */
1766 1, /* cond_not_taken_branch_cost. */
1769 static stringop_algs intel_memcpy[2] = {
1770 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1771 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1772 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1773 static stringop_algs intel_memset[2] = {
1774 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1775 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1776 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1777 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1778 static const
1779 struct processor_costs intel_cost = {
1780 COSTS_N_INSNS (1), /* cost of an add instruction */
1781 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1782 COSTS_N_INSNS (1), /* variable shift costs */
1783 COSTS_N_INSNS (1), /* constant shift costs */
1784 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1785 COSTS_N_INSNS (3), /* HI */
1786 COSTS_N_INSNS (3), /* SI */
1787 COSTS_N_INSNS (4), /* DI */
1788 COSTS_N_INSNS (2)}, /* other */
1789 0, /* cost of multiply per each bit set */
1790 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1791 COSTS_N_INSNS (26), /* HI */
1792 COSTS_N_INSNS (42), /* SI */
1793 COSTS_N_INSNS (74), /* DI */
1794 COSTS_N_INSNS (74)}, /* other */
1795 COSTS_N_INSNS (1), /* cost of movsx */
1796 COSTS_N_INSNS (1), /* cost of movzx */
1797 8, /* "large" insn */
1798 17, /* MOVE_RATIO */
1799 4, /* cost for loading QImode using movzbl */
1800 {4, 4, 4}, /* cost of loading integer registers
1801 in QImode, HImode and SImode.
1802 Relative to reg-reg move (2). */
1803 {4, 4, 4}, /* cost of storing integer registers */
1804 4, /* cost of reg,reg fld/fst */
1805 {12, 12, 12}, /* cost of loading fp registers
1806 in SFmode, DFmode and XFmode */
1807 {6, 6, 8}, /* cost of storing fp registers
1808 in SFmode, DFmode and XFmode */
1809 2, /* cost of moving MMX register */
1810 {8, 8}, /* cost of loading MMX registers
1811 in SImode and DImode */
1812 {8, 8}, /* cost of storing MMX registers
1813 in SImode and DImode */
1814 2, /* cost of moving SSE register */
1815 {8, 8, 8}, /* cost of loading SSE registers
1816 in SImode, DImode and TImode */
1817 {8, 8, 8}, /* cost of storing SSE registers
1818 in SImode, DImode and TImode */
1819 5, /* MMX or SSE register to integer */
1820 32, /* size of l1 cache. */
1821 256, /* size of l2 cache. */
1822 64, /* size of prefetch block */
1823 6, /* number of parallel prefetches */
1824 3, /* Branch cost */
1825 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1826 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1827 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1828 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1829 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1830 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1831 intel_memcpy,
1832 intel_memset,
1833 1, /* scalar_stmt_cost. */
1834 1, /* scalar load_cost. */
1835 1, /* scalar_store_cost. */
1836 1, /* vec_stmt_cost. */
1837 4, /* vec_to_scalar_cost. */
1838 1, /* scalar_to_vec_cost. */
1839 1, /* vec_align_load_cost. */
1840 2, /* vec_unalign_load_cost. */
1841 1, /* vec_store_cost. */
1842 3, /* cond_taken_branch_cost. */
1843 1, /* cond_not_taken_branch_cost. */
1846 /* Generic should produce code tuned for Core-i7 (and newer chips)
1847 and btver1 (and newer chips). */
1849 static stringop_algs generic_memcpy[2] = {
1850 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1851 {-1, libcall, false}}},
1852 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1853 {-1, libcall, false}}}};
1854 static stringop_algs generic_memset[2] = {
1855 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1856 {-1, libcall, false}}},
1857 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1858 {-1, libcall, false}}}};
1859 static const
1860 struct processor_costs generic_cost = {
1861 COSTS_N_INSNS (1), /* cost of an add instruction */
1862 /* On all chips taken into consideration lea is 2 cycles and more. With
1863 this cost however our current implementation of synth_mult results in
1864 use of unnecessary temporary registers causing regression on several
1865 SPECfp benchmarks. */
1866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1867 COSTS_N_INSNS (1), /* variable shift costs */
1868 COSTS_N_INSNS (1), /* constant shift costs */
1869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1870 COSTS_N_INSNS (4), /* HI */
1871 COSTS_N_INSNS (3), /* SI */
1872 COSTS_N_INSNS (4), /* DI */
1873 COSTS_N_INSNS (2)}, /* other */
1874 0, /* cost of multiply per each bit set */
1875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1876 COSTS_N_INSNS (26), /* HI */
1877 COSTS_N_INSNS (42), /* SI */
1878 COSTS_N_INSNS (74), /* DI */
1879 COSTS_N_INSNS (74)}, /* other */
1880 COSTS_N_INSNS (1), /* cost of movsx */
1881 COSTS_N_INSNS (1), /* cost of movzx */
1882 8, /* "large" insn */
1883 17, /* MOVE_RATIO */
1884 4, /* cost for loading QImode using movzbl */
1885 {4, 4, 4}, /* cost of loading integer registers
1886 in QImode, HImode and SImode.
1887 Relative to reg-reg move (2). */
1888 {4, 4, 4}, /* cost of storing integer registers */
1889 4, /* cost of reg,reg fld/fst */
1890 {12, 12, 12}, /* cost of loading fp registers
1891 in SFmode, DFmode and XFmode */
1892 {6, 6, 8}, /* cost of storing fp registers
1893 in SFmode, DFmode and XFmode */
1894 2, /* cost of moving MMX register */
1895 {8, 8}, /* cost of loading MMX registers
1896 in SImode and DImode */
1897 {8, 8}, /* cost of storing MMX registers
1898 in SImode and DImode */
1899 2, /* cost of moving SSE register */
1900 {8, 8, 8}, /* cost of loading SSE registers
1901 in SImode, DImode and TImode */
1902 {8, 8, 8}, /* cost of storing SSE registers
1903 in SImode, DImode and TImode */
1904 5, /* MMX or SSE register to integer */
1905 32, /* size of l1 cache. */
1906 512, /* size of l2 cache. */
1907 64, /* size of prefetch block */
1908 6, /* number of parallel prefetches */
1909 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1910 value is increased to perhaps more appropriate value of 5. */
1911 3, /* Branch cost */
1912 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1913 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1914 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1915 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1916 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1917 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1918 generic_memcpy,
1919 generic_memset,
1920 1, /* scalar_stmt_cost. */
1921 1, /* scalar load_cost. */
1922 1, /* scalar_store_cost. */
1923 1, /* vec_stmt_cost. */
1924 1, /* vec_to_scalar_cost. */
1925 1, /* scalar_to_vec_cost. */
1926 1, /* vec_align_load_cost. */
1927 2, /* vec_unalign_load_cost. */
1928 1, /* vec_store_cost. */
1929 3, /* cond_taken_branch_cost. */
1930 1, /* cond_not_taken_branch_cost. */
1933 /* core_cost should produce code tuned for Core familly of CPUs. */
1934 static stringop_algs core_memcpy[2] = {
1935 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1936 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1937 {-1, libcall, false}}}};
1938 static stringop_algs core_memset[2] = {
1939 {libcall, {{6, loop_1_byte, true},
1940 {24, loop, true},
1941 {8192, rep_prefix_4_byte, true},
1942 {-1, libcall, false}}},
1943 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1944 {-1, libcall, false}}}};
1946 static const
1947 struct processor_costs core_cost = {
1948 COSTS_N_INSNS (1), /* cost of an add instruction */
1949 /* On all chips taken into consideration lea is 2 cycles and more. With
1950 this cost however our current implementation of synth_mult results in
1951 use of unnecessary temporary registers causing regression on several
1952 SPECfp benchmarks. */
1953 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1954 COSTS_N_INSNS (1), /* variable shift costs */
1955 COSTS_N_INSNS (1), /* constant shift costs */
1956 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1957 COSTS_N_INSNS (4), /* HI */
1958 COSTS_N_INSNS (3), /* SI */
1959 COSTS_N_INSNS (4), /* DI */
1960 COSTS_N_INSNS (2)}, /* other */
1961 0, /* cost of multiply per each bit set */
1962 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1963 COSTS_N_INSNS (26), /* HI */
1964 COSTS_N_INSNS (42), /* SI */
1965 COSTS_N_INSNS (74), /* DI */
1966 COSTS_N_INSNS (74)}, /* other */
1967 COSTS_N_INSNS (1), /* cost of movsx */
1968 COSTS_N_INSNS (1), /* cost of movzx */
1969 8, /* "large" insn */
1970 17, /* MOVE_RATIO */
1971 4, /* cost for loading QImode using movzbl */
1972 {4, 4, 4}, /* cost of loading integer registers
1973 in QImode, HImode and SImode.
1974 Relative to reg-reg move (2). */
1975 {4, 4, 4}, /* cost of storing integer registers */
1976 4, /* cost of reg,reg fld/fst */
1977 {12, 12, 12}, /* cost of loading fp registers
1978 in SFmode, DFmode and XFmode */
1979 {6, 6, 8}, /* cost of storing fp registers
1980 in SFmode, DFmode and XFmode */
1981 2, /* cost of moving MMX register */
1982 {8, 8}, /* cost of loading MMX registers
1983 in SImode and DImode */
1984 {8, 8}, /* cost of storing MMX registers
1985 in SImode and DImode */
1986 2, /* cost of moving SSE register */
1987 {8, 8, 8}, /* cost of loading SSE registers
1988 in SImode, DImode and TImode */
1989 {8, 8, 8}, /* cost of storing SSE registers
1990 in SImode, DImode and TImode */
1991 5, /* MMX or SSE register to integer */
1992 64, /* size of l1 cache. */
1993 512, /* size of l2 cache. */
1994 64, /* size of prefetch block */
1995 6, /* number of parallel prefetches */
1996 /* FIXME perhaps more appropriate value is 5. */
1997 3, /* Branch cost */
1998 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1999 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2000 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2001 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2002 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2003 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2004 core_memcpy,
2005 core_memset,
2006 1, /* scalar_stmt_cost. */
2007 1, /* scalar load_cost. */
2008 1, /* scalar_store_cost. */
2009 1, /* vec_stmt_cost. */
2010 1, /* vec_to_scalar_cost. */
2011 1, /* scalar_to_vec_cost. */
2012 1, /* vec_align_load_cost. */
2013 2, /* vec_unalign_load_cost. */
2014 1, /* vec_store_cost. */
2015 3, /* cond_taken_branch_cost. */
2016 1, /* cond_not_taken_branch_cost. */
2020 /* Set by -mtune. */
2021 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2023 /* Set by -mtune or -Os. */
2024 const struct processor_costs *ix86_cost = &pentium_cost;
2026 /* Processor feature/optimization bitmasks. */
2027 #define m_386 (1<<PROCESSOR_I386)
2028 #define m_486 (1<<PROCESSOR_I486)
2029 #define m_PENT (1<<PROCESSOR_PENTIUM)
2030 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2031 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2032 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2033 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2034 #define m_CORE2 (1<<PROCESSOR_CORE2)
2035 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2036 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2037 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2038 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2039 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2040 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2041 #define m_KNL (1<<PROCESSOR_KNL)
2042 #define m_INTEL (1<<PROCESSOR_INTEL)
2044 #define m_GEODE (1<<PROCESSOR_GEODE)
2045 #define m_K6 (1<<PROCESSOR_K6)
2046 #define m_K6_GEODE (m_K6 | m_GEODE)
2047 #define m_K8 (1<<PROCESSOR_K8)
2048 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2049 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2050 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2051 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2052 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2053 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2054 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2055 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2056 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2057 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2058 #define m_BTVER (m_BTVER1 | m_BTVER2)
2059 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2061 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2063 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2064 #undef DEF_TUNE
2065 #define DEF_TUNE(tune, name, selector) name,
2066 #include "x86-tune.def"
2067 #undef DEF_TUNE
2070 /* Feature tests against the various tunings. */
2071 unsigned char ix86_tune_features[X86_TUNE_LAST];
2073 /* Feature tests against the various tunings used to create ix86_tune_features
2074 based on the processor mask. */
2075 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2076 #undef DEF_TUNE
2077 #define DEF_TUNE(tune, name, selector) selector,
2078 #include "x86-tune.def"
2079 #undef DEF_TUNE
2082 /* Feature tests against the various architecture variations. */
2083 unsigned char ix86_arch_features[X86_ARCH_LAST];
2085 /* Feature tests against the various architecture variations, used to create
2086 ix86_arch_features based on the processor mask. */
2087 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2088 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2089 ~(m_386 | m_486 | m_PENT | m_K6),
2091 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2092 ~m_386,
2094 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2095 ~(m_386 | m_486),
2097 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2098 ~m_386,
2100 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2101 ~m_386,
2104 /* In case the average insn count for single function invocation is
2105 lower than this constant, emit fast (but longer) prologue and
2106 epilogue code. */
2107 #define FAST_PROLOGUE_INSN_COUNT 20
2109 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2110 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2111 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2112 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2114 /* Array of the smallest class containing reg number REGNO, indexed by
2115 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2117 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2119 /* ax, dx, cx, bx */
2120 AREG, DREG, CREG, BREG,
2121 /* si, di, bp, sp */
2122 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2123 /* FP registers */
2124 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2125 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2126 /* arg pointer */
2127 NON_Q_REGS,
2128 /* flags, fpsr, fpcr, frame */
2129 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2130 /* SSE registers */
2131 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2132 SSE_REGS, SSE_REGS,
2133 /* MMX registers */
2134 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2135 MMX_REGS, MMX_REGS,
2136 /* REX registers */
2137 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2138 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2139 /* SSE REX registers */
2140 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2141 SSE_REGS, SSE_REGS,
2142 /* AVX-512 SSE registers */
2143 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2144 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2145 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2146 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2147 /* Mask registers. */
2148 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2149 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2150 /* MPX bound registers */
2151 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
2154 /* The "default" register map used in 32bit mode. */
2156 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2158 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2159 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2160 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2161 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2162 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2163 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2164 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2165 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2166 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2167 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2168 101, 102, 103, 104, /* bound registers */
2171 /* The "default" register map used in 64bit mode. */
2173 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2175 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2176 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2177 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2178 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2179 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2180 8,9,10,11,12,13,14,15, /* extended integer registers */
2181 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2182 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2183 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2184 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2185 126, 127, 128, 129, /* bound registers */
2188 /* Define the register numbers to be used in Dwarf debugging information.
2189 The SVR4 reference port C compiler uses the following register numbers
2190 in its Dwarf output code:
2191 0 for %eax (gcc regno = 0)
2192 1 for %ecx (gcc regno = 2)
2193 2 for %edx (gcc regno = 1)
2194 3 for %ebx (gcc regno = 3)
2195 4 for %esp (gcc regno = 7)
2196 5 for %ebp (gcc regno = 6)
2197 6 for %esi (gcc regno = 4)
2198 7 for %edi (gcc regno = 5)
2199 The following three DWARF register numbers are never generated by
2200 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2201 believes these numbers have these meanings.
2202 8 for %eip (no gcc equivalent)
2203 9 for %eflags (gcc regno = 17)
2204 10 for %trapno (no gcc equivalent)
2205 It is not at all clear how we should number the FP stack registers
2206 for the x86 architecture. If the version of SDB on x86/svr4 were
2207 a bit less brain dead with respect to floating-point then we would
2208 have a precedent to follow with respect to DWARF register numbers
2209 for x86 FP registers, but the SDB on x86/svr4 is so completely
2210 broken with respect to FP registers that it is hardly worth thinking
2211 of it as something to strive for compatibility with.
2212 The version of x86/svr4 SDB I have at the moment does (partially)
2213 seem to believe that DWARF register number 11 is associated with
2214 the x86 register %st(0), but that's about all. Higher DWARF
2215 register numbers don't seem to be associated with anything in
2216 particular, and even for DWARF regno 11, SDB only seems to under-
2217 stand that it should say that a variable lives in %st(0) (when
2218 asked via an `=' command) if we said it was in DWARF regno 11,
2219 but SDB still prints garbage when asked for the value of the
2220 variable in question (via a `/' command).
2221 (Also note that the labels SDB prints for various FP stack regs
2222 when doing an `x' command are all wrong.)
2223 Note that these problems generally don't affect the native SVR4
2224 C compiler because it doesn't allow the use of -O with -g and
2225 because when it is *not* optimizing, it allocates a memory
2226 location for each floating-point variable, and the memory
2227 location is what gets described in the DWARF AT_location
2228 attribute for the variable in question.
2229 Regardless of the severe mental illness of the x86/svr4 SDB, we
2230 do something sensible here and we use the following DWARF
2231 register numbers. Note that these are all stack-top-relative
2232 numbers.
2233 11 for %st(0) (gcc regno = 8)
2234 12 for %st(1) (gcc regno = 9)
2235 13 for %st(2) (gcc regno = 10)
2236 14 for %st(3) (gcc regno = 11)
2237 15 for %st(4) (gcc regno = 12)
2238 16 for %st(5) (gcc regno = 13)
2239 17 for %st(6) (gcc regno = 14)
2240 18 for %st(7) (gcc regno = 15)
2242 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2244 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2245 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2246 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2247 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2248 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2249 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2250 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2251 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2252 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2253 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2254 101, 102, 103, 104, /* bound registers */
2257 /* Define parameter passing and return registers. */
2259 static int const x86_64_int_parameter_registers[6] =
2261 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2264 static int const x86_64_ms_abi_int_parameter_registers[4] =
2266 CX_REG, DX_REG, R8_REG, R9_REG
2269 static int const x86_64_int_return_registers[4] =
2271 AX_REG, DX_REG, DI_REG, SI_REG
2274 /* Additional registers that are clobbered by SYSV calls. */
2276 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2278 SI_REG, DI_REG,
2279 XMM6_REG, XMM7_REG,
2280 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2281 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2284 /* Define the structure for the machine field in struct function. */
2286 struct GTY(()) stack_local_entry {
2287 unsigned short mode;
2288 unsigned short n;
2289 rtx rtl;
2290 struct stack_local_entry *next;
2293 /* Structure describing stack frame layout.
2294 Stack grows downward:
2296 [arguments]
2297 <- ARG_POINTER
2298 saved pc
2300 saved static chain if ix86_static_chain_on_stack
2302 saved frame pointer if frame_pointer_needed
2303 <- HARD_FRAME_POINTER
2304 [saved regs]
2305 <- regs_save_offset
2306 [padding0]
2308 [saved SSE regs]
2309 <- sse_regs_save_offset
2310 [padding1] |
2311 | <- FRAME_POINTER
2312 [va_arg registers] |
2314 [frame] |
2316 [padding2] | = to_allocate
2317 <- STACK_POINTER
2319 struct ix86_frame
2321 int nsseregs;
2322 int nregs;
2323 int va_arg_size;
2324 int red_zone_size;
2325 int outgoing_arguments_size;
2327 /* The offsets relative to ARG_POINTER. */
2328 HOST_WIDE_INT frame_pointer_offset;
2329 HOST_WIDE_INT hard_frame_pointer_offset;
2330 HOST_WIDE_INT stack_pointer_offset;
2331 HOST_WIDE_INT hfp_save_offset;
2332 HOST_WIDE_INT reg_save_offset;
2333 HOST_WIDE_INT sse_reg_save_offset;
2335 /* When save_regs_using_mov is set, emit prologue using
2336 move instead of push instructions. */
2337 bool save_regs_using_mov;
2340 /* Which cpu are we scheduling for. */
2341 enum attr_cpu ix86_schedule;
2343 /* Which cpu are we optimizing for. */
2344 enum processor_type ix86_tune;
2346 /* Which instruction set architecture to use. */
2347 enum processor_type ix86_arch;
2349 /* True if processor has SSE prefetch instruction. */
2350 unsigned char x86_prefetch_sse;
2352 /* -mstackrealign option */
2353 static const char ix86_force_align_arg_pointer_string[]
2354 = "force_align_arg_pointer";
2356 static rtx (*ix86_gen_leave) (void);
2357 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2358 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2359 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2360 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2361 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2362 static rtx (*ix86_gen_monitorx) (rtx, rtx, rtx);
2363 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2364 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2365 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2366 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2367 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2368 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2370 /* Preferred alignment for stack boundary in bits. */
2371 unsigned int ix86_preferred_stack_boundary;
2373 /* Alignment for incoming stack boundary in bits specified at
2374 command line. */
2375 static unsigned int ix86_user_incoming_stack_boundary;
2377 /* Default alignment for incoming stack boundary in bits. */
2378 static unsigned int ix86_default_incoming_stack_boundary;
2380 /* Alignment for incoming stack boundary in bits. */
2381 unsigned int ix86_incoming_stack_boundary;
2383 /* Calling abi specific va_list type nodes. */
2384 static GTY(()) tree sysv_va_list_type_node;
2385 static GTY(()) tree ms_va_list_type_node;
2387 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2388 char internal_label_prefix[16];
2389 int internal_label_prefix_len;
2391 /* Fence to use after loop using movnt. */
2392 tree x86_mfence;
2394 /* Register class used for passing given 64bit part of the argument.
2395 These represent classes as documented by the PS ABI, with the exception
2396 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2397 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2399 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2400 whenever possible (upper half does contain padding). */
2401 enum x86_64_reg_class
2403 X86_64_NO_CLASS,
2404 X86_64_INTEGER_CLASS,
2405 X86_64_INTEGERSI_CLASS,
2406 X86_64_SSE_CLASS,
2407 X86_64_SSESF_CLASS,
2408 X86_64_SSEDF_CLASS,
2409 X86_64_SSEUP_CLASS,
2410 X86_64_X87_CLASS,
2411 X86_64_X87UP_CLASS,
2412 X86_64_COMPLEX_X87_CLASS,
2413 X86_64_MEMORY_CLASS
2416 #define MAX_CLASSES 8
2418 /* Table of constants used by fldpi, fldln2, etc.... */
2419 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2420 static bool ext_80387_constants_init = 0;
2423 static struct machine_function * ix86_init_machine_status (void);
2424 static rtx ix86_function_value (const_tree, const_tree, bool);
2425 static bool ix86_function_value_regno_p (const unsigned int);
2426 static unsigned int ix86_function_arg_boundary (machine_mode,
2427 const_tree);
2428 static rtx ix86_static_chain (const_tree, bool);
2429 static int ix86_function_regparm (const_tree, const_tree);
2430 static void ix86_compute_frame_layout (struct ix86_frame *);
2431 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
2432 rtx, rtx, int);
2433 static void ix86_add_new_builtins (HOST_WIDE_INT);
2434 static tree ix86_canonical_va_list_type (tree);
2435 static void predict_jump (int);
2436 static unsigned int split_stack_prologue_scratch_regno (void);
2437 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2439 enum ix86_function_specific_strings
2441 IX86_FUNCTION_SPECIFIC_ARCH,
2442 IX86_FUNCTION_SPECIFIC_TUNE,
2443 IX86_FUNCTION_SPECIFIC_MAX
2446 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2447 const char *, enum fpmath_unit, bool);
2448 static void ix86_function_specific_save (struct cl_target_option *,
2449 struct gcc_options *opts);
2450 static void ix86_function_specific_restore (struct gcc_options *opts,
2451 struct cl_target_option *);
2452 static void ix86_function_specific_post_stream_in (struct cl_target_option *);
2453 static void ix86_function_specific_print (FILE *, int,
2454 struct cl_target_option *);
2455 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2456 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2457 struct gcc_options *,
2458 struct gcc_options *,
2459 struct gcc_options *);
2460 static bool ix86_can_inline_p (tree, tree);
2461 static void ix86_set_current_function (tree);
2462 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2464 static enum calling_abi ix86_function_abi (const_tree);
2467 #ifndef SUBTARGET32_DEFAULT_CPU
2468 #define SUBTARGET32_DEFAULT_CPU "i386"
2469 #endif
2471 /* Whether -mtune= or -march= were specified */
2472 static int ix86_tune_defaulted;
2473 static int ix86_arch_specified;
2475 /* Vectorization library interface and handlers. */
2476 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2478 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2479 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2481 /* Processor target table, indexed by processor number */
2482 struct ptt
2484 const char *const name; /* processor name */
2485 const struct processor_costs *cost; /* Processor costs */
2486 const int align_loop; /* Default alignments. */
2487 const int align_loop_max_skip;
2488 const int align_jump;
2489 const int align_jump_max_skip;
2490 const int align_func;
2493 /* This table must be in sync with enum processor_type in i386.h. */
2494 static const struct ptt processor_target_table[PROCESSOR_max] =
2496 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2497 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2498 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2499 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2500 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2501 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2502 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2503 {"core2", &core_cost, 16, 10, 16, 10, 16},
2504 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2505 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2506 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2507 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2508 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2509 {"knl", &slm_cost, 16, 15, 16, 7, 16},
2510 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2511 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2512 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2513 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2514 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2515 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2516 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2517 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2518 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2519 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2520 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2521 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2524 static unsigned int
2525 rest_of_handle_insert_vzeroupper (void)
2527 int i;
2529 /* vzeroupper instructions are inserted immediately after reload to
2530 account for possible spills from 256bit registers. The pass
2531 reuses mode switching infrastructure by re-running mode insertion
2532 pass, so disable entities that have already been processed. */
2533 for (i = 0; i < MAX_386_ENTITIES; i++)
2534 ix86_optimize_mode_switching[i] = 0;
2536 ix86_optimize_mode_switching[AVX_U128] = 1;
2538 /* Call optimize_mode_switching. */
2539 g->get_passes ()->execute_pass_mode_switching ();
2540 return 0;
2543 namespace {
2545 const pass_data pass_data_insert_vzeroupper =
2547 RTL_PASS, /* type */
2548 "vzeroupper", /* name */
2549 OPTGROUP_NONE, /* optinfo_flags */
2550 TV_NONE, /* tv_id */
2551 0, /* properties_required */
2552 0, /* properties_provided */
2553 0, /* properties_destroyed */
2554 0, /* todo_flags_start */
2555 TODO_df_finish, /* todo_flags_finish */
2558 class pass_insert_vzeroupper : public rtl_opt_pass
2560 public:
2561 pass_insert_vzeroupper(gcc::context *ctxt)
2562 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2565 /* opt_pass methods: */
2566 virtual bool gate (function *)
2568 return TARGET_AVX && !TARGET_AVX512F
2569 && TARGET_VZEROUPPER && flag_expensive_optimizations
2570 && !optimize_size;
2573 virtual unsigned int execute (function *)
2575 return rest_of_handle_insert_vzeroupper ();
2578 }; // class pass_insert_vzeroupper
2580 } // anon namespace
2582 rtl_opt_pass *
2583 make_pass_insert_vzeroupper (gcc::context *ctxt)
2585 return new pass_insert_vzeroupper (ctxt);
2588 /* Return true if a red-zone is in use. */
2590 static inline bool
2591 ix86_using_red_zone (void)
2593 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2596 /* Return a string that documents the current -m options. The caller is
2597 responsible for freeing the string. */
2599 static char *
2600 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2601 const char *tune, enum fpmath_unit fpmath,
2602 bool add_nl_p)
2604 struct ix86_target_opts
2606 const char *option; /* option string */
2607 HOST_WIDE_INT mask; /* isa mask options */
2610 /* This table is ordered so that options like -msse4.2 that imply
2611 preceding options while match those first. */
2612 static struct ix86_target_opts isa_opts[] =
2614 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2615 { "-mfma", OPTION_MASK_ISA_FMA },
2616 { "-mxop", OPTION_MASK_ISA_XOP },
2617 { "-mlwp", OPTION_MASK_ISA_LWP },
2618 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2619 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2620 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2621 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2622 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2623 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2624 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2625 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
2626 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
2627 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2628 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2629 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2630 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2631 { "-msse3", OPTION_MASK_ISA_SSE3 },
2632 { "-msse2", OPTION_MASK_ISA_SSE2 },
2633 { "-msse", OPTION_MASK_ISA_SSE },
2634 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2635 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2636 { "-mmmx", OPTION_MASK_ISA_MMX },
2637 { "-mabm", OPTION_MASK_ISA_ABM },
2638 { "-mbmi", OPTION_MASK_ISA_BMI },
2639 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2640 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2641 { "-mhle", OPTION_MASK_ISA_HLE },
2642 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2643 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2644 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2645 { "-madx", OPTION_MASK_ISA_ADX },
2646 { "-mtbm", OPTION_MASK_ISA_TBM },
2647 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2648 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2649 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2650 { "-maes", OPTION_MASK_ISA_AES },
2651 { "-msha", OPTION_MASK_ISA_SHA },
2652 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2653 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2654 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2655 { "-mf16c", OPTION_MASK_ISA_F16C },
2656 { "-mrtm", OPTION_MASK_ISA_RTM },
2657 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2658 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2659 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2660 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2661 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2662 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2663 { "-mmpx", OPTION_MASK_ISA_MPX },
2664 { "-mclwb", OPTION_MASK_ISA_CLWB },
2665 { "-mpcommit", OPTION_MASK_ISA_PCOMMIT },
2666 { "-mmwaitx", OPTION_MASK_ISA_MWAITX },
2669 /* Flag options. */
2670 static struct ix86_target_opts flag_opts[] =
2672 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2673 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2674 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2675 { "-m80387", MASK_80387 },
2676 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2677 { "-malign-double", MASK_ALIGN_DOUBLE },
2678 { "-mcld", MASK_CLD },
2679 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2680 { "-mieee-fp", MASK_IEEE_FP },
2681 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2682 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2683 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2684 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2685 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2686 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2687 { "-mno-red-zone", MASK_NO_RED_ZONE },
2688 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2689 { "-mrecip", MASK_RECIP },
2690 { "-mrtd", MASK_RTD },
2691 { "-msseregparm", MASK_SSEREGPARM },
2692 { "-mstack-arg-probe", MASK_STACK_PROBE },
2693 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2694 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2695 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2696 { "-mvzeroupper", MASK_VZEROUPPER },
2697 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2698 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2699 { "-mprefer-avx128", MASK_PREFER_AVX128},
2702 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2704 char isa_other[40];
2705 char target_other[40];
2706 unsigned num = 0;
2707 unsigned i, j;
2708 char *ret;
2709 char *ptr;
2710 size_t len;
2711 size_t line_len;
2712 size_t sep_len;
2713 const char *abi;
2715 memset (opts, '\0', sizeof (opts));
2717 /* Add -march= option. */
2718 if (arch)
2720 opts[num][0] = "-march=";
2721 opts[num++][1] = arch;
2724 /* Add -mtune= option. */
2725 if (tune)
2727 opts[num][0] = "-mtune=";
2728 opts[num++][1] = tune;
2731 /* Add -m32/-m64/-mx32. */
2732 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2734 if ((isa & OPTION_MASK_ABI_64) != 0)
2735 abi = "-m64";
2736 else
2737 abi = "-mx32";
2738 isa &= ~ (OPTION_MASK_ISA_64BIT
2739 | OPTION_MASK_ABI_64
2740 | OPTION_MASK_ABI_X32);
2742 else
2743 abi = "-m32";
2744 opts[num++][0] = abi;
2746 /* Pick out the options in isa options. */
2747 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2749 if ((isa & isa_opts[i].mask) != 0)
2751 opts[num++][0] = isa_opts[i].option;
2752 isa &= ~ isa_opts[i].mask;
2756 if (isa && add_nl_p)
2758 opts[num++][0] = isa_other;
2759 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2760 isa);
2763 /* Add flag options. */
2764 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2766 if ((flags & flag_opts[i].mask) != 0)
2768 opts[num++][0] = flag_opts[i].option;
2769 flags &= ~ flag_opts[i].mask;
2773 if (flags && add_nl_p)
2775 opts[num++][0] = target_other;
2776 sprintf (target_other, "(other flags: %#x)", flags);
2779 /* Add -fpmath= option. */
2780 if (fpmath)
2782 opts[num][0] = "-mfpmath=";
2783 switch ((int) fpmath)
2785 case FPMATH_387:
2786 opts[num++][1] = "387";
2787 break;
2789 case FPMATH_SSE:
2790 opts[num++][1] = "sse";
2791 break;
2793 case FPMATH_387 | FPMATH_SSE:
2794 opts[num++][1] = "sse+387";
2795 break;
2797 default:
2798 gcc_unreachable ();
2802 /* Any options? */
2803 if (num == 0)
2804 return NULL;
2806 gcc_assert (num < ARRAY_SIZE (opts));
2808 /* Size the string. */
2809 len = 0;
2810 sep_len = (add_nl_p) ? 3 : 1;
2811 for (i = 0; i < num; i++)
2813 len += sep_len;
2814 for (j = 0; j < 2; j++)
2815 if (opts[i][j])
2816 len += strlen (opts[i][j]);
2819 /* Build the string. */
2820 ret = ptr = (char *) xmalloc (len);
2821 line_len = 0;
2823 for (i = 0; i < num; i++)
2825 size_t len2[2];
2827 for (j = 0; j < 2; j++)
2828 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2830 if (i != 0)
2832 *ptr++ = ' ';
2833 line_len++;
2835 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2837 *ptr++ = '\\';
2838 *ptr++ = '\n';
2839 line_len = 0;
2843 for (j = 0; j < 2; j++)
2844 if (opts[i][j])
2846 memcpy (ptr, opts[i][j], len2[j]);
2847 ptr += len2[j];
2848 line_len += len2[j];
2852 *ptr = '\0';
2853 gcc_assert (ret + len >= ptr);
2855 return ret;
2858 /* Return true, if profiling code should be emitted before
2859 prologue. Otherwise it returns false.
2860 Note: For x86 with "hotfix" it is sorried. */
2861 static bool
2862 ix86_profile_before_prologue (void)
2864 return flag_fentry != 0;
2867 /* Function that is callable from the debugger to print the current
2868 options. */
2869 void ATTRIBUTE_UNUSED
2870 ix86_debug_options (void)
2872 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2873 ix86_arch_string, ix86_tune_string,
2874 ix86_fpmath, true);
2876 if (opts)
2878 fprintf (stderr, "%s\n\n", opts);
2879 free (opts);
2881 else
2882 fputs ("<no options>\n\n", stderr);
2884 return;
2887 static const char *stringop_alg_names[] = {
2888 #define DEF_ENUM
2889 #define DEF_ALG(alg, name) #name,
2890 #include "stringop.def"
2891 #undef DEF_ENUM
2892 #undef DEF_ALG
2895 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2896 The string is of the following form (or comma separated list of it):
2898 strategy_alg:max_size:[align|noalign]
2900 where the full size range for the strategy is either [0, max_size] or
2901 [min_size, max_size], in which min_size is the max_size + 1 of the
2902 preceding range. The last size range must have max_size == -1.
2904 Examples:
2907 -mmemcpy-strategy=libcall:-1:noalign
2909 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2913 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2915 This is to tell the compiler to use the following strategy for memset
2916 1) when the expected size is between [1, 16], use rep_8byte strategy;
2917 2) when the size is between [17, 2048], use vector_loop;
2918 3) when the size is > 2048, use libcall. */
2920 struct stringop_size_range
2922 int max;
2923 stringop_alg alg;
2924 bool noalign;
2927 static void
2928 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2930 const struct stringop_algs *default_algs;
2931 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2932 char *curr_range_str, *next_range_str;
2933 int i = 0, n = 0;
2935 if (is_memset)
2936 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2937 else
2938 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2940 curr_range_str = strategy_str;
2944 int maxs;
2945 char alg_name[128];
2946 char align[16];
2947 next_range_str = strchr (curr_range_str, ',');
2948 if (next_range_str)
2949 *next_range_str++ = '\0';
2951 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2952 alg_name, &maxs, align))
2954 error ("wrong arg %s to option %s", curr_range_str,
2955 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2956 return;
2959 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2961 error ("size ranges of option %s should be increasing",
2962 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2963 return;
2966 for (i = 0; i < last_alg; i++)
2967 if (!strcmp (alg_name, stringop_alg_names[i]))
2968 break;
2970 if (i == last_alg)
2972 error ("wrong stringop strategy name %s specified for option %s",
2973 alg_name,
2974 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2975 return;
2978 if ((stringop_alg) i == rep_prefix_8_byte
2979 && !TARGET_64BIT)
2981 /* rep; movq isn't available in 32-bit code. */
2982 error ("stringop strategy name %s specified for option %s "
2983 "not supported for 32-bit code",
2984 alg_name,
2985 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2986 return;
2989 input_ranges[n].max = maxs;
2990 input_ranges[n].alg = (stringop_alg) i;
2991 if (!strcmp (align, "align"))
2992 input_ranges[n].noalign = false;
2993 else if (!strcmp (align, "noalign"))
2994 input_ranges[n].noalign = true;
2995 else
2997 error ("unknown alignment %s specified for option %s",
2998 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2999 return;
3001 n++;
3002 curr_range_str = next_range_str;
3004 while (curr_range_str);
3006 if (input_ranges[n - 1].max != -1)
3008 error ("the max value for the last size range should be -1"
3009 " for option %s",
3010 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3011 return;
3014 if (n > MAX_STRINGOP_ALGS)
3016 error ("too many size ranges specified in option %s",
3017 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3018 return;
3021 /* Now override the default algs array. */
3022 for (i = 0; i < n; i++)
3024 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
3025 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
3026 = input_ranges[i].alg;
3027 *const_cast<int *>(&default_algs->size[i].noalign)
3028 = input_ranges[i].noalign;
3033 /* parse -mtune-ctrl= option. When DUMP is true,
3034 print the features that are explicitly set. */
3036 static void
3037 parse_mtune_ctrl_str (bool dump)
3039 if (!ix86_tune_ctrl_string)
3040 return;
3042 char *next_feature_string = NULL;
3043 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3044 char *orig = curr_feature_string;
3045 int i;
3048 bool clear = false;
3050 next_feature_string = strchr (curr_feature_string, ',');
3051 if (next_feature_string)
3052 *next_feature_string++ = '\0';
3053 if (*curr_feature_string == '^')
3055 curr_feature_string++;
3056 clear = true;
3058 for (i = 0; i < X86_TUNE_LAST; i++)
3060 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3062 ix86_tune_features[i] = !clear;
3063 if (dump)
3064 fprintf (stderr, "Explicitly %s feature %s\n",
3065 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3066 break;
3069 if (i == X86_TUNE_LAST)
3070 error ("Unknown parameter to option -mtune-ctrl: %s",
3071 clear ? curr_feature_string - 1 : curr_feature_string);
3072 curr_feature_string = next_feature_string;
3074 while (curr_feature_string);
3075 free (orig);
3078 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3079 processor type. */
3081 static void
3082 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3084 unsigned int ix86_tune_mask = 1u << ix86_tune;
3085 int i;
3087 for (i = 0; i < X86_TUNE_LAST; ++i)
3089 if (ix86_tune_no_default)
3090 ix86_tune_features[i] = 0;
3091 else
3092 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3095 if (dump)
3097 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3098 for (i = 0; i < X86_TUNE_LAST; i++)
3099 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3100 ix86_tune_features[i] ? "on" : "off");
3103 parse_mtune_ctrl_str (dump);
3107 /* Default align_* from the processor table. */
3109 static void
3110 ix86_default_align (struct gcc_options *opts)
3112 if (opts->x_align_loops == 0)
3114 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3115 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3117 if (opts->x_align_jumps == 0)
3119 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3120 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3122 if (opts->x_align_functions == 0)
3124 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3128 /* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */
3130 static void
3131 ix86_override_options_after_change (void)
3133 ix86_default_align (&global_options);
3136 /* Override various settings based on options. If MAIN_ARGS_P, the
3137 options are from the command line, otherwise they are from
3138 attributes. */
3140 static void
3141 ix86_option_override_internal (bool main_args_p,
3142 struct gcc_options *opts,
3143 struct gcc_options *opts_set)
3145 int i;
3146 unsigned int ix86_arch_mask;
3147 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3148 const char *prefix;
3149 const char *suffix;
3150 const char *sw;
3152 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3153 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3154 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3155 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3156 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3157 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3158 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3159 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3160 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3161 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3162 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3163 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3164 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3165 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3166 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3167 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3168 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3169 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3170 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3171 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3172 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3173 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3174 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3175 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3176 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3177 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3178 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3179 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3180 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3181 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3182 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3183 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3184 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3185 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3186 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3187 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3188 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3189 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3190 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3191 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3192 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3193 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3194 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3195 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3196 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
3197 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3198 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3199 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3200 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3201 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3202 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3203 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3204 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3205 #define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53)
3206 #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
3207 #define PTA_CLWB (HOST_WIDE_INT_1 << 55)
3208 #define PTA_PCOMMIT (HOST_WIDE_INT_1 << 56)
3209 #define PTA_MWAITX (HOST_WIDE_INT_1 << 57)
3211 #define PTA_CORE2 \
3212 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3213 | PTA_CX16 | PTA_FXSR)
3214 #define PTA_NEHALEM \
3215 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3216 #define PTA_WESTMERE \
3217 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3218 #define PTA_SANDYBRIDGE \
3219 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3220 #define PTA_IVYBRIDGE \
3221 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3222 #define PTA_HASWELL \
3223 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3224 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3225 #define PTA_BROADWELL \
3226 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3227 #define PTA_KNL \
3228 (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
3229 #define PTA_BONNELL \
3230 (PTA_CORE2 | PTA_MOVBE)
3231 #define PTA_SILVERMONT \
3232 (PTA_WESTMERE | PTA_MOVBE)
3234 /* if this reaches 64, need to widen struct pta flags below */
3236 static struct pta
3238 const char *const name; /* processor name or nickname. */
3239 const enum processor_type processor;
3240 const enum attr_cpu schedule;
3241 const unsigned HOST_WIDE_INT flags;
3243 const processor_alias_table[] =
3245 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3246 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3247 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3248 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3249 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3250 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3251 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3252 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3253 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3254 PTA_MMX | PTA_SSE | PTA_FXSR},
3255 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3256 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3257 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3258 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3259 PTA_MMX | PTA_SSE | PTA_FXSR},
3260 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3261 PTA_MMX | PTA_SSE | PTA_FXSR},
3262 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3263 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3264 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3265 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3266 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3267 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3268 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3269 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3270 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3271 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3272 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3273 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3274 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3275 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3276 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3277 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3278 PTA_SANDYBRIDGE},
3279 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3280 PTA_SANDYBRIDGE},
3281 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3282 PTA_IVYBRIDGE},
3283 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3284 PTA_IVYBRIDGE},
3285 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3286 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3287 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3288 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3289 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3290 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3291 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3292 {"knl", PROCESSOR_KNL, CPU_KNL, PTA_KNL},
3293 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3294 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3295 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3296 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3297 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3298 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3299 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3300 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3301 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3302 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3303 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3304 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3305 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3306 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3307 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3308 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3309 {"x86-64", PROCESSOR_K8, CPU_K8,
3310 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3311 {"k8", PROCESSOR_K8, CPU_K8,
3312 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3313 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3314 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3315 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3316 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3317 {"opteron", PROCESSOR_K8, CPU_K8,
3318 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3319 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3320 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3321 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3322 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3323 {"athlon64", PROCESSOR_K8, CPU_K8,
3324 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3325 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3326 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3327 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3328 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3329 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3330 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3331 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3332 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3333 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3334 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3335 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3336 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3337 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3338 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3339 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3340 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3341 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3342 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3343 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3344 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3345 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3346 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3347 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3348 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3349 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3350 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3351 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3352 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3353 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3354 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3355 | PTA_XSAVEOPT | PTA_FSGSBASE},
3356 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3357 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3358 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3359 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3360 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3361 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3362 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3363 | PTA_MOVBE | PTA_MWAITX},
3364 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3365 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3366 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3367 | PTA_FXSR | PTA_XSAVE},
3368 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3369 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3370 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3371 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3372 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3373 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3375 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3376 PTA_64BIT
3377 | PTA_HLE /* flags are only used for -march switch. */ },
3380 /* -mrecip options. */
3381 static struct
3383 const char *string; /* option name */
3384 unsigned int mask; /* mask bits to set */
3386 const recip_options[] =
3388 { "all", RECIP_MASK_ALL },
3389 { "none", RECIP_MASK_NONE },
3390 { "div", RECIP_MASK_DIV },
3391 { "sqrt", RECIP_MASK_SQRT },
3392 { "vec-div", RECIP_MASK_VEC_DIV },
3393 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3396 int const pta_size = ARRAY_SIZE (processor_alias_table);
3398 /* Set up prefix/suffix so the error messages refer to either the command
3399 line argument, or the attribute(target). */
3400 if (main_args_p)
3402 prefix = "-m";
3403 suffix = "";
3404 sw = "switch";
3406 else
3408 prefix = "option(\"";
3409 suffix = "\")";
3410 sw = "attribute";
3413 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3414 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3415 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3416 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3417 #ifdef TARGET_BI_ARCH
3418 else
3420 #if TARGET_BI_ARCH == 1
3421 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3422 is on and OPTION_MASK_ABI_X32 is off. We turn off
3423 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3424 -mx32. */
3425 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3426 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3427 #else
3428 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3429 on and OPTION_MASK_ABI_64 is off. We turn off
3430 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3431 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3432 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3433 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3434 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3435 #endif
3436 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3437 && TARGET_IAMCU_P (opts->x_target_flags))
3438 sorry ("Intel MCU psABI isn%'t supported in %s mode",
3439 TARGET_X32_P (opts->x_ix86_isa_flags) ? "x32" : "64-bit");
3441 #endif
3443 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3445 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3446 OPTION_MASK_ABI_64 for TARGET_X32. */
3447 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3448 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3450 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3451 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3452 | OPTION_MASK_ABI_X32
3453 | OPTION_MASK_ABI_64);
3454 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3456 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3457 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3458 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3459 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3462 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3463 SUBTARGET_OVERRIDE_OPTIONS;
3464 #endif
3466 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3467 SUBSUBTARGET_OVERRIDE_OPTIONS;
3468 #endif
3470 /* -fPIC is the default for x86_64. */
3471 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3472 opts->x_flag_pic = 2;
3474 /* Need to check -mtune=generic first. */
3475 if (opts->x_ix86_tune_string)
3477 /* As special support for cross compilers we read -mtune=native
3478 as -mtune=generic. With native compilers we won't see the
3479 -mtune=native, as it was changed by the driver. */
3480 if (!strcmp (opts->x_ix86_tune_string, "native"))
3482 opts->x_ix86_tune_string = "generic";
3484 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3485 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3486 "%stune=k8%s or %stune=generic%s instead as appropriate",
3487 prefix, suffix, prefix, suffix, prefix, suffix);
3489 else
3491 if (opts->x_ix86_arch_string)
3492 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3493 if (!opts->x_ix86_tune_string)
3495 opts->x_ix86_tune_string
3496 = processor_target_table[TARGET_CPU_DEFAULT].name;
3497 ix86_tune_defaulted = 1;
3500 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3501 or defaulted. We need to use a sensible tune option. */
3502 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3504 opts->x_ix86_tune_string = "generic";
3508 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3509 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3511 /* rep; movq isn't available in 32-bit code. */
3512 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3513 opts->x_ix86_stringop_alg = no_stringop;
3516 if (!opts->x_ix86_arch_string)
3517 opts->x_ix86_arch_string
3518 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3519 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3520 else
3521 ix86_arch_specified = 1;
3523 if (opts_set->x_ix86_pmode)
3525 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3526 && opts->x_ix86_pmode == PMODE_SI)
3527 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3528 && opts->x_ix86_pmode == PMODE_DI))
3529 error ("address mode %qs not supported in the %s bit mode",
3530 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3531 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3533 else
3534 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3535 ? PMODE_DI : PMODE_SI;
3537 if (!opts_set->x_ix86_abi)
3538 opts->x_ix86_abi = DEFAULT_ABI;
3540 /* For targets using ms ABI enable ms-extensions, if not
3541 explicit turned off. For non-ms ABI we turn off this
3542 option. */
3543 if (!opts_set->x_flag_ms_extensions)
3544 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3546 if (opts_set->x_ix86_cmodel)
3548 switch (opts->x_ix86_cmodel)
3550 case CM_SMALL:
3551 case CM_SMALL_PIC:
3552 if (opts->x_flag_pic)
3553 opts->x_ix86_cmodel = CM_SMALL_PIC;
3554 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3555 error ("code model %qs not supported in the %s bit mode",
3556 "small", "32");
3557 break;
3559 case CM_MEDIUM:
3560 case CM_MEDIUM_PIC:
3561 if (opts->x_flag_pic)
3562 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3563 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3564 error ("code model %qs not supported in the %s bit mode",
3565 "medium", "32");
3566 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3567 error ("code model %qs not supported in x32 mode",
3568 "medium");
3569 break;
3571 case CM_LARGE:
3572 case CM_LARGE_PIC:
3573 if (opts->x_flag_pic)
3574 opts->x_ix86_cmodel = CM_LARGE_PIC;
3575 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3576 error ("code model %qs not supported in the %s bit mode",
3577 "large", "32");
3578 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3579 error ("code model %qs not supported in x32 mode",
3580 "large");
3581 break;
3583 case CM_32:
3584 if (opts->x_flag_pic)
3585 error ("code model %s does not support PIC mode", "32");
3586 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3587 error ("code model %qs not supported in the %s bit mode",
3588 "32", "64");
3589 break;
3591 case CM_KERNEL:
3592 if (opts->x_flag_pic)
3594 error ("code model %s does not support PIC mode", "kernel");
3595 opts->x_ix86_cmodel = CM_32;
3597 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3598 error ("code model %qs not supported in the %s bit mode",
3599 "kernel", "32");
3600 break;
3602 default:
3603 gcc_unreachable ();
3606 else
3608 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3609 use of rip-relative addressing. This eliminates fixups that
3610 would otherwise be needed if this object is to be placed in a
3611 DLL, and is essentially just as efficient as direct addressing. */
3612 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3613 && (TARGET_RDOS || TARGET_PECOFF))
3614 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3615 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3616 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3617 else
3618 opts->x_ix86_cmodel = CM_32;
3620 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3622 error ("-masm=intel not supported in this configuration");
3623 opts->x_ix86_asm_dialect = ASM_ATT;
3625 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3626 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3627 sorry ("%i-bit mode not compiled in",
3628 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3630 for (i = 0; i < pta_size; i++)
3631 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3633 ix86_schedule = processor_alias_table[i].schedule;
3634 ix86_arch = processor_alias_table[i].processor;
3635 /* Default cpu tuning to the architecture. */
3636 ix86_tune = ix86_arch;
3638 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3639 && !(processor_alias_table[i].flags & PTA_64BIT))
3640 error ("CPU you selected does not support x86-64 "
3641 "instruction set");
3643 if (processor_alias_table[i].flags & PTA_MMX
3644 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3645 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3646 if (processor_alias_table[i].flags & PTA_3DNOW
3647 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3648 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3649 if (processor_alias_table[i].flags & PTA_3DNOW_A
3650 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3651 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3652 if (processor_alias_table[i].flags & PTA_SSE
3653 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3654 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3655 if (processor_alias_table[i].flags & PTA_SSE2
3656 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3657 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3658 if (processor_alias_table[i].flags & PTA_SSE3
3659 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3660 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3661 if (processor_alias_table[i].flags & PTA_SSSE3
3662 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3663 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3664 if (processor_alias_table[i].flags & PTA_SSE4_1
3665 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3666 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3667 if (processor_alias_table[i].flags & PTA_SSE4_2
3668 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3669 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3670 if (processor_alias_table[i].flags & PTA_AVX
3671 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3672 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3673 if (processor_alias_table[i].flags & PTA_AVX2
3674 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3675 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3676 if (processor_alias_table[i].flags & PTA_FMA
3677 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3678 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3679 if (processor_alias_table[i].flags & PTA_SSE4A
3680 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3681 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3682 if (processor_alias_table[i].flags & PTA_FMA4
3683 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3684 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3685 if (processor_alias_table[i].flags & PTA_XOP
3686 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3687 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3688 if (processor_alias_table[i].flags & PTA_LWP
3689 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3690 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3691 if (processor_alias_table[i].flags & PTA_ABM
3692 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3693 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3694 if (processor_alias_table[i].flags & PTA_BMI
3695 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3696 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3697 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3698 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3699 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3700 if (processor_alias_table[i].flags & PTA_TBM
3701 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3702 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3703 if (processor_alias_table[i].flags & PTA_BMI2
3704 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3705 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3706 if (processor_alias_table[i].flags & PTA_CX16
3707 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3708 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3709 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3710 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3711 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3712 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3713 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3714 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3715 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3716 if (processor_alias_table[i].flags & PTA_MOVBE
3717 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3718 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3719 if (processor_alias_table[i].flags & PTA_AES
3720 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3721 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3722 if (processor_alias_table[i].flags & PTA_SHA
3723 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3724 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3725 if (processor_alias_table[i].flags & PTA_PCLMUL
3726 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3727 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3728 if (processor_alias_table[i].flags & PTA_FSGSBASE
3729 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3730 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3731 if (processor_alias_table[i].flags & PTA_RDRND
3732 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3733 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3734 if (processor_alias_table[i].flags & PTA_F16C
3735 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3736 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3737 if (processor_alias_table[i].flags & PTA_RTM
3738 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3739 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3740 if (processor_alias_table[i].flags & PTA_HLE
3741 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3742 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3743 if (processor_alias_table[i].flags & PTA_PRFCHW
3744 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3745 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3746 if (processor_alias_table[i].flags & PTA_RDSEED
3747 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3748 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3749 if (processor_alias_table[i].flags & PTA_ADX
3750 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3751 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3752 if (processor_alias_table[i].flags & PTA_FXSR
3753 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3754 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3755 if (processor_alias_table[i].flags & PTA_XSAVE
3756 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3757 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3758 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3759 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3760 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3761 if (processor_alias_table[i].flags & PTA_AVX512F
3762 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3763 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3764 if (processor_alias_table[i].flags & PTA_AVX512ER
3765 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3766 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3767 if (processor_alias_table[i].flags & PTA_AVX512PF
3768 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3769 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3770 if (processor_alias_table[i].flags & PTA_AVX512CD
3771 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3772 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3773 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3774 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3775 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3776 if (processor_alias_table[i].flags & PTA_PCOMMIT
3777 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCOMMIT))
3778 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCOMMIT;
3779 if (processor_alias_table[i].flags & PTA_CLWB
3780 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
3781 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
3782 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3783 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3784 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3785 if (processor_alias_table[i].flags & PTA_XSAVEC
3786 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3787 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3788 if (processor_alias_table[i].flags & PTA_XSAVES
3789 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3790 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3791 if (processor_alias_table[i].flags & PTA_AVX512DQ
3792 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3793 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3794 if (processor_alias_table[i].flags & PTA_AVX512BW
3795 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3796 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3797 if (processor_alias_table[i].flags & PTA_AVX512VL
3798 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3799 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3800 if (processor_alias_table[i].flags & PTA_MPX
3801 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX))
3802 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX;
3803 if (processor_alias_table[i].flags & PTA_AVX512VBMI
3804 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
3805 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
3806 if (processor_alias_table[i].flags & PTA_AVX512IFMA
3807 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
3808 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
3809 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3810 x86_prefetch_sse = true;
3811 if (processor_alias_table[i].flags & PTA_MWAITX
3812 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MWAITX))
3813 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MWAITX;
3815 break;
3818 if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
3819 error ("Intel MPX does not support x32");
3821 if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
3822 error ("Intel MPX does not support x32");
3824 if (TARGET_IAMCU_P (opts->x_target_flags))
3826 /* Verify that x87/MMX/SSE/AVX is off for -miamcu. */
3827 if (TARGET_80387_P (opts->x_target_flags))
3828 sorry ("X87 FPU isn%'t supported in Intel MCU psABI");
3829 else if ((opts->x_ix86_isa_flags & (OPTION_MASK_ISA_MMX
3830 | OPTION_MASK_ISA_SSE
3831 | OPTION_MASK_ISA_AVX)))
3832 sorry ("%s isn%'t supported in Intel MCU psABI",
3833 TARGET_MMX_P (opts->x_ix86_isa_flags)
3834 ? "MMX"
3835 : TARGET_SSE_P (opts->x_ix86_isa_flags) ? "SSE" : "AVX");
3838 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3839 error ("generic CPU can be used only for %stune=%s %s",
3840 prefix, suffix, sw);
3841 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3842 error ("intel CPU can be used only for %stune=%s %s",
3843 prefix, suffix, sw);
3844 else if (i == pta_size)
3845 error ("bad value (%s) for %sarch=%s %s",
3846 opts->x_ix86_arch_string, prefix, suffix, sw);
3848 ix86_arch_mask = 1u << ix86_arch;
3849 for (i = 0; i < X86_ARCH_LAST; ++i)
3850 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3852 for (i = 0; i < pta_size; i++)
3853 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3855 ix86_schedule = processor_alias_table[i].schedule;
3856 ix86_tune = processor_alias_table[i].processor;
3857 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3859 if (!(processor_alias_table[i].flags & PTA_64BIT))
3861 if (ix86_tune_defaulted)
3863 opts->x_ix86_tune_string = "x86-64";
3864 for (i = 0; i < pta_size; i++)
3865 if (! strcmp (opts->x_ix86_tune_string,
3866 processor_alias_table[i].name))
3867 break;
3868 ix86_schedule = processor_alias_table[i].schedule;
3869 ix86_tune = processor_alias_table[i].processor;
3871 else
3872 error ("CPU you selected does not support x86-64 "
3873 "instruction set");
3876 /* Intel CPUs have always interpreted SSE prefetch instructions as
3877 NOPs; so, we can enable SSE prefetch instructions even when
3878 -mtune (rather than -march) points us to a processor that has them.
3879 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3880 higher processors. */
3881 if (TARGET_CMOV
3882 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3883 x86_prefetch_sse = true;
3884 break;
3887 if (ix86_tune_specified && i == pta_size)
3888 error ("bad value (%s) for %stune=%s %s",
3889 opts->x_ix86_tune_string, prefix, suffix, sw);
3891 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3893 #ifndef USE_IX86_FRAME_POINTER
3894 #define USE_IX86_FRAME_POINTER 0
3895 #endif
3897 #ifndef USE_X86_64_FRAME_POINTER
3898 #define USE_X86_64_FRAME_POINTER 0
3899 #endif
3901 /* Set the default values for switches whose default depends on TARGET_64BIT
3902 in case they weren't overwritten by command line options. */
3903 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3905 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3906 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3907 if (opts->x_flag_asynchronous_unwind_tables
3908 && !opts_set->x_flag_unwind_tables
3909 && TARGET_64BIT_MS_ABI)
3910 opts->x_flag_unwind_tables = 1;
3911 if (opts->x_flag_asynchronous_unwind_tables == 2)
3912 opts->x_flag_unwind_tables
3913 = opts->x_flag_asynchronous_unwind_tables = 1;
3914 if (opts->x_flag_pcc_struct_return == 2)
3915 opts->x_flag_pcc_struct_return = 0;
3917 else
3919 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3920 opts->x_flag_omit_frame_pointer
3921 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3922 if (opts->x_flag_asynchronous_unwind_tables == 2)
3923 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3924 if (opts->x_flag_pcc_struct_return == 2)
3926 /* Intel MCU psABI specifies that -freg-struct-return should
3927 be on. Instead of setting DEFAULT_PCC_STRUCT_RETURN to 1,
3928 we check -miamcu so that -freg-struct-return is always
3929 turned on if -miamcu is used. */
3930 if (TARGET_IAMCU_P (opts->x_target_flags))
3931 opts->x_flag_pcc_struct_return = 0;
3932 else
3933 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3937 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3938 /* TODO: ix86_cost should be chosen at instruction or function granuality
3939 so for cold code we use size_cost even in !optimize_size compilation. */
3940 if (opts->x_optimize_size)
3941 ix86_cost = &ix86_size_cost;
3942 else
3943 ix86_cost = ix86_tune_cost;
3945 /* Arrange to set up i386_stack_locals for all functions. */
3946 init_machine_status = ix86_init_machine_status;
3948 /* Validate -mregparm= value. */
3949 if (opts_set->x_ix86_regparm)
3951 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3952 warning (0, "-mregparm is ignored in 64-bit mode");
3953 else if (TARGET_IAMCU_P (opts->x_target_flags))
3954 warning (0, "-mregparm is ignored for Intel MCU psABI");
3955 if (opts->x_ix86_regparm > REGPARM_MAX)
3957 error ("-mregparm=%d is not between 0 and %d",
3958 opts->x_ix86_regparm, REGPARM_MAX);
3959 opts->x_ix86_regparm = 0;
3962 if (TARGET_IAMCU_P (opts->x_target_flags)
3963 || TARGET_64BIT_P (opts->x_ix86_isa_flags))
3964 opts->x_ix86_regparm = REGPARM_MAX;
3966 /* Default align_* from the processor table. */
3967 ix86_default_align (opts);
3969 /* Provide default for -mbranch-cost= value. */
3970 if (!opts_set->x_ix86_branch_cost)
3971 opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost;
3973 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3975 opts->x_target_flags
3976 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3978 /* Enable by default the SSE and MMX builtins. Do allow the user to
3979 explicitly disable any of these. In particular, disabling SSE and
3980 MMX for kernel code is extremely useful. */
3981 if (!ix86_arch_specified)
3982 opts->x_ix86_isa_flags
3983 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3984 | TARGET_SUBTARGET64_ISA_DEFAULT)
3985 & ~opts->x_ix86_isa_flags_explicit);
3987 if (TARGET_RTD_P (opts->x_target_flags))
3988 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3990 else
3992 opts->x_target_flags
3993 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3995 if (!ix86_arch_specified)
3996 opts->x_ix86_isa_flags
3997 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3999 /* i386 ABI does not specify red zone. It still makes sense to use it
4000 when programmer takes care to stack from being destroyed. */
4001 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
4002 opts->x_target_flags |= MASK_NO_RED_ZONE;
4005 /* Keep nonleaf frame pointers. */
4006 if (opts->x_flag_omit_frame_pointer)
4007 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
4008 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
4009 opts->x_flag_omit_frame_pointer = 1;
4011 /* If we're doing fast math, we don't care about comparison order
4012 wrt NaNs. This lets us use a shorter comparison sequence. */
4013 if (opts->x_flag_finite_math_only)
4014 opts->x_target_flags &= ~MASK_IEEE_FP;
4016 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
4017 since the insns won't need emulation. */
4018 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
4019 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
4021 /* Likewise, if the target doesn't have a 387, or we've specified
4022 software floating point, don't use 387 inline intrinsics. */
4023 if (!TARGET_80387_P (opts->x_target_flags))
4024 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
4026 /* Turn on MMX builtins for -msse. */
4027 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
4028 opts->x_ix86_isa_flags
4029 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
4031 /* Enable SSE prefetch. */
4032 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
4033 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
4034 x86_prefetch_sse = true;
4036 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
4037 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
4038 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
4039 opts->x_ix86_isa_flags
4040 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
4042 /* Enable popcnt instruction for -msse4.2 or -mabm. */
4043 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
4044 || TARGET_ABM_P (opts->x_ix86_isa_flags))
4045 opts->x_ix86_isa_flags
4046 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
4048 /* Enable lzcnt instruction for -mabm. */
4049 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
4050 opts->x_ix86_isa_flags
4051 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
4053 /* Validate -mpreferred-stack-boundary= value or default it to
4054 PREFERRED_STACK_BOUNDARY_DEFAULT. */
4055 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
4056 if (opts_set->x_ix86_preferred_stack_boundary_arg)
4058 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4059 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
4060 int max = (TARGET_SEH ? 4 : 12);
4062 if (opts->x_ix86_preferred_stack_boundary_arg < min
4063 || opts->x_ix86_preferred_stack_boundary_arg > max)
4065 if (min == max)
4066 error ("-mpreferred-stack-boundary is not supported "
4067 "for this target");
4068 else
4069 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
4070 opts->x_ix86_preferred_stack_boundary_arg, min, max);
4072 else
4073 ix86_preferred_stack_boundary
4074 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
4077 /* Set the default value for -mstackrealign. */
4078 if (opts->x_ix86_force_align_arg_pointer == -1)
4079 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
4081 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
4083 /* Validate -mincoming-stack-boundary= value or default it to
4084 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
4085 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4086 if (opts_set->x_ix86_incoming_stack_boundary_arg)
4088 if (opts->x_ix86_incoming_stack_boundary_arg
4089 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
4090 || opts->x_ix86_incoming_stack_boundary_arg > 12)
4091 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4092 opts->x_ix86_incoming_stack_boundary_arg,
4093 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
4094 else
4096 ix86_user_incoming_stack_boundary
4097 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
4098 ix86_incoming_stack_boundary
4099 = ix86_user_incoming_stack_boundary;
4103 #ifndef NO_PROFILE_COUNTERS
4104 if (flag_nop_mcount)
4105 error ("-mnop-mcount is not compatible with this target");
4106 #endif
4107 if (flag_nop_mcount && flag_pic)
4108 error ("-mnop-mcount is not implemented for -fPIC");
4110 /* Accept -msseregparm only if at least SSE support is enabled. */
4111 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
4112 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
4113 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
4115 if (opts_set->x_ix86_fpmath)
4117 if (opts->x_ix86_fpmath & FPMATH_SSE)
4119 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
4121 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4122 opts->x_ix86_fpmath = FPMATH_387;
4124 else if ((opts->x_ix86_fpmath & FPMATH_387)
4125 && !TARGET_80387_P (opts->x_target_flags))
4127 warning (0, "387 instruction set disabled, using SSE arithmetics");
4128 opts->x_ix86_fpmath = FPMATH_SSE;
4132 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4133 fpmath=387. The second is however default at many targets since the
4134 extra 80bit precision of temporaries is considered to be part of ABI.
4135 Overwrite the default at least for -ffast-math.
4136 TODO: -mfpmath=both seems to produce same performing code with bit
4137 smaller binaries. It is however not clear if register allocation is
4138 ready for this setting.
4139 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4140 codegen. We may switch to 387 with -ffast-math for size optimized
4141 functions. */
4142 else if (fast_math_flags_set_p (&global_options)
4143 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4144 opts->x_ix86_fpmath = FPMATH_SSE;
4145 else
4146 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4148 /* If the i387 is disabled, then do not return values in it. */
4149 if (!TARGET_80387_P (opts->x_target_flags))
4150 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4152 /* Use external vectorized library in vectorizing intrinsics. */
4153 if (opts_set->x_ix86_veclibabi_type)
4154 switch (opts->x_ix86_veclibabi_type)
4156 case ix86_veclibabi_type_svml:
4157 ix86_veclib_handler = ix86_veclibabi_svml;
4158 break;
4160 case ix86_veclibabi_type_acml:
4161 ix86_veclib_handler = ix86_veclibabi_acml;
4162 break;
4164 default:
4165 gcc_unreachable ();
4168 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4169 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4170 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4172 /* If stack probes are required, the space used for large function
4173 arguments on the stack must also be probed, so enable
4174 -maccumulate-outgoing-args so this happens in the prologue. */
4175 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4176 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4178 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4179 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4180 "for correctness", prefix, suffix);
4181 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4184 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4186 char *p;
4187 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4188 p = strchr (internal_label_prefix, 'X');
4189 internal_label_prefix_len = p - internal_label_prefix;
4190 *p = '\0';
4193 /* When scheduling description is not available, disable scheduler pass
4194 so it won't slow down the compilation and make x87 code slower. */
4195 if (!TARGET_SCHEDULE)
4196 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4198 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4199 ix86_tune_cost->simultaneous_prefetches,
4200 opts->x_param_values,
4201 opts_set->x_param_values);
4202 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4203 ix86_tune_cost->prefetch_block,
4204 opts->x_param_values,
4205 opts_set->x_param_values);
4206 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4207 ix86_tune_cost->l1_cache_size,
4208 opts->x_param_values,
4209 opts_set->x_param_values);
4210 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4211 ix86_tune_cost->l2_cache_size,
4212 opts->x_param_values,
4213 opts_set->x_param_values);
4215 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4216 if (opts->x_flag_prefetch_loop_arrays < 0
4217 && HAVE_prefetch
4218 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4219 && !opts->x_optimize_size
4220 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4221 opts->x_flag_prefetch_loop_arrays = 1;
4223 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4224 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4225 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4226 targetm.expand_builtin_va_start = NULL;
4228 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4230 ix86_gen_leave = gen_leave_rex64;
4231 if (Pmode == DImode)
4233 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4234 ix86_gen_tls_local_dynamic_base_64
4235 = gen_tls_local_dynamic_base_64_di;
4237 else
4239 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4240 ix86_gen_tls_local_dynamic_base_64
4241 = gen_tls_local_dynamic_base_64_si;
4244 else
4245 ix86_gen_leave = gen_leave;
4247 if (Pmode == DImode)
4249 ix86_gen_add3 = gen_adddi3;
4250 ix86_gen_sub3 = gen_subdi3;
4251 ix86_gen_sub3_carry = gen_subdi3_carry;
4252 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4253 ix86_gen_andsp = gen_anddi3;
4254 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4255 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4256 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4257 ix86_gen_monitor = gen_sse3_monitor_di;
4258 ix86_gen_monitorx = gen_monitorx_di;
4260 else
4262 ix86_gen_add3 = gen_addsi3;
4263 ix86_gen_sub3 = gen_subsi3;
4264 ix86_gen_sub3_carry = gen_subsi3_carry;
4265 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4266 ix86_gen_andsp = gen_andsi3;
4267 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4268 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4269 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4270 ix86_gen_monitor = gen_sse3_monitor_si;
4271 ix86_gen_monitorx = gen_monitorx_si;
4274 #ifdef USE_IX86_CLD
4275 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4276 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4277 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4278 #endif
4280 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4282 if (opts->x_flag_fentry > 0)
4283 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4284 "with -fpic");
4285 opts->x_flag_fentry = 0;
4287 else if (TARGET_SEH)
4289 if (opts->x_flag_fentry == 0)
4290 sorry ("-mno-fentry isn%'t compatible with SEH");
4291 opts->x_flag_fentry = 1;
4293 else if (opts->x_flag_fentry < 0)
4295 #if defined(PROFILE_BEFORE_PROLOGUE)
4296 opts->x_flag_fentry = 1;
4297 #else
4298 opts->x_flag_fentry = 0;
4299 #endif
4302 if (!(opts_set->x_target_flags & MASK_VZEROUPPER))
4303 opts->x_target_flags |= MASK_VZEROUPPER;
4304 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4305 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4306 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4307 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4308 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4309 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4310 /* Enable 128-bit AVX instruction generation
4311 for the auto-vectorizer. */
4312 if (TARGET_AVX128_OPTIMAL
4313 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4314 opts->x_target_flags |= MASK_PREFER_AVX128;
4316 if (opts->x_ix86_recip_name)
4318 char *p = ASTRDUP (opts->x_ix86_recip_name);
4319 char *q;
4320 unsigned int mask, i;
4321 bool invert;
4323 while ((q = strtok (p, ",")) != NULL)
4325 p = NULL;
4326 if (*q == '!')
4328 invert = true;
4329 q++;
4331 else
4332 invert = false;
4334 if (!strcmp (q, "default"))
4335 mask = RECIP_MASK_ALL;
4336 else
4338 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4339 if (!strcmp (q, recip_options[i].string))
4341 mask = recip_options[i].mask;
4342 break;
4345 if (i == ARRAY_SIZE (recip_options))
4347 error ("unknown option for -mrecip=%s", q);
4348 invert = false;
4349 mask = RECIP_MASK_NONE;
4353 opts->x_recip_mask_explicit |= mask;
4354 if (invert)
4355 opts->x_recip_mask &= ~mask;
4356 else
4357 opts->x_recip_mask |= mask;
4361 if (TARGET_RECIP_P (opts->x_target_flags))
4362 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4363 else if (opts_set->x_target_flags & MASK_RECIP)
4364 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4366 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4367 for 64-bit Bionic. Also default long double to 64-bit for Intel
4368 MCU psABI. */
4369 if ((TARGET_HAS_BIONIC || TARGET_IAMCU)
4370 && !(opts_set->x_target_flags
4371 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4372 opts->x_target_flags |= (TARGET_64BIT
4373 ? MASK_LONG_DOUBLE_128
4374 : MASK_LONG_DOUBLE_64);
4376 /* Only one of them can be active. */
4377 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4378 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4380 /* Save the initial options in case the user does function specific
4381 options. */
4382 if (main_args_p)
4383 target_option_default_node = target_option_current_node
4384 = build_target_option_node (opts);
4386 /* Handle stack protector */
4387 if (!opts_set->x_ix86_stack_protector_guard)
4388 opts->x_ix86_stack_protector_guard
4389 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4391 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4392 if (opts->x_ix86_tune_memcpy_strategy)
4394 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4395 ix86_parse_stringop_strategy_string (str, false);
4396 free (str);
4399 if (opts->x_ix86_tune_memset_strategy)
4401 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4402 ix86_parse_stringop_strategy_string (str, true);
4403 free (str);
4407 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4409 static void
4410 ix86_option_override (void)
4412 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4413 struct register_pass_info insert_vzeroupper_info
4414 = { pass_insert_vzeroupper, "reload",
4415 1, PASS_POS_INSERT_AFTER
4418 ix86_option_override_internal (true, &global_options, &global_options_set);
4421 /* This needs to be done at start up. It's convenient to do it here. */
4422 register_pass (&insert_vzeroupper_info);
4425 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
4426 static char *
4427 ix86_offload_options (void)
4429 if (TARGET_LP64)
4430 return xstrdup ("-foffload-abi=lp64");
4431 return xstrdup ("-foffload-abi=ilp32");
4434 /* Update register usage after having seen the compiler flags. */
4436 static void
4437 ix86_conditional_register_usage (void)
4439 int i, c_mask;
4441 /* For 32-bit targets, squash the REX registers. */
4442 if (! TARGET_64BIT)
4444 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4445 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4446 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4447 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4448 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4449 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4452 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4453 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4454 : TARGET_64BIT ? (1 << 2)
4455 : (1 << 1));
4457 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4459 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4461 /* Set/reset conditionally defined registers from
4462 CALL_USED_REGISTERS initializer. */
4463 if (call_used_regs[i] > 1)
4464 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4466 /* Calculate registers of CLOBBERED_REGS register set
4467 as call used registers from GENERAL_REGS register set. */
4468 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4469 && call_used_regs[i])
4470 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4473 /* If MMX is disabled, squash the registers. */
4474 if (! TARGET_MMX)
4475 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4476 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4477 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4479 /* If SSE is disabled, squash the registers. */
4480 if (! TARGET_SSE)
4481 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4482 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4483 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4485 /* If the FPU is disabled, squash the registers. */
4486 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4487 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4488 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4489 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4491 /* If AVX512F is disabled, squash the registers. */
4492 if (! TARGET_AVX512F)
4494 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4495 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4497 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4498 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4501 /* If MPX is disabled, squash the registers. */
4502 if (! TARGET_MPX)
4503 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
4504 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4508 /* Save the current options */
4510 static void
4511 ix86_function_specific_save (struct cl_target_option *ptr,
4512 struct gcc_options *opts)
4514 ptr->arch = ix86_arch;
4515 ptr->schedule = ix86_schedule;
4516 ptr->prefetch_sse = x86_prefetch_sse;
4517 ptr->tune = ix86_tune;
4518 ptr->branch_cost = ix86_branch_cost;
4519 ptr->tune_defaulted = ix86_tune_defaulted;
4520 ptr->arch_specified = ix86_arch_specified;
4521 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4522 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4523 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4524 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4525 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4526 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4527 ptr->x_ix86_abi = opts->x_ix86_abi;
4528 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4529 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4530 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4531 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4532 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4533 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4534 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4535 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4536 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4537 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4538 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4539 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4540 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4541 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4542 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4543 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4544 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4545 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4546 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4547 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4549 /* The fields are char but the variables are not; make sure the
4550 values fit in the fields. */
4551 gcc_assert (ptr->arch == ix86_arch);
4552 gcc_assert (ptr->schedule == ix86_schedule);
4553 gcc_assert (ptr->tune == ix86_tune);
4554 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4557 /* Restore the current options */
4559 static void
4560 ix86_function_specific_restore (struct gcc_options *opts,
4561 struct cl_target_option *ptr)
4563 enum processor_type old_tune = ix86_tune;
4564 enum processor_type old_arch = ix86_arch;
4565 unsigned int ix86_arch_mask;
4566 int i;
4568 /* We don't change -fPIC. */
4569 opts->x_flag_pic = flag_pic;
4571 ix86_arch = (enum processor_type) ptr->arch;
4572 ix86_schedule = (enum attr_cpu) ptr->schedule;
4573 ix86_tune = (enum processor_type) ptr->tune;
4574 x86_prefetch_sse = ptr->prefetch_sse;
4575 opts->x_ix86_branch_cost = ptr->branch_cost;
4576 ix86_tune_defaulted = ptr->tune_defaulted;
4577 ix86_arch_specified = ptr->arch_specified;
4578 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4579 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4580 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4581 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4582 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4583 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4584 opts->x_ix86_abi = ptr->x_ix86_abi;
4585 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4586 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4587 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4588 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4589 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4590 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4591 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4592 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4593 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4594 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4595 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4596 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4597 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4598 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4599 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4600 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4601 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4602 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4603 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4604 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4605 ix86_tune_cost = processor_target_table[ix86_tune].cost;
4606 /* TODO: ix86_cost should be chosen at instruction or function granuality
4607 so for cold code we use size_cost even in !optimize_size compilation. */
4608 if (opts->x_optimize_size)
4609 ix86_cost = &ix86_size_cost;
4610 else
4611 ix86_cost = ix86_tune_cost;
4613 /* Recreate the arch feature tests if the arch changed */
4614 if (old_arch != ix86_arch)
4616 ix86_arch_mask = 1u << ix86_arch;
4617 for (i = 0; i < X86_ARCH_LAST; ++i)
4618 ix86_arch_features[i]
4619 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4622 /* Recreate the tune optimization tests */
4623 if (old_tune != ix86_tune)
4624 set_ix86_tune_features (ix86_tune, false);
4627 /* Adjust target options after streaming them in. This is mainly about
4628 reconciling them with global options. */
4630 static void
4631 ix86_function_specific_post_stream_in (struct cl_target_option *ptr)
4633 /* flag_pic is a global option, but ix86_cmodel is target saved option
4634 partly computed from flag_pic. If flag_pic is on, adjust x_ix86_cmodel
4635 for PIC, or error out. */
4636 if (flag_pic)
4637 switch (ptr->x_ix86_cmodel)
4639 case CM_SMALL:
4640 ptr->x_ix86_cmodel = CM_SMALL_PIC;
4641 break;
4643 case CM_MEDIUM:
4644 ptr->x_ix86_cmodel = CM_MEDIUM_PIC;
4645 break;
4647 case CM_LARGE:
4648 ptr->x_ix86_cmodel = CM_LARGE_PIC;
4649 break;
4651 case CM_KERNEL:
4652 error ("code model %s does not support PIC mode", "kernel");
4653 break;
4655 default:
4656 break;
4658 else
4659 switch (ptr->x_ix86_cmodel)
4661 case CM_SMALL_PIC:
4662 ptr->x_ix86_cmodel = CM_SMALL;
4663 break;
4665 case CM_MEDIUM_PIC:
4666 ptr->x_ix86_cmodel = CM_MEDIUM;
4667 break;
4669 case CM_LARGE_PIC:
4670 ptr->x_ix86_cmodel = CM_LARGE;
4671 break;
4673 default:
4674 break;
4678 /* Print the current options */
4680 static void
4681 ix86_function_specific_print (FILE *file, int indent,
4682 struct cl_target_option *ptr)
4684 char *target_string
4685 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4686 NULL, NULL, ptr->x_ix86_fpmath, false);
4688 gcc_assert (ptr->arch < PROCESSOR_max);
4689 fprintf (file, "%*sarch = %d (%s)\n",
4690 indent, "",
4691 ptr->arch, processor_target_table[ptr->arch].name);
4693 gcc_assert (ptr->tune < PROCESSOR_max);
4694 fprintf (file, "%*stune = %d (%s)\n",
4695 indent, "",
4696 ptr->tune, processor_target_table[ptr->tune].name);
4698 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4700 if (target_string)
4702 fprintf (file, "%*s%s\n", indent, "", target_string);
4703 free (target_string);
4708 /* Inner function to process the attribute((target(...))), take an argument and
4709 set the current options from the argument. If we have a list, recursively go
4710 over the list. */
4712 static bool
4713 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4714 struct gcc_options *opts,
4715 struct gcc_options *opts_set,
4716 struct gcc_options *enum_opts_set)
4718 char *next_optstr;
4719 bool ret = true;
4721 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4722 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4723 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4724 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4725 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4727 enum ix86_opt_type
4729 ix86_opt_unknown,
4730 ix86_opt_yes,
4731 ix86_opt_no,
4732 ix86_opt_str,
4733 ix86_opt_enum,
4734 ix86_opt_isa
4737 static const struct
4739 const char *string;
4740 size_t len;
4741 enum ix86_opt_type type;
4742 int opt;
4743 int mask;
4744 } attrs[] = {
4745 /* isa options */
4746 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4747 IX86_ATTR_ISA ("abm", OPT_mabm),
4748 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4749 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4750 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4751 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4752 IX86_ATTR_ISA ("aes", OPT_maes),
4753 IX86_ATTR_ISA ("sha", OPT_msha),
4754 IX86_ATTR_ISA ("avx", OPT_mavx),
4755 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4756 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4757 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4758 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4759 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4760 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
4761 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
4762 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
4763 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4764 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4765 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4766 IX86_ATTR_ISA ("sse", OPT_msse),
4767 IX86_ATTR_ISA ("sse2", OPT_msse2),
4768 IX86_ATTR_ISA ("sse3", OPT_msse3),
4769 IX86_ATTR_ISA ("sse4", OPT_msse4),
4770 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4771 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4772 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4773 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4774 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4775 IX86_ATTR_ISA ("fma", OPT_mfma),
4776 IX86_ATTR_ISA ("xop", OPT_mxop),
4777 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4778 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4779 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4780 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4781 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4782 IX86_ATTR_ISA ("hle", OPT_mhle),
4783 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4784 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4785 IX86_ATTR_ISA ("adx", OPT_madx),
4786 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4787 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4788 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4789 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4790 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4791 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4792 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4793 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
4794 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
4795 IX86_ATTR_ISA ("clwb", OPT_mclwb),
4796 IX86_ATTR_ISA ("pcommit", OPT_mpcommit),
4797 IX86_ATTR_ISA ("mwaitx", OPT_mmwaitx),
4799 /* enum options */
4800 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4802 /* string options */
4803 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4804 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4806 /* flag options */
4807 IX86_ATTR_YES ("cld",
4808 OPT_mcld,
4809 MASK_CLD),
4811 IX86_ATTR_NO ("fancy-math-387",
4812 OPT_mfancy_math_387,
4813 MASK_NO_FANCY_MATH_387),
4815 IX86_ATTR_YES ("ieee-fp",
4816 OPT_mieee_fp,
4817 MASK_IEEE_FP),
4819 IX86_ATTR_YES ("inline-all-stringops",
4820 OPT_minline_all_stringops,
4821 MASK_INLINE_ALL_STRINGOPS),
4823 IX86_ATTR_YES ("inline-stringops-dynamically",
4824 OPT_minline_stringops_dynamically,
4825 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4827 IX86_ATTR_NO ("align-stringops",
4828 OPT_mno_align_stringops,
4829 MASK_NO_ALIGN_STRINGOPS),
4831 IX86_ATTR_YES ("recip",
4832 OPT_mrecip,
4833 MASK_RECIP),
4837 /* If this is a list, recurse to get the options. */
4838 if (TREE_CODE (args) == TREE_LIST)
4840 bool ret = true;
4842 for (; args; args = TREE_CHAIN (args))
4843 if (TREE_VALUE (args)
4844 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4845 p_strings, opts, opts_set,
4846 enum_opts_set))
4847 ret = false;
4849 return ret;
4852 else if (TREE_CODE (args) != STRING_CST)
4854 error ("attribute %<target%> argument not a string");
4855 return false;
4858 /* Handle multiple arguments separated by commas. */
4859 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4861 while (next_optstr && *next_optstr != '\0')
4863 char *p = next_optstr;
4864 char *orig_p = p;
4865 char *comma = strchr (next_optstr, ',');
4866 const char *opt_string;
4867 size_t len, opt_len;
4868 int opt;
4869 bool opt_set_p;
4870 char ch;
4871 unsigned i;
4872 enum ix86_opt_type type = ix86_opt_unknown;
4873 int mask = 0;
4875 if (comma)
4877 *comma = '\0';
4878 len = comma - next_optstr;
4879 next_optstr = comma + 1;
4881 else
4883 len = strlen (p);
4884 next_optstr = NULL;
4887 /* Recognize no-xxx. */
4888 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4890 opt_set_p = false;
4891 p += 3;
4892 len -= 3;
4894 else
4895 opt_set_p = true;
4897 /* Find the option. */
4898 ch = *p;
4899 opt = N_OPTS;
4900 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4902 type = attrs[i].type;
4903 opt_len = attrs[i].len;
4904 if (ch == attrs[i].string[0]
4905 && ((type != ix86_opt_str && type != ix86_opt_enum)
4906 ? len == opt_len
4907 : len > opt_len)
4908 && memcmp (p, attrs[i].string, opt_len) == 0)
4910 opt = attrs[i].opt;
4911 mask = attrs[i].mask;
4912 opt_string = attrs[i].string;
4913 break;
4917 /* Process the option. */
4918 if (opt == N_OPTS)
4920 error ("attribute(target(\"%s\")) is unknown", orig_p);
4921 ret = false;
4924 else if (type == ix86_opt_isa)
4926 struct cl_decoded_option decoded;
4928 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4929 ix86_handle_option (opts, opts_set,
4930 &decoded, input_location);
4933 else if (type == ix86_opt_yes || type == ix86_opt_no)
4935 if (type == ix86_opt_no)
4936 opt_set_p = !opt_set_p;
4938 if (opt_set_p)
4939 opts->x_target_flags |= mask;
4940 else
4941 opts->x_target_flags &= ~mask;
4944 else if (type == ix86_opt_str)
4946 if (p_strings[opt])
4948 error ("option(\"%s\") was already specified", opt_string);
4949 ret = false;
4951 else
4952 p_strings[opt] = xstrdup (p + opt_len);
4955 else if (type == ix86_opt_enum)
4957 bool arg_ok;
4958 int value;
4960 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4961 if (arg_ok)
4962 set_option (opts, enum_opts_set, opt, value,
4963 p + opt_len, DK_UNSPECIFIED, input_location,
4964 global_dc);
4965 else
4967 error ("attribute(target(\"%s\")) is unknown", orig_p);
4968 ret = false;
4972 else
4973 gcc_unreachable ();
4976 return ret;
4979 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4981 tree
4982 ix86_valid_target_attribute_tree (tree args,
4983 struct gcc_options *opts,
4984 struct gcc_options *opts_set)
4986 const char *orig_arch_string = opts->x_ix86_arch_string;
4987 const char *orig_tune_string = opts->x_ix86_tune_string;
4988 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4989 int orig_tune_defaulted = ix86_tune_defaulted;
4990 int orig_arch_specified = ix86_arch_specified;
4991 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4992 tree t = NULL_TREE;
4993 int i;
4994 struct cl_target_option *def
4995 = TREE_TARGET_OPTION (target_option_default_node);
4996 struct gcc_options enum_opts_set;
4998 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
5000 /* Process each of the options on the chain. */
5001 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
5002 opts_set, &enum_opts_set))
5003 return error_mark_node;
5005 /* If the changed options are different from the default, rerun
5006 ix86_option_override_internal, and then save the options away.
5007 The string options are are attribute options, and will be undone
5008 when we copy the save structure. */
5009 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
5010 || opts->x_target_flags != def->x_target_flags
5011 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
5012 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
5013 || enum_opts_set.x_ix86_fpmath)
5015 /* If we are using the default tune= or arch=, undo the string assigned,
5016 and use the default. */
5017 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
5018 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
5019 else if (!orig_arch_specified)
5020 opts->x_ix86_arch_string = NULL;
5022 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
5023 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
5024 else if (orig_tune_defaulted)
5025 opts->x_ix86_tune_string = NULL;
5027 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
5028 if (enum_opts_set.x_ix86_fpmath)
5029 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
5030 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
5031 && TARGET_SSE_P (opts->x_ix86_isa_flags))
5033 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
5034 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
5037 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
5038 ix86_option_override_internal (false, opts, opts_set);
5040 /* Add any builtin functions with the new isa if any. */
5041 ix86_add_new_builtins (opts->x_ix86_isa_flags);
5043 /* Save the current options unless we are validating options for
5044 #pragma. */
5045 t = build_target_option_node (opts);
5047 opts->x_ix86_arch_string = orig_arch_string;
5048 opts->x_ix86_tune_string = orig_tune_string;
5049 opts_set->x_ix86_fpmath = orig_fpmath_set;
5051 /* Free up memory allocated to hold the strings */
5052 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
5053 free (option_strings[i]);
5056 return t;
5059 /* Hook to validate attribute((target("string"))). */
5061 static bool
5062 ix86_valid_target_attribute_p (tree fndecl,
5063 tree ARG_UNUSED (name),
5064 tree args,
5065 int ARG_UNUSED (flags))
5067 struct gcc_options func_options;
5068 tree new_target, new_optimize;
5069 bool ret = true;
5071 /* attribute((target("default"))) does nothing, beyond
5072 affecting multi-versioning. */
5073 if (TREE_VALUE (args)
5074 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
5075 && TREE_CHAIN (args) == NULL_TREE
5076 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
5077 return true;
5079 tree old_optimize = build_optimization_node (&global_options);
5081 /* Get the optimization options of the current function. */
5082 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
5084 if (!func_optimize)
5085 func_optimize = old_optimize;
5087 /* Init func_options. */
5088 memset (&func_options, 0, sizeof (func_options));
5089 init_options_struct (&func_options, NULL);
5090 lang_hooks.init_options_struct (&func_options);
5092 cl_optimization_restore (&func_options,
5093 TREE_OPTIMIZATION (func_optimize));
5095 /* Initialize func_options to the default before its target options can
5096 be set. */
5097 cl_target_option_restore (&func_options,
5098 TREE_TARGET_OPTION (target_option_default_node));
5100 new_target = ix86_valid_target_attribute_tree (args, &func_options,
5101 &global_options_set);
5103 new_optimize = build_optimization_node (&func_options);
5105 if (new_target == error_mark_node)
5106 ret = false;
5108 else if (fndecl && new_target)
5110 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
5112 if (old_optimize != new_optimize)
5113 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
5116 return ret;
5120 /* Hook to determine if one function can safely inline another. */
5122 static bool
5123 ix86_can_inline_p (tree caller, tree callee)
5125 bool ret = false;
5126 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
5127 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
5129 /* If callee has no option attributes, then it is ok to inline. */
5130 if (!callee_tree)
5131 ret = true;
5133 /* If caller has no option attributes, but callee does then it is not ok to
5134 inline. */
5135 else if (!caller_tree)
5136 ret = false;
5138 else
5140 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
5141 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
5143 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
5144 can inline a SSE2 function but a SSE2 function can't inline a SSE4
5145 function. */
5146 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
5147 != callee_opts->x_ix86_isa_flags)
5148 ret = false;
5150 /* See if we have the same non-isa options. */
5151 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
5152 ret = false;
5154 /* See if arch, tune, etc. are the same. */
5155 else if (caller_opts->arch != callee_opts->arch)
5156 ret = false;
5158 else if (caller_opts->tune != callee_opts->tune)
5159 ret = false;
5161 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
5162 ret = false;
5164 else if (caller_opts->branch_cost != callee_opts->branch_cost)
5165 ret = false;
5167 else
5168 ret = true;
5171 return ret;
5175 /* Remember the last target of ix86_set_current_function. */
5176 static GTY(()) tree ix86_previous_fndecl;
5178 /* Set targets globals to the default (or current #pragma GCC target
5179 if active). Invalidate ix86_previous_fndecl cache. */
5181 void
5182 ix86_reset_previous_fndecl (void)
5184 tree new_tree = target_option_current_node;
5185 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5186 if (TREE_TARGET_GLOBALS (new_tree))
5187 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5188 else if (new_tree == target_option_default_node)
5189 restore_target_globals (&default_target_globals);
5190 else
5191 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5192 ix86_previous_fndecl = NULL_TREE;
5195 /* Establish appropriate back-end context for processing the function
5196 FNDECL. The argument might be NULL to indicate processing at top
5197 level, outside of any function scope. */
5198 static void
5199 ix86_set_current_function (tree fndecl)
5201 /* Only change the context if the function changes. This hook is called
5202 several times in the course of compiling a function, and we don't want to
5203 slow things down too much or call target_reinit when it isn't safe. */
5204 if (fndecl == ix86_previous_fndecl)
5205 return;
5207 tree old_tree;
5208 if (ix86_previous_fndecl == NULL_TREE)
5209 old_tree = target_option_current_node;
5210 else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl))
5211 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl);
5212 else
5213 old_tree = target_option_default_node;
5215 if (fndecl == NULL_TREE)
5217 if (old_tree != target_option_current_node)
5218 ix86_reset_previous_fndecl ();
5219 return;
5222 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
5223 if (new_tree == NULL_TREE)
5224 new_tree = target_option_default_node;
5226 if (old_tree != new_tree)
5228 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5229 if (TREE_TARGET_GLOBALS (new_tree))
5230 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5231 else if (new_tree == target_option_default_node)
5232 restore_target_globals (&default_target_globals);
5233 else
5234 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5236 ix86_previous_fndecl = fndecl;
5240 /* Return true if this goes in large data/bss. */
5242 static bool
5243 ix86_in_large_data_p (tree exp)
5245 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5246 return false;
5248 /* Functions are never large data. */
5249 if (TREE_CODE (exp) == FUNCTION_DECL)
5250 return false;
5252 /* Automatic variables are never large data. */
5253 if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
5254 return false;
5256 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5258 const char *section = DECL_SECTION_NAME (exp);
5259 if (strcmp (section, ".ldata") == 0
5260 || strcmp (section, ".lbss") == 0)
5261 return true;
5262 return false;
5264 else
5266 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5268 /* If this is an incomplete type with size 0, then we can't put it
5269 in data because it might be too big when completed. Also,
5270 int_size_in_bytes returns -1 if size can vary or is larger than
5271 an integer in which case also it is safer to assume that it goes in
5272 large data. */
5273 if (size <= 0 || size > ix86_section_threshold)
5274 return true;
5277 return false;
5280 /* Switch to the appropriate section for output of DECL.
5281 DECL is either a `VAR_DECL' node or a constant of some sort.
5282 RELOC indicates whether forming the initial value of DECL requires
5283 link-time relocations. */
5285 ATTRIBUTE_UNUSED static section *
5286 x86_64_elf_select_section (tree decl, int reloc,
5287 unsigned HOST_WIDE_INT align)
5289 if (ix86_in_large_data_p (decl))
5291 const char *sname = NULL;
5292 unsigned int flags = SECTION_WRITE;
5293 switch (categorize_decl_for_section (decl, reloc))
5295 case SECCAT_DATA:
5296 sname = ".ldata";
5297 break;
5298 case SECCAT_DATA_REL:
5299 sname = ".ldata.rel";
5300 break;
5301 case SECCAT_DATA_REL_LOCAL:
5302 sname = ".ldata.rel.local";
5303 break;
5304 case SECCAT_DATA_REL_RO:
5305 sname = ".ldata.rel.ro";
5306 break;
5307 case SECCAT_DATA_REL_RO_LOCAL:
5308 sname = ".ldata.rel.ro.local";
5309 break;
5310 case SECCAT_BSS:
5311 sname = ".lbss";
5312 flags |= SECTION_BSS;
5313 break;
5314 case SECCAT_RODATA:
5315 case SECCAT_RODATA_MERGE_STR:
5316 case SECCAT_RODATA_MERGE_STR_INIT:
5317 case SECCAT_RODATA_MERGE_CONST:
5318 sname = ".lrodata";
5319 flags = 0;
5320 break;
5321 case SECCAT_SRODATA:
5322 case SECCAT_SDATA:
5323 case SECCAT_SBSS:
5324 gcc_unreachable ();
5325 case SECCAT_TEXT:
5326 case SECCAT_TDATA:
5327 case SECCAT_TBSS:
5328 /* We don't split these for medium model. Place them into
5329 default sections and hope for best. */
5330 break;
5332 if (sname)
5334 /* We might get called with string constants, but get_named_section
5335 doesn't like them as they are not DECLs. Also, we need to set
5336 flags in that case. */
5337 if (!DECL_P (decl))
5338 return get_section (sname, flags, NULL);
5339 return get_named_section (decl, sname, reloc);
5342 return default_elf_select_section (decl, reloc, align);
5345 /* Select a set of attributes for section NAME based on the properties
5346 of DECL and whether or not RELOC indicates that DECL's initializer
5347 might contain runtime relocations. */
5349 static unsigned int ATTRIBUTE_UNUSED
5350 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5352 unsigned int flags = default_section_type_flags (decl, name, reloc);
5354 if (decl == NULL_TREE
5355 && (strcmp (name, ".ldata.rel.ro") == 0
5356 || strcmp (name, ".ldata.rel.ro.local") == 0))
5357 flags |= SECTION_RELRO;
5359 if (strcmp (name, ".lbss") == 0
5360 || strncmp (name, ".lbss.", 5) == 0
5361 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5362 flags |= SECTION_BSS;
5364 return flags;
5367 /* Build up a unique section name, expressed as a
5368 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5369 RELOC indicates whether the initial value of EXP requires
5370 link-time relocations. */
5372 static void ATTRIBUTE_UNUSED
5373 x86_64_elf_unique_section (tree decl, int reloc)
5375 if (ix86_in_large_data_p (decl))
5377 const char *prefix = NULL;
5378 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5379 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5381 switch (categorize_decl_for_section (decl, reloc))
5383 case SECCAT_DATA:
5384 case SECCAT_DATA_REL:
5385 case SECCAT_DATA_REL_LOCAL:
5386 case SECCAT_DATA_REL_RO:
5387 case SECCAT_DATA_REL_RO_LOCAL:
5388 prefix = one_only ? ".ld" : ".ldata";
5389 break;
5390 case SECCAT_BSS:
5391 prefix = one_only ? ".lb" : ".lbss";
5392 break;
5393 case SECCAT_RODATA:
5394 case SECCAT_RODATA_MERGE_STR:
5395 case SECCAT_RODATA_MERGE_STR_INIT:
5396 case SECCAT_RODATA_MERGE_CONST:
5397 prefix = one_only ? ".lr" : ".lrodata";
5398 break;
5399 case SECCAT_SRODATA:
5400 case SECCAT_SDATA:
5401 case SECCAT_SBSS:
5402 gcc_unreachable ();
5403 case SECCAT_TEXT:
5404 case SECCAT_TDATA:
5405 case SECCAT_TBSS:
5406 /* We don't split these for medium model. Place them into
5407 default sections and hope for best. */
5408 break;
5410 if (prefix)
5412 const char *name, *linkonce;
5413 char *string;
5415 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5416 name = targetm.strip_name_encoding (name);
5418 /* If we're using one_only, then there needs to be a .gnu.linkonce
5419 prefix to the section name. */
5420 linkonce = one_only ? ".gnu.linkonce" : "";
5422 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5424 set_decl_section_name (decl, string);
5425 return;
5428 default_unique_section (decl, reloc);
5431 #ifdef COMMON_ASM_OP
5432 /* This says how to output assembler code to declare an
5433 uninitialized external linkage data object.
5435 For medium model x86-64 we need to use .largecomm opcode for
5436 large objects. */
5437 void
5438 x86_elf_aligned_common (FILE *file,
5439 const char *name, unsigned HOST_WIDE_INT size,
5440 int align)
5442 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5443 && size > (unsigned int)ix86_section_threshold)
5444 fputs ("\t.largecomm\t", file);
5445 else
5446 fputs (COMMON_ASM_OP, file);
5447 assemble_name (file, name);
5448 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5449 size, align / BITS_PER_UNIT);
5451 #endif
5453 /* Utility function for targets to use in implementing
5454 ASM_OUTPUT_ALIGNED_BSS. */
5456 void
5457 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5458 unsigned HOST_WIDE_INT size, int align)
5460 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5461 && size > (unsigned int)ix86_section_threshold)
5462 switch_to_section (get_named_section (decl, ".lbss", 0));
5463 else
5464 switch_to_section (bss_section);
5465 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5466 #ifdef ASM_DECLARE_OBJECT_NAME
5467 last_assemble_variable_decl = decl;
5468 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5469 #else
5470 /* Standard thing is just output label for the object. */
5471 ASM_OUTPUT_LABEL (file, name);
5472 #endif /* ASM_DECLARE_OBJECT_NAME */
5473 ASM_OUTPUT_SKIP (file, size ? size : 1);
5476 /* Decide whether we must probe the stack before any space allocation
5477 on this target. It's essentially TARGET_STACK_PROBE except when
5478 -fstack-check causes the stack to be already probed differently. */
5480 bool
5481 ix86_target_stack_probe (void)
5483 /* Do not probe the stack twice if static stack checking is enabled. */
5484 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5485 return false;
5487 return TARGET_STACK_PROBE;
5490 /* Decide whether we can make a sibling call to a function. DECL is the
5491 declaration of the function being targeted by the call and EXP is the
5492 CALL_EXPR representing the call. */
5494 static bool
5495 ix86_function_ok_for_sibcall (tree decl, tree exp)
5497 tree type, decl_or_type;
5498 rtx a, b;
5500 /* If we are generating position-independent code, we cannot sibcall
5501 optimize direct calls to global functions, as the PLT requires
5502 %ebx be live. (Darwin does not have a PLT.) */
5503 if (!TARGET_MACHO
5504 && !TARGET_64BIT
5505 && flag_pic
5506 && flag_plt
5507 && decl && !targetm.binds_local_p (decl))
5508 return false;
5510 /* If we need to align the outgoing stack, then sibcalling would
5511 unalign the stack, which may break the called function. */
5512 if (ix86_minimum_incoming_stack_boundary (true)
5513 < PREFERRED_STACK_BOUNDARY)
5514 return false;
5516 if (decl)
5518 decl_or_type = decl;
5519 type = TREE_TYPE (decl);
5521 else
5523 /* We're looking at the CALL_EXPR, we need the type of the function. */
5524 type = CALL_EXPR_FN (exp); /* pointer expression */
5525 type = TREE_TYPE (type); /* pointer type */
5526 type = TREE_TYPE (type); /* function type */
5527 decl_or_type = type;
5530 /* Check that the return value locations are the same. Like
5531 if we are returning floats on the 80387 register stack, we cannot
5532 make a sibcall from a function that doesn't return a float to a
5533 function that does or, conversely, from a function that does return
5534 a float to a function that doesn't; the necessary stack adjustment
5535 would not be executed. This is also the place we notice
5536 differences in the return value ABI. Note that it is ok for one
5537 of the functions to have void return type as long as the return
5538 value of the other is passed in a register. */
5539 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5540 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5541 cfun->decl, false);
5542 if (STACK_REG_P (a) || STACK_REG_P (b))
5544 if (!rtx_equal_p (a, b))
5545 return false;
5547 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5549 else if (!rtx_equal_p (a, b))
5550 return false;
5552 if (TARGET_64BIT)
5554 /* The SYSV ABI has more call-clobbered registers;
5555 disallow sibcalls from MS to SYSV. */
5556 if (cfun->machine->call_abi == MS_ABI
5557 && ix86_function_type_abi (type) == SYSV_ABI)
5558 return false;
5560 else
5562 /* If this call is indirect, we'll need to be able to use a
5563 call-clobbered register for the address of the target function.
5564 Make sure that all such registers are not used for passing
5565 parameters. Note that DLLIMPORT functions are indirect. */
5566 if (!decl
5567 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5569 if (ix86_function_regparm (type, NULL) >= 3)
5571 /* ??? Need to count the actual number of registers to be used,
5572 not the possible number of registers. Fix later. */
5573 return false;
5578 /* Otherwise okay. That also includes certain types of indirect calls. */
5579 return true;
5582 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5583 and "sseregparm" calling convention attributes;
5584 arguments as in struct attribute_spec.handler. */
5586 static tree
5587 ix86_handle_cconv_attribute (tree *node, tree name,
5588 tree args,
5589 int,
5590 bool *no_add_attrs)
5592 if (TREE_CODE (*node) != FUNCTION_TYPE
5593 && TREE_CODE (*node) != METHOD_TYPE
5594 && TREE_CODE (*node) != FIELD_DECL
5595 && TREE_CODE (*node) != TYPE_DECL)
5597 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5598 name);
5599 *no_add_attrs = true;
5600 return NULL_TREE;
5603 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5604 if (is_attribute_p ("regparm", name))
5606 tree cst;
5608 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5610 error ("fastcall and regparm attributes are not compatible");
5613 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5615 error ("regparam and thiscall attributes are not compatible");
5618 cst = TREE_VALUE (args);
5619 if (TREE_CODE (cst) != INTEGER_CST)
5621 warning (OPT_Wattributes,
5622 "%qE attribute requires an integer constant argument",
5623 name);
5624 *no_add_attrs = true;
5626 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5628 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5629 name, REGPARM_MAX);
5630 *no_add_attrs = true;
5633 return NULL_TREE;
5636 if (TARGET_64BIT)
5638 /* Do not warn when emulating the MS ABI. */
5639 if ((TREE_CODE (*node) != FUNCTION_TYPE
5640 && TREE_CODE (*node) != METHOD_TYPE)
5641 || ix86_function_type_abi (*node) != MS_ABI)
5642 warning (OPT_Wattributes, "%qE attribute ignored",
5643 name);
5644 *no_add_attrs = true;
5645 return NULL_TREE;
5648 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5649 if (is_attribute_p ("fastcall", name))
5651 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5653 error ("fastcall and cdecl attributes are not compatible");
5655 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5657 error ("fastcall and stdcall attributes are not compatible");
5659 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5661 error ("fastcall and regparm attributes are not compatible");
5663 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5665 error ("fastcall and thiscall attributes are not compatible");
5669 /* Can combine stdcall with fastcall (redundant), regparm and
5670 sseregparm. */
5671 else if (is_attribute_p ("stdcall", name))
5673 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5675 error ("stdcall and cdecl attributes are not compatible");
5677 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5679 error ("stdcall and fastcall attributes are not compatible");
5681 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5683 error ("stdcall and thiscall attributes are not compatible");
5687 /* Can combine cdecl with regparm and sseregparm. */
5688 else if (is_attribute_p ("cdecl", name))
5690 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5692 error ("stdcall and cdecl attributes are not compatible");
5694 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5696 error ("fastcall and cdecl attributes are not compatible");
5698 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5700 error ("cdecl and thiscall attributes are not compatible");
5703 else if (is_attribute_p ("thiscall", name))
5705 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5706 warning (OPT_Wattributes, "%qE attribute is used for non-class method",
5707 name);
5708 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5710 error ("stdcall and thiscall attributes are not compatible");
5712 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5714 error ("fastcall and thiscall attributes are not compatible");
5716 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5718 error ("cdecl and thiscall attributes are not compatible");
5722 /* Can combine sseregparm with all attributes. */
5724 return NULL_TREE;
5727 /* The transactional memory builtins are implicitly regparm or fastcall
5728 depending on the ABI. Override the generic do-nothing attribute that
5729 these builtins were declared with, and replace it with one of the two
5730 attributes that we expect elsewhere. */
5732 static tree
5733 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5734 int flags, bool *no_add_attrs)
5736 tree alt;
5738 /* In no case do we want to add the placeholder attribute. */
5739 *no_add_attrs = true;
5741 /* The 64-bit ABI is unchanged for transactional memory. */
5742 if (TARGET_64BIT)
5743 return NULL_TREE;
5745 /* ??? Is there a better way to validate 32-bit windows? We have
5746 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5747 if (CHECK_STACK_LIMIT > 0)
5748 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5749 else
5751 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5752 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5754 decl_attributes (node, alt, flags);
5756 return NULL_TREE;
5759 /* This function determines from TYPE the calling-convention. */
5761 unsigned int
5762 ix86_get_callcvt (const_tree type)
5764 unsigned int ret = 0;
5765 bool is_stdarg;
5766 tree attrs;
5768 if (TARGET_64BIT)
5769 return IX86_CALLCVT_CDECL;
5771 attrs = TYPE_ATTRIBUTES (type);
5772 if (attrs != NULL_TREE)
5774 if (lookup_attribute ("cdecl", attrs))
5775 ret |= IX86_CALLCVT_CDECL;
5776 else if (lookup_attribute ("stdcall", attrs))
5777 ret |= IX86_CALLCVT_STDCALL;
5778 else if (lookup_attribute ("fastcall", attrs))
5779 ret |= IX86_CALLCVT_FASTCALL;
5780 else if (lookup_attribute ("thiscall", attrs))
5781 ret |= IX86_CALLCVT_THISCALL;
5783 /* Regparam isn't allowed for thiscall and fastcall. */
5784 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5786 if (lookup_attribute ("regparm", attrs))
5787 ret |= IX86_CALLCVT_REGPARM;
5788 if (lookup_attribute ("sseregparm", attrs))
5789 ret |= IX86_CALLCVT_SSEREGPARM;
5792 if (IX86_BASE_CALLCVT(ret) != 0)
5793 return ret;
5796 is_stdarg = stdarg_p (type);
5797 if (TARGET_RTD && !is_stdarg)
5798 return IX86_CALLCVT_STDCALL | ret;
5800 if (ret != 0
5801 || is_stdarg
5802 || TREE_CODE (type) != METHOD_TYPE
5803 || ix86_function_type_abi (type) != MS_ABI)
5804 return IX86_CALLCVT_CDECL | ret;
5806 return IX86_CALLCVT_THISCALL;
5809 /* Return 0 if the attributes for two types are incompatible, 1 if they
5810 are compatible, and 2 if they are nearly compatible (which causes a
5811 warning to be generated). */
5813 static int
5814 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5816 unsigned int ccvt1, ccvt2;
5818 if (TREE_CODE (type1) != FUNCTION_TYPE
5819 && TREE_CODE (type1) != METHOD_TYPE)
5820 return 1;
5822 ccvt1 = ix86_get_callcvt (type1);
5823 ccvt2 = ix86_get_callcvt (type2);
5824 if (ccvt1 != ccvt2)
5825 return 0;
5826 if (ix86_function_regparm (type1, NULL)
5827 != ix86_function_regparm (type2, NULL))
5828 return 0;
5830 return 1;
5833 /* Return the regparm value for a function with the indicated TYPE and DECL.
5834 DECL may be NULL when calling function indirectly
5835 or considering a libcall. */
5837 static int
5838 ix86_function_regparm (const_tree type, const_tree decl)
5840 tree attr;
5841 int regparm;
5842 unsigned int ccvt;
5844 if (TARGET_64BIT)
5845 return (ix86_function_type_abi (type) == SYSV_ABI
5846 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5847 ccvt = ix86_get_callcvt (type);
5848 regparm = ix86_regparm;
5850 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5852 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5853 if (attr)
5855 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5856 return regparm;
5859 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5860 return 2;
5861 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5862 return 1;
5864 /* Use register calling convention for local functions when possible. */
5865 if (decl
5866 && TREE_CODE (decl) == FUNCTION_DECL)
5868 cgraph_node *target = cgraph_node::get (decl);
5869 if (target)
5870 target = target->function_symbol ();
5872 /* Caller and callee must agree on the calling convention, so
5873 checking here just optimize means that with
5874 __attribute__((optimize (...))) caller could use regparm convention
5875 and callee not, or vice versa. Instead look at whether the callee
5876 is optimized or not. */
5877 if (target && opt_for_fn (target->decl, optimize)
5878 && !(profile_flag && !flag_fentry))
5880 cgraph_local_info *i = &target->local;
5881 if (i && i->local && i->can_change_signature)
5883 int local_regparm, globals = 0, regno;
5885 /* Make sure no regparm register is taken by a
5886 fixed register variable. */
5887 for (local_regparm = 0; local_regparm < REGPARM_MAX;
5888 local_regparm++)
5889 if (fixed_regs[local_regparm])
5890 break;
5892 /* We don't want to use regparm(3) for nested functions as
5893 these use a static chain pointer in the third argument. */
5894 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
5895 local_regparm = 2;
5897 /* Save a register for the split stack. */
5898 if (local_regparm == 3 && flag_split_stack)
5899 local_regparm = 2;
5901 /* Each fixed register usage increases register pressure,
5902 so less registers should be used for argument passing.
5903 This functionality can be overriden by an explicit
5904 regparm value. */
5905 for (regno = AX_REG; regno <= DI_REG; regno++)
5906 if (fixed_regs[regno])
5907 globals++;
5909 local_regparm
5910 = globals < local_regparm ? local_regparm - globals : 0;
5912 if (local_regparm > regparm)
5913 regparm = local_regparm;
5918 return regparm;
5921 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5922 DFmode (2) arguments in SSE registers for a function with the
5923 indicated TYPE and DECL. DECL may be NULL when calling function
5924 indirectly or considering a libcall. Return -1 if any FP parameter
5925 should be rejected by error. This is used in siutation we imply SSE
5926 calling convetion but the function is called from another function with
5927 SSE disabled. Otherwise return 0. */
5929 static int
5930 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5932 gcc_assert (!TARGET_64BIT);
5934 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5935 by the sseregparm attribute. */
5936 if (TARGET_SSEREGPARM
5937 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5939 if (!TARGET_SSE)
5941 if (warn)
5943 if (decl)
5944 error ("calling %qD with attribute sseregparm without "
5945 "SSE/SSE2 enabled", decl);
5946 else
5947 error ("calling %qT with attribute sseregparm without "
5948 "SSE/SSE2 enabled", type);
5950 return 0;
5953 return 2;
5956 if (!decl)
5957 return 0;
5959 cgraph_node *target = cgraph_node::get (decl);
5960 if (target)
5961 target = target->function_symbol ();
5963 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5964 (and DFmode for SSE2) arguments in SSE registers. */
5965 if (target
5966 /* TARGET_SSE_MATH */
5967 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
5968 && opt_for_fn (target->decl, optimize)
5969 && !(profile_flag && !flag_fentry))
5971 cgraph_local_info *i = &target->local;
5972 if (i && i->local && i->can_change_signature)
5974 /* Refuse to produce wrong code when local function with SSE enabled
5975 is called from SSE disabled function.
5976 FIXME: We need a way to detect these cases cross-ltrans partition
5977 and avoid using SSE calling conventions on local functions called
5978 from function with SSE disabled. For now at least delay the
5979 warning until we know we are going to produce wrong code.
5980 See PR66047 */
5981 if (!TARGET_SSE && warn)
5982 return -1;
5983 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
5984 ->x_ix86_isa_flags) ? 2 : 1;
5988 return 0;
5991 /* Return true if EAX is live at the start of the function. Used by
5992 ix86_expand_prologue to determine if we need special help before
5993 calling allocate_stack_worker. */
5995 static bool
5996 ix86_eax_live_at_start_p (void)
5998 /* Cheat. Don't bother working forward from ix86_function_regparm
5999 to the function type to whether an actual argument is located in
6000 eax. Instead just look at cfg info, which is still close enough
6001 to correct at this point. This gives false positives for broken
6002 functions that might use uninitialized data that happens to be
6003 allocated in eax, but who cares? */
6004 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
6007 static bool
6008 ix86_keep_aggregate_return_pointer (tree fntype)
6010 tree attr;
6012 if (!TARGET_64BIT)
6014 attr = lookup_attribute ("callee_pop_aggregate_return",
6015 TYPE_ATTRIBUTES (fntype));
6016 if (attr)
6017 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
6019 /* For 32-bit MS-ABI the default is to keep aggregate
6020 return pointer. */
6021 if (ix86_function_type_abi (fntype) == MS_ABI)
6022 return true;
6024 return KEEP_AGGREGATE_RETURN_POINTER != 0;
6027 /* Value is the number of bytes of arguments automatically
6028 popped when returning from a subroutine call.
6029 FUNDECL is the declaration node of the function (as a tree),
6030 FUNTYPE is the data type of the function (as a tree),
6031 or for a library call it is an identifier node for the subroutine name.
6032 SIZE is the number of bytes of arguments passed on the stack.
6034 On the 80386, the RTD insn may be used to pop them if the number
6035 of args is fixed, but if the number is variable then the caller
6036 must pop them all. RTD can't be used for library calls now
6037 because the library is compiled with the Unix compiler.
6038 Use of RTD is a selectable option, since it is incompatible with
6039 standard Unix calling sequences. If the option is not selected,
6040 the caller must always pop the args.
6042 The attribute stdcall is equivalent to RTD on a per module basis. */
6044 static int
6045 ix86_return_pops_args (tree fundecl, tree funtype, int size)
6047 unsigned int ccvt;
6049 /* None of the 64-bit ABIs pop arguments. */
6050 if (TARGET_64BIT)
6051 return 0;
6053 ccvt = ix86_get_callcvt (funtype);
6055 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
6056 | IX86_CALLCVT_THISCALL)) != 0
6057 && ! stdarg_p (funtype))
6058 return size;
6060 /* Lose any fake structure return argument if it is passed on the stack. */
6061 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
6062 && !ix86_keep_aggregate_return_pointer (funtype))
6064 int nregs = ix86_function_regparm (funtype, fundecl);
6065 if (nregs == 0)
6066 return GET_MODE_SIZE (Pmode);
6069 return 0;
6072 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
6074 static bool
6075 ix86_legitimate_combined_insn (rtx_insn *insn)
6077 /* Check operand constraints in case hard registers were propagated
6078 into insn pattern. This check prevents combine pass from
6079 generating insn patterns with invalid hard register operands.
6080 These invalid insns can eventually confuse reload to error out
6081 with a spill failure. See also PRs 46829 and 46843. */
6082 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
6084 int i;
6086 extract_insn (insn);
6087 preprocess_constraints (insn);
6089 int n_operands = recog_data.n_operands;
6090 int n_alternatives = recog_data.n_alternatives;
6091 for (i = 0; i < n_operands; i++)
6093 rtx op = recog_data.operand[i];
6094 machine_mode mode = GET_MODE (op);
6095 const operand_alternative *op_alt;
6096 int offset = 0;
6097 bool win;
6098 int j;
6100 /* For pre-AVX disallow unaligned loads/stores where the
6101 instructions don't support it. */
6102 if (!TARGET_AVX
6103 && VECTOR_MODE_P (GET_MODE (op))
6104 && misaligned_operand (op, GET_MODE (op)))
6106 int min_align = get_attr_ssememalign (insn);
6107 if (min_align == 0)
6108 return false;
6111 /* A unary operator may be accepted by the predicate, but it
6112 is irrelevant for matching constraints. */
6113 if (UNARY_P (op))
6114 op = XEXP (op, 0);
6116 if (GET_CODE (op) == SUBREG)
6118 if (REG_P (SUBREG_REG (op))
6119 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
6120 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
6121 GET_MODE (SUBREG_REG (op)),
6122 SUBREG_BYTE (op),
6123 GET_MODE (op));
6124 op = SUBREG_REG (op);
6127 if (!(REG_P (op) && HARD_REGISTER_P (op)))
6128 continue;
6130 op_alt = recog_op_alt;
6132 /* Operand has no constraints, anything is OK. */
6133 win = !n_alternatives;
6135 alternative_mask preferred = get_preferred_alternatives (insn);
6136 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
6138 if (!TEST_BIT (preferred, j))
6139 continue;
6140 if (op_alt[i].anything_ok
6141 || (op_alt[i].matches != -1
6142 && operands_match_p
6143 (recog_data.operand[i],
6144 recog_data.operand[op_alt[i].matches]))
6145 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
6147 win = true;
6148 break;
6152 if (!win)
6153 return false;
6157 return true;
6160 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
6162 static unsigned HOST_WIDE_INT
6163 ix86_asan_shadow_offset (void)
6165 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
6166 : HOST_WIDE_INT_C (0x7fff8000))
6167 : (HOST_WIDE_INT_1 << 29);
6170 /* Argument support functions. */
6172 /* Return true when register may be used to pass function parameters. */
6173 bool
6174 ix86_function_arg_regno_p (int regno)
6176 int i;
6177 enum calling_abi call_abi;
6178 const int *parm_regs;
6180 if (TARGET_MPX && BND_REGNO_P (regno))
6181 return true;
6183 if (!TARGET_64BIT)
6185 if (TARGET_MACHO)
6186 return (regno < REGPARM_MAX
6187 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
6188 else
6189 return (regno < REGPARM_MAX
6190 || (TARGET_MMX && MMX_REGNO_P (regno)
6191 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
6192 || (TARGET_SSE && SSE_REGNO_P (regno)
6193 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
6196 if (TARGET_SSE && SSE_REGNO_P (regno)
6197 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
6198 return true;
6200 /* TODO: The function should depend on current function ABI but
6201 builtins.c would need updating then. Therefore we use the
6202 default ABI. */
6203 call_abi = ix86_cfun_abi ();
6205 /* RAX is used as hidden argument to va_arg functions. */
6206 if (call_abi == SYSV_ABI && regno == AX_REG)
6207 return true;
6209 if (call_abi == MS_ABI)
6210 parm_regs = x86_64_ms_abi_int_parameter_registers;
6211 else
6212 parm_regs = x86_64_int_parameter_registers;
6214 for (i = 0; i < (call_abi == MS_ABI
6215 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
6216 if (regno == parm_regs[i])
6217 return true;
6218 return false;
6221 /* Return if we do not know how to pass TYPE solely in registers. */
6223 static bool
6224 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
6226 if (must_pass_in_stack_var_size_or_pad (mode, type))
6227 return true;
6229 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
6230 The layout_type routine is crafty and tries to trick us into passing
6231 currently unsupported vector types on the stack by using TImode. */
6232 return (!TARGET_64BIT && mode == TImode
6233 && type && TREE_CODE (type) != VECTOR_TYPE);
6236 /* It returns the size, in bytes, of the area reserved for arguments passed
6237 in registers for the function represented by fndecl dependent to the used
6238 abi format. */
6240 ix86_reg_parm_stack_space (const_tree fndecl)
6242 enum calling_abi call_abi = SYSV_ABI;
6243 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6244 call_abi = ix86_function_abi (fndecl);
6245 else
6246 call_abi = ix86_function_type_abi (fndecl);
6247 if (TARGET_64BIT && call_abi == MS_ABI)
6248 return 32;
6249 return 0;
6252 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
6253 call abi used. */
6254 enum calling_abi
6255 ix86_function_type_abi (const_tree fntype)
6257 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
6259 enum calling_abi abi = ix86_abi;
6260 if (abi == SYSV_ABI)
6262 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6264 if (TARGET_X32)
6266 static bool warned = false;
6267 if (!warned)
6269 error ("X32 does not support ms_abi attribute");
6270 warned = true;
6273 abi = MS_ABI;
6276 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6277 abi = SYSV_ABI;
6278 return abi;
6280 return ix86_abi;
6283 /* We add this as a workaround in order to use libc_has_function
6284 hook in i386.md. */
6285 bool
6286 ix86_libc_has_function (enum function_class fn_class)
6288 return targetm.libc_has_function (fn_class);
6291 static bool
6292 ix86_function_ms_hook_prologue (const_tree fn)
6294 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6296 if (decl_function_context (fn) != NULL_TREE)
6297 error_at (DECL_SOURCE_LOCATION (fn),
6298 "ms_hook_prologue is not compatible with nested function");
6299 else
6300 return true;
6302 return false;
6305 static enum calling_abi
6306 ix86_function_abi (const_tree fndecl)
6308 if (! fndecl)
6309 return ix86_abi;
6310 return ix86_function_type_abi (TREE_TYPE (fndecl));
6313 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6314 call abi used. */
6315 enum calling_abi
6316 ix86_cfun_abi (void)
6318 if (! cfun)
6319 return ix86_abi;
6320 return cfun->machine->call_abi;
6323 /* Write the extra assembler code needed to declare a function properly. */
6325 void
6326 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6327 tree decl)
6329 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6331 if (is_ms_hook)
6333 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6334 unsigned int filler_cc = 0xcccccccc;
6336 for (i = 0; i < filler_count; i += 4)
6337 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6340 #ifdef SUBTARGET_ASM_UNWIND_INIT
6341 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6342 #endif
6344 ASM_OUTPUT_LABEL (asm_out_file, fname);
6346 /* Output magic byte marker, if hot-patch attribute is set. */
6347 if (is_ms_hook)
6349 if (TARGET_64BIT)
6351 /* leaq [%rsp + 0], %rsp */
6352 asm_fprintf (asm_out_file, ASM_BYTE
6353 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6355 else
6357 /* movl.s %edi, %edi
6358 push %ebp
6359 movl.s %esp, %ebp */
6360 asm_fprintf (asm_out_file, ASM_BYTE
6361 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6366 /* regclass.c */
6367 extern void init_regs (void);
6369 /* Implementation of call abi switching target hook. Specific to FNDECL
6370 the specific call register sets are set. See also
6371 ix86_conditional_register_usage for more details. */
6372 void
6373 ix86_call_abi_override (const_tree fndecl)
6375 if (fndecl == NULL_TREE)
6376 cfun->machine->call_abi = ix86_abi;
6377 else
6378 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6381 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6382 expensive re-initialization of init_regs each time we switch function context
6383 since this is needed only during RTL expansion. */
6384 static void
6385 ix86_maybe_switch_abi (void)
6387 if (TARGET_64BIT &&
6388 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6389 reinit_regs ();
6392 /* Return 1 if pseudo register should be created and used to hold
6393 GOT address for PIC code. */
6394 bool
6395 ix86_use_pseudo_pic_reg (void)
6397 if ((TARGET_64BIT
6398 && (ix86_cmodel == CM_SMALL_PIC
6399 || TARGET_PECOFF))
6400 || !flag_pic)
6401 return false;
6402 return true;
6405 /* Initialize large model PIC register. */
6407 static void
6408 ix86_init_large_pic_reg (unsigned int tmp_regno)
6410 rtx_code_label *label;
6411 rtx tmp_reg;
6413 gcc_assert (Pmode == DImode);
6414 label = gen_label_rtx ();
6415 emit_label (label);
6416 LABEL_PRESERVE_P (label) = 1;
6417 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
6418 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
6419 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
6420 label));
6421 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6422 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
6423 pic_offset_table_rtx, tmp_reg));
6426 /* Create and initialize PIC register if required. */
6427 static void
6428 ix86_init_pic_reg (void)
6430 edge entry_edge;
6431 rtx_insn *seq;
6433 if (!ix86_use_pseudo_pic_reg ())
6434 return;
6436 start_sequence ();
6438 if (TARGET_64BIT)
6440 if (ix86_cmodel == CM_LARGE_PIC)
6441 ix86_init_large_pic_reg (R11_REG);
6442 else
6443 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6445 else
6447 /* If there is future mcount call in the function it is more profitable
6448 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
6449 rtx reg = crtl->profile
6450 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
6451 : pic_offset_table_rtx;
6452 rtx_insn *insn = emit_insn (gen_set_got (reg));
6453 RTX_FRAME_RELATED_P (insn) = 1;
6454 if (crtl->profile)
6455 emit_move_insn (pic_offset_table_rtx, reg);
6456 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
6459 seq = get_insns ();
6460 end_sequence ();
6462 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6463 insert_insn_on_edge (seq, entry_edge);
6464 commit_one_edge_insertion (entry_edge);
6467 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6468 for a call to a function whose data type is FNTYPE.
6469 For a library call, FNTYPE is 0. */
6471 void
6472 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6473 tree fntype, /* tree ptr for function decl */
6474 rtx libname, /* SYMBOL_REF of library name or 0 */
6475 tree fndecl,
6476 int caller)
6478 struct cgraph_local_info *i = NULL;
6479 struct cgraph_node *target = NULL;
6481 memset (cum, 0, sizeof (*cum));
6483 if (fndecl)
6485 target = cgraph_node::get (fndecl);
6486 if (target)
6488 target = target->function_symbol ();
6489 i = cgraph_node::local_info (target->decl);
6490 cum->call_abi = ix86_function_abi (target->decl);
6492 else
6493 cum->call_abi = ix86_function_abi (fndecl);
6495 else
6496 cum->call_abi = ix86_function_type_abi (fntype);
6498 cum->caller = caller;
6500 /* Set up the number of registers to use for passing arguments. */
6501 cum->nregs = ix86_regparm;
6502 if (TARGET_64BIT)
6504 cum->nregs = (cum->call_abi == SYSV_ABI
6505 ? X86_64_REGPARM_MAX
6506 : X86_64_MS_REGPARM_MAX);
6508 if (TARGET_SSE)
6510 cum->sse_nregs = SSE_REGPARM_MAX;
6511 if (TARGET_64BIT)
6513 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6514 ? X86_64_SSE_REGPARM_MAX
6515 : X86_64_MS_SSE_REGPARM_MAX);
6518 if (TARGET_MMX)
6519 cum->mmx_nregs = MMX_REGPARM_MAX;
6520 cum->warn_avx512f = true;
6521 cum->warn_avx = true;
6522 cum->warn_sse = true;
6523 cum->warn_mmx = true;
6525 /* Because type might mismatch in between caller and callee, we need to
6526 use actual type of function for local calls.
6527 FIXME: cgraph_analyze can be told to actually record if function uses
6528 va_start so for local functions maybe_vaarg can be made aggressive
6529 helping K&R code.
6530 FIXME: once typesytem is fixed, we won't need this code anymore. */
6531 if (i && i->local && i->can_change_signature)
6532 fntype = TREE_TYPE (target->decl);
6533 cum->stdarg = stdarg_p (fntype);
6534 cum->maybe_vaarg = (fntype
6535 ? (!prototype_p (fntype) || stdarg_p (fntype))
6536 : !libname);
6538 cum->bnd_regno = FIRST_BND_REG;
6539 cum->bnds_in_bt = 0;
6540 cum->force_bnd_pass = 0;
6541 cum->decl = fndecl;
6543 if (!TARGET_64BIT)
6545 /* If there are variable arguments, then we won't pass anything
6546 in registers in 32-bit mode. */
6547 if (stdarg_p (fntype))
6549 cum->nregs = 0;
6550 cum->sse_nregs = 0;
6551 cum->mmx_nregs = 0;
6552 cum->warn_avx512f = false;
6553 cum->warn_avx = false;
6554 cum->warn_sse = false;
6555 cum->warn_mmx = false;
6556 return;
6559 /* Use ecx and edx registers if function has fastcall attribute,
6560 else look for regparm information. */
6561 if (fntype)
6563 unsigned int ccvt = ix86_get_callcvt (fntype);
6564 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6566 cum->nregs = 1;
6567 cum->fastcall = 1; /* Same first register as in fastcall. */
6569 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6571 cum->nregs = 2;
6572 cum->fastcall = 1;
6574 else
6575 cum->nregs = ix86_function_regparm (fntype, fndecl);
6578 /* Set up the number of SSE registers used for passing SFmode
6579 and DFmode arguments. Warn for mismatching ABI. */
6580 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6584 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6585 But in the case of vector types, it is some vector mode.
6587 When we have only some of our vector isa extensions enabled, then there
6588 are some modes for which vector_mode_supported_p is false. For these
6589 modes, the generic vector support in gcc will choose some non-vector mode
6590 in order to implement the type. By computing the natural mode, we'll
6591 select the proper ABI location for the operand and not depend on whatever
6592 the middle-end decides to do with these vector types.
6594 The midde-end can't deal with the vector types > 16 bytes. In this
6595 case, we return the original mode and warn ABI change if CUM isn't
6596 NULL.
6598 If INT_RETURN is true, warn ABI change if the vector mode isn't
6599 available for function return value. */
6601 static machine_mode
6602 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6603 bool in_return)
6605 machine_mode mode = TYPE_MODE (type);
6607 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6609 HOST_WIDE_INT size = int_size_in_bytes (type);
6610 if ((size == 8 || size == 16 || size == 32 || size == 64)
6611 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6612 && TYPE_VECTOR_SUBPARTS (type) > 1)
6614 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6616 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6617 mode = MIN_MODE_VECTOR_FLOAT;
6618 else
6619 mode = MIN_MODE_VECTOR_INT;
6621 /* Get the mode which has this inner mode and number of units. */
6622 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6623 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6624 && GET_MODE_INNER (mode) == innermode)
6626 if (size == 64 && !TARGET_AVX512F)
6628 static bool warnedavx512f;
6629 static bool warnedavx512f_ret;
6631 if (cum && cum->warn_avx512f && !warnedavx512f)
6633 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6634 "without AVX512F enabled changes the ABI"))
6635 warnedavx512f = true;
6637 else if (in_return && !warnedavx512f_ret)
6639 if (warning (OPT_Wpsabi, "AVX512F vector return "
6640 "without AVX512F enabled changes the ABI"))
6641 warnedavx512f_ret = true;
6644 return TYPE_MODE (type);
6646 else if (size == 32 && !TARGET_AVX)
6648 static bool warnedavx;
6649 static bool warnedavx_ret;
6651 if (cum && cum->warn_avx && !warnedavx)
6653 if (warning (OPT_Wpsabi, "AVX vector argument "
6654 "without AVX enabled changes the ABI"))
6655 warnedavx = true;
6657 else if (in_return && !warnedavx_ret)
6659 if (warning (OPT_Wpsabi, "AVX vector return "
6660 "without AVX enabled changes the ABI"))
6661 warnedavx_ret = true;
6664 return TYPE_MODE (type);
6666 else if (((size == 8 && TARGET_64BIT) || size == 16)
6667 && !TARGET_SSE)
6669 static bool warnedsse;
6670 static bool warnedsse_ret;
6672 if (cum && cum->warn_sse && !warnedsse)
6674 if (warning (OPT_Wpsabi, "SSE vector argument "
6675 "without SSE enabled changes the ABI"))
6676 warnedsse = true;
6678 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6680 if (warning (OPT_Wpsabi, "SSE vector return "
6681 "without SSE enabled changes the ABI"))
6682 warnedsse_ret = true;
6685 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6687 static bool warnedmmx;
6688 static bool warnedmmx_ret;
6690 if (cum && cum->warn_mmx && !warnedmmx)
6692 if (warning (OPT_Wpsabi, "MMX vector argument "
6693 "without MMX enabled changes the ABI"))
6694 warnedmmx = true;
6696 else if (in_return && !warnedmmx_ret)
6698 if (warning (OPT_Wpsabi, "MMX vector return "
6699 "without MMX enabled changes the ABI"))
6700 warnedmmx_ret = true;
6703 return mode;
6706 gcc_unreachable ();
6710 return mode;
6713 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6714 this may not agree with the mode that the type system has chosen for the
6715 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6716 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6718 static rtx
6719 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
6720 unsigned int regno)
6722 rtx tmp;
6724 if (orig_mode != BLKmode)
6725 tmp = gen_rtx_REG (orig_mode, regno);
6726 else
6728 tmp = gen_rtx_REG (mode, regno);
6729 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6730 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6733 return tmp;
6736 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6737 of this code is to classify each 8bytes of incoming argument by the register
6738 class and assign registers accordingly. */
6740 /* Return the union class of CLASS1 and CLASS2.
6741 See the x86-64 PS ABI for details. */
6743 static enum x86_64_reg_class
6744 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6746 /* Rule #1: If both classes are equal, this is the resulting class. */
6747 if (class1 == class2)
6748 return class1;
6750 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6751 the other class. */
6752 if (class1 == X86_64_NO_CLASS)
6753 return class2;
6754 if (class2 == X86_64_NO_CLASS)
6755 return class1;
6757 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6758 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6759 return X86_64_MEMORY_CLASS;
6761 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6762 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6763 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6764 return X86_64_INTEGERSI_CLASS;
6765 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6766 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6767 return X86_64_INTEGER_CLASS;
6769 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6770 MEMORY is used. */
6771 if (class1 == X86_64_X87_CLASS
6772 || class1 == X86_64_X87UP_CLASS
6773 || class1 == X86_64_COMPLEX_X87_CLASS
6774 || class2 == X86_64_X87_CLASS
6775 || class2 == X86_64_X87UP_CLASS
6776 || class2 == X86_64_COMPLEX_X87_CLASS)
6777 return X86_64_MEMORY_CLASS;
6779 /* Rule #6: Otherwise class SSE is used. */
6780 return X86_64_SSE_CLASS;
6783 /* Classify the argument of type TYPE and mode MODE.
6784 CLASSES will be filled by the register class used to pass each word
6785 of the operand. The number of words is returned. In case the parameter
6786 should be passed in memory, 0 is returned. As a special case for zero
6787 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6789 BIT_OFFSET is used internally for handling records and specifies offset
6790 of the offset in bits modulo 512 to avoid overflow cases.
6792 See the x86-64 PS ABI for details.
6795 static int
6796 classify_argument (machine_mode mode, const_tree type,
6797 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6799 HOST_WIDE_INT bytes =
6800 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6801 int words
6802 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6804 /* Variable sized entities are always passed/returned in memory. */
6805 if (bytes < 0)
6806 return 0;
6808 if (mode != VOIDmode
6809 && targetm.calls.must_pass_in_stack (mode, type))
6810 return 0;
6812 if (type && AGGREGATE_TYPE_P (type))
6814 int i;
6815 tree field;
6816 enum x86_64_reg_class subclasses[MAX_CLASSES];
6818 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6819 if (bytes > 64)
6820 return 0;
6822 for (i = 0; i < words; i++)
6823 classes[i] = X86_64_NO_CLASS;
6825 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6826 signalize memory class, so handle it as special case. */
6827 if (!words)
6829 classes[0] = X86_64_NO_CLASS;
6830 return 1;
6833 /* Classify each field of record and merge classes. */
6834 switch (TREE_CODE (type))
6836 case RECORD_TYPE:
6837 /* And now merge the fields of structure. */
6838 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6840 if (TREE_CODE (field) == FIELD_DECL)
6842 int num;
6844 if (TREE_TYPE (field) == error_mark_node)
6845 continue;
6847 /* Bitfields are always classified as integer. Handle them
6848 early, since later code would consider them to be
6849 misaligned integers. */
6850 if (DECL_BIT_FIELD (field))
6852 for (i = (int_bit_position (field)
6853 + (bit_offset % 64)) / 8 / 8;
6854 i < ((int_bit_position (field) + (bit_offset % 64))
6855 + tree_to_shwi (DECL_SIZE (field))
6856 + 63) / 8 / 8; i++)
6857 classes[i] =
6858 merge_classes (X86_64_INTEGER_CLASS,
6859 classes[i]);
6861 else
6863 int pos;
6865 type = TREE_TYPE (field);
6867 /* Flexible array member is ignored. */
6868 if (TYPE_MODE (type) == BLKmode
6869 && TREE_CODE (type) == ARRAY_TYPE
6870 && TYPE_SIZE (type) == NULL_TREE
6871 && TYPE_DOMAIN (type) != NULL_TREE
6872 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6873 == NULL_TREE))
6875 static bool warned;
6877 if (!warned && warn_psabi)
6879 warned = true;
6880 inform (input_location,
6881 "the ABI of passing struct with"
6882 " a flexible array member has"
6883 " changed in GCC 4.4");
6885 continue;
6887 num = classify_argument (TYPE_MODE (type), type,
6888 subclasses,
6889 (int_bit_position (field)
6890 + bit_offset) % 512);
6891 if (!num)
6892 return 0;
6893 pos = (int_bit_position (field)
6894 + (bit_offset % 64)) / 8 / 8;
6895 for (i = 0; i < num && (i + pos) < words; i++)
6896 classes[i + pos] =
6897 merge_classes (subclasses[i], classes[i + pos]);
6901 break;
6903 case ARRAY_TYPE:
6904 /* Arrays are handled as small records. */
6906 int num;
6907 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6908 TREE_TYPE (type), subclasses, bit_offset);
6909 if (!num)
6910 return 0;
6912 /* The partial classes are now full classes. */
6913 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6914 subclasses[0] = X86_64_SSE_CLASS;
6915 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6916 && !((bit_offset % 64) == 0 && bytes == 4))
6917 subclasses[0] = X86_64_INTEGER_CLASS;
6919 for (i = 0; i < words; i++)
6920 classes[i] = subclasses[i % num];
6922 break;
6924 case UNION_TYPE:
6925 case QUAL_UNION_TYPE:
6926 /* Unions are similar to RECORD_TYPE but offset is always 0.
6928 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6930 if (TREE_CODE (field) == FIELD_DECL)
6932 int num;
6934 if (TREE_TYPE (field) == error_mark_node)
6935 continue;
6937 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6938 TREE_TYPE (field), subclasses,
6939 bit_offset);
6940 if (!num)
6941 return 0;
6942 for (i = 0; i < num && i < words; i++)
6943 classes[i] = merge_classes (subclasses[i], classes[i]);
6946 break;
6948 default:
6949 gcc_unreachable ();
6952 if (words > 2)
6954 /* When size > 16 bytes, if the first one isn't
6955 X86_64_SSE_CLASS or any other ones aren't
6956 X86_64_SSEUP_CLASS, everything should be passed in
6957 memory. */
6958 if (classes[0] != X86_64_SSE_CLASS)
6959 return 0;
6961 for (i = 1; i < words; i++)
6962 if (classes[i] != X86_64_SSEUP_CLASS)
6963 return 0;
6966 /* Final merger cleanup. */
6967 for (i = 0; i < words; i++)
6969 /* If one class is MEMORY, everything should be passed in
6970 memory. */
6971 if (classes[i] == X86_64_MEMORY_CLASS)
6972 return 0;
6974 /* The X86_64_SSEUP_CLASS should be always preceded by
6975 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6976 if (classes[i] == X86_64_SSEUP_CLASS
6977 && classes[i - 1] != X86_64_SSE_CLASS
6978 && classes[i - 1] != X86_64_SSEUP_CLASS)
6980 /* The first one should never be X86_64_SSEUP_CLASS. */
6981 gcc_assert (i != 0);
6982 classes[i] = X86_64_SSE_CLASS;
6985 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6986 everything should be passed in memory. */
6987 if (classes[i] == X86_64_X87UP_CLASS
6988 && (classes[i - 1] != X86_64_X87_CLASS))
6990 static bool warned;
6992 /* The first one should never be X86_64_X87UP_CLASS. */
6993 gcc_assert (i != 0);
6994 if (!warned && warn_psabi)
6996 warned = true;
6997 inform (input_location,
6998 "the ABI of passing union with long double"
6999 " has changed in GCC 4.4");
7001 return 0;
7004 return words;
7007 /* Compute alignment needed. We align all types to natural boundaries with
7008 exception of XFmode that is aligned to 64bits. */
7009 if (mode != VOIDmode && mode != BLKmode)
7011 int mode_alignment = GET_MODE_BITSIZE (mode);
7013 if (mode == XFmode)
7014 mode_alignment = 128;
7015 else if (mode == XCmode)
7016 mode_alignment = 256;
7017 if (COMPLEX_MODE_P (mode))
7018 mode_alignment /= 2;
7019 /* Misaligned fields are always returned in memory. */
7020 if (bit_offset % mode_alignment)
7021 return 0;
7024 /* for V1xx modes, just use the base mode */
7025 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
7026 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
7027 mode = GET_MODE_INNER (mode);
7029 /* Classification of atomic types. */
7030 switch (mode)
7032 case SDmode:
7033 case DDmode:
7034 classes[0] = X86_64_SSE_CLASS;
7035 return 1;
7036 case TDmode:
7037 classes[0] = X86_64_SSE_CLASS;
7038 classes[1] = X86_64_SSEUP_CLASS;
7039 return 2;
7040 case DImode:
7041 case SImode:
7042 case HImode:
7043 case QImode:
7044 case CSImode:
7045 case CHImode:
7046 case CQImode:
7048 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
7050 /* Analyze last 128 bits only. */
7051 size = (size - 1) & 0x7f;
7053 if (size < 32)
7055 classes[0] = X86_64_INTEGERSI_CLASS;
7056 return 1;
7058 else if (size < 64)
7060 classes[0] = X86_64_INTEGER_CLASS;
7061 return 1;
7063 else if (size < 64+32)
7065 classes[0] = X86_64_INTEGER_CLASS;
7066 classes[1] = X86_64_INTEGERSI_CLASS;
7067 return 2;
7069 else if (size < 64+64)
7071 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7072 return 2;
7074 else
7075 gcc_unreachable ();
7077 case CDImode:
7078 case TImode:
7079 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7080 return 2;
7081 case COImode:
7082 case OImode:
7083 /* OImode shouldn't be used directly. */
7084 gcc_unreachable ();
7085 case CTImode:
7086 return 0;
7087 case SFmode:
7088 if (!(bit_offset % 64))
7089 classes[0] = X86_64_SSESF_CLASS;
7090 else
7091 classes[0] = X86_64_SSE_CLASS;
7092 return 1;
7093 case DFmode:
7094 classes[0] = X86_64_SSEDF_CLASS;
7095 return 1;
7096 case XFmode:
7097 classes[0] = X86_64_X87_CLASS;
7098 classes[1] = X86_64_X87UP_CLASS;
7099 return 2;
7100 case TFmode:
7101 classes[0] = X86_64_SSE_CLASS;
7102 classes[1] = X86_64_SSEUP_CLASS;
7103 return 2;
7104 case SCmode:
7105 classes[0] = X86_64_SSE_CLASS;
7106 if (!(bit_offset % 64))
7107 return 1;
7108 else
7110 static bool warned;
7112 if (!warned && warn_psabi)
7114 warned = true;
7115 inform (input_location,
7116 "the ABI of passing structure with complex float"
7117 " member has changed in GCC 4.4");
7119 classes[1] = X86_64_SSESF_CLASS;
7120 return 2;
7122 case DCmode:
7123 classes[0] = X86_64_SSEDF_CLASS;
7124 classes[1] = X86_64_SSEDF_CLASS;
7125 return 2;
7126 case XCmode:
7127 classes[0] = X86_64_COMPLEX_X87_CLASS;
7128 return 1;
7129 case TCmode:
7130 /* This modes is larger than 16 bytes. */
7131 return 0;
7132 case V8SFmode:
7133 case V8SImode:
7134 case V32QImode:
7135 case V16HImode:
7136 case V4DFmode:
7137 case V4DImode:
7138 classes[0] = X86_64_SSE_CLASS;
7139 classes[1] = X86_64_SSEUP_CLASS;
7140 classes[2] = X86_64_SSEUP_CLASS;
7141 classes[3] = X86_64_SSEUP_CLASS;
7142 return 4;
7143 case V8DFmode:
7144 case V16SFmode:
7145 case V8DImode:
7146 case V16SImode:
7147 case V32HImode:
7148 case V64QImode:
7149 classes[0] = X86_64_SSE_CLASS;
7150 classes[1] = X86_64_SSEUP_CLASS;
7151 classes[2] = X86_64_SSEUP_CLASS;
7152 classes[3] = X86_64_SSEUP_CLASS;
7153 classes[4] = X86_64_SSEUP_CLASS;
7154 classes[5] = X86_64_SSEUP_CLASS;
7155 classes[6] = X86_64_SSEUP_CLASS;
7156 classes[7] = X86_64_SSEUP_CLASS;
7157 return 8;
7158 case V4SFmode:
7159 case V4SImode:
7160 case V16QImode:
7161 case V8HImode:
7162 case V2DFmode:
7163 case V2DImode:
7164 classes[0] = X86_64_SSE_CLASS;
7165 classes[1] = X86_64_SSEUP_CLASS;
7166 return 2;
7167 case V1TImode:
7168 case V1DImode:
7169 case V2SFmode:
7170 case V2SImode:
7171 case V4HImode:
7172 case V8QImode:
7173 classes[0] = X86_64_SSE_CLASS;
7174 return 1;
7175 case BLKmode:
7176 case VOIDmode:
7177 return 0;
7178 default:
7179 gcc_assert (VECTOR_MODE_P (mode));
7181 if (bytes > 16)
7182 return 0;
7184 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
7186 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
7187 classes[0] = X86_64_INTEGERSI_CLASS;
7188 else
7189 classes[0] = X86_64_INTEGER_CLASS;
7190 classes[1] = X86_64_INTEGER_CLASS;
7191 return 1 + (bytes > 8);
7195 /* Examine the argument and return set number of register required in each
7196 class. Return true iff parameter should be passed in memory. */
7198 static bool
7199 examine_argument (machine_mode mode, const_tree type, int in_return,
7200 int *int_nregs, int *sse_nregs)
7202 enum x86_64_reg_class regclass[MAX_CLASSES];
7203 int n = classify_argument (mode, type, regclass, 0);
7205 *int_nregs = 0;
7206 *sse_nregs = 0;
7208 if (!n)
7209 return true;
7210 for (n--; n >= 0; n--)
7211 switch (regclass[n])
7213 case X86_64_INTEGER_CLASS:
7214 case X86_64_INTEGERSI_CLASS:
7215 (*int_nregs)++;
7216 break;
7217 case X86_64_SSE_CLASS:
7218 case X86_64_SSESF_CLASS:
7219 case X86_64_SSEDF_CLASS:
7220 (*sse_nregs)++;
7221 break;
7222 case X86_64_NO_CLASS:
7223 case X86_64_SSEUP_CLASS:
7224 break;
7225 case X86_64_X87_CLASS:
7226 case X86_64_X87UP_CLASS:
7227 case X86_64_COMPLEX_X87_CLASS:
7228 if (!in_return)
7229 return true;
7230 break;
7231 case X86_64_MEMORY_CLASS:
7232 gcc_unreachable ();
7235 return false;
7238 /* Construct container for the argument used by GCC interface. See
7239 FUNCTION_ARG for the detailed description. */
7241 static rtx
7242 construct_container (machine_mode mode, machine_mode orig_mode,
7243 const_tree type, int in_return, int nintregs, int nsseregs,
7244 const int *intreg, int sse_regno)
7246 /* The following variables hold the static issued_error state. */
7247 static bool issued_sse_arg_error;
7248 static bool issued_sse_ret_error;
7249 static bool issued_x87_ret_error;
7251 machine_mode tmpmode;
7252 int bytes =
7253 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7254 enum x86_64_reg_class regclass[MAX_CLASSES];
7255 int n;
7256 int i;
7257 int nexps = 0;
7258 int needed_sseregs, needed_intregs;
7259 rtx exp[MAX_CLASSES];
7260 rtx ret;
7262 n = classify_argument (mode, type, regclass, 0);
7263 if (!n)
7264 return NULL;
7265 if (examine_argument (mode, type, in_return, &needed_intregs,
7266 &needed_sseregs))
7267 return NULL;
7268 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
7269 return NULL;
7271 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
7272 some less clueful developer tries to use floating-point anyway. */
7273 if (needed_sseregs && !TARGET_SSE)
7275 if (in_return)
7277 if (!issued_sse_ret_error)
7279 error ("SSE register return with SSE disabled");
7280 issued_sse_ret_error = true;
7283 else if (!issued_sse_arg_error)
7285 error ("SSE register argument with SSE disabled");
7286 issued_sse_arg_error = true;
7288 return NULL;
7291 /* Likewise, error if the ABI requires us to return values in the
7292 x87 registers and the user specified -mno-80387. */
7293 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
7294 for (i = 0; i < n; i++)
7295 if (regclass[i] == X86_64_X87_CLASS
7296 || regclass[i] == X86_64_X87UP_CLASS
7297 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
7299 if (!issued_x87_ret_error)
7301 error ("x87 register return with x87 disabled");
7302 issued_x87_ret_error = true;
7304 return NULL;
7307 /* First construct simple cases. Avoid SCmode, since we want to use
7308 single register to pass this type. */
7309 if (n == 1 && mode != SCmode)
7310 switch (regclass[0])
7312 case X86_64_INTEGER_CLASS:
7313 case X86_64_INTEGERSI_CLASS:
7314 return gen_rtx_REG (mode, intreg[0]);
7315 case X86_64_SSE_CLASS:
7316 case X86_64_SSESF_CLASS:
7317 case X86_64_SSEDF_CLASS:
7318 if (mode != BLKmode)
7319 return gen_reg_or_parallel (mode, orig_mode,
7320 SSE_REGNO (sse_regno));
7321 break;
7322 case X86_64_X87_CLASS:
7323 case X86_64_COMPLEX_X87_CLASS:
7324 return gen_rtx_REG (mode, FIRST_STACK_REG);
7325 case X86_64_NO_CLASS:
7326 /* Zero sized array, struct or class. */
7327 return NULL;
7328 default:
7329 gcc_unreachable ();
7331 if (n == 2
7332 && regclass[0] == X86_64_SSE_CLASS
7333 && regclass[1] == X86_64_SSEUP_CLASS
7334 && mode != BLKmode)
7335 return gen_reg_or_parallel (mode, orig_mode,
7336 SSE_REGNO (sse_regno));
7337 if (n == 4
7338 && regclass[0] == X86_64_SSE_CLASS
7339 && regclass[1] == X86_64_SSEUP_CLASS
7340 && regclass[2] == X86_64_SSEUP_CLASS
7341 && regclass[3] == X86_64_SSEUP_CLASS
7342 && mode != BLKmode)
7343 return gen_reg_or_parallel (mode, orig_mode,
7344 SSE_REGNO (sse_regno));
7345 if (n == 8
7346 && regclass[0] == X86_64_SSE_CLASS
7347 && regclass[1] == X86_64_SSEUP_CLASS
7348 && regclass[2] == X86_64_SSEUP_CLASS
7349 && regclass[3] == X86_64_SSEUP_CLASS
7350 && regclass[4] == X86_64_SSEUP_CLASS
7351 && regclass[5] == X86_64_SSEUP_CLASS
7352 && regclass[6] == X86_64_SSEUP_CLASS
7353 && regclass[7] == X86_64_SSEUP_CLASS
7354 && mode != BLKmode)
7355 return gen_reg_or_parallel (mode, orig_mode,
7356 SSE_REGNO (sse_regno));
7357 if (n == 2
7358 && regclass[0] == X86_64_X87_CLASS
7359 && regclass[1] == X86_64_X87UP_CLASS)
7360 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7362 if (n == 2
7363 && regclass[0] == X86_64_INTEGER_CLASS
7364 && regclass[1] == X86_64_INTEGER_CLASS
7365 && (mode == CDImode || mode == TImode)
7366 && intreg[0] + 1 == intreg[1])
7367 return gen_rtx_REG (mode, intreg[0]);
7369 /* Otherwise figure out the entries of the PARALLEL. */
7370 for (i = 0; i < n; i++)
7372 int pos;
7374 switch (regclass[i])
7376 case X86_64_NO_CLASS:
7377 break;
7378 case X86_64_INTEGER_CLASS:
7379 case X86_64_INTEGERSI_CLASS:
7380 /* Merge TImodes on aligned occasions here too. */
7381 if (i * 8 + 8 > bytes)
7382 tmpmode
7383 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7384 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7385 tmpmode = SImode;
7386 else
7387 tmpmode = DImode;
7388 /* We've requested 24 bytes we
7389 don't have mode for. Use DImode. */
7390 if (tmpmode == BLKmode)
7391 tmpmode = DImode;
7392 exp [nexps++]
7393 = gen_rtx_EXPR_LIST (VOIDmode,
7394 gen_rtx_REG (tmpmode, *intreg),
7395 GEN_INT (i*8));
7396 intreg++;
7397 break;
7398 case X86_64_SSESF_CLASS:
7399 exp [nexps++]
7400 = gen_rtx_EXPR_LIST (VOIDmode,
7401 gen_rtx_REG (SFmode,
7402 SSE_REGNO (sse_regno)),
7403 GEN_INT (i*8));
7404 sse_regno++;
7405 break;
7406 case X86_64_SSEDF_CLASS:
7407 exp [nexps++]
7408 = gen_rtx_EXPR_LIST (VOIDmode,
7409 gen_rtx_REG (DFmode,
7410 SSE_REGNO (sse_regno)),
7411 GEN_INT (i*8));
7412 sse_regno++;
7413 break;
7414 case X86_64_SSE_CLASS:
7415 pos = i;
7416 switch (n)
7418 case 1:
7419 tmpmode = DImode;
7420 break;
7421 case 2:
7422 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7424 tmpmode = TImode;
7425 i++;
7427 else
7428 tmpmode = DImode;
7429 break;
7430 case 4:
7431 gcc_assert (i == 0
7432 && regclass[1] == X86_64_SSEUP_CLASS
7433 && regclass[2] == X86_64_SSEUP_CLASS
7434 && regclass[3] == X86_64_SSEUP_CLASS);
7435 tmpmode = OImode;
7436 i += 3;
7437 break;
7438 case 8:
7439 gcc_assert (i == 0
7440 && regclass[1] == X86_64_SSEUP_CLASS
7441 && regclass[2] == X86_64_SSEUP_CLASS
7442 && regclass[3] == X86_64_SSEUP_CLASS
7443 && regclass[4] == X86_64_SSEUP_CLASS
7444 && regclass[5] == X86_64_SSEUP_CLASS
7445 && regclass[6] == X86_64_SSEUP_CLASS
7446 && regclass[7] == X86_64_SSEUP_CLASS);
7447 tmpmode = XImode;
7448 i += 7;
7449 break;
7450 default:
7451 gcc_unreachable ();
7453 exp [nexps++]
7454 = gen_rtx_EXPR_LIST (VOIDmode,
7455 gen_rtx_REG (tmpmode,
7456 SSE_REGNO (sse_regno)),
7457 GEN_INT (pos*8));
7458 sse_regno++;
7459 break;
7460 default:
7461 gcc_unreachable ();
7465 /* Empty aligned struct, union or class. */
7466 if (nexps == 0)
7467 return NULL;
7469 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7470 for (i = 0; i < nexps; i++)
7471 XVECEXP (ret, 0, i) = exp [i];
7472 return ret;
7475 /* Update the data in CUM to advance over an argument of mode MODE
7476 and data type TYPE. (TYPE is null for libcalls where that information
7477 may not be available.)
7479 Return a number of integer regsiters advanced over. */
7481 static int
7482 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7483 const_tree type, HOST_WIDE_INT bytes,
7484 HOST_WIDE_INT words)
7486 int res = 0;
7487 bool error_p = NULL;
7489 if (TARGET_IAMCU)
7491 /* Intel MCU psABI passes scalars and aggregates no larger than 8
7492 bytes in registers. */
7493 if (bytes <= 8)
7494 goto pass_in_reg;
7495 return res;
7498 switch (mode)
7500 default:
7501 break;
7503 case BLKmode:
7504 if (bytes < 0)
7505 break;
7506 /* FALLTHRU */
7508 case DImode:
7509 case SImode:
7510 case HImode:
7511 case QImode:
7512 pass_in_reg:
7513 cum->words += words;
7514 cum->nregs -= words;
7515 cum->regno += words;
7516 if (cum->nregs >= 0)
7517 res = words;
7518 if (cum->nregs <= 0)
7520 cum->nregs = 0;
7521 cum->regno = 0;
7523 break;
7525 case OImode:
7526 /* OImode shouldn't be used directly. */
7527 gcc_unreachable ();
7529 case DFmode:
7530 if (cum->float_in_sse == -1)
7531 error_p = 1;
7532 if (cum->float_in_sse < 2)
7533 break;
7534 case SFmode:
7535 if (cum->float_in_sse == -1)
7536 error_p = 1;
7537 if (cum->float_in_sse < 1)
7538 break;
7539 /* FALLTHRU */
7541 case V8SFmode:
7542 case V8SImode:
7543 case V64QImode:
7544 case V32HImode:
7545 case V16SImode:
7546 case V8DImode:
7547 case V16SFmode:
7548 case V8DFmode:
7549 case V32QImode:
7550 case V16HImode:
7551 case V4DFmode:
7552 case V4DImode:
7553 case TImode:
7554 case V16QImode:
7555 case V8HImode:
7556 case V4SImode:
7557 case V2DImode:
7558 case V4SFmode:
7559 case V2DFmode:
7560 if (!type || !AGGREGATE_TYPE_P (type))
7562 cum->sse_words += words;
7563 cum->sse_nregs -= 1;
7564 cum->sse_regno += 1;
7565 if (cum->sse_nregs <= 0)
7567 cum->sse_nregs = 0;
7568 cum->sse_regno = 0;
7571 break;
7573 case V8QImode:
7574 case V4HImode:
7575 case V2SImode:
7576 case V2SFmode:
7577 case V1TImode:
7578 case V1DImode:
7579 if (!type || !AGGREGATE_TYPE_P (type))
7581 cum->mmx_words += words;
7582 cum->mmx_nregs -= 1;
7583 cum->mmx_regno += 1;
7584 if (cum->mmx_nregs <= 0)
7586 cum->mmx_nregs = 0;
7587 cum->mmx_regno = 0;
7590 break;
7592 if (error_p)
7594 cum->float_in_sse = 0;
7595 error ("calling %qD with SSE calling convention without "
7596 "SSE/SSE2 enabled", cum->decl);
7597 sorry ("this is a GCC bug that can be worked around by adding "
7598 "attribute used to function called");
7601 return res;
7604 static int
7605 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
7606 const_tree type, HOST_WIDE_INT words, bool named)
7608 int int_nregs, sse_nregs;
7610 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7611 if (!named && (VALID_AVX512F_REG_MODE (mode)
7612 || VALID_AVX256_REG_MODE (mode)))
7613 return 0;
7615 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7616 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7618 cum->nregs -= int_nregs;
7619 cum->sse_nregs -= sse_nregs;
7620 cum->regno += int_nregs;
7621 cum->sse_regno += sse_nregs;
7622 return int_nregs;
7624 else
7626 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7627 cum->words = (cum->words + align - 1) & ~(align - 1);
7628 cum->words += words;
7629 return 0;
7633 static int
7634 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7635 HOST_WIDE_INT words)
7637 /* Otherwise, this should be passed indirect. */
7638 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7640 cum->words += words;
7641 if (cum->nregs > 0)
7643 cum->nregs -= 1;
7644 cum->regno += 1;
7645 return 1;
7647 return 0;
7650 /* Update the data in CUM to advance over an argument of mode MODE and
7651 data type TYPE. (TYPE is null for libcalls where that information
7652 may not be available.) */
7654 static void
7655 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7656 const_tree type, bool named)
7658 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7659 HOST_WIDE_INT bytes, words;
7660 int nregs;
7662 if (mode == BLKmode)
7663 bytes = int_size_in_bytes (type);
7664 else
7665 bytes = GET_MODE_SIZE (mode);
7666 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7668 if (type)
7669 mode = type_natural_mode (type, NULL, false);
7671 if ((type && POINTER_BOUNDS_TYPE_P (type))
7672 || POINTER_BOUNDS_MODE_P (mode))
7674 /* If we pass bounds in BT then just update remained bounds count. */
7675 if (cum->bnds_in_bt)
7677 cum->bnds_in_bt--;
7678 return;
7681 /* Update remained number of bounds to force. */
7682 if (cum->force_bnd_pass)
7683 cum->force_bnd_pass--;
7685 cum->bnd_regno++;
7687 return;
7690 /* The first arg not going to Bounds Tables resets this counter. */
7691 cum->bnds_in_bt = 0;
7692 /* For unnamed args we always pass bounds to avoid bounds mess when
7693 passed and received types do not match. If bounds do not follow
7694 unnamed arg, still pretend required number of bounds were passed. */
7695 if (cum->force_bnd_pass)
7697 cum->bnd_regno += cum->force_bnd_pass;
7698 cum->force_bnd_pass = 0;
7701 if (TARGET_64BIT)
7703 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
7705 if (call_abi == MS_ABI)
7706 nregs = function_arg_advance_ms_64 (cum, bytes, words);
7707 else
7708 nregs = function_arg_advance_64 (cum, mode, type, words, named);
7710 else
7711 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
7713 /* For stdarg we expect bounds to be passed for each value passed
7714 in register. */
7715 if (cum->stdarg)
7716 cum->force_bnd_pass = nregs;
7717 /* For pointers passed in memory we expect bounds passed in Bounds
7718 Table. */
7719 if (!nregs)
7720 cum->bnds_in_bt = chkp_type_bounds_count (type);
7723 /* Define where to put the arguments to a function.
7724 Value is zero to push the argument on the stack,
7725 or a hard register in which to store the argument.
7727 MODE is the argument's machine mode.
7728 TYPE is the data type of the argument (as a tree).
7729 This is null for libcalls where that information may
7730 not be available.
7731 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7732 the preceding args and about the function being called.
7733 NAMED is nonzero if this argument is a named parameter
7734 (otherwise it is an extra parameter matching an ellipsis). */
7736 static rtx
7737 function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7738 machine_mode orig_mode, const_tree type,
7739 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7741 bool error_p = false;
7742 /* Avoid the AL settings for the Unix64 ABI. */
7743 if (mode == VOIDmode)
7744 return constm1_rtx;
7746 if (TARGET_IAMCU)
7748 /* Intel MCU psABI passes scalars and aggregates no larger than 8
7749 bytes in registers. */
7750 if (bytes <= 8)
7751 goto pass_in_reg;
7752 return NULL_RTX;
7755 switch (mode)
7757 default:
7758 break;
7760 case BLKmode:
7761 if (bytes < 0)
7762 break;
7763 /* FALLTHRU */
7764 case DImode:
7765 case SImode:
7766 case HImode:
7767 case QImode:
7768 pass_in_reg:
7769 if (words <= cum->nregs)
7771 int regno = cum->regno;
7773 /* Fastcall allocates the first two DWORD (SImode) or
7774 smaller arguments to ECX and EDX if it isn't an
7775 aggregate type . */
7776 if (cum->fastcall)
7778 if (mode == BLKmode
7779 || mode == DImode
7780 || (type && AGGREGATE_TYPE_P (type)))
7781 break;
7783 /* ECX not EAX is the first allocated register. */
7784 if (regno == AX_REG)
7785 regno = CX_REG;
7787 return gen_rtx_REG (mode, regno);
7789 break;
7791 case DFmode:
7792 if (cum->float_in_sse == -1)
7793 error_p = 1;
7794 if (cum->float_in_sse < 2)
7795 break;
7796 case SFmode:
7797 if (cum->float_in_sse == -1)
7798 error_p = 1;
7799 if (cum->float_in_sse < 1)
7800 break;
7801 /* FALLTHRU */
7802 case TImode:
7803 /* In 32bit, we pass TImode in xmm registers. */
7804 case V16QImode:
7805 case V8HImode:
7806 case V4SImode:
7807 case V2DImode:
7808 case V4SFmode:
7809 case V2DFmode:
7810 if (!type || !AGGREGATE_TYPE_P (type))
7812 if (cum->sse_nregs)
7813 return gen_reg_or_parallel (mode, orig_mode,
7814 cum->sse_regno + FIRST_SSE_REG);
7816 break;
7818 case OImode:
7819 case XImode:
7820 /* OImode and XImode shouldn't be used directly. */
7821 gcc_unreachable ();
7823 case V64QImode:
7824 case V32HImode:
7825 case V16SImode:
7826 case V8DImode:
7827 case V16SFmode:
7828 case V8DFmode:
7829 case V8SFmode:
7830 case V8SImode:
7831 case V32QImode:
7832 case V16HImode:
7833 case V4DFmode:
7834 case V4DImode:
7835 if (!type || !AGGREGATE_TYPE_P (type))
7837 if (cum->sse_nregs)
7838 return gen_reg_or_parallel (mode, orig_mode,
7839 cum->sse_regno + FIRST_SSE_REG);
7841 break;
7843 case V8QImode:
7844 case V4HImode:
7845 case V2SImode:
7846 case V2SFmode:
7847 case V1TImode:
7848 case V1DImode:
7849 if (!type || !AGGREGATE_TYPE_P (type))
7851 if (cum->mmx_nregs)
7852 return gen_reg_or_parallel (mode, orig_mode,
7853 cum->mmx_regno + FIRST_MMX_REG);
7855 break;
7857 if (error_p)
7859 cum->float_in_sse = 0;
7860 error ("calling %qD with SSE calling convention without "
7861 "SSE/SSE2 enabled", cum->decl);
7862 sorry ("this is a GCC bug that can be worked around by adding "
7863 "attribute used to function called");
7866 return NULL_RTX;
7869 static rtx
7870 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7871 machine_mode orig_mode, const_tree type, bool named)
7873 /* Handle a hidden AL argument containing number of registers
7874 for varargs x86-64 functions. */
7875 if (mode == VOIDmode)
7876 return GEN_INT (cum->maybe_vaarg
7877 ? (cum->sse_nregs < 0
7878 ? X86_64_SSE_REGPARM_MAX
7879 : cum->sse_regno)
7880 : -1);
7882 switch (mode)
7884 default:
7885 break;
7887 case V8SFmode:
7888 case V8SImode:
7889 case V32QImode:
7890 case V16HImode:
7891 case V4DFmode:
7892 case V4DImode:
7893 case V16SFmode:
7894 case V16SImode:
7895 case V64QImode:
7896 case V32HImode:
7897 case V8DFmode:
7898 case V8DImode:
7899 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7900 if (!named)
7901 return NULL;
7902 break;
7905 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7906 cum->sse_nregs,
7907 &x86_64_int_parameter_registers [cum->regno],
7908 cum->sse_regno);
7911 static rtx
7912 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7913 machine_mode orig_mode, bool named,
7914 HOST_WIDE_INT bytes)
7916 unsigned int regno;
7918 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7919 We use value of -2 to specify that current function call is MSABI. */
7920 if (mode == VOIDmode)
7921 return GEN_INT (-2);
7923 /* If we've run out of registers, it goes on the stack. */
7924 if (cum->nregs == 0)
7925 return NULL_RTX;
7927 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7929 /* Only floating point modes are passed in anything but integer regs. */
7930 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7932 if (named)
7933 regno = cum->regno + FIRST_SSE_REG;
7934 else
7936 rtx t1, t2;
7938 /* Unnamed floating parameters are passed in both the
7939 SSE and integer registers. */
7940 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7941 t2 = gen_rtx_REG (mode, regno);
7942 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7943 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7944 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7947 /* Handle aggregated types passed in register. */
7948 if (orig_mode == BLKmode)
7950 if (bytes > 0 && bytes <= 8)
7951 mode = (bytes > 4 ? DImode : SImode);
7952 if (mode == BLKmode)
7953 mode = DImode;
7956 return gen_reg_or_parallel (mode, orig_mode, regno);
7959 /* Return where to put the arguments to a function.
7960 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7962 MODE is the argument's machine mode. TYPE is the data type of the
7963 argument. It is null for libcalls where that information may not be
7964 available. CUM gives information about the preceding args and about
7965 the function being called. NAMED is nonzero if this argument is a
7966 named parameter (otherwise it is an extra parameter matching an
7967 ellipsis). */
7969 static rtx
7970 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
7971 const_tree type, bool named)
7973 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7974 machine_mode mode = omode;
7975 HOST_WIDE_INT bytes, words;
7976 rtx arg;
7978 /* All pointer bounds argumntas are handled separately here. */
7979 if ((type && POINTER_BOUNDS_TYPE_P (type))
7980 || POINTER_BOUNDS_MODE_P (mode))
7982 /* Return NULL if bounds are forced to go in Bounds Table. */
7983 if (cum->bnds_in_bt)
7984 arg = NULL;
7985 /* Return the next available bound reg if any. */
7986 else if (cum->bnd_regno <= LAST_BND_REG)
7987 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
7988 /* Return the next special slot number otherwise. */
7989 else
7990 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
7992 return arg;
7995 if (mode == BLKmode)
7996 bytes = int_size_in_bytes (type);
7997 else
7998 bytes = GET_MODE_SIZE (mode);
7999 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8001 /* To simplify the code below, represent vector types with a vector mode
8002 even if MMX/SSE are not active. */
8003 if (type && TREE_CODE (type) == VECTOR_TYPE)
8004 mode = type_natural_mode (type, cum, false);
8006 if (TARGET_64BIT)
8008 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8010 if (call_abi == MS_ABI)
8011 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
8012 else
8013 arg = function_arg_64 (cum, mode, omode, type, named);
8015 else
8016 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
8018 return arg;
8021 /* A C expression that indicates when an argument must be passed by
8022 reference. If nonzero for an argument, a copy of that argument is
8023 made in memory and a pointer to the argument is passed instead of
8024 the argument itself. The pointer is passed in whatever way is
8025 appropriate for passing a pointer to that type. */
8027 static bool
8028 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
8029 const_tree type, bool)
8031 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8033 /* Bounds are never passed by reference. */
8034 if ((type && POINTER_BOUNDS_TYPE_P (type))
8035 || POINTER_BOUNDS_MODE_P (mode))
8036 return false;
8038 if (TARGET_64BIT)
8040 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8042 /* See Windows x64 Software Convention. */
8043 if (call_abi == MS_ABI)
8045 HOST_WIDE_INT msize = GET_MODE_SIZE (mode);
8047 if (type)
8049 /* Arrays are passed by reference. */
8050 if (TREE_CODE (type) == ARRAY_TYPE)
8051 return true;
8053 if (RECORD_OR_UNION_TYPE_P (type))
8055 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
8056 are passed by reference. */
8057 msize = int_size_in_bytes (type);
8061 /* __m128 is passed by reference. */
8062 return msize != 1 && msize != 2 && msize != 4 && msize != 8;
8064 else if (type && int_size_in_bytes (type) == -1)
8065 return true;
8068 return false;
8071 /* Return true when TYPE should be 128bit aligned for 32bit argument
8072 passing ABI. XXX: This function is obsolete and is only used for
8073 checking psABI compatibility with previous versions of GCC. */
8075 static bool
8076 ix86_compat_aligned_value_p (const_tree type)
8078 machine_mode mode = TYPE_MODE (type);
8079 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
8080 || mode == TDmode
8081 || mode == TFmode
8082 || mode == TCmode)
8083 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
8084 return true;
8085 if (TYPE_ALIGN (type) < 128)
8086 return false;
8088 if (AGGREGATE_TYPE_P (type))
8090 /* Walk the aggregates recursively. */
8091 switch (TREE_CODE (type))
8093 case RECORD_TYPE:
8094 case UNION_TYPE:
8095 case QUAL_UNION_TYPE:
8097 tree field;
8099 /* Walk all the structure fields. */
8100 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8102 if (TREE_CODE (field) == FIELD_DECL
8103 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
8104 return true;
8106 break;
8109 case ARRAY_TYPE:
8110 /* Just for use if some languages passes arrays by value. */
8111 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
8112 return true;
8113 break;
8115 default:
8116 gcc_unreachable ();
8119 return false;
8122 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
8123 XXX: This function is obsolete and is only used for checking psABI
8124 compatibility with previous versions of GCC. */
8126 static unsigned int
8127 ix86_compat_function_arg_boundary (machine_mode mode,
8128 const_tree type, unsigned int align)
8130 /* In 32bit, only _Decimal128 and __float128 are aligned to their
8131 natural boundaries. */
8132 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
8134 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
8135 make an exception for SSE modes since these require 128bit
8136 alignment.
8138 The handling here differs from field_alignment. ICC aligns MMX
8139 arguments to 4 byte boundaries, while structure fields are aligned
8140 to 8 byte boundaries. */
8141 if (!type)
8143 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
8144 align = PARM_BOUNDARY;
8146 else
8148 if (!ix86_compat_aligned_value_p (type))
8149 align = PARM_BOUNDARY;
8152 if (align > BIGGEST_ALIGNMENT)
8153 align = BIGGEST_ALIGNMENT;
8154 return align;
8157 /* Return true when TYPE should be 128bit aligned for 32bit argument
8158 passing ABI. */
8160 static bool
8161 ix86_contains_aligned_value_p (const_tree type)
8163 machine_mode mode = TYPE_MODE (type);
8165 if (mode == XFmode || mode == XCmode)
8166 return false;
8168 if (TYPE_ALIGN (type) < 128)
8169 return false;
8171 if (AGGREGATE_TYPE_P (type))
8173 /* Walk the aggregates recursively. */
8174 switch (TREE_CODE (type))
8176 case RECORD_TYPE:
8177 case UNION_TYPE:
8178 case QUAL_UNION_TYPE:
8180 tree field;
8182 /* Walk all the structure fields. */
8183 for (field = TYPE_FIELDS (type);
8184 field;
8185 field = DECL_CHAIN (field))
8187 if (TREE_CODE (field) == FIELD_DECL
8188 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
8189 return true;
8191 break;
8194 case ARRAY_TYPE:
8195 /* Just for use if some languages passes arrays by value. */
8196 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
8197 return true;
8198 break;
8200 default:
8201 gcc_unreachable ();
8204 else
8205 return TYPE_ALIGN (type) >= 128;
8207 return false;
8210 /* Gives the alignment boundary, in bits, of an argument with the
8211 specified mode and type. */
8213 static unsigned int
8214 ix86_function_arg_boundary (machine_mode mode, const_tree type)
8216 unsigned int align;
8217 if (type)
8219 /* Since the main variant type is used for call, we convert it to
8220 the main variant type. */
8221 type = TYPE_MAIN_VARIANT (type);
8222 align = TYPE_ALIGN (type);
8224 else
8225 align = GET_MODE_ALIGNMENT (mode);
8226 if (align < PARM_BOUNDARY)
8227 align = PARM_BOUNDARY;
8228 else
8230 static bool warned;
8231 unsigned int saved_align = align;
8233 if (!TARGET_64BIT)
8235 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
8236 if (!type)
8238 if (mode == XFmode || mode == XCmode)
8239 align = PARM_BOUNDARY;
8241 else if (!ix86_contains_aligned_value_p (type))
8242 align = PARM_BOUNDARY;
8244 if (align < 128)
8245 align = PARM_BOUNDARY;
8248 if (warn_psabi
8249 && !warned
8250 && align != ix86_compat_function_arg_boundary (mode, type,
8251 saved_align))
8253 warned = true;
8254 inform (input_location,
8255 "The ABI for passing parameters with %d-byte"
8256 " alignment has changed in GCC 4.6",
8257 align / BITS_PER_UNIT);
8261 return align;
8264 /* Return true if N is a possible register number of function value. */
8266 static bool
8267 ix86_function_value_regno_p (const unsigned int regno)
8269 switch (regno)
8271 case AX_REG:
8272 return true;
8273 case DX_REG:
8274 return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
8275 case DI_REG:
8276 case SI_REG:
8277 return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
8279 case BND0_REG:
8280 case BND1_REG:
8281 return chkp_function_instrumented_p (current_function_decl);
8283 /* Complex values are returned in %st(0)/%st(1) pair. */
8284 case ST0_REG:
8285 case ST1_REG:
8286 /* TODO: The function should depend on current function ABI but
8287 builtins.c would need updating then. Therefore we use the
8288 default ABI. */
8289 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
8290 return false;
8291 return TARGET_FLOAT_RETURNS_IN_80387;
8293 /* Complex values are returned in %xmm0/%xmm1 pair. */
8294 case XMM0_REG:
8295 case XMM1_REG:
8296 return TARGET_SSE;
8298 case MM0_REG:
8299 if (TARGET_MACHO || TARGET_64BIT)
8300 return false;
8301 return TARGET_MMX;
8304 return false;
8307 /* Define how to find the value returned by a function.
8308 VALTYPE is the data type of the value (as a tree).
8309 If the precise function being called is known, FUNC is its FUNCTION_DECL;
8310 otherwise, FUNC is 0. */
8312 static rtx
8313 function_value_32 (machine_mode orig_mode, machine_mode mode,
8314 const_tree fntype, const_tree fn)
8316 unsigned int regno;
8318 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
8319 we normally prevent this case when mmx is not available. However
8320 some ABIs may require the result to be returned like DImode. */
8321 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
8322 regno = FIRST_MMX_REG;
8324 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
8325 we prevent this case when sse is not available. However some ABIs
8326 may require the result to be returned like integer TImode. */
8327 else if (mode == TImode
8328 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
8329 regno = FIRST_SSE_REG;
8331 /* 32-byte vector modes in %ymm0. */
8332 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
8333 regno = FIRST_SSE_REG;
8335 /* 64-byte vector modes in %zmm0. */
8336 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
8337 regno = FIRST_SSE_REG;
8339 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
8340 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
8341 regno = FIRST_FLOAT_REG;
8342 else
8343 /* Most things go in %eax. */
8344 regno = AX_REG;
8346 /* Override FP return register with %xmm0 for local functions when
8347 SSE math is enabled or for functions with sseregparm attribute. */
8348 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
8350 int sse_level = ix86_function_sseregparm (fntype, fn, false);
8351 if (sse_level == -1)
8353 error ("calling %qD with SSE caling convention without "
8354 "SSE/SSE2 enabled", fn);
8355 sorry ("this is a GCC bug that can be worked around by adding "
8356 "attribute used to function called");
8358 else if ((sse_level >= 1 && mode == SFmode)
8359 || (sse_level == 2 && mode == DFmode))
8360 regno = FIRST_SSE_REG;
8363 /* OImode shouldn't be used directly. */
8364 gcc_assert (mode != OImode);
8366 return gen_rtx_REG (orig_mode, regno);
8369 static rtx
8370 function_value_64 (machine_mode orig_mode, machine_mode mode,
8371 const_tree valtype)
8373 rtx ret;
8375 /* Handle libcalls, which don't provide a type node. */
8376 if (valtype == NULL)
8378 unsigned int regno;
8380 switch (mode)
8382 case SFmode:
8383 case SCmode:
8384 case DFmode:
8385 case DCmode:
8386 case TFmode:
8387 case SDmode:
8388 case DDmode:
8389 case TDmode:
8390 regno = FIRST_SSE_REG;
8391 break;
8392 case XFmode:
8393 case XCmode:
8394 regno = FIRST_FLOAT_REG;
8395 break;
8396 case TCmode:
8397 return NULL;
8398 default:
8399 regno = AX_REG;
8402 return gen_rtx_REG (mode, regno);
8404 else if (POINTER_TYPE_P (valtype))
8406 /* Pointers are always returned in word_mode. */
8407 mode = word_mode;
8410 ret = construct_container (mode, orig_mode, valtype, 1,
8411 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
8412 x86_64_int_return_registers, 0);
8414 /* For zero sized structures, construct_container returns NULL, but we
8415 need to keep rest of compiler happy by returning meaningful value. */
8416 if (!ret)
8417 ret = gen_rtx_REG (orig_mode, AX_REG);
8419 return ret;
8422 static rtx
8423 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
8424 const_tree valtype)
8426 unsigned int regno = AX_REG;
8428 if (TARGET_SSE)
8430 switch (GET_MODE_SIZE (mode))
8432 case 16:
8433 if (valtype != NULL_TREE
8434 && !VECTOR_INTEGER_TYPE_P (valtype)
8435 && !VECTOR_INTEGER_TYPE_P (valtype)
8436 && !INTEGRAL_TYPE_P (valtype)
8437 && !VECTOR_FLOAT_TYPE_P (valtype))
8438 break;
8439 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8440 && !COMPLEX_MODE_P (mode))
8441 regno = FIRST_SSE_REG;
8442 break;
8443 case 8:
8444 case 4:
8445 if (mode == SFmode || mode == DFmode)
8446 regno = FIRST_SSE_REG;
8447 break;
8448 default:
8449 break;
8452 return gen_rtx_REG (orig_mode, regno);
8455 static rtx
8456 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
8457 machine_mode orig_mode, machine_mode mode)
8459 const_tree fn, fntype;
8461 fn = NULL_TREE;
8462 if (fntype_or_decl && DECL_P (fntype_or_decl))
8463 fn = fntype_or_decl;
8464 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
8466 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
8467 || POINTER_BOUNDS_MODE_P (mode))
8468 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
8469 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
8470 return function_value_ms_64 (orig_mode, mode, valtype);
8471 else if (TARGET_64BIT)
8472 return function_value_64 (orig_mode, mode, valtype);
8473 else
8474 return function_value_32 (orig_mode, mode, fntype, fn);
8477 static rtx
8478 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
8480 machine_mode mode, orig_mode;
8482 orig_mode = TYPE_MODE (valtype);
8483 mode = type_natural_mode (valtype, NULL, true);
8484 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
8487 /* Return an RTX representing a place where a function returns
8488 or recieves pointer bounds or NULL if no bounds are returned.
8490 VALTYPE is a data type of a value returned by the function.
8492 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
8493 or FUNCTION_TYPE of the function.
8495 If OUTGOING is false, return a place in which the caller will
8496 see the return value. Otherwise, return a place where a
8497 function returns a value. */
8499 static rtx
8500 ix86_function_value_bounds (const_tree valtype,
8501 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
8502 bool outgoing ATTRIBUTE_UNUSED)
8504 rtx res = NULL_RTX;
8506 if (BOUNDED_TYPE_P (valtype))
8507 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
8508 else if (chkp_type_has_pointer (valtype))
8510 bitmap slots;
8511 rtx bounds[2];
8512 bitmap_iterator bi;
8513 unsigned i, bnd_no = 0;
8515 bitmap_obstack_initialize (NULL);
8516 slots = BITMAP_ALLOC (NULL);
8517 chkp_find_bound_slots (valtype, slots);
8519 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
8521 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
8522 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
8523 gcc_assert (bnd_no < 2);
8524 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
8527 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
8529 BITMAP_FREE (slots);
8530 bitmap_obstack_release (NULL);
8532 else
8533 res = NULL_RTX;
8535 return res;
8538 /* Pointer function arguments and return values are promoted to
8539 word_mode. */
8541 static machine_mode
8542 ix86_promote_function_mode (const_tree type, machine_mode mode,
8543 int *punsignedp, const_tree fntype,
8544 int for_return)
8546 if (type != NULL_TREE && POINTER_TYPE_P (type))
8548 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8549 return word_mode;
8551 return default_promote_function_mode (type, mode, punsignedp, fntype,
8552 for_return);
8555 /* Return true if a structure, union or array with MODE containing FIELD
8556 should be accessed using BLKmode. */
8558 static bool
8559 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
8561 /* Union with XFmode must be in BLKmode. */
8562 return (mode == XFmode
8563 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8564 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8568 ix86_libcall_value (machine_mode mode)
8570 return ix86_function_value_1 (NULL, NULL, mode, mode);
8573 /* Return true iff type is returned in memory. */
8575 static bool
8576 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8578 #ifdef SUBTARGET_RETURN_IN_MEMORY
8579 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8580 #else
8581 const machine_mode mode = type_natural_mode (type, NULL, true);
8582 HOST_WIDE_INT size;
8584 if (POINTER_BOUNDS_TYPE_P (type))
8585 return false;
8587 if (TARGET_64BIT)
8589 if (ix86_function_type_abi (fntype) == MS_ABI)
8591 size = int_size_in_bytes (type);
8593 /* __m128 is returned in xmm0. */
8594 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8595 || INTEGRAL_TYPE_P (type)
8596 || VECTOR_FLOAT_TYPE_P (type))
8597 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8598 && !COMPLEX_MODE_P (mode)
8599 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8600 return false;
8602 /* Otherwise, the size must be exactly in [1248]. */
8603 return size != 1 && size != 2 && size != 4 && size != 8;
8605 else
8607 int needed_intregs, needed_sseregs;
8609 return examine_argument (mode, type, 1,
8610 &needed_intregs, &needed_sseregs);
8613 else
8615 size = int_size_in_bytes (type);
8617 /* Intel MCU psABI returns scalars and aggregates no larger than 8
8618 bytes in registers. */
8619 if (TARGET_IAMCU)
8620 return size > 8;
8622 if (mode == BLKmode)
8623 return true;
8625 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8626 return false;
8628 if (VECTOR_MODE_P (mode) || mode == TImode)
8630 /* User-created vectors small enough to fit in EAX. */
8631 if (size < 8)
8632 return false;
8634 /* Unless ABI prescibes otherwise,
8635 MMX/3dNow values are returned in MM0 if available. */
8637 if (size == 8)
8638 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8640 /* SSE values are returned in XMM0 if available. */
8641 if (size == 16)
8642 return !TARGET_SSE;
8644 /* AVX values are returned in YMM0 if available. */
8645 if (size == 32)
8646 return !TARGET_AVX;
8648 /* AVX512F values are returned in ZMM0 if available. */
8649 if (size == 64)
8650 return !TARGET_AVX512F;
8653 if (mode == XFmode)
8654 return false;
8656 if (size > 12)
8657 return true;
8659 /* OImode shouldn't be used directly. */
8660 gcc_assert (mode != OImode);
8662 return false;
8664 #endif
8668 /* Create the va_list data type. */
8670 /* Returns the calling convention specific va_list date type.
8671 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8673 static tree
8674 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8676 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8678 /* For i386 we use plain pointer to argument area. */
8679 if (!TARGET_64BIT || abi == MS_ABI)
8680 return build_pointer_type (char_type_node);
8682 record = lang_hooks.types.make_type (RECORD_TYPE);
8683 type_decl = build_decl (BUILTINS_LOCATION,
8684 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8686 f_gpr = build_decl (BUILTINS_LOCATION,
8687 FIELD_DECL, get_identifier ("gp_offset"),
8688 unsigned_type_node);
8689 f_fpr = build_decl (BUILTINS_LOCATION,
8690 FIELD_DECL, get_identifier ("fp_offset"),
8691 unsigned_type_node);
8692 f_ovf = build_decl (BUILTINS_LOCATION,
8693 FIELD_DECL, get_identifier ("overflow_arg_area"),
8694 ptr_type_node);
8695 f_sav = build_decl (BUILTINS_LOCATION,
8696 FIELD_DECL, get_identifier ("reg_save_area"),
8697 ptr_type_node);
8699 va_list_gpr_counter_field = f_gpr;
8700 va_list_fpr_counter_field = f_fpr;
8702 DECL_FIELD_CONTEXT (f_gpr) = record;
8703 DECL_FIELD_CONTEXT (f_fpr) = record;
8704 DECL_FIELD_CONTEXT (f_ovf) = record;
8705 DECL_FIELD_CONTEXT (f_sav) = record;
8707 TYPE_STUB_DECL (record) = type_decl;
8708 TYPE_NAME (record) = type_decl;
8709 TYPE_FIELDS (record) = f_gpr;
8710 DECL_CHAIN (f_gpr) = f_fpr;
8711 DECL_CHAIN (f_fpr) = f_ovf;
8712 DECL_CHAIN (f_ovf) = f_sav;
8714 layout_type (record);
8716 /* The correct type is an array type of one element. */
8717 return build_array_type (record, build_index_type (size_zero_node));
8720 /* Setup the builtin va_list data type and for 64-bit the additional
8721 calling convention specific va_list data types. */
8723 static tree
8724 ix86_build_builtin_va_list (void)
8726 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8728 /* Initialize abi specific va_list builtin types. */
8729 if (TARGET_64BIT)
8731 tree t;
8732 if (ix86_abi == MS_ABI)
8734 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8735 if (TREE_CODE (t) != RECORD_TYPE)
8736 t = build_variant_type_copy (t);
8737 sysv_va_list_type_node = t;
8739 else
8741 t = ret;
8742 if (TREE_CODE (t) != RECORD_TYPE)
8743 t = build_variant_type_copy (t);
8744 sysv_va_list_type_node = t;
8746 if (ix86_abi != MS_ABI)
8748 t = ix86_build_builtin_va_list_abi (MS_ABI);
8749 if (TREE_CODE (t) != RECORD_TYPE)
8750 t = build_variant_type_copy (t);
8751 ms_va_list_type_node = t;
8753 else
8755 t = ret;
8756 if (TREE_CODE (t) != RECORD_TYPE)
8757 t = build_variant_type_copy (t);
8758 ms_va_list_type_node = t;
8762 return ret;
8765 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8767 static void
8768 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8770 rtx save_area, mem;
8771 alias_set_type set;
8772 int i, max;
8774 /* GPR size of varargs save area. */
8775 if (cfun->va_list_gpr_size)
8776 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8777 else
8778 ix86_varargs_gpr_size = 0;
8780 /* FPR size of varargs save area. We don't need it if we don't pass
8781 anything in SSE registers. */
8782 if (TARGET_SSE && cfun->va_list_fpr_size)
8783 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8784 else
8785 ix86_varargs_fpr_size = 0;
8787 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8788 return;
8790 save_area = frame_pointer_rtx;
8791 set = get_varargs_alias_set ();
8793 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8794 if (max > X86_64_REGPARM_MAX)
8795 max = X86_64_REGPARM_MAX;
8797 for (i = cum->regno; i < max; i++)
8799 mem = gen_rtx_MEM (word_mode,
8800 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8801 MEM_NOTRAP_P (mem) = 1;
8802 set_mem_alias_set (mem, set);
8803 emit_move_insn (mem,
8804 gen_rtx_REG (word_mode,
8805 x86_64_int_parameter_registers[i]));
8808 if (ix86_varargs_fpr_size)
8810 machine_mode smode;
8811 rtx_code_label *label;
8812 rtx test;
8814 /* Now emit code to save SSE registers. The AX parameter contains number
8815 of SSE parameter registers used to call this function, though all we
8816 actually check here is the zero/non-zero status. */
8818 label = gen_label_rtx ();
8819 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8820 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8821 label));
8823 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8824 we used movdqa (i.e. TImode) instead? Perhaps even better would
8825 be if we could determine the real mode of the data, via a hook
8826 into pass_stdarg. Ignore all that for now. */
8827 smode = V4SFmode;
8828 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8829 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8831 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8832 if (max > X86_64_SSE_REGPARM_MAX)
8833 max = X86_64_SSE_REGPARM_MAX;
8835 for (i = cum->sse_regno; i < max; ++i)
8837 mem = plus_constant (Pmode, save_area,
8838 i * 16 + ix86_varargs_gpr_size);
8839 mem = gen_rtx_MEM (smode, mem);
8840 MEM_NOTRAP_P (mem) = 1;
8841 set_mem_alias_set (mem, set);
8842 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8844 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8847 emit_label (label);
8851 static void
8852 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8854 alias_set_type set = get_varargs_alias_set ();
8855 int i;
8857 /* Reset to zero, as there might be a sysv vaarg used
8858 before. */
8859 ix86_varargs_gpr_size = 0;
8860 ix86_varargs_fpr_size = 0;
8862 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8864 rtx reg, mem;
8866 mem = gen_rtx_MEM (Pmode,
8867 plus_constant (Pmode, virtual_incoming_args_rtx,
8868 i * UNITS_PER_WORD));
8869 MEM_NOTRAP_P (mem) = 1;
8870 set_mem_alias_set (mem, set);
8872 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8873 emit_move_insn (mem, reg);
8877 static void
8878 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
8879 tree type, int *, int no_rtl)
8881 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8882 CUMULATIVE_ARGS next_cum;
8883 tree fntype;
8885 /* This argument doesn't appear to be used anymore. Which is good,
8886 because the old code here didn't suppress rtl generation. */
8887 gcc_assert (!no_rtl);
8889 if (!TARGET_64BIT)
8890 return;
8892 fntype = TREE_TYPE (current_function_decl);
8894 /* For varargs, we do not want to skip the dummy va_dcl argument.
8895 For stdargs, we do want to skip the last named argument. */
8896 next_cum = *cum;
8897 if (stdarg_p (fntype))
8898 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8899 true);
8901 if (cum->call_abi == MS_ABI)
8902 setup_incoming_varargs_ms_64 (&next_cum);
8903 else
8904 setup_incoming_varargs_64 (&next_cum);
8907 static void
8908 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
8909 enum machine_mode mode,
8910 tree type,
8911 int *pretend_size ATTRIBUTE_UNUSED,
8912 int no_rtl)
8914 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8915 CUMULATIVE_ARGS next_cum;
8916 tree fntype;
8917 rtx save_area;
8918 int bnd_reg, i, max;
8920 gcc_assert (!no_rtl);
8922 /* Do nothing if we use plain pointer to argument area. */
8923 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
8924 return;
8926 fntype = TREE_TYPE (current_function_decl);
8928 /* For varargs, we do not want to skip the dummy va_dcl argument.
8929 For stdargs, we do want to skip the last named argument. */
8930 next_cum = *cum;
8931 if (stdarg_p (fntype))
8932 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8933 true);
8934 save_area = frame_pointer_rtx;
8936 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8937 if (max > X86_64_REGPARM_MAX)
8938 max = X86_64_REGPARM_MAX;
8940 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
8941 if (chkp_function_instrumented_p (current_function_decl))
8942 for (i = cum->regno; i < max; i++)
8944 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
8945 rtx ptr = gen_rtx_REG (Pmode,
8946 x86_64_int_parameter_registers[i]);
8947 rtx bounds;
8949 if (bnd_reg <= LAST_BND_REG)
8950 bounds = gen_rtx_REG (BNDmode, bnd_reg);
8951 else
8953 rtx ldx_addr =
8954 plus_constant (Pmode, arg_pointer_rtx,
8955 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
8956 bounds = gen_reg_rtx (BNDmode);
8957 emit_insn (BNDmode == BND64mode
8958 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
8959 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
8962 emit_insn (BNDmode == BND64mode
8963 ? gen_bnd64_stx (addr, ptr, bounds)
8964 : gen_bnd32_stx (addr, ptr, bounds));
8966 bnd_reg++;
8971 /* Checks if TYPE is of kind va_list char *. */
8973 static bool
8974 is_va_list_char_pointer (tree type)
8976 tree canonic;
8978 /* For 32-bit it is always true. */
8979 if (!TARGET_64BIT)
8980 return true;
8981 canonic = ix86_canonical_va_list_type (type);
8982 return (canonic == ms_va_list_type_node
8983 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8986 /* Implement va_start. */
8988 static void
8989 ix86_va_start (tree valist, rtx nextarg)
8991 HOST_WIDE_INT words, n_gpr, n_fpr;
8992 tree f_gpr, f_fpr, f_ovf, f_sav;
8993 tree gpr, fpr, ovf, sav, t;
8994 tree type;
8995 rtx ovf_rtx;
8997 if (flag_split_stack
8998 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9000 unsigned int scratch_regno;
9002 /* When we are splitting the stack, we can't refer to the stack
9003 arguments using internal_arg_pointer, because they may be on
9004 the old stack. The split stack prologue will arrange to
9005 leave a pointer to the old stack arguments in a scratch
9006 register, which we here copy to a pseudo-register. The split
9007 stack prologue can't set the pseudo-register directly because
9008 it (the prologue) runs before any registers have been saved. */
9010 scratch_regno = split_stack_prologue_scratch_regno ();
9011 if (scratch_regno != INVALID_REGNUM)
9013 rtx reg;
9014 rtx_insn *seq;
9016 reg = gen_reg_rtx (Pmode);
9017 cfun->machine->split_stack_varargs_pointer = reg;
9019 start_sequence ();
9020 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
9021 seq = get_insns ();
9022 end_sequence ();
9024 push_topmost_sequence ();
9025 emit_insn_after (seq, entry_of_function ());
9026 pop_topmost_sequence ();
9030 /* Only 64bit target needs something special. */
9031 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
9033 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9034 std_expand_builtin_va_start (valist, nextarg);
9035 else
9037 rtx va_r, next;
9039 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
9040 next = expand_binop (ptr_mode, add_optab,
9041 cfun->machine->split_stack_varargs_pointer,
9042 crtl->args.arg_offset_rtx,
9043 NULL_RTX, 0, OPTAB_LIB_WIDEN);
9044 convert_move (va_r, next, 0);
9046 /* Store zero bounds for va_list. */
9047 if (chkp_function_instrumented_p (current_function_decl))
9048 chkp_expand_bounds_reset_for_mem (valist,
9049 make_tree (TREE_TYPE (valist),
9050 next));
9053 return;
9056 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9057 f_fpr = DECL_CHAIN (f_gpr);
9058 f_ovf = DECL_CHAIN (f_fpr);
9059 f_sav = DECL_CHAIN (f_ovf);
9061 valist = build_simple_mem_ref (valist);
9062 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
9063 /* The following should be folded into the MEM_REF offset. */
9064 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
9065 f_gpr, NULL_TREE);
9066 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
9067 f_fpr, NULL_TREE);
9068 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
9069 f_ovf, NULL_TREE);
9070 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
9071 f_sav, NULL_TREE);
9073 /* Count number of gp and fp argument registers used. */
9074 words = crtl->args.info.words;
9075 n_gpr = crtl->args.info.regno;
9076 n_fpr = crtl->args.info.sse_regno;
9078 if (cfun->va_list_gpr_size)
9080 type = TREE_TYPE (gpr);
9081 t = build2 (MODIFY_EXPR, type,
9082 gpr, build_int_cst (type, n_gpr * 8));
9083 TREE_SIDE_EFFECTS (t) = 1;
9084 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9087 if (TARGET_SSE && cfun->va_list_fpr_size)
9089 type = TREE_TYPE (fpr);
9090 t = build2 (MODIFY_EXPR, type, fpr,
9091 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
9092 TREE_SIDE_EFFECTS (t) = 1;
9093 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9096 /* Find the overflow area. */
9097 type = TREE_TYPE (ovf);
9098 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9099 ovf_rtx = crtl->args.internal_arg_pointer;
9100 else
9101 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
9102 t = make_tree (type, ovf_rtx);
9103 if (words != 0)
9104 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
9106 /* Store zero bounds for overflow area pointer. */
9107 if (chkp_function_instrumented_p (current_function_decl))
9108 chkp_expand_bounds_reset_for_mem (ovf, t);
9110 t = build2 (MODIFY_EXPR, type, ovf, t);
9111 TREE_SIDE_EFFECTS (t) = 1;
9112 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9114 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
9116 /* Find the register save area.
9117 Prologue of the function save it right above stack frame. */
9118 type = TREE_TYPE (sav);
9119 t = make_tree (type, frame_pointer_rtx);
9120 if (!ix86_varargs_gpr_size)
9121 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
9123 /* Store zero bounds for save area pointer. */
9124 if (chkp_function_instrumented_p (current_function_decl))
9125 chkp_expand_bounds_reset_for_mem (sav, t);
9127 t = build2 (MODIFY_EXPR, type, sav, t);
9128 TREE_SIDE_EFFECTS (t) = 1;
9129 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9133 /* Implement va_arg. */
9135 static tree
9136 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
9137 gimple_seq *post_p)
9139 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
9140 tree f_gpr, f_fpr, f_ovf, f_sav;
9141 tree gpr, fpr, ovf, sav, t;
9142 int size, rsize;
9143 tree lab_false, lab_over = NULL_TREE;
9144 tree addr, t2;
9145 rtx container;
9146 int indirect_p = 0;
9147 tree ptrtype;
9148 machine_mode nat_mode;
9149 unsigned int arg_boundary;
9151 /* Only 64bit target needs something special. */
9152 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
9153 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
9155 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9156 f_fpr = DECL_CHAIN (f_gpr);
9157 f_ovf = DECL_CHAIN (f_fpr);
9158 f_sav = DECL_CHAIN (f_ovf);
9160 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
9161 valist, f_gpr, NULL_TREE);
9163 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
9164 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
9165 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
9167 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
9168 if (indirect_p)
9169 type = build_pointer_type (type);
9170 size = int_size_in_bytes (type);
9171 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
9173 nat_mode = type_natural_mode (type, NULL, false);
9174 switch (nat_mode)
9176 case V8SFmode:
9177 case V8SImode:
9178 case V32QImode:
9179 case V16HImode:
9180 case V4DFmode:
9181 case V4DImode:
9182 case V16SFmode:
9183 case V16SImode:
9184 case V64QImode:
9185 case V32HImode:
9186 case V8DFmode:
9187 case V8DImode:
9188 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
9189 if (!TARGET_64BIT_MS_ABI)
9191 container = NULL;
9192 break;
9195 default:
9196 container = construct_container (nat_mode, TYPE_MODE (type),
9197 type, 0, X86_64_REGPARM_MAX,
9198 X86_64_SSE_REGPARM_MAX, intreg,
9200 break;
9203 /* Pull the value out of the saved registers. */
9205 addr = create_tmp_var (ptr_type_node, "addr");
9207 if (container)
9209 int needed_intregs, needed_sseregs;
9210 bool need_temp;
9211 tree int_addr, sse_addr;
9213 lab_false = create_artificial_label (UNKNOWN_LOCATION);
9214 lab_over = create_artificial_label (UNKNOWN_LOCATION);
9216 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
9218 need_temp = (!REG_P (container)
9219 && ((needed_intregs && TYPE_ALIGN (type) > 64)
9220 || TYPE_ALIGN (type) > 128));
9222 /* In case we are passing structure, verify that it is consecutive block
9223 on the register save area. If not we need to do moves. */
9224 if (!need_temp && !REG_P (container))
9226 /* Verify that all registers are strictly consecutive */
9227 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
9229 int i;
9231 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9233 rtx slot = XVECEXP (container, 0, i);
9234 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
9235 || INTVAL (XEXP (slot, 1)) != i * 16)
9236 need_temp = true;
9239 else
9241 int i;
9243 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9245 rtx slot = XVECEXP (container, 0, i);
9246 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
9247 || INTVAL (XEXP (slot, 1)) != i * 8)
9248 need_temp = true;
9252 if (!need_temp)
9254 int_addr = addr;
9255 sse_addr = addr;
9257 else
9259 int_addr = create_tmp_var (ptr_type_node, "int_addr");
9260 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
9263 /* First ensure that we fit completely in registers. */
9264 if (needed_intregs)
9266 t = build_int_cst (TREE_TYPE (gpr),
9267 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
9268 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
9269 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9270 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9271 gimplify_and_add (t, pre_p);
9273 if (needed_sseregs)
9275 t = build_int_cst (TREE_TYPE (fpr),
9276 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
9277 + X86_64_REGPARM_MAX * 8);
9278 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
9279 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9280 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9281 gimplify_and_add (t, pre_p);
9284 /* Compute index to start of area used for integer regs. */
9285 if (needed_intregs)
9287 /* int_addr = gpr + sav; */
9288 t = fold_build_pointer_plus (sav, gpr);
9289 gimplify_assign (int_addr, t, pre_p);
9291 if (needed_sseregs)
9293 /* sse_addr = fpr + sav; */
9294 t = fold_build_pointer_plus (sav, fpr);
9295 gimplify_assign (sse_addr, t, pre_p);
9297 if (need_temp)
9299 int i, prev_size = 0;
9300 tree temp = create_tmp_var (type, "va_arg_tmp");
9302 /* addr = &temp; */
9303 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
9304 gimplify_assign (addr, t, pre_p);
9306 for (i = 0; i < XVECLEN (container, 0); i++)
9308 rtx slot = XVECEXP (container, 0, i);
9309 rtx reg = XEXP (slot, 0);
9310 machine_mode mode = GET_MODE (reg);
9311 tree piece_type;
9312 tree addr_type;
9313 tree daddr_type;
9314 tree src_addr, src;
9315 int src_offset;
9316 tree dest_addr, dest;
9317 int cur_size = GET_MODE_SIZE (mode);
9319 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
9320 prev_size = INTVAL (XEXP (slot, 1));
9321 if (prev_size + cur_size > size)
9323 cur_size = size - prev_size;
9324 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
9325 if (mode == BLKmode)
9326 mode = QImode;
9328 piece_type = lang_hooks.types.type_for_mode (mode, 1);
9329 if (mode == GET_MODE (reg))
9330 addr_type = build_pointer_type (piece_type);
9331 else
9332 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9333 true);
9334 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9335 true);
9337 if (SSE_REGNO_P (REGNO (reg)))
9339 src_addr = sse_addr;
9340 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
9342 else
9344 src_addr = int_addr;
9345 src_offset = REGNO (reg) * 8;
9347 src_addr = fold_convert (addr_type, src_addr);
9348 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
9350 dest_addr = fold_convert (daddr_type, addr);
9351 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
9352 if (cur_size == GET_MODE_SIZE (mode))
9354 src = build_va_arg_indirect_ref (src_addr);
9355 dest = build_va_arg_indirect_ref (dest_addr);
9357 gimplify_assign (dest, src, pre_p);
9359 else
9361 tree copy
9362 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
9363 3, dest_addr, src_addr,
9364 size_int (cur_size));
9365 gimplify_and_add (copy, pre_p);
9367 prev_size += cur_size;
9371 if (needed_intregs)
9373 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
9374 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
9375 gimplify_assign (gpr, t, pre_p);
9378 if (needed_sseregs)
9380 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
9381 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
9382 gimplify_assign (unshare_expr (fpr), t, pre_p);
9385 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
9387 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
9390 /* ... otherwise out of the overflow area. */
9392 /* When we align parameter on stack for caller, if the parameter
9393 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
9394 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
9395 here with caller. */
9396 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
9397 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
9398 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
9400 /* Care for on-stack alignment if needed. */
9401 if (arg_boundary <= 64 || size == 0)
9402 t = ovf;
9403 else
9405 HOST_WIDE_INT align = arg_boundary / 8;
9406 t = fold_build_pointer_plus_hwi (ovf, align - 1);
9407 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9408 build_int_cst (TREE_TYPE (t), -align));
9411 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
9412 gimplify_assign (addr, t, pre_p);
9414 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
9415 gimplify_assign (unshare_expr (ovf), t, pre_p);
9417 if (container)
9418 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
9420 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
9421 addr = fold_convert (ptrtype, addr);
9423 if (indirect_p)
9424 addr = build_va_arg_indirect_ref (addr);
9425 return build_va_arg_indirect_ref (addr);
9428 /* Return true if OPNUM's MEM should be matched
9429 in movabs* patterns. */
9431 bool
9432 ix86_check_movabs (rtx insn, int opnum)
9434 rtx set, mem;
9436 set = PATTERN (insn);
9437 if (GET_CODE (set) == PARALLEL)
9438 set = XVECEXP (set, 0, 0);
9439 gcc_assert (GET_CODE (set) == SET);
9440 mem = XEXP (set, opnum);
9441 while (GET_CODE (mem) == SUBREG)
9442 mem = SUBREG_REG (mem);
9443 gcc_assert (MEM_P (mem));
9444 return volatile_ok || !MEM_VOLATILE_P (mem);
9447 /* Initialize the table of extra 80387 mathematical constants. */
9449 static void
9450 init_ext_80387_constants (void)
9452 static const char * cst[5] =
9454 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
9455 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
9456 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
9457 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
9458 "3.1415926535897932385128089594061862044", /* 4: fldpi */
9460 int i;
9462 for (i = 0; i < 5; i++)
9464 real_from_string (&ext_80387_constants_table[i], cst[i]);
9465 /* Ensure each constant is rounded to XFmode precision. */
9466 real_convert (&ext_80387_constants_table[i],
9467 XFmode, &ext_80387_constants_table[i]);
9470 ext_80387_constants_init = 1;
9473 /* Return non-zero if the constant is something that
9474 can be loaded with a special instruction. */
9477 standard_80387_constant_p (rtx x)
9479 machine_mode mode = GET_MODE (x);
9481 REAL_VALUE_TYPE r;
9483 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
9484 return -1;
9486 if (x == CONST0_RTX (mode))
9487 return 1;
9488 if (x == CONST1_RTX (mode))
9489 return 2;
9491 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9493 /* For XFmode constants, try to find a special 80387 instruction when
9494 optimizing for size or on those CPUs that benefit from them. */
9495 if (mode == XFmode
9496 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
9498 int i;
9500 if (! ext_80387_constants_init)
9501 init_ext_80387_constants ();
9503 for (i = 0; i < 5; i++)
9504 if (real_identical (&r, &ext_80387_constants_table[i]))
9505 return i + 3;
9508 /* Load of the constant -0.0 or -1.0 will be split as
9509 fldz;fchs or fld1;fchs sequence. */
9510 if (real_isnegzero (&r))
9511 return 8;
9512 if (real_identical (&r, &dconstm1))
9513 return 9;
9515 return 0;
9518 /* Return the opcode of the special instruction to be used to load
9519 the constant X. */
9521 const char *
9522 standard_80387_constant_opcode (rtx x)
9524 switch (standard_80387_constant_p (x))
9526 case 1:
9527 return "fldz";
9528 case 2:
9529 return "fld1";
9530 case 3:
9531 return "fldlg2";
9532 case 4:
9533 return "fldln2";
9534 case 5:
9535 return "fldl2e";
9536 case 6:
9537 return "fldl2t";
9538 case 7:
9539 return "fldpi";
9540 case 8:
9541 case 9:
9542 return "#";
9543 default:
9544 gcc_unreachable ();
9548 /* Return the CONST_DOUBLE representing the 80387 constant that is
9549 loaded by the specified special instruction. The argument IDX
9550 matches the return value from standard_80387_constant_p. */
9553 standard_80387_constant_rtx (int idx)
9555 int i;
9557 if (! ext_80387_constants_init)
9558 init_ext_80387_constants ();
9560 switch (idx)
9562 case 3:
9563 case 4:
9564 case 5:
9565 case 6:
9566 case 7:
9567 i = idx - 3;
9568 break;
9570 default:
9571 gcc_unreachable ();
9574 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
9575 XFmode);
9578 /* Return 1 if X is all 0s and 2 if x is all 1s
9579 in supported SSE/AVX vector mode. */
9582 standard_sse_constant_p (rtx x)
9584 machine_mode mode;
9586 if (!TARGET_SSE)
9587 return 0;
9589 mode = GET_MODE (x);
9591 if (x == const0_rtx || x == CONST0_RTX (mode))
9592 return 1;
9593 if (vector_all_ones_operand (x, mode))
9594 switch (mode)
9596 case V16QImode:
9597 case V8HImode:
9598 case V4SImode:
9599 case V2DImode:
9600 if (TARGET_SSE2)
9601 return 2;
9602 case V32QImode:
9603 case V16HImode:
9604 case V8SImode:
9605 case V4DImode:
9606 if (TARGET_AVX2)
9607 return 2;
9608 case V64QImode:
9609 case V32HImode:
9610 case V16SImode:
9611 case V8DImode:
9612 if (TARGET_AVX512F)
9613 return 2;
9614 default:
9615 break;
9618 return 0;
9621 /* Return the opcode of the special instruction to be used to load
9622 the constant X. */
9624 const char *
9625 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
9627 switch (standard_sse_constant_p (x))
9629 case 1:
9630 switch (get_attr_mode (insn))
9632 case MODE_XI:
9633 return "vpxord\t%g0, %g0, %g0";
9634 case MODE_V16SF:
9635 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
9636 : "vpxord\t%g0, %g0, %g0";
9637 case MODE_V8DF:
9638 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
9639 : "vpxorq\t%g0, %g0, %g0";
9640 case MODE_TI:
9641 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
9642 : "%vpxor\t%0, %d0";
9643 case MODE_V2DF:
9644 return "%vxorpd\t%0, %d0";
9645 case MODE_V4SF:
9646 return "%vxorps\t%0, %d0";
9648 case MODE_OI:
9649 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
9650 : "vpxor\t%x0, %x0, %x0";
9651 case MODE_V4DF:
9652 return "vxorpd\t%x0, %x0, %x0";
9653 case MODE_V8SF:
9654 return "vxorps\t%x0, %x0, %x0";
9656 default:
9657 break;
9660 case 2:
9661 if (TARGET_AVX512VL
9662 || get_attr_mode (insn) == MODE_XI
9663 || get_attr_mode (insn) == MODE_V8DF
9664 || get_attr_mode (insn) == MODE_V16SF)
9665 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9666 if (TARGET_AVX)
9667 return "vpcmpeqd\t%0, %0, %0";
9668 else
9669 return "pcmpeqd\t%0, %0";
9671 default:
9672 break;
9674 gcc_unreachable ();
9677 /* Returns true if OP contains a symbol reference */
9679 bool
9680 symbolic_reference_mentioned_p (rtx op)
9682 const char *fmt;
9683 int i;
9685 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9686 return true;
9688 fmt = GET_RTX_FORMAT (GET_CODE (op));
9689 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9691 if (fmt[i] == 'E')
9693 int j;
9695 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9696 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9697 return true;
9700 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9701 return true;
9704 return false;
9707 /* Return true if it is appropriate to emit `ret' instructions in the
9708 body of a function. Do this only if the epilogue is simple, needing a
9709 couple of insns. Prior to reloading, we can't tell how many registers
9710 must be saved, so return false then. Return false if there is no frame
9711 marker to de-allocate. */
9713 bool
9714 ix86_can_use_return_insn_p (void)
9716 struct ix86_frame frame;
9718 if (! reload_completed || frame_pointer_needed)
9719 return 0;
9721 /* Don't allow more than 32k pop, since that's all we can do
9722 with one instruction. */
9723 if (crtl->args.pops_args && crtl->args.size >= 32768)
9724 return 0;
9726 ix86_compute_frame_layout (&frame);
9727 return (frame.stack_pointer_offset == UNITS_PER_WORD
9728 && (frame.nregs + frame.nsseregs) == 0);
9731 /* Value should be nonzero if functions must have frame pointers.
9732 Zero means the frame pointer need not be set up (and parms may
9733 be accessed via the stack pointer) in functions that seem suitable. */
9735 static bool
9736 ix86_frame_pointer_required (void)
9738 /* If we accessed previous frames, then the generated code expects
9739 to be able to access the saved ebp value in our frame. */
9740 if (cfun->machine->accesses_prev_frame)
9741 return true;
9743 /* Several x86 os'es need a frame pointer for other reasons,
9744 usually pertaining to setjmp. */
9745 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9746 return true;
9748 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9749 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9750 return true;
9752 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9753 allocation is 4GB. */
9754 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9755 return true;
9757 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9758 turns off the frame pointer by default. Turn it back on now if
9759 we've not got a leaf function. */
9760 if (TARGET_OMIT_LEAF_FRAME_POINTER
9761 && (!crtl->is_leaf
9762 || ix86_current_function_calls_tls_descriptor))
9763 return true;
9765 if (crtl->profile && !flag_fentry)
9766 return true;
9768 return false;
9771 /* Record that the current function accesses previous call frames. */
9773 void
9774 ix86_setup_frame_addresses (void)
9776 cfun->machine->accesses_prev_frame = 1;
9779 #ifndef USE_HIDDEN_LINKONCE
9780 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9781 # define USE_HIDDEN_LINKONCE 1
9782 # else
9783 # define USE_HIDDEN_LINKONCE 0
9784 # endif
9785 #endif
9787 static int pic_labels_used;
9789 /* Fills in the label name that should be used for a pc thunk for
9790 the given register. */
9792 static void
9793 get_pc_thunk_name (char name[32], unsigned int regno)
9795 gcc_assert (!TARGET_64BIT);
9797 if (USE_HIDDEN_LINKONCE)
9798 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9799 else
9800 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9804 /* This function generates code for -fpic that loads %ebx with
9805 the return address of the caller and then returns. */
9807 static void
9808 ix86_code_end (void)
9810 rtx xops[2];
9811 int regno;
9813 for (regno = AX_REG; regno <= SP_REG; regno++)
9815 char name[32];
9816 tree decl;
9818 if (!(pic_labels_used & (1 << regno)))
9819 continue;
9821 get_pc_thunk_name (name, regno);
9823 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9824 get_identifier (name),
9825 build_function_type_list (void_type_node, NULL_TREE));
9826 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9827 NULL_TREE, void_type_node);
9828 TREE_PUBLIC (decl) = 1;
9829 TREE_STATIC (decl) = 1;
9830 DECL_IGNORED_P (decl) = 1;
9832 #if TARGET_MACHO
9833 if (TARGET_MACHO)
9835 switch_to_section (darwin_sections[text_coal_section]);
9836 fputs ("\t.weak_definition\t", asm_out_file);
9837 assemble_name (asm_out_file, name);
9838 fputs ("\n\t.private_extern\t", asm_out_file);
9839 assemble_name (asm_out_file, name);
9840 putc ('\n', asm_out_file);
9841 ASM_OUTPUT_LABEL (asm_out_file, name);
9842 DECL_WEAK (decl) = 1;
9844 else
9845 #endif
9846 if (USE_HIDDEN_LINKONCE)
9848 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9850 targetm.asm_out.unique_section (decl, 0);
9851 switch_to_section (get_named_section (decl, NULL, 0));
9853 targetm.asm_out.globalize_label (asm_out_file, name);
9854 fputs ("\t.hidden\t", asm_out_file);
9855 assemble_name (asm_out_file, name);
9856 putc ('\n', asm_out_file);
9857 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9859 else
9861 switch_to_section (text_section);
9862 ASM_OUTPUT_LABEL (asm_out_file, name);
9865 DECL_INITIAL (decl) = make_node (BLOCK);
9866 current_function_decl = decl;
9867 init_function_start (decl);
9868 first_function_block_is_cold = false;
9869 /* Make sure unwind info is emitted for the thunk if needed. */
9870 final_start_function (emit_barrier (), asm_out_file, 1);
9872 /* Pad stack IP move with 4 instructions (two NOPs count
9873 as one instruction). */
9874 if (TARGET_PAD_SHORT_FUNCTION)
9876 int i = 8;
9878 while (i--)
9879 fputs ("\tnop\n", asm_out_file);
9882 xops[0] = gen_rtx_REG (Pmode, regno);
9883 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9884 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9885 output_asm_insn ("%!ret", NULL);
9886 final_end_function ();
9887 init_insn_lengths ();
9888 free_after_compilation (cfun);
9889 set_cfun (NULL);
9890 current_function_decl = NULL;
9893 if (flag_split_stack)
9894 file_end_indicate_split_stack ();
9897 /* Emit code for the SET_GOT patterns. */
9899 const char *
9900 output_set_got (rtx dest, rtx label)
9902 rtx xops[3];
9904 xops[0] = dest;
9906 if (TARGET_VXWORKS_RTP && flag_pic)
9908 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9909 xops[2] = gen_rtx_MEM (Pmode,
9910 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9911 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9913 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9914 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9915 an unadorned address. */
9916 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9917 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9918 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9919 return "";
9922 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9924 if (!flag_pic)
9926 if (TARGET_MACHO)
9927 /* We don't need a pic base, we're not producing pic. */
9928 gcc_unreachable ();
9930 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9931 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9932 targetm.asm_out.internal_label (asm_out_file, "L",
9933 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9935 else
9937 char name[32];
9938 get_pc_thunk_name (name, REGNO (dest));
9939 pic_labels_used |= 1 << REGNO (dest);
9941 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9942 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9943 output_asm_insn ("%!call\t%X2", xops);
9945 #if TARGET_MACHO
9946 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9947 This is what will be referenced by the Mach-O PIC subsystem. */
9948 if (machopic_should_output_picbase_label () || !label)
9949 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9951 /* When we are restoring the pic base at the site of a nonlocal label,
9952 and we decided to emit the pic base above, we will still output a
9953 local label used for calculating the correction offset (even though
9954 the offset will be 0 in that case). */
9955 if (label)
9956 targetm.asm_out.internal_label (asm_out_file, "L",
9957 CODE_LABEL_NUMBER (label));
9958 #endif
9961 if (!TARGET_MACHO)
9962 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9964 return "";
9967 /* Generate an "push" pattern for input ARG. */
9969 static rtx
9970 gen_push (rtx arg)
9972 struct machine_function *m = cfun->machine;
9974 if (m->fs.cfa_reg == stack_pointer_rtx)
9975 m->fs.cfa_offset += UNITS_PER_WORD;
9976 m->fs.sp_offset += UNITS_PER_WORD;
9978 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9979 arg = gen_rtx_REG (word_mode, REGNO (arg));
9981 return gen_rtx_SET (gen_rtx_MEM (word_mode,
9982 gen_rtx_PRE_DEC (Pmode,
9983 stack_pointer_rtx)),
9984 arg);
9987 /* Generate an "pop" pattern for input ARG. */
9989 static rtx
9990 gen_pop (rtx arg)
9992 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9993 arg = gen_rtx_REG (word_mode, REGNO (arg));
9995 return gen_rtx_SET (arg,
9996 gen_rtx_MEM (word_mode,
9997 gen_rtx_POST_INC (Pmode,
9998 stack_pointer_rtx)));
10001 /* Return >= 0 if there is an unused call-clobbered register available
10002 for the entire function. */
10004 static unsigned int
10005 ix86_select_alt_pic_regnum (void)
10007 if (ix86_use_pseudo_pic_reg ())
10008 return INVALID_REGNUM;
10010 if (crtl->is_leaf
10011 && !crtl->profile
10012 && !ix86_current_function_calls_tls_descriptor)
10014 int i, drap;
10015 /* Can't use the same register for both PIC and DRAP. */
10016 if (crtl->drap_reg)
10017 drap = REGNO (crtl->drap_reg);
10018 else
10019 drap = -1;
10020 for (i = 2; i >= 0; --i)
10021 if (i != drap && !df_regs_ever_live_p (i))
10022 return i;
10025 return INVALID_REGNUM;
10028 /* Return TRUE if we need to save REGNO. */
10030 static bool
10031 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
10033 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
10034 && pic_offset_table_rtx)
10036 if (ix86_use_pseudo_pic_reg ())
10038 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
10039 _mcount in prologue. */
10040 if (!TARGET_64BIT && flag_pic && crtl->profile)
10041 return true;
10043 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10044 || crtl->profile
10045 || crtl->calls_eh_return
10046 || crtl->uses_const_pool
10047 || cfun->has_nonlocal_label)
10048 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
10051 if (crtl->calls_eh_return && maybe_eh_return)
10053 unsigned i;
10054 for (i = 0; ; i++)
10056 unsigned test = EH_RETURN_DATA_REGNO (i);
10057 if (test == INVALID_REGNUM)
10058 break;
10059 if (test == regno)
10060 return true;
10064 if (crtl->drap_reg
10065 && regno == REGNO (crtl->drap_reg)
10066 && !cfun->machine->no_drap_save_restore)
10067 return true;
10069 return (df_regs_ever_live_p (regno)
10070 && !call_used_regs[regno]
10071 && !fixed_regs[regno]
10072 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
10075 /* Return number of saved general prupose registers. */
10077 static int
10078 ix86_nsaved_regs (void)
10080 int nregs = 0;
10081 int regno;
10083 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10084 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10085 nregs ++;
10086 return nregs;
10089 /* Return number of saved SSE registrers. */
10091 static int
10092 ix86_nsaved_sseregs (void)
10094 int nregs = 0;
10095 int regno;
10097 if (!TARGET_64BIT_MS_ABI)
10098 return 0;
10099 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10100 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10101 nregs ++;
10102 return nregs;
10105 /* Given FROM and TO register numbers, say whether this elimination is
10106 allowed. If stack alignment is needed, we can only replace argument
10107 pointer with hard frame pointer, or replace frame pointer with stack
10108 pointer. Otherwise, frame pointer elimination is automatically
10109 handled and all other eliminations are valid. */
10111 static bool
10112 ix86_can_eliminate (const int from, const int to)
10114 if (stack_realign_fp)
10115 return ((from == ARG_POINTER_REGNUM
10116 && to == HARD_FRAME_POINTER_REGNUM)
10117 || (from == FRAME_POINTER_REGNUM
10118 && to == STACK_POINTER_REGNUM));
10119 else
10120 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
10123 /* Return the offset between two registers, one to be eliminated, and the other
10124 its replacement, at the start of a routine. */
10126 HOST_WIDE_INT
10127 ix86_initial_elimination_offset (int from, int to)
10129 struct ix86_frame frame;
10130 ix86_compute_frame_layout (&frame);
10132 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10133 return frame.hard_frame_pointer_offset;
10134 else if (from == FRAME_POINTER_REGNUM
10135 && to == HARD_FRAME_POINTER_REGNUM)
10136 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
10137 else
10139 gcc_assert (to == STACK_POINTER_REGNUM);
10141 if (from == ARG_POINTER_REGNUM)
10142 return frame.stack_pointer_offset;
10144 gcc_assert (from == FRAME_POINTER_REGNUM);
10145 return frame.stack_pointer_offset - frame.frame_pointer_offset;
10149 /* In a dynamically-aligned function, we can't know the offset from
10150 stack pointer to frame pointer, so we must ensure that setjmp
10151 eliminates fp against the hard fp (%ebp) rather than trying to
10152 index from %esp up to the top of the frame across a gap that is
10153 of unknown (at compile-time) size. */
10154 static rtx
10155 ix86_builtin_setjmp_frame_value (void)
10157 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
10160 /* When using -fsplit-stack, the allocation routines set a field in
10161 the TCB to the bottom of the stack plus this much space, measured
10162 in bytes. */
10164 #define SPLIT_STACK_AVAILABLE 256
10166 /* Fill structure ix86_frame about frame of currently computed function. */
10168 static void
10169 ix86_compute_frame_layout (struct ix86_frame *frame)
10171 unsigned HOST_WIDE_INT stack_alignment_needed;
10172 HOST_WIDE_INT offset;
10173 unsigned HOST_WIDE_INT preferred_alignment;
10174 HOST_WIDE_INT size = get_frame_size ();
10175 HOST_WIDE_INT to_allocate;
10177 frame->nregs = ix86_nsaved_regs ();
10178 frame->nsseregs = ix86_nsaved_sseregs ();
10180 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
10181 function prologues and leaf. */
10182 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
10183 && (!crtl->is_leaf || cfun->calls_alloca != 0
10184 || ix86_current_function_calls_tls_descriptor))
10186 crtl->preferred_stack_boundary = 128;
10187 crtl->stack_alignment_needed = 128;
10189 /* preferred_stack_boundary is never updated for call
10190 expanded from tls descriptor. Update it here. We don't update it in
10191 expand stage because according to the comments before
10192 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
10193 away. */
10194 else if (ix86_current_function_calls_tls_descriptor
10195 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
10197 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
10198 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
10199 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
10202 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
10203 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
10205 gcc_assert (!size || stack_alignment_needed);
10206 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
10207 gcc_assert (preferred_alignment <= stack_alignment_needed);
10209 /* For SEH we have to limit the amount of code movement into the prologue.
10210 At present we do this via a BLOCKAGE, at which point there's very little
10211 scheduling that can be done, which means that there's very little point
10212 in doing anything except PUSHs. */
10213 if (TARGET_SEH)
10214 cfun->machine->use_fast_prologue_epilogue = false;
10216 /* During reload iteration the amount of registers saved can change.
10217 Recompute the value as needed. Do not recompute when amount of registers
10218 didn't change as reload does multiple calls to the function and does not
10219 expect the decision to change within single iteration. */
10220 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
10221 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
10223 int count = frame->nregs;
10224 struct cgraph_node *node = cgraph_node::get (current_function_decl);
10226 cfun->machine->use_fast_prologue_epilogue_nregs = count;
10228 /* The fast prologue uses move instead of push to save registers. This
10229 is significantly longer, but also executes faster as modern hardware
10230 can execute the moves in parallel, but can't do that for push/pop.
10232 Be careful about choosing what prologue to emit: When function takes
10233 many instructions to execute we may use slow version as well as in
10234 case function is known to be outside hot spot (this is known with
10235 feedback only). Weight the size of function by number of registers
10236 to save as it is cheap to use one or two push instructions but very
10237 slow to use many of them. */
10238 if (count)
10239 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
10240 if (node->frequency < NODE_FREQUENCY_NORMAL
10241 || (flag_branch_probabilities
10242 && node->frequency < NODE_FREQUENCY_HOT))
10243 cfun->machine->use_fast_prologue_epilogue = false;
10244 else
10245 cfun->machine->use_fast_prologue_epilogue
10246 = !expensive_function_p (count);
10249 frame->save_regs_using_mov
10250 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
10251 /* If static stack checking is enabled and done with probes,
10252 the registers need to be saved before allocating the frame. */
10253 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
10255 /* Skip return address. */
10256 offset = UNITS_PER_WORD;
10258 /* Skip pushed static chain. */
10259 if (ix86_static_chain_on_stack)
10260 offset += UNITS_PER_WORD;
10262 /* Skip saved base pointer. */
10263 if (frame_pointer_needed)
10264 offset += UNITS_PER_WORD;
10265 frame->hfp_save_offset = offset;
10267 /* The traditional frame pointer location is at the top of the frame. */
10268 frame->hard_frame_pointer_offset = offset;
10270 /* Register save area */
10271 offset += frame->nregs * UNITS_PER_WORD;
10272 frame->reg_save_offset = offset;
10274 /* On SEH target, registers are pushed just before the frame pointer
10275 location. */
10276 if (TARGET_SEH)
10277 frame->hard_frame_pointer_offset = offset;
10279 /* Align and set SSE register save area. */
10280 if (frame->nsseregs)
10282 /* The only ABI that has saved SSE registers (Win64) also has a
10283 16-byte aligned default stack, and thus we don't need to be
10284 within the re-aligned local stack frame to save them. */
10285 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
10286 offset = (offset + 16 - 1) & -16;
10287 offset += frame->nsseregs * 16;
10289 frame->sse_reg_save_offset = offset;
10291 /* The re-aligned stack starts here. Values before this point are not
10292 directly comparable with values below this point. In order to make
10293 sure that no value happens to be the same before and after, force
10294 the alignment computation below to add a non-zero value. */
10295 if (stack_realign_fp)
10296 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
10298 /* Va-arg area */
10299 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
10300 offset += frame->va_arg_size;
10302 /* Align start of frame for local function. */
10303 if (stack_realign_fp
10304 || offset != frame->sse_reg_save_offset
10305 || size != 0
10306 || !crtl->is_leaf
10307 || cfun->calls_alloca
10308 || ix86_current_function_calls_tls_descriptor)
10309 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
10311 /* Frame pointer points here. */
10312 frame->frame_pointer_offset = offset;
10314 offset += size;
10316 /* Add outgoing arguments area. Can be skipped if we eliminated
10317 all the function calls as dead code.
10318 Skipping is however impossible when function calls alloca. Alloca
10319 expander assumes that last crtl->outgoing_args_size
10320 of stack frame are unused. */
10321 if (ACCUMULATE_OUTGOING_ARGS
10322 && (!crtl->is_leaf || cfun->calls_alloca
10323 || ix86_current_function_calls_tls_descriptor))
10325 offset += crtl->outgoing_args_size;
10326 frame->outgoing_arguments_size = crtl->outgoing_args_size;
10328 else
10329 frame->outgoing_arguments_size = 0;
10331 /* Align stack boundary. Only needed if we're calling another function
10332 or using alloca. */
10333 if (!crtl->is_leaf || cfun->calls_alloca
10334 || ix86_current_function_calls_tls_descriptor)
10335 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
10337 /* We've reached end of stack frame. */
10338 frame->stack_pointer_offset = offset;
10340 /* Size prologue needs to allocate. */
10341 to_allocate = offset - frame->sse_reg_save_offset;
10343 if ((!to_allocate && frame->nregs <= 1)
10344 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
10345 frame->save_regs_using_mov = false;
10347 if (ix86_using_red_zone ()
10348 && crtl->sp_is_unchanging
10349 && crtl->is_leaf
10350 && !ix86_current_function_calls_tls_descriptor)
10352 frame->red_zone_size = to_allocate;
10353 if (frame->save_regs_using_mov)
10354 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
10355 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
10356 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
10358 else
10359 frame->red_zone_size = 0;
10360 frame->stack_pointer_offset -= frame->red_zone_size;
10362 /* The SEH frame pointer location is near the bottom of the frame.
10363 This is enforced by the fact that the difference between the
10364 stack pointer and the frame pointer is limited to 240 bytes in
10365 the unwind data structure. */
10366 if (TARGET_SEH)
10368 HOST_WIDE_INT diff;
10370 /* If we can leave the frame pointer where it is, do so. Also, returns
10371 the establisher frame for __builtin_frame_address (0). */
10372 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
10373 if (diff <= SEH_MAX_FRAME_SIZE
10374 && (diff > 240 || (diff & 15) != 0)
10375 && !crtl->accesses_prior_frames)
10377 /* Ideally we'd determine what portion of the local stack frame
10378 (within the constraint of the lowest 240) is most heavily used.
10379 But without that complication, simply bias the frame pointer
10380 by 128 bytes so as to maximize the amount of the local stack
10381 frame that is addressable with 8-bit offsets. */
10382 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
10387 /* This is semi-inlined memory_address_length, but simplified
10388 since we know that we're always dealing with reg+offset, and
10389 to avoid having to create and discard all that rtl. */
10391 static inline int
10392 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
10394 int len = 4;
10396 if (offset == 0)
10398 /* EBP and R13 cannot be encoded without an offset. */
10399 len = (regno == BP_REG || regno == R13_REG);
10401 else if (IN_RANGE (offset, -128, 127))
10402 len = 1;
10404 /* ESP and R12 must be encoded with a SIB byte. */
10405 if (regno == SP_REG || regno == R12_REG)
10406 len++;
10408 return len;
10411 /* Return an RTX that points to CFA_OFFSET within the stack frame.
10412 The valid base registers are taken from CFUN->MACHINE->FS. */
10414 static rtx
10415 choose_baseaddr (HOST_WIDE_INT cfa_offset)
10417 const struct machine_function *m = cfun->machine;
10418 rtx base_reg = NULL;
10419 HOST_WIDE_INT base_offset = 0;
10421 if (m->use_fast_prologue_epilogue)
10423 /* Choose the base register most likely to allow the most scheduling
10424 opportunities. Generally FP is valid throughout the function,
10425 while DRAP must be reloaded within the epilogue. But choose either
10426 over the SP due to increased encoding size. */
10428 if (m->fs.fp_valid)
10430 base_reg = hard_frame_pointer_rtx;
10431 base_offset = m->fs.fp_offset - cfa_offset;
10433 else if (m->fs.drap_valid)
10435 base_reg = crtl->drap_reg;
10436 base_offset = 0 - cfa_offset;
10438 else if (m->fs.sp_valid)
10440 base_reg = stack_pointer_rtx;
10441 base_offset = m->fs.sp_offset - cfa_offset;
10444 else
10446 HOST_WIDE_INT toffset;
10447 int len = 16, tlen;
10449 /* Choose the base register with the smallest address encoding.
10450 With a tie, choose FP > DRAP > SP. */
10451 if (m->fs.sp_valid)
10453 base_reg = stack_pointer_rtx;
10454 base_offset = m->fs.sp_offset - cfa_offset;
10455 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
10457 if (m->fs.drap_valid)
10459 toffset = 0 - cfa_offset;
10460 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
10461 if (tlen <= len)
10463 base_reg = crtl->drap_reg;
10464 base_offset = toffset;
10465 len = tlen;
10468 if (m->fs.fp_valid)
10470 toffset = m->fs.fp_offset - cfa_offset;
10471 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
10472 if (tlen <= len)
10474 base_reg = hard_frame_pointer_rtx;
10475 base_offset = toffset;
10476 len = tlen;
10480 gcc_assert (base_reg != NULL);
10482 return plus_constant (Pmode, base_reg, base_offset);
10485 /* Emit code to save registers in the prologue. */
10487 static void
10488 ix86_emit_save_regs (void)
10490 unsigned int regno;
10491 rtx_insn *insn;
10493 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
10494 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10496 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
10497 RTX_FRAME_RELATED_P (insn) = 1;
10501 /* Emit a single register save at CFA - CFA_OFFSET. */
10503 static void
10504 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
10505 HOST_WIDE_INT cfa_offset)
10507 struct machine_function *m = cfun->machine;
10508 rtx reg = gen_rtx_REG (mode, regno);
10509 rtx mem, addr, base, insn;
10511 addr = choose_baseaddr (cfa_offset);
10512 mem = gen_frame_mem (mode, addr);
10514 /* For SSE saves, we need to indicate the 128-bit alignment. */
10515 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
10517 insn = emit_move_insn (mem, reg);
10518 RTX_FRAME_RELATED_P (insn) = 1;
10520 base = addr;
10521 if (GET_CODE (base) == PLUS)
10522 base = XEXP (base, 0);
10523 gcc_checking_assert (REG_P (base));
10525 /* When saving registers into a re-aligned local stack frame, avoid
10526 any tricky guessing by dwarf2out. */
10527 if (m->fs.realigned)
10529 gcc_checking_assert (stack_realign_drap);
10531 if (regno == REGNO (crtl->drap_reg))
10533 /* A bit of a hack. We force the DRAP register to be saved in
10534 the re-aligned stack frame, which provides us with a copy
10535 of the CFA that will last past the prologue. Install it. */
10536 gcc_checking_assert (cfun->machine->fs.fp_valid);
10537 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10538 cfun->machine->fs.fp_offset - cfa_offset);
10539 mem = gen_rtx_MEM (mode, addr);
10540 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
10542 else
10544 /* The frame pointer is a stable reference within the
10545 aligned frame. Use it. */
10546 gcc_checking_assert (cfun->machine->fs.fp_valid);
10547 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10548 cfun->machine->fs.fp_offset - cfa_offset);
10549 mem = gen_rtx_MEM (mode, addr);
10550 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
10554 /* The memory may not be relative to the current CFA register,
10555 which means that we may need to generate a new pattern for
10556 use by the unwind info. */
10557 else if (base != m->fs.cfa_reg)
10559 addr = plus_constant (Pmode, m->fs.cfa_reg,
10560 m->fs.cfa_offset - cfa_offset);
10561 mem = gen_rtx_MEM (mode, addr);
10562 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
10566 /* Emit code to save registers using MOV insns.
10567 First register is stored at CFA - CFA_OFFSET. */
10568 static void
10569 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
10571 unsigned int regno;
10573 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10574 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10576 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
10577 cfa_offset -= UNITS_PER_WORD;
10581 /* Emit code to save SSE registers using MOV insns.
10582 First register is stored at CFA - CFA_OFFSET. */
10583 static void
10584 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
10586 unsigned int regno;
10588 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10589 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10591 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
10592 cfa_offset -= 16;
10596 static GTY(()) rtx queued_cfa_restores;
10598 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
10599 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
10600 Don't add the note if the previously saved value will be left untouched
10601 within stack red-zone till return, as unwinders can find the same value
10602 in the register and on the stack. */
10604 static void
10605 ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
10607 if (!crtl->shrink_wrapped
10608 && cfa_offset <= cfun->machine->fs.red_zone_offset)
10609 return;
10611 if (insn)
10613 add_reg_note (insn, REG_CFA_RESTORE, reg);
10614 RTX_FRAME_RELATED_P (insn) = 1;
10616 else
10617 queued_cfa_restores
10618 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
10621 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
10623 static void
10624 ix86_add_queued_cfa_restore_notes (rtx insn)
10626 rtx last;
10627 if (!queued_cfa_restores)
10628 return;
10629 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
10631 XEXP (last, 1) = REG_NOTES (insn);
10632 REG_NOTES (insn) = queued_cfa_restores;
10633 queued_cfa_restores = NULL_RTX;
10634 RTX_FRAME_RELATED_P (insn) = 1;
10637 /* Expand prologue or epilogue stack adjustment.
10638 The pattern exist to put a dependency on all ebp-based memory accesses.
10639 STYLE should be negative if instructions should be marked as frame related,
10640 zero if %r11 register is live and cannot be freely used and positive
10641 otherwise. */
10643 static void
10644 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10645 int style, bool set_cfa)
10647 struct machine_function *m = cfun->machine;
10648 rtx insn;
10649 bool add_frame_related_expr = false;
10651 if (Pmode == SImode)
10652 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10653 else if (x86_64_immediate_operand (offset, DImode))
10654 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10655 else
10657 rtx tmp;
10658 /* r11 is used by indirect sibcall return as well, set before the
10659 epilogue and used after the epilogue. */
10660 if (style)
10661 tmp = gen_rtx_REG (DImode, R11_REG);
10662 else
10664 gcc_assert (src != hard_frame_pointer_rtx
10665 && dest != hard_frame_pointer_rtx);
10666 tmp = hard_frame_pointer_rtx;
10668 insn = emit_insn (gen_rtx_SET (tmp, offset));
10669 if (style < 0)
10670 add_frame_related_expr = true;
10672 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10675 insn = emit_insn (insn);
10676 if (style >= 0)
10677 ix86_add_queued_cfa_restore_notes (insn);
10679 if (set_cfa)
10681 rtx r;
10683 gcc_assert (m->fs.cfa_reg == src);
10684 m->fs.cfa_offset += INTVAL (offset);
10685 m->fs.cfa_reg = dest;
10687 r = gen_rtx_PLUS (Pmode, src, offset);
10688 r = gen_rtx_SET (dest, r);
10689 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10690 RTX_FRAME_RELATED_P (insn) = 1;
10692 else if (style < 0)
10694 RTX_FRAME_RELATED_P (insn) = 1;
10695 if (add_frame_related_expr)
10697 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10698 r = gen_rtx_SET (dest, r);
10699 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10703 if (dest == stack_pointer_rtx)
10705 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10706 bool valid = m->fs.sp_valid;
10708 if (src == hard_frame_pointer_rtx)
10710 valid = m->fs.fp_valid;
10711 ooffset = m->fs.fp_offset;
10713 else if (src == crtl->drap_reg)
10715 valid = m->fs.drap_valid;
10716 ooffset = 0;
10718 else
10720 /* Else there are two possibilities: SP itself, which we set
10721 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10722 taken care of this by hand along the eh_return path. */
10723 gcc_checking_assert (src == stack_pointer_rtx
10724 || offset == const0_rtx);
10727 m->fs.sp_offset = ooffset - INTVAL (offset);
10728 m->fs.sp_valid = valid;
10732 /* Find an available register to be used as dynamic realign argument
10733 pointer regsiter. Such a register will be written in prologue and
10734 used in begin of body, so it must not be
10735 1. parameter passing register.
10736 2. GOT pointer.
10737 We reuse static-chain register if it is available. Otherwise, we
10738 use DI for i386 and R13 for x86-64. We chose R13 since it has
10739 shorter encoding.
10741 Return: the regno of chosen register. */
10743 static unsigned int
10744 find_drap_reg (void)
10746 tree decl = cfun->decl;
10748 if (TARGET_64BIT)
10750 /* Use R13 for nested function or function need static chain.
10751 Since function with tail call may use any caller-saved
10752 registers in epilogue, DRAP must not use caller-saved
10753 register in such case. */
10754 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10755 return R13_REG;
10757 return R10_REG;
10759 else
10761 /* Use DI for nested function or function need static chain.
10762 Since function with tail call may use any caller-saved
10763 registers in epilogue, DRAP must not use caller-saved
10764 register in such case. */
10765 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10766 return DI_REG;
10768 /* Reuse static chain register if it isn't used for parameter
10769 passing. */
10770 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10772 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10773 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10774 return CX_REG;
10776 return DI_REG;
10780 /* Return minimum incoming stack alignment. */
10782 static unsigned int
10783 ix86_minimum_incoming_stack_boundary (bool sibcall)
10785 unsigned int incoming_stack_boundary;
10787 /* Prefer the one specified at command line. */
10788 if (ix86_user_incoming_stack_boundary)
10789 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10790 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10791 if -mstackrealign is used, it isn't used for sibcall check and
10792 estimated stack alignment is 128bit. */
10793 else if (!sibcall
10794 && !TARGET_64BIT
10795 && ix86_force_align_arg_pointer
10796 && crtl->stack_alignment_estimated == 128)
10797 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10798 else
10799 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10801 /* Incoming stack alignment can be changed on individual functions
10802 via force_align_arg_pointer attribute. We use the smallest
10803 incoming stack boundary. */
10804 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10805 && lookup_attribute (ix86_force_align_arg_pointer_string,
10806 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10807 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10809 /* The incoming stack frame has to be aligned at least at
10810 parm_stack_boundary. */
10811 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10812 incoming_stack_boundary = crtl->parm_stack_boundary;
10814 /* Stack at entrance of main is aligned by runtime. We use the
10815 smallest incoming stack boundary. */
10816 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10817 && DECL_NAME (current_function_decl)
10818 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10819 && DECL_FILE_SCOPE_P (current_function_decl))
10820 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10822 return incoming_stack_boundary;
10825 /* Update incoming stack boundary and estimated stack alignment. */
10827 static void
10828 ix86_update_stack_boundary (void)
10830 ix86_incoming_stack_boundary
10831 = ix86_minimum_incoming_stack_boundary (false);
10833 /* x86_64 vararg needs 16byte stack alignment for register save
10834 area. */
10835 if (TARGET_64BIT
10836 && cfun->stdarg
10837 && crtl->stack_alignment_estimated < 128)
10838 crtl->stack_alignment_estimated = 128;
10841 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10842 needed or an rtx for DRAP otherwise. */
10844 static rtx
10845 ix86_get_drap_rtx (void)
10847 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10848 crtl->need_drap = true;
10850 if (stack_realign_drap)
10852 /* Assign DRAP to vDRAP and returns vDRAP */
10853 unsigned int regno = find_drap_reg ();
10854 rtx drap_vreg;
10855 rtx arg_ptr;
10856 rtx_insn *seq, *insn;
10858 arg_ptr = gen_rtx_REG (Pmode, regno);
10859 crtl->drap_reg = arg_ptr;
10861 start_sequence ();
10862 drap_vreg = copy_to_reg (arg_ptr);
10863 seq = get_insns ();
10864 end_sequence ();
10866 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10867 if (!optimize)
10869 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10870 RTX_FRAME_RELATED_P (insn) = 1;
10872 return drap_vreg;
10874 else
10875 return NULL;
10878 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10880 static rtx
10881 ix86_internal_arg_pointer (void)
10883 return virtual_incoming_args_rtx;
10886 struct scratch_reg {
10887 rtx reg;
10888 bool saved;
10891 /* Return a short-lived scratch register for use on function entry.
10892 In 32-bit mode, it is valid only after the registers are saved
10893 in the prologue. This register must be released by means of
10894 release_scratch_register_on_entry once it is dead. */
10896 static void
10897 get_scratch_register_on_entry (struct scratch_reg *sr)
10899 int regno;
10901 sr->saved = false;
10903 if (TARGET_64BIT)
10905 /* We always use R11 in 64-bit mode. */
10906 regno = R11_REG;
10908 else
10910 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10911 bool fastcall_p
10912 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10913 bool thiscall_p
10914 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10915 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10916 int regparm = ix86_function_regparm (fntype, decl);
10917 int drap_regno
10918 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10920 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10921 for the static chain register. */
10922 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10923 && drap_regno != AX_REG)
10924 regno = AX_REG;
10925 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10926 for the static chain register. */
10927 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10928 regno = AX_REG;
10929 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10930 regno = DX_REG;
10931 /* ecx is the static chain register. */
10932 else if (regparm < 3 && !fastcall_p && !thiscall_p
10933 && !static_chain_p
10934 && drap_regno != CX_REG)
10935 regno = CX_REG;
10936 else if (ix86_save_reg (BX_REG, true))
10937 regno = BX_REG;
10938 /* esi is the static chain register. */
10939 else if (!(regparm == 3 && static_chain_p)
10940 && ix86_save_reg (SI_REG, true))
10941 regno = SI_REG;
10942 else if (ix86_save_reg (DI_REG, true))
10943 regno = DI_REG;
10944 else
10946 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10947 sr->saved = true;
10951 sr->reg = gen_rtx_REG (Pmode, regno);
10952 if (sr->saved)
10954 rtx_insn *insn = emit_insn (gen_push (sr->reg));
10955 RTX_FRAME_RELATED_P (insn) = 1;
10959 /* Release a scratch register obtained from the preceding function. */
10961 static void
10962 release_scratch_register_on_entry (struct scratch_reg *sr)
10964 if (sr->saved)
10966 struct machine_function *m = cfun->machine;
10967 rtx x, insn = emit_insn (gen_pop (sr->reg));
10969 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10970 RTX_FRAME_RELATED_P (insn) = 1;
10971 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10972 x = gen_rtx_SET (stack_pointer_rtx, x);
10973 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10974 m->fs.sp_offset -= UNITS_PER_WORD;
10978 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10980 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10982 static void
10983 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10985 /* We skip the probe for the first interval + a small dope of 4 words and
10986 probe that many bytes past the specified size to maintain a protection
10987 area at the botton of the stack. */
10988 const int dope = 4 * UNITS_PER_WORD;
10989 rtx size_rtx = GEN_INT (size), last;
10991 /* See if we have a constant small number of probes to generate. If so,
10992 that's the easy case. The run-time loop is made up of 11 insns in the
10993 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10994 for n # of intervals. */
10995 if (size <= 5 * PROBE_INTERVAL)
10997 HOST_WIDE_INT i, adjust;
10998 bool first_probe = true;
11000 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
11001 values of N from 1 until it exceeds SIZE. If only one probe is
11002 needed, this will not generate any code. Then adjust and probe
11003 to PROBE_INTERVAL + SIZE. */
11004 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
11006 if (first_probe)
11008 adjust = 2 * PROBE_INTERVAL + dope;
11009 first_probe = false;
11011 else
11012 adjust = PROBE_INTERVAL;
11014 emit_insn (gen_rtx_SET (stack_pointer_rtx,
11015 plus_constant (Pmode, stack_pointer_rtx,
11016 -adjust)));
11017 emit_stack_probe (stack_pointer_rtx);
11020 if (first_probe)
11021 adjust = size + PROBE_INTERVAL + dope;
11022 else
11023 adjust = size + PROBE_INTERVAL - i;
11025 emit_insn (gen_rtx_SET (stack_pointer_rtx,
11026 plus_constant (Pmode, stack_pointer_rtx,
11027 -adjust)));
11028 emit_stack_probe (stack_pointer_rtx);
11030 /* Adjust back to account for the additional first interval. */
11031 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
11032 plus_constant (Pmode, stack_pointer_rtx,
11033 PROBE_INTERVAL + dope)));
11036 /* Otherwise, do the same as above, but in a loop. Note that we must be
11037 extra careful with variables wrapping around because we might be at
11038 the very top (or the very bottom) of the address space and we have
11039 to be able to handle this case properly; in particular, we use an
11040 equality test for the loop condition. */
11041 else
11043 HOST_WIDE_INT rounded_size;
11044 struct scratch_reg sr;
11046 get_scratch_register_on_entry (&sr);
11049 /* Step 1: round SIZE to the previous multiple of the interval. */
11051 rounded_size = size & -PROBE_INTERVAL;
11054 /* Step 2: compute initial and final value of the loop counter. */
11056 /* SP = SP_0 + PROBE_INTERVAL. */
11057 emit_insn (gen_rtx_SET (stack_pointer_rtx,
11058 plus_constant (Pmode, stack_pointer_rtx,
11059 - (PROBE_INTERVAL + dope))));
11061 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
11062 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
11063 emit_insn (gen_rtx_SET (sr.reg,
11064 gen_rtx_PLUS (Pmode, sr.reg,
11065 stack_pointer_rtx)));
11068 /* Step 3: the loop
11070 while (SP != LAST_ADDR)
11072 SP = SP + PROBE_INTERVAL
11073 probe at SP
11076 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
11077 values of N from 1 until it is equal to ROUNDED_SIZE. */
11079 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
11082 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
11083 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
11085 if (size != rounded_size)
11087 emit_insn (gen_rtx_SET (stack_pointer_rtx,
11088 plus_constant (Pmode, stack_pointer_rtx,
11089 rounded_size - size)));
11090 emit_stack_probe (stack_pointer_rtx);
11093 /* Adjust back to account for the additional first interval. */
11094 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
11095 plus_constant (Pmode, stack_pointer_rtx,
11096 PROBE_INTERVAL + dope)));
11098 release_scratch_register_on_entry (&sr);
11101 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
11103 /* Even if the stack pointer isn't the CFA register, we need to correctly
11104 describe the adjustments made to it, in particular differentiate the
11105 frame-related ones from the frame-unrelated ones. */
11106 if (size > 0)
11108 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
11109 XVECEXP (expr, 0, 0)
11110 = gen_rtx_SET (stack_pointer_rtx,
11111 plus_constant (Pmode, stack_pointer_rtx, -size));
11112 XVECEXP (expr, 0, 1)
11113 = gen_rtx_SET (stack_pointer_rtx,
11114 plus_constant (Pmode, stack_pointer_rtx,
11115 PROBE_INTERVAL + dope + size));
11116 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
11117 RTX_FRAME_RELATED_P (last) = 1;
11119 cfun->machine->fs.sp_offset += size;
11122 /* Make sure nothing is scheduled before we are done. */
11123 emit_insn (gen_blockage ());
11126 /* Adjust the stack pointer up to REG while probing it. */
11128 const char *
11129 output_adjust_stack_and_probe (rtx reg)
11131 static int labelno = 0;
11132 char loop_lab[32], end_lab[32];
11133 rtx xops[2];
11135 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11136 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11138 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11140 /* Jump to END_LAB if SP == LAST_ADDR. */
11141 xops[0] = stack_pointer_rtx;
11142 xops[1] = reg;
11143 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11144 fputs ("\tje\t", asm_out_file);
11145 assemble_name_raw (asm_out_file, end_lab);
11146 fputc ('\n', asm_out_file);
11148 /* SP = SP + PROBE_INTERVAL. */
11149 xops[1] = GEN_INT (PROBE_INTERVAL);
11150 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11152 /* Probe at SP. */
11153 xops[1] = const0_rtx;
11154 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
11156 fprintf (asm_out_file, "\tjmp\t");
11157 assemble_name_raw (asm_out_file, loop_lab);
11158 fputc ('\n', asm_out_file);
11160 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11162 return "";
11165 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
11166 inclusive. These are offsets from the current stack pointer. */
11168 static void
11169 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
11171 /* See if we have a constant small number of probes to generate. If so,
11172 that's the easy case. The run-time loop is made up of 7 insns in the
11173 generic case while the compile-time loop is made up of n insns for n #
11174 of intervals. */
11175 if (size <= 7 * PROBE_INTERVAL)
11177 HOST_WIDE_INT i;
11179 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
11180 it exceeds SIZE. If only one probe is needed, this will not
11181 generate any code. Then probe at FIRST + SIZE. */
11182 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
11183 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
11184 -(first + i)));
11186 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
11187 -(first + size)));
11190 /* Otherwise, do the same as above, but in a loop. Note that we must be
11191 extra careful with variables wrapping around because we might be at
11192 the very top (or the very bottom) of the address space and we have
11193 to be able to handle this case properly; in particular, we use an
11194 equality test for the loop condition. */
11195 else
11197 HOST_WIDE_INT rounded_size, last;
11198 struct scratch_reg sr;
11200 get_scratch_register_on_entry (&sr);
11203 /* Step 1: round SIZE to the previous multiple of the interval. */
11205 rounded_size = size & -PROBE_INTERVAL;
11208 /* Step 2: compute initial and final value of the loop counter. */
11210 /* TEST_OFFSET = FIRST. */
11211 emit_move_insn (sr.reg, GEN_INT (-first));
11213 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
11214 last = first + rounded_size;
11217 /* Step 3: the loop
11219 while (TEST_ADDR != LAST_ADDR)
11221 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
11222 probe at TEST_ADDR
11225 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
11226 until it is equal to ROUNDED_SIZE. */
11228 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
11231 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
11232 that SIZE is equal to ROUNDED_SIZE. */
11234 if (size != rounded_size)
11235 emit_stack_probe (plus_constant (Pmode,
11236 gen_rtx_PLUS (Pmode,
11237 stack_pointer_rtx,
11238 sr.reg),
11239 rounded_size - size));
11241 release_scratch_register_on_entry (&sr);
11244 /* Make sure nothing is scheduled before we are done. */
11245 emit_insn (gen_blockage ());
11248 /* Probe a range of stack addresses from REG to END, inclusive. These are
11249 offsets from the current stack pointer. */
11251 const char *
11252 output_probe_stack_range (rtx reg, rtx end)
11254 static int labelno = 0;
11255 char loop_lab[32], end_lab[32];
11256 rtx xops[3];
11258 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11259 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11261 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11263 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
11264 xops[0] = reg;
11265 xops[1] = end;
11266 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11267 fputs ("\tje\t", asm_out_file);
11268 assemble_name_raw (asm_out_file, end_lab);
11269 fputc ('\n', asm_out_file);
11271 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
11272 xops[1] = GEN_INT (PROBE_INTERVAL);
11273 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11275 /* Probe at TEST_ADDR. */
11276 xops[0] = stack_pointer_rtx;
11277 xops[1] = reg;
11278 xops[2] = const0_rtx;
11279 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
11281 fprintf (asm_out_file, "\tjmp\t");
11282 assemble_name_raw (asm_out_file, loop_lab);
11283 fputc ('\n', asm_out_file);
11285 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11287 return "";
11290 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
11291 to be generated in correct form. */
11292 static void
11293 ix86_finalize_stack_realign_flags (void)
11295 /* Check if stack realign is really needed after reload, and
11296 stores result in cfun */
11297 unsigned int incoming_stack_boundary
11298 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
11299 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
11300 unsigned int stack_realign = (incoming_stack_boundary
11301 < (crtl->is_leaf
11302 ? crtl->max_used_stack_slot_alignment
11303 : crtl->stack_alignment_needed));
11305 if (crtl->stack_realign_finalized)
11307 /* After stack_realign_needed is finalized, we can't no longer
11308 change it. */
11309 gcc_assert (crtl->stack_realign_needed == stack_realign);
11310 return;
11313 /* If the only reason for frame_pointer_needed is that we conservatively
11314 assumed stack realignment might be needed, but in the end nothing that
11315 needed the stack alignment had been spilled, clear frame_pointer_needed
11316 and say we don't need stack realignment. */
11317 if (stack_realign
11318 && frame_pointer_needed
11319 && crtl->is_leaf
11320 && flag_omit_frame_pointer
11321 && crtl->sp_is_unchanging
11322 && !ix86_current_function_calls_tls_descriptor
11323 && !crtl->accesses_prior_frames
11324 && !cfun->calls_alloca
11325 && !crtl->calls_eh_return
11326 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
11327 && !ix86_frame_pointer_required ()
11328 && get_frame_size () == 0
11329 && ix86_nsaved_sseregs () == 0
11330 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
11332 HARD_REG_SET set_up_by_prologue, prologue_used;
11333 basic_block bb;
11335 CLEAR_HARD_REG_SET (prologue_used);
11336 CLEAR_HARD_REG_SET (set_up_by_prologue);
11337 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
11338 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
11339 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
11340 HARD_FRAME_POINTER_REGNUM);
11341 FOR_EACH_BB_FN (bb, cfun)
11343 rtx_insn *insn;
11344 FOR_BB_INSNS (bb, insn)
11345 if (NONDEBUG_INSN_P (insn)
11346 && requires_stack_frame_p (insn, prologue_used,
11347 set_up_by_prologue))
11349 crtl->stack_realign_needed = stack_realign;
11350 crtl->stack_realign_finalized = true;
11351 return;
11355 /* If drap has been set, but it actually isn't live at the start
11356 of the function, there is no reason to set it up. */
11357 if (crtl->drap_reg)
11359 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11360 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
11362 crtl->drap_reg = NULL_RTX;
11363 crtl->need_drap = false;
11366 else
11367 cfun->machine->no_drap_save_restore = true;
11369 frame_pointer_needed = false;
11370 stack_realign = false;
11371 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
11372 crtl->stack_alignment_needed = incoming_stack_boundary;
11373 crtl->stack_alignment_estimated = incoming_stack_boundary;
11374 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
11375 crtl->preferred_stack_boundary = incoming_stack_boundary;
11376 df_finish_pass (true);
11377 df_scan_alloc (NULL);
11378 df_scan_blocks ();
11379 df_compute_regs_ever_live (true);
11380 df_analyze ();
11383 crtl->stack_realign_needed = stack_realign;
11384 crtl->stack_realign_finalized = true;
11387 /* Delete SET_GOT right after entry block if it is allocated to reg. */
11389 static void
11390 ix86_elim_entry_set_got (rtx reg)
11392 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11393 rtx_insn *c_insn = BB_HEAD (bb);
11394 if (!NONDEBUG_INSN_P (c_insn))
11395 c_insn = next_nonnote_nondebug_insn (c_insn);
11396 if (c_insn && NONJUMP_INSN_P (c_insn))
11398 rtx pat = PATTERN (c_insn);
11399 if (GET_CODE (pat) == PARALLEL)
11401 rtx vec = XVECEXP (pat, 0, 0);
11402 if (GET_CODE (vec) == SET
11403 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
11404 && REGNO (XEXP (vec, 0)) == REGNO (reg))
11405 delete_insn (c_insn);
11410 /* Expand the prologue into a bunch of separate insns. */
11412 void
11413 ix86_expand_prologue (void)
11415 struct machine_function *m = cfun->machine;
11416 rtx insn, t;
11417 struct ix86_frame frame;
11418 HOST_WIDE_INT allocate;
11419 bool int_registers_saved;
11420 bool sse_registers_saved;
11422 ix86_finalize_stack_realign_flags ();
11424 /* DRAP should not coexist with stack_realign_fp */
11425 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
11427 memset (&m->fs, 0, sizeof (m->fs));
11429 /* Initialize CFA state for before the prologue. */
11430 m->fs.cfa_reg = stack_pointer_rtx;
11431 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
11433 /* Track SP offset to the CFA. We continue tracking this after we've
11434 swapped the CFA register away from SP. In the case of re-alignment
11435 this is fudged; we're interested to offsets within the local frame. */
11436 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11437 m->fs.sp_valid = true;
11439 ix86_compute_frame_layout (&frame);
11441 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
11443 /* We should have already generated an error for any use of
11444 ms_hook on a nested function. */
11445 gcc_checking_assert (!ix86_static_chain_on_stack);
11447 /* Check if profiling is active and we shall use profiling before
11448 prologue variant. If so sorry. */
11449 if (crtl->profile && flag_fentry != 0)
11450 sorry ("ms_hook_prologue attribute isn%'t compatible "
11451 "with -mfentry for 32-bit");
11453 /* In ix86_asm_output_function_label we emitted:
11454 8b ff movl.s %edi,%edi
11455 55 push %ebp
11456 8b ec movl.s %esp,%ebp
11458 This matches the hookable function prologue in Win32 API
11459 functions in Microsoft Windows XP Service Pack 2 and newer.
11460 Wine uses this to enable Windows apps to hook the Win32 API
11461 functions provided by Wine.
11463 What that means is that we've already set up the frame pointer. */
11465 if (frame_pointer_needed
11466 && !(crtl->drap_reg && crtl->stack_realign_needed))
11468 rtx push, mov;
11470 /* We've decided to use the frame pointer already set up.
11471 Describe this to the unwinder by pretending that both
11472 push and mov insns happen right here.
11474 Putting the unwind info here at the end of the ms_hook
11475 is done so that we can make absolutely certain we get
11476 the required byte sequence at the start of the function,
11477 rather than relying on an assembler that can produce
11478 the exact encoding required.
11480 However it does mean (in the unpatched case) that we have
11481 a 1 insn window where the asynchronous unwind info is
11482 incorrect. However, if we placed the unwind info at
11483 its correct location we would have incorrect unwind info
11484 in the patched case. Which is probably all moot since
11485 I don't expect Wine generates dwarf2 unwind info for the
11486 system libraries that use this feature. */
11488 insn = emit_insn (gen_blockage ());
11490 push = gen_push (hard_frame_pointer_rtx);
11491 mov = gen_rtx_SET (hard_frame_pointer_rtx,
11492 stack_pointer_rtx);
11493 RTX_FRAME_RELATED_P (push) = 1;
11494 RTX_FRAME_RELATED_P (mov) = 1;
11496 RTX_FRAME_RELATED_P (insn) = 1;
11497 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11498 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
11500 /* Note that gen_push incremented m->fs.cfa_offset, even
11501 though we didn't emit the push insn here. */
11502 m->fs.cfa_reg = hard_frame_pointer_rtx;
11503 m->fs.fp_offset = m->fs.cfa_offset;
11504 m->fs.fp_valid = true;
11506 else
11508 /* The frame pointer is not needed so pop %ebp again.
11509 This leaves us with a pristine state. */
11510 emit_insn (gen_pop (hard_frame_pointer_rtx));
11514 /* The first insn of a function that accepts its static chain on the
11515 stack is to push the register that would be filled in by a direct
11516 call. This insn will be skipped by the trampoline. */
11517 else if (ix86_static_chain_on_stack)
11519 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
11520 emit_insn (gen_blockage ());
11522 /* We don't want to interpret this push insn as a register save,
11523 only as a stack adjustment. The real copy of the register as
11524 a save will be done later, if needed. */
11525 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
11526 t = gen_rtx_SET (stack_pointer_rtx, t);
11527 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
11528 RTX_FRAME_RELATED_P (insn) = 1;
11531 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
11532 of DRAP is needed and stack realignment is really needed after reload */
11533 if (stack_realign_drap)
11535 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11537 /* Only need to push parameter pointer reg if it is caller saved. */
11538 if (!call_used_regs[REGNO (crtl->drap_reg)])
11540 /* Push arg pointer reg */
11541 insn = emit_insn (gen_push (crtl->drap_reg));
11542 RTX_FRAME_RELATED_P (insn) = 1;
11545 /* Grab the argument pointer. */
11546 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
11547 insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
11548 RTX_FRAME_RELATED_P (insn) = 1;
11549 m->fs.cfa_reg = crtl->drap_reg;
11550 m->fs.cfa_offset = 0;
11552 /* Align the stack. */
11553 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11554 stack_pointer_rtx,
11555 GEN_INT (-align_bytes)));
11556 RTX_FRAME_RELATED_P (insn) = 1;
11558 /* Replicate the return address on the stack so that return
11559 address can be reached via (argp - 1) slot. This is needed
11560 to implement macro RETURN_ADDR_RTX and intrinsic function
11561 expand_builtin_return_addr etc. */
11562 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
11563 t = gen_frame_mem (word_mode, t);
11564 insn = emit_insn (gen_push (t));
11565 RTX_FRAME_RELATED_P (insn) = 1;
11567 /* For the purposes of frame and register save area addressing,
11568 we've started over with a new frame. */
11569 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11570 m->fs.realigned = true;
11573 int_registers_saved = (frame.nregs == 0);
11574 sse_registers_saved = (frame.nsseregs == 0);
11576 if (frame_pointer_needed && !m->fs.fp_valid)
11578 /* Note: AT&T enter does NOT have reversed args. Enter is probably
11579 slower on all targets. Also sdb doesn't like it. */
11580 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
11581 RTX_FRAME_RELATED_P (insn) = 1;
11583 /* Push registers now, before setting the frame pointer
11584 on SEH target. */
11585 if (!int_registers_saved
11586 && TARGET_SEH
11587 && !frame.save_regs_using_mov)
11589 ix86_emit_save_regs ();
11590 int_registers_saved = true;
11591 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11594 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
11596 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11597 RTX_FRAME_RELATED_P (insn) = 1;
11599 if (m->fs.cfa_reg == stack_pointer_rtx)
11600 m->fs.cfa_reg = hard_frame_pointer_rtx;
11601 m->fs.fp_offset = m->fs.sp_offset;
11602 m->fs.fp_valid = true;
11606 if (!int_registers_saved)
11608 /* If saving registers via PUSH, do so now. */
11609 if (!frame.save_regs_using_mov)
11611 ix86_emit_save_regs ();
11612 int_registers_saved = true;
11613 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11616 /* When using red zone we may start register saving before allocating
11617 the stack frame saving one cycle of the prologue. However, avoid
11618 doing this if we have to probe the stack; at least on x86_64 the
11619 stack probe can turn into a call that clobbers a red zone location. */
11620 else if (ix86_using_red_zone ()
11621 && (! TARGET_STACK_PROBE
11622 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
11624 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11625 int_registers_saved = true;
11629 if (stack_realign_fp)
11631 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11632 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
11634 /* The computation of the size of the re-aligned stack frame means
11635 that we must allocate the size of the register save area before
11636 performing the actual alignment. Otherwise we cannot guarantee
11637 that there's enough storage above the realignment point. */
11638 if (m->fs.sp_offset != frame.sse_reg_save_offset)
11639 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11640 GEN_INT (m->fs.sp_offset
11641 - frame.sse_reg_save_offset),
11642 -1, false);
11644 /* Align the stack. */
11645 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11646 stack_pointer_rtx,
11647 GEN_INT (-align_bytes)));
11649 /* For the purposes of register save area addressing, the stack
11650 pointer is no longer valid. As for the value of sp_offset,
11651 see ix86_compute_frame_layout, which we need to match in order
11652 to pass verification of stack_pointer_offset at the end. */
11653 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
11654 m->fs.sp_valid = false;
11657 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
11659 if (flag_stack_usage_info)
11661 /* We start to count from ARG_POINTER. */
11662 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11664 /* If it was realigned, take into account the fake frame. */
11665 if (stack_realign_drap)
11667 if (ix86_static_chain_on_stack)
11668 stack_size += UNITS_PER_WORD;
11670 if (!call_used_regs[REGNO (crtl->drap_reg)])
11671 stack_size += UNITS_PER_WORD;
11673 /* This over-estimates by 1 minimal-stack-alignment-unit but
11674 mitigates that by counting in the new return address slot. */
11675 current_function_dynamic_stack_size
11676 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11679 current_function_static_stack_size = stack_size;
11682 /* On SEH target with very large frame size, allocate an area to save
11683 SSE registers (as the very large allocation won't be described). */
11684 if (TARGET_SEH
11685 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11686 && !sse_registers_saved)
11688 HOST_WIDE_INT sse_size =
11689 frame.sse_reg_save_offset - frame.reg_save_offset;
11691 gcc_assert (int_registers_saved);
11693 /* No need to do stack checking as the area will be immediately
11694 written. */
11695 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11696 GEN_INT (-sse_size), -1,
11697 m->fs.cfa_reg == stack_pointer_rtx);
11698 allocate -= sse_size;
11699 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11700 sse_registers_saved = true;
11703 /* The stack has already been decremented by the instruction calling us
11704 so probe if the size is non-negative to preserve the protection area. */
11705 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11707 /* We expect the registers to be saved when probes are used. */
11708 gcc_assert (int_registers_saved);
11710 if (STACK_CHECK_MOVING_SP)
11712 if (!(crtl->is_leaf && !cfun->calls_alloca
11713 && allocate <= PROBE_INTERVAL))
11715 ix86_adjust_stack_and_probe (allocate);
11716 allocate = 0;
11719 else
11721 HOST_WIDE_INT size = allocate;
11723 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11724 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11726 if (TARGET_STACK_PROBE)
11728 if (crtl->is_leaf && !cfun->calls_alloca)
11730 if (size > PROBE_INTERVAL)
11731 ix86_emit_probe_stack_range (0, size);
11733 else
11734 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11736 else
11738 if (crtl->is_leaf && !cfun->calls_alloca)
11740 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11741 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11742 size - STACK_CHECK_PROTECT);
11744 else
11745 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11750 if (allocate == 0)
11752 else if (!ix86_target_stack_probe ()
11753 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11755 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11756 GEN_INT (-allocate), -1,
11757 m->fs.cfa_reg == stack_pointer_rtx);
11759 else
11761 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11762 rtx r10 = NULL;
11763 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11764 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11765 bool eax_live = ix86_eax_live_at_start_p ();
11766 bool r10_live = false;
11768 if (TARGET_64BIT)
11769 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11771 if (eax_live)
11773 insn = emit_insn (gen_push (eax));
11774 allocate -= UNITS_PER_WORD;
11775 /* Note that SEH directives need to continue tracking the stack
11776 pointer even after the frame pointer has been set up. */
11777 if (sp_is_cfa_reg || TARGET_SEH)
11779 if (sp_is_cfa_reg)
11780 m->fs.cfa_offset += UNITS_PER_WORD;
11781 RTX_FRAME_RELATED_P (insn) = 1;
11782 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11783 gen_rtx_SET (stack_pointer_rtx,
11784 plus_constant (Pmode, stack_pointer_rtx,
11785 -UNITS_PER_WORD)));
11789 if (r10_live)
11791 r10 = gen_rtx_REG (Pmode, R10_REG);
11792 insn = emit_insn (gen_push (r10));
11793 allocate -= UNITS_PER_WORD;
11794 if (sp_is_cfa_reg || TARGET_SEH)
11796 if (sp_is_cfa_reg)
11797 m->fs.cfa_offset += UNITS_PER_WORD;
11798 RTX_FRAME_RELATED_P (insn) = 1;
11799 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11800 gen_rtx_SET (stack_pointer_rtx,
11801 plus_constant (Pmode, stack_pointer_rtx,
11802 -UNITS_PER_WORD)));
11806 emit_move_insn (eax, GEN_INT (allocate));
11807 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11809 /* Use the fact that AX still contains ALLOCATE. */
11810 adjust_stack_insn = (Pmode == DImode
11811 ? gen_pro_epilogue_adjust_stack_di_sub
11812 : gen_pro_epilogue_adjust_stack_si_sub);
11814 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11815 stack_pointer_rtx, eax));
11817 if (sp_is_cfa_reg || TARGET_SEH)
11819 if (sp_is_cfa_reg)
11820 m->fs.cfa_offset += allocate;
11821 RTX_FRAME_RELATED_P (insn) = 1;
11822 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11823 gen_rtx_SET (stack_pointer_rtx,
11824 plus_constant (Pmode, stack_pointer_rtx,
11825 -allocate)));
11827 m->fs.sp_offset += allocate;
11829 /* Use stack_pointer_rtx for relative addressing so that code
11830 works for realigned stack, too. */
11831 if (r10_live && eax_live)
11833 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11834 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11835 gen_frame_mem (word_mode, t));
11836 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11837 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11838 gen_frame_mem (word_mode, t));
11840 else if (eax_live || r10_live)
11842 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11843 emit_move_insn (gen_rtx_REG (word_mode,
11844 (eax_live ? AX_REG : R10_REG)),
11845 gen_frame_mem (word_mode, t));
11848 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11850 /* If we havn't already set up the frame pointer, do so now. */
11851 if (frame_pointer_needed && !m->fs.fp_valid)
11853 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11854 GEN_INT (frame.stack_pointer_offset
11855 - frame.hard_frame_pointer_offset));
11856 insn = emit_insn (insn);
11857 RTX_FRAME_RELATED_P (insn) = 1;
11858 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11860 if (m->fs.cfa_reg == stack_pointer_rtx)
11861 m->fs.cfa_reg = hard_frame_pointer_rtx;
11862 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11863 m->fs.fp_valid = true;
11866 if (!int_registers_saved)
11867 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11868 if (!sse_registers_saved)
11869 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11871 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
11872 in PROLOGUE. */
11873 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
11875 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
11876 insn = emit_insn (gen_set_got (pic));
11877 RTX_FRAME_RELATED_P (insn) = 1;
11878 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11879 emit_insn (gen_prologue_use (pic));
11880 /* Deleting already emmitted SET_GOT if exist and allocated to
11881 REAL_PIC_OFFSET_TABLE_REGNUM. */
11882 ix86_elim_entry_set_got (pic);
11885 if (crtl->drap_reg && !crtl->stack_realign_needed)
11887 /* vDRAP is setup but after reload it turns out stack realign
11888 isn't necessary, here we will emit prologue to setup DRAP
11889 without stack realign adjustment */
11890 t = choose_baseaddr (0);
11891 emit_insn (gen_rtx_SET (crtl->drap_reg, t));
11894 /* Prevent instructions from being scheduled into register save push
11895 sequence when access to the redzone area is done through frame pointer.
11896 The offset between the frame pointer and the stack pointer is calculated
11897 relative to the value of the stack pointer at the end of the function
11898 prologue, and moving instructions that access redzone area via frame
11899 pointer inside push sequence violates this assumption. */
11900 if (frame_pointer_needed && frame.red_zone_size)
11901 emit_insn (gen_memory_blockage ());
11903 /* Emit cld instruction if stringops are used in the function. */
11904 if (TARGET_CLD && ix86_current_function_needs_cld)
11905 emit_insn (gen_cld ());
11907 /* SEH requires that the prologue end within 256 bytes of the start of
11908 the function. Prevent instruction schedules that would extend that.
11909 Further, prevent alloca modifications to the stack pointer from being
11910 combined with prologue modifications. */
11911 if (TARGET_SEH)
11912 emit_insn (gen_prologue_use (stack_pointer_rtx));
11915 /* Emit code to restore REG using a POP insn. */
11917 static void
11918 ix86_emit_restore_reg_using_pop (rtx reg)
11920 struct machine_function *m = cfun->machine;
11921 rtx_insn *insn = emit_insn (gen_pop (reg));
11923 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11924 m->fs.sp_offset -= UNITS_PER_WORD;
11926 if (m->fs.cfa_reg == crtl->drap_reg
11927 && REGNO (reg) == REGNO (crtl->drap_reg))
11929 /* Previously we'd represented the CFA as an expression
11930 like *(%ebp - 8). We've just popped that value from
11931 the stack, which means we need to reset the CFA to
11932 the drap register. This will remain until we restore
11933 the stack pointer. */
11934 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11935 RTX_FRAME_RELATED_P (insn) = 1;
11937 /* This means that the DRAP register is valid for addressing too. */
11938 m->fs.drap_valid = true;
11939 return;
11942 if (m->fs.cfa_reg == stack_pointer_rtx)
11944 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11945 x = gen_rtx_SET (stack_pointer_rtx, x);
11946 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11947 RTX_FRAME_RELATED_P (insn) = 1;
11949 m->fs.cfa_offset -= UNITS_PER_WORD;
11952 /* When the frame pointer is the CFA, and we pop it, we are
11953 swapping back to the stack pointer as the CFA. This happens
11954 for stack frames that don't allocate other data, so we assume
11955 the stack pointer is now pointing at the return address, i.e.
11956 the function entry state, which makes the offset be 1 word. */
11957 if (reg == hard_frame_pointer_rtx)
11959 m->fs.fp_valid = false;
11960 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11962 m->fs.cfa_reg = stack_pointer_rtx;
11963 m->fs.cfa_offset -= UNITS_PER_WORD;
11965 add_reg_note (insn, REG_CFA_DEF_CFA,
11966 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11967 GEN_INT (m->fs.cfa_offset)));
11968 RTX_FRAME_RELATED_P (insn) = 1;
11973 /* Emit code to restore saved registers using POP insns. */
11975 static void
11976 ix86_emit_restore_regs_using_pop (void)
11978 unsigned int regno;
11980 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11981 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11982 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11985 /* Emit code and notes for the LEAVE instruction. */
11987 static void
11988 ix86_emit_leave (void)
11990 struct machine_function *m = cfun->machine;
11991 rtx_insn *insn = emit_insn (ix86_gen_leave ());
11993 ix86_add_queued_cfa_restore_notes (insn);
11995 gcc_assert (m->fs.fp_valid);
11996 m->fs.sp_valid = true;
11997 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11998 m->fs.fp_valid = false;
12000 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
12002 m->fs.cfa_reg = stack_pointer_rtx;
12003 m->fs.cfa_offset = m->fs.sp_offset;
12005 add_reg_note (insn, REG_CFA_DEF_CFA,
12006 plus_constant (Pmode, stack_pointer_rtx,
12007 m->fs.sp_offset));
12008 RTX_FRAME_RELATED_P (insn) = 1;
12010 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
12011 m->fs.fp_offset);
12014 /* Emit code to restore saved registers using MOV insns.
12015 First register is restored from CFA - CFA_OFFSET. */
12016 static void
12017 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
12018 bool maybe_eh_return)
12020 struct machine_function *m = cfun->machine;
12021 unsigned int regno;
12023 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12024 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
12026 rtx reg = gen_rtx_REG (word_mode, regno);
12027 rtx mem;
12028 rtx_insn *insn;
12030 mem = choose_baseaddr (cfa_offset);
12031 mem = gen_frame_mem (word_mode, mem);
12032 insn = emit_move_insn (reg, mem);
12034 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
12036 /* Previously we'd represented the CFA as an expression
12037 like *(%ebp - 8). We've just popped that value from
12038 the stack, which means we need to reset the CFA to
12039 the drap register. This will remain until we restore
12040 the stack pointer. */
12041 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
12042 RTX_FRAME_RELATED_P (insn) = 1;
12044 /* This means that the DRAP register is valid for addressing. */
12045 m->fs.drap_valid = true;
12047 else
12048 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
12050 cfa_offset -= UNITS_PER_WORD;
12054 /* Emit code to restore saved registers using MOV insns.
12055 First register is restored from CFA - CFA_OFFSET. */
12056 static void
12057 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
12058 bool maybe_eh_return)
12060 unsigned int regno;
12062 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12063 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
12065 rtx reg = gen_rtx_REG (V4SFmode, regno);
12066 rtx mem;
12068 mem = choose_baseaddr (cfa_offset);
12069 mem = gen_rtx_MEM (V4SFmode, mem);
12070 set_mem_align (mem, 128);
12071 emit_move_insn (reg, mem);
12073 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
12075 cfa_offset -= 16;
12079 /* Restore function stack, frame, and registers. */
12081 void
12082 ix86_expand_epilogue (int style)
12084 struct machine_function *m = cfun->machine;
12085 struct machine_frame_state frame_state_save = m->fs;
12086 struct ix86_frame frame;
12087 bool restore_regs_via_mov;
12088 bool using_drap;
12090 ix86_finalize_stack_realign_flags ();
12091 ix86_compute_frame_layout (&frame);
12093 m->fs.sp_valid = (!frame_pointer_needed
12094 || (crtl->sp_is_unchanging
12095 && !stack_realign_fp));
12096 gcc_assert (!m->fs.sp_valid
12097 || m->fs.sp_offset == frame.stack_pointer_offset);
12099 /* The FP must be valid if the frame pointer is present. */
12100 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
12101 gcc_assert (!m->fs.fp_valid
12102 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
12104 /* We must have *some* valid pointer to the stack frame. */
12105 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
12107 /* The DRAP is never valid at this point. */
12108 gcc_assert (!m->fs.drap_valid);
12110 /* See the comment about red zone and frame
12111 pointer usage in ix86_expand_prologue. */
12112 if (frame_pointer_needed && frame.red_zone_size)
12113 emit_insn (gen_memory_blockage ());
12115 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
12116 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
12118 /* Determine the CFA offset of the end of the red-zone. */
12119 m->fs.red_zone_offset = 0;
12120 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
12122 /* The red-zone begins below the return address. */
12123 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
12125 /* When the register save area is in the aligned portion of
12126 the stack, determine the maximum runtime displacement that
12127 matches up with the aligned frame. */
12128 if (stack_realign_drap)
12129 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
12130 + UNITS_PER_WORD);
12133 /* Special care must be taken for the normal return case of a function
12134 using eh_return: the eax and edx registers are marked as saved, but
12135 not restored along this path. Adjust the save location to match. */
12136 if (crtl->calls_eh_return && style != 2)
12137 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
12139 /* EH_RETURN requires the use of moves to function properly. */
12140 if (crtl->calls_eh_return)
12141 restore_regs_via_mov = true;
12142 /* SEH requires the use of pops to identify the epilogue. */
12143 else if (TARGET_SEH)
12144 restore_regs_via_mov = false;
12145 /* If we're only restoring one register and sp is not valid then
12146 using a move instruction to restore the register since it's
12147 less work than reloading sp and popping the register. */
12148 else if (!m->fs.sp_valid && frame.nregs <= 1)
12149 restore_regs_via_mov = true;
12150 else if (TARGET_EPILOGUE_USING_MOVE
12151 && cfun->machine->use_fast_prologue_epilogue
12152 && (frame.nregs > 1
12153 || m->fs.sp_offset != frame.reg_save_offset))
12154 restore_regs_via_mov = true;
12155 else if (frame_pointer_needed
12156 && !frame.nregs
12157 && m->fs.sp_offset != frame.reg_save_offset)
12158 restore_regs_via_mov = true;
12159 else if (frame_pointer_needed
12160 && TARGET_USE_LEAVE
12161 && cfun->machine->use_fast_prologue_epilogue
12162 && frame.nregs == 1)
12163 restore_regs_via_mov = true;
12164 else
12165 restore_regs_via_mov = false;
12167 if (restore_regs_via_mov || frame.nsseregs)
12169 /* Ensure that the entire register save area is addressable via
12170 the stack pointer, if we will restore via sp. */
12171 if (TARGET_64BIT
12172 && m->fs.sp_offset > 0x7fffffff
12173 && !(m->fs.fp_valid || m->fs.drap_valid)
12174 && (frame.nsseregs + frame.nregs) != 0)
12176 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12177 GEN_INT (m->fs.sp_offset
12178 - frame.sse_reg_save_offset),
12179 style,
12180 m->fs.cfa_reg == stack_pointer_rtx);
12184 /* If there are any SSE registers to restore, then we have to do it
12185 via moves, since there's obviously no pop for SSE regs. */
12186 if (frame.nsseregs)
12187 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
12188 style == 2);
12190 if (restore_regs_via_mov)
12192 rtx t;
12194 if (frame.nregs)
12195 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
12197 /* eh_return epilogues need %ecx added to the stack pointer. */
12198 if (style == 2)
12200 rtx sa = EH_RETURN_STACKADJ_RTX;
12201 rtx_insn *insn;
12203 /* Stack align doesn't work with eh_return. */
12204 gcc_assert (!stack_realign_drap);
12205 /* Neither does regparm nested functions. */
12206 gcc_assert (!ix86_static_chain_on_stack);
12208 if (frame_pointer_needed)
12210 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
12211 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
12212 emit_insn (gen_rtx_SET (sa, t));
12214 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
12215 insn = emit_move_insn (hard_frame_pointer_rtx, t);
12217 /* Note that we use SA as a temporary CFA, as the return
12218 address is at the proper place relative to it. We
12219 pretend this happens at the FP restore insn because
12220 prior to this insn the FP would be stored at the wrong
12221 offset relative to SA, and after this insn we have no
12222 other reasonable register to use for the CFA. We don't
12223 bother resetting the CFA to the SP for the duration of
12224 the return insn. */
12225 add_reg_note (insn, REG_CFA_DEF_CFA,
12226 plus_constant (Pmode, sa, UNITS_PER_WORD));
12227 ix86_add_queued_cfa_restore_notes (insn);
12228 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
12229 RTX_FRAME_RELATED_P (insn) = 1;
12231 m->fs.cfa_reg = sa;
12232 m->fs.cfa_offset = UNITS_PER_WORD;
12233 m->fs.fp_valid = false;
12235 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
12236 const0_rtx, style, false);
12238 else
12240 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
12241 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
12242 insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
12243 ix86_add_queued_cfa_restore_notes (insn);
12245 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
12246 if (m->fs.cfa_offset != UNITS_PER_WORD)
12248 m->fs.cfa_offset = UNITS_PER_WORD;
12249 add_reg_note (insn, REG_CFA_DEF_CFA,
12250 plus_constant (Pmode, stack_pointer_rtx,
12251 UNITS_PER_WORD));
12252 RTX_FRAME_RELATED_P (insn) = 1;
12255 m->fs.sp_offset = UNITS_PER_WORD;
12256 m->fs.sp_valid = true;
12259 else
12261 /* SEH requires that the function end with (1) a stack adjustment
12262 if necessary, (2) a sequence of pops, and (3) a return or
12263 jump instruction. Prevent insns from the function body from
12264 being scheduled into this sequence. */
12265 if (TARGET_SEH)
12267 /* Prevent a catch region from being adjacent to the standard
12268 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
12269 several other flags that would be interesting to test are
12270 not yet set up. */
12271 if (flag_non_call_exceptions)
12272 emit_insn (gen_nops (const1_rtx));
12273 else
12274 emit_insn (gen_blockage ());
12277 /* First step is to deallocate the stack frame so that we can
12278 pop the registers. Also do it on SEH target for very large
12279 frame as the emitted instructions aren't allowed by the ABI in
12280 epilogues. */
12281 if (!m->fs.sp_valid
12282 || (TARGET_SEH
12283 && (m->fs.sp_offset - frame.reg_save_offset
12284 >= SEH_MAX_FRAME_SIZE)))
12286 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
12287 GEN_INT (m->fs.fp_offset
12288 - frame.reg_save_offset),
12289 style, false);
12291 else if (m->fs.sp_offset != frame.reg_save_offset)
12293 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12294 GEN_INT (m->fs.sp_offset
12295 - frame.reg_save_offset),
12296 style,
12297 m->fs.cfa_reg == stack_pointer_rtx);
12300 ix86_emit_restore_regs_using_pop ();
12303 /* If we used a stack pointer and haven't already got rid of it,
12304 then do so now. */
12305 if (m->fs.fp_valid)
12307 /* If the stack pointer is valid and pointing at the frame
12308 pointer store address, then we only need a pop. */
12309 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
12310 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12311 /* Leave results in shorter dependency chains on CPUs that are
12312 able to grok it fast. */
12313 else if (TARGET_USE_LEAVE
12314 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
12315 || !cfun->machine->use_fast_prologue_epilogue)
12316 ix86_emit_leave ();
12317 else
12319 pro_epilogue_adjust_stack (stack_pointer_rtx,
12320 hard_frame_pointer_rtx,
12321 const0_rtx, style, !using_drap);
12322 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12326 if (using_drap)
12328 int param_ptr_offset = UNITS_PER_WORD;
12329 rtx_insn *insn;
12331 gcc_assert (stack_realign_drap);
12333 if (ix86_static_chain_on_stack)
12334 param_ptr_offset += UNITS_PER_WORD;
12335 if (!call_used_regs[REGNO (crtl->drap_reg)])
12336 param_ptr_offset += UNITS_PER_WORD;
12338 insn = emit_insn (gen_rtx_SET
12339 (stack_pointer_rtx,
12340 gen_rtx_PLUS (Pmode,
12341 crtl->drap_reg,
12342 GEN_INT (-param_ptr_offset))));
12343 m->fs.cfa_reg = stack_pointer_rtx;
12344 m->fs.cfa_offset = param_ptr_offset;
12345 m->fs.sp_offset = param_ptr_offset;
12346 m->fs.realigned = false;
12348 add_reg_note (insn, REG_CFA_DEF_CFA,
12349 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12350 GEN_INT (param_ptr_offset)));
12351 RTX_FRAME_RELATED_P (insn) = 1;
12353 if (!call_used_regs[REGNO (crtl->drap_reg)])
12354 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
12357 /* At this point the stack pointer must be valid, and we must have
12358 restored all of the registers. We may not have deallocated the
12359 entire stack frame. We've delayed this until now because it may
12360 be possible to merge the local stack deallocation with the
12361 deallocation forced by ix86_static_chain_on_stack. */
12362 gcc_assert (m->fs.sp_valid);
12363 gcc_assert (!m->fs.fp_valid);
12364 gcc_assert (!m->fs.realigned);
12365 if (m->fs.sp_offset != UNITS_PER_WORD)
12367 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12368 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
12369 style, true);
12371 else
12372 ix86_add_queued_cfa_restore_notes (get_last_insn ());
12374 /* Sibcall epilogues don't want a return instruction. */
12375 if (style == 0)
12377 m->fs = frame_state_save;
12378 return;
12381 if (crtl->args.pops_args && crtl->args.size)
12383 rtx popc = GEN_INT (crtl->args.pops_args);
12385 /* i386 can only pop 64K bytes. If asked to pop more, pop return
12386 address, do explicit add, and jump indirectly to the caller. */
12388 if (crtl->args.pops_args >= 65536)
12390 rtx ecx = gen_rtx_REG (SImode, CX_REG);
12391 rtx_insn *insn;
12393 /* There is no "pascal" calling convention in any 64bit ABI. */
12394 gcc_assert (!TARGET_64BIT);
12396 insn = emit_insn (gen_pop (ecx));
12397 m->fs.cfa_offset -= UNITS_PER_WORD;
12398 m->fs.sp_offset -= UNITS_PER_WORD;
12400 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
12401 x = gen_rtx_SET (stack_pointer_rtx, x);
12402 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
12403 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
12404 RTX_FRAME_RELATED_P (insn) = 1;
12406 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12407 popc, -1, true);
12408 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
12410 else
12411 emit_jump_insn (gen_simple_return_pop_internal (popc));
12413 else
12414 emit_jump_insn (gen_simple_return_internal ());
12416 /* Restore the state back to the state from the prologue,
12417 so that it's correct for the next epilogue. */
12418 m->fs = frame_state_save;
12421 /* Reset from the function's potential modifications. */
12423 static void
12424 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
12426 if (pic_offset_table_rtx
12427 && !ix86_use_pseudo_pic_reg ())
12428 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
12429 #if TARGET_MACHO
12430 /* Mach-O doesn't support labels at the end of objects, so if
12431 it looks like we might want one, insert a NOP. */
12433 rtx_insn *insn = get_last_insn ();
12434 rtx_insn *deleted_debug_label = NULL;
12435 while (insn
12436 && NOTE_P (insn)
12437 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
12439 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
12440 notes only, instead set their CODE_LABEL_NUMBER to -1,
12441 otherwise there would be code generation differences
12442 in between -g and -g0. */
12443 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12444 deleted_debug_label = insn;
12445 insn = PREV_INSN (insn);
12447 if (insn
12448 && (LABEL_P (insn)
12449 || (NOTE_P (insn)
12450 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
12451 fputs ("\tnop\n", file);
12452 else if (deleted_debug_label)
12453 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
12454 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12455 CODE_LABEL_NUMBER (insn) = -1;
12457 #endif
12461 /* Return a scratch register to use in the split stack prologue. The
12462 split stack prologue is used for -fsplit-stack. It is the first
12463 instructions in the function, even before the regular prologue.
12464 The scratch register can be any caller-saved register which is not
12465 used for parameters or for the static chain. */
12467 static unsigned int
12468 split_stack_prologue_scratch_regno (void)
12470 if (TARGET_64BIT)
12471 return R11_REG;
12472 else
12474 bool is_fastcall, is_thiscall;
12475 int regparm;
12477 is_fastcall = (lookup_attribute ("fastcall",
12478 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12479 != NULL);
12480 is_thiscall = (lookup_attribute ("thiscall",
12481 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12482 != NULL);
12483 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
12485 if (is_fastcall)
12487 if (DECL_STATIC_CHAIN (cfun->decl))
12489 sorry ("-fsplit-stack does not support fastcall with "
12490 "nested function");
12491 return INVALID_REGNUM;
12493 return AX_REG;
12495 else if (is_thiscall)
12497 if (!DECL_STATIC_CHAIN (cfun->decl))
12498 return DX_REG;
12499 return AX_REG;
12501 else if (regparm < 3)
12503 if (!DECL_STATIC_CHAIN (cfun->decl))
12504 return CX_REG;
12505 else
12507 if (regparm >= 2)
12509 sorry ("-fsplit-stack does not support 2 register "
12510 "parameters for a nested function");
12511 return INVALID_REGNUM;
12513 return DX_REG;
12516 else
12518 /* FIXME: We could make this work by pushing a register
12519 around the addition and comparison. */
12520 sorry ("-fsplit-stack does not support 3 register parameters");
12521 return INVALID_REGNUM;
12526 /* A SYMBOL_REF for the function which allocates new stackspace for
12527 -fsplit-stack. */
12529 static GTY(()) rtx split_stack_fn;
12531 /* A SYMBOL_REF for the more stack function when using the large
12532 model. */
12534 static GTY(()) rtx split_stack_fn_large;
12536 /* Handle -fsplit-stack. These are the first instructions in the
12537 function, even before the regular prologue. */
12539 void
12540 ix86_expand_split_stack_prologue (void)
12542 struct ix86_frame frame;
12543 HOST_WIDE_INT allocate;
12544 unsigned HOST_WIDE_INT args_size;
12545 rtx_code_label *label;
12546 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
12547 rtx scratch_reg = NULL_RTX;
12548 rtx_code_label *varargs_label = NULL;
12549 rtx fn;
12551 gcc_assert (flag_split_stack && reload_completed);
12553 ix86_finalize_stack_realign_flags ();
12554 ix86_compute_frame_layout (&frame);
12555 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
12557 /* This is the label we will branch to if we have enough stack
12558 space. We expect the basic block reordering pass to reverse this
12559 branch if optimizing, so that we branch in the unlikely case. */
12560 label = gen_label_rtx ();
12562 /* We need to compare the stack pointer minus the frame size with
12563 the stack boundary in the TCB. The stack boundary always gives
12564 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
12565 can compare directly. Otherwise we need to do an addition. */
12567 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12568 UNSPEC_STACK_CHECK);
12569 limit = gen_rtx_CONST (Pmode, limit);
12570 limit = gen_rtx_MEM (Pmode, limit);
12571 if (allocate < SPLIT_STACK_AVAILABLE)
12572 current = stack_pointer_rtx;
12573 else
12575 unsigned int scratch_regno;
12576 rtx offset;
12578 /* We need a scratch register to hold the stack pointer minus
12579 the required frame size. Since this is the very start of the
12580 function, the scratch register can be any caller-saved
12581 register which is not used for parameters. */
12582 offset = GEN_INT (- allocate);
12583 scratch_regno = split_stack_prologue_scratch_regno ();
12584 if (scratch_regno == INVALID_REGNUM)
12585 return;
12586 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12587 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
12589 /* We don't use ix86_gen_add3 in this case because it will
12590 want to split to lea, but when not optimizing the insn
12591 will not be split after this point. */
12592 emit_insn (gen_rtx_SET (scratch_reg,
12593 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12594 offset)));
12596 else
12598 emit_move_insn (scratch_reg, offset);
12599 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
12600 stack_pointer_rtx));
12602 current = scratch_reg;
12605 ix86_expand_branch (GEU, current, limit, label);
12606 jump_insn = get_last_insn ();
12607 JUMP_LABEL (jump_insn) = label;
12609 /* Mark the jump as very likely to be taken. */
12610 add_int_reg_note (jump_insn, REG_BR_PROB,
12611 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
12613 if (split_stack_fn == NULL_RTX)
12615 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
12616 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
12618 fn = split_stack_fn;
12620 /* Get more stack space. We pass in the desired stack space and the
12621 size of the arguments to copy to the new stack. In 32-bit mode
12622 we push the parameters; __morestack will return on a new stack
12623 anyhow. In 64-bit mode we pass the parameters in r10 and
12624 r11. */
12625 allocate_rtx = GEN_INT (allocate);
12626 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
12627 call_fusage = NULL_RTX;
12628 if (TARGET_64BIT)
12630 rtx reg10, reg11;
12632 reg10 = gen_rtx_REG (Pmode, R10_REG);
12633 reg11 = gen_rtx_REG (Pmode, R11_REG);
12635 /* If this function uses a static chain, it will be in %r10.
12636 Preserve it across the call to __morestack. */
12637 if (DECL_STATIC_CHAIN (cfun->decl))
12639 rtx rax;
12641 rax = gen_rtx_REG (word_mode, AX_REG);
12642 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12643 use_reg (&call_fusage, rax);
12646 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12647 && !TARGET_PECOFF)
12649 HOST_WIDE_INT argval;
12651 gcc_assert (Pmode == DImode);
12652 /* When using the large model we need to load the address
12653 into a register, and we've run out of registers. So we
12654 switch to a different calling convention, and we call a
12655 different function: __morestack_large. We pass the
12656 argument size in the upper 32 bits of r10 and pass the
12657 frame size in the lower 32 bits. */
12658 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12659 gcc_assert ((args_size & 0xffffffff) == args_size);
12661 if (split_stack_fn_large == NULL_RTX)
12663 split_stack_fn_large =
12664 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12665 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
12667 if (ix86_cmodel == CM_LARGE_PIC)
12669 rtx_code_label *label;
12670 rtx x;
12672 label = gen_label_rtx ();
12673 emit_label (label);
12674 LABEL_PRESERVE_P (label) = 1;
12675 emit_insn (gen_set_rip_rex64 (reg10, label));
12676 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12677 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12678 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12679 UNSPEC_GOT);
12680 x = gen_rtx_CONST (Pmode, x);
12681 emit_move_insn (reg11, x);
12682 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12683 x = gen_const_mem (Pmode, x);
12684 emit_move_insn (reg11, x);
12686 else
12687 emit_move_insn (reg11, split_stack_fn_large);
12689 fn = reg11;
12691 argval = ((args_size << 16) << 16) + allocate;
12692 emit_move_insn (reg10, GEN_INT (argval));
12694 else
12696 emit_move_insn (reg10, allocate_rtx);
12697 emit_move_insn (reg11, GEN_INT (args_size));
12698 use_reg (&call_fusage, reg11);
12701 use_reg (&call_fusage, reg10);
12703 else
12705 emit_insn (gen_push (GEN_INT (args_size)));
12706 emit_insn (gen_push (allocate_rtx));
12708 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12709 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12710 NULL_RTX, false);
12711 add_function_usage_to (call_insn, call_fusage);
12713 /* In order to make call/return prediction work right, we now need
12714 to execute a return instruction. See
12715 libgcc/config/i386/morestack.S for the details on how this works.
12717 For flow purposes gcc must not see this as a return
12718 instruction--we need control flow to continue at the subsequent
12719 label. Therefore, we use an unspec. */
12720 gcc_assert (crtl->args.pops_args < 65536);
12721 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12723 /* If we are in 64-bit mode and this function uses a static chain,
12724 we saved %r10 in %rax before calling _morestack. */
12725 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12726 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12727 gen_rtx_REG (word_mode, AX_REG));
12729 /* If this function calls va_start, we need to store a pointer to
12730 the arguments on the old stack, because they may not have been
12731 all copied to the new stack. At this point the old stack can be
12732 found at the frame pointer value used by __morestack, because
12733 __morestack has set that up before calling back to us. Here we
12734 store that pointer in a scratch register, and in
12735 ix86_expand_prologue we store the scratch register in a stack
12736 slot. */
12737 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12739 unsigned int scratch_regno;
12740 rtx frame_reg;
12741 int words;
12743 scratch_regno = split_stack_prologue_scratch_regno ();
12744 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12745 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12747 /* 64-bit:
12748 fp -> old fp value
12749 return address within this function
12750 return address of caller of this function
12751 stack arguments
12752 So we add three words to get to the stack arguments.
12754 32-bit:
12755 fp -> old fp value
12756 return address within this function
12757 first argument to __morestack
12758 second argument to __morestack
12759 return address of caller of this function
12760 stack arguments
12761 So we add five words to get to the stack arguments.
12763 words = TARGET_64BIT ? 3 : 5;
12764 emit_insn (gen_rtx_SET (scratch_reg,
12765 gen_rtx_PLUS (Pmode, frame_reg,
12766 GEN_INT (words * UNITS_PER_WORD))));
12768 varargs_label = gen_label_rtx ();
12769 emit_jump_insn (gen_jump (varargs_label));
12770 JUMP_LABEL (get_last_insn ()) = varargs_label;
12772 emit_barrier ();
12775 emit_label (label);
12776 LABEL_NUSES (label) = 1;
12778 /* If this function calls va_start, we now have to set the scratch
12779 register for the case where we do not call __morestack. In this
12780 case we need to set it based on the stack pointer. */
12781 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12783 emit_insn (gen_rtx_SET (scratch_reg,
12784 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12785 GEN_INT (UNITS_PER_WORD))));
12787 emit_label (varargs_label);
12788 LABEL_NUSES (varargs_label) = 1;
12792 /* We may have to tell the dataflow pass that the split stack prologue
12793 is initializing a scratch register. */
12795 static void
12796 ix86_live_on_entry (bitmap regs)
12798 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12800 gcc_assert (flag_split_stack);
12801 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12805 /* Extract the parts of an RTL expression that is a valid memory address
12806 for an instruction. Return 0 if the structure of the address is
12807 grossly off. Return -1 if the address contains ASHIFT, so it is not
12808 strictly valid, but still used for computing length of lea instruction. */
12811 ix86_decompose_address (rtx addr, struct ix86_address *out)
12813 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12814 rtx base_reg, index_reg;
12815 HOST_WIDE_INT scale = 1;
12816 rtx scale_rtx = NULL_RTX;
12817 rtx tmp;
12818 int retval = 1;
12819 enum ix86_address_seg seg = SEG_DEFAULT;
12821 /* Allow zero-extended SImode addresses,
12822 they will be emitted with addr32 prefix. */
12823 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12825 if (GET_CODE (addr) == ZERO_EXTEND
12826 && GET_MODE (XEXP (addr, 0)) == SImode)
12828 addr = XEXP (addr, 0);
12829 if (CONST_INT_P (addr))
12830 return 0;
12832 else if (GET_CODE (addr) == AND
12833 && const_32bit_mask (XEXP (addr, 1), DImode))
12835 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12836 if (addr == NULL_RTX)
12837 return 0;
12839 if (CONST_INT_P (addr))
12840 return 0;
12844 /* Allow SImode subregs of DImode addresses,
12845 they will be emitted with addr32 prefix. */
12846 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12848 if (GET_CODE (addr) == SUBREG
12849 && GET_MODE (SUBREG_REG (addr)) == DImode)
12851 addr = SUBREG_REG (addr);
12852 if (CONST_INT_P (addr))
12853 return 0;
12857 if (REG_P (addr))
12858 base = addr;
12859 else if (GET_CODE (addr) == SUBREG)
12861 if (REG_P (SUBREG_REG (addr)))
12862 base = addr;
12863 else
12864 return 0;
12866 else if (GET_CODE (addr) == PLUS)
12868 rtx addends[4], op;
12869 int n = 0, i;
12871 op = addr;
12874 if (n >= 4)
12875 return 0;
12876 addends[n++] = XEXP (op, 1);
12877 op = XEXP (op, 0);
12879 while (GET_CODE (op) == PLUS);
12880 if (n >= 4)
12881 return 0;
12882 addends[n] = op;
12884 for (i = n; i >= 0; --i)
12886 op = addends[i];
12887 switch (GET_CODE (op))
12889 case MULT:
12890 if (index)
12891 return 0;
12892 index = XEXP (op, 0);
12893 scale_rtx = XEXP (op, 1);
12894 break;
12896 case ASHIFT:
12897 if (index)
12898 return 0;
12899 index = XEXP (op, 0);
12900 tmp = XEXP (op, 1);
12901 if (!CONST_INT_P (tmp))
12902 return 0;
12903 scale = INTVAL (tmp);
12904 if ((unsigned HOST_WIDE_INT) scale > 3)
12905 return 0;
12906 scale = 1 << scale;
12907 break;
12909 case ZERO_EXTEND:
12910 op = XEXP (op, 0);
12911 if (GET_CODE (op) != UNSPEC)
12912 return 0;
12913 /* FALLTHRU */
12915 case UNSPEC:
12916 if (XINT (op, 1) == UNSPEC_TP
12917 && TARGET_TLS_DIRECT_SEG_REFS
12918 && seg == SEG_DEFAULT)
12919 seg = DEFAULT_TLS_SEG_REG;
12920 else
12921 return 0;
12922 break;
12924 case SUBREG:
12925 if (!REG_P (SUBREG_REG (op)))
12926 return 0;
12927 /* FALLTHRU */
12929 case REG:
12930 if (!base)
12931 base = op;
12932 else if (!index)
12933 index = op;
12934 else
12935 return 0;
12936 break;
12938 case CONST:
12939 case CONST_INT:
12940 case SYMBOL_REF:
12941 case LABEL_REF:
12942 if (disp)
12943 return 0;
12944 disp = op;
12945 break;
12947 default:
12948 return 0;
12952 else if (GET_CODE (addr) == MULT)
12954 index = XEXP (addr, 0); /* index*scale */
12955 scale_rtx = XEXP (addr, 1);
12957 else if (GET_CODE (addr) == ASHIFT)
12959 /* We're called for lea too, which implements ashift on occasion. */
12960 index = XEXP (addr, 0);
12961 tmp = XEXP (addr, 1);
12962 if (!CONST_INT_P (tmp))
12963 return 0;
12964 scale = INTVAL (tmp);
12965 if ((unsigned HOST_WIDE_INT) scale > 3)
12966 return 0;
12967 scale = 1 << scale;
12968 retval = -1;
12970 else
12971 disp = addr; /* displacement */
12973 if (index)
12975 if (REG_P (index))
12977 else if (GET_CODE (index) == SUBREG
12978 && REG_P (SUBREG_REG (index)))
12980 else
12981 return 0;
12984 /* Extract the integral value of scale. */
12985 if (scale_rtx)
12987 if (!CONST_INT_P (scale_rtx))
12988 return 0;
12989 scale = INTVAL (scale_rtx);
12992 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12993 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12995 /* Avoid useless 0 displacement. */
12996 if (disp == const0_rtx && (base || index))
12997 disp = NULL_RTX;
12999 /* Allow arg pointer and stack pointer as index if there is not scaling. */
13000 if (base_reg && index_reg && scale == 1
13001 && (index_reg == arg_pointer_rtx
13002 || index_reg == frame_pointer_rtx
13003 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
13005 std::swap (base, index);
13006 std::swap (base_reg, index_reg);
13009 /* Special case: %ebp cannot be encoded as a base without a displacement.
13010 Similarly %r13. */
13011 if (!disp
13012 && base_reg
13013 && (base_reg == hard_frame_pointer_rtx
13014 || base_reg == frame_pointer_rtx
13015 || base_reg == arg_pointer_rtx
13016 || (REG_P (base_reg)
13017 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
13018 || REGNO (base_reg) == R13_REG))))
13019 disp = const0_rtx;
13021 /* Special case: on K6, [%esi] makes the instruction vector decoded.
13022 Avoid this by transforming to [%esi+0].
13023 Reload calls address legitimization without cfun defined, so we need
13024 to test cfun for being non-NULL. */
13025 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
13026 && base_reg && !index_reg && !disp
13027 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
13028 disp = const0_rtx;
13030 /* Special case: encode reg+reg instead of reg*2. */
13031 if (!base && index && scale == 2)
13032 base = index, base_reg = index_reg, scale = 1;
13034 /* Special case: scaling cannot be encoded without base or displacement. */
13035 if (!base && !disp && index && scale != 1)
13036 disp = const0_rtx;
13038 out->base = base;
13039 out->index = index;
13040 out->disp = disp;
13041 out->scale = scale;
13042 out->seg = seg;
13044 return retval;
13047 /* Return cost of the memory address x.
13048 For i386, it is better to use a complex address than let gcc copy
13049 the address into a reg and make a new pseudo. But not if the address
13050 requires to two regs - that would mean more pseudos with longer
13051 lifetimes. */
13052 static int
13053 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
13055 struct ix86_address parts;
13056 int cost = 1;
13057 int ok = ix86_decompose_address (x, &parts);
13059 gcc_assert (ok);
13061 if (parts.base && GET_CODE (parts.base) == SUBREG)
13062 parts.base = SUBREG_REG (parts.base);
13063 if (parts.index && GET_CODE (parts.index) == SUBREG)
13064 parts.index = SUBREG_REG (parts.index);
13066 /* Attempt to minimize number of registers in the address by increasing
13067 address cost for each used register. We don't increase address cost
13068 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
13069 is not invariant itself it most likely means that base or index is not
13070 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
13071 which is not profitable for x86. */
13072 if (parts.base
13073 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
13074 && (current_pass->type == GIMPLE_PASS
13075 || !pic_offset_table_rtx
13076 || !REG_P (parts.base)
13077 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
13078 cost++;
13080 if (parts.index
13081 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
13082 && (current_pass->type == GIMPLE_PASS
13083 || !pic_offset_table_rtx
13084 || !REG_P (parts.index)
13085 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
13086 cost++;
13088 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
13089 since it's predecode logic can't detect the length of instructions
13090 and it degenerates to vector decoded. Increase cost of such
13091 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
13092 to split such addresses or even refuse such addresses at all.
13094 Following addressing modes are affected:
13095 [base+scale*index]
13096 [scale*index+disp]
13097 [base+index]
13099 The first and last case may be avoidable by explicitly coding the zero in
13100 memory address, but I don't have AMD-K6 machine handy to check this
13101 theory. */
13103 if (TARGET_K6
13104 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
13105 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
13106 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
13107 cost += 10;
13109 return cost;
13112 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
13113 this is used for to form addresses to local data when -fPIC is in
13114 use. */
13116 static bool
13117 darwin_local_data_pic (rtx disp)
13119 return (GET_CODE (disp) == UNSPEC
13120 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
13123 /* Determine if a given RTX is a valid constant. We already know this
13124 satisfies CONSTANT_P. */
13126 static bool
13127 ix86_legitimate_constant_p (machine_mode, rtx x)
13129 /* Pointer bounds constants are not valid. */
13130 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
13131 return false;
13133 switch (GET_CODE (x))
13135 case CONST:
13136 x = XEXP (x, 0);
13138 if (GET_CODE (x) == PLUS)
13140 if (!CONST_INT_P (XEXP (x, 1)))
13141 return false;
13142 x = XEXP (x, 0);
13145 if (TARGET_MACHO && darwin_local_data_pic (x))
13146 return true;
13148 /* Only some unspecs are valid as "constants". */
13149 if (GET_CODE (x) == UNSPEC)
13150 switch (XINT (x, 1))
13152 case UNSPEC_GOT:
13153 case UNSPEC_GOTOFF:
13154 case UNSPEC_PLTOFF:
13155 return TARGET_64BIT;
13156 case UNSPEC_TPOFF:
13157 case UNSPEC_NTPOFF:
13158 x = XVECEXP (x, 0, 0);
13159 return (GET_CODE (x) == SYMBOL_REF
13160 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13161 case UNSPEC_DTPOFF:
13162 x = XVECEXP (x, 0, 0);
13163 return (GET_CODE (x) == SYMBOL_REF
13164 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
13165 default:
13166 return false;
13169 /* We must have drilled down to a symbol. */
13170 if (GET_CODE (x) == LABEL_REF)
13171 return true;
13172 if (GET_CODE (x) != SYMBOL_REF)
13173 return false;
13174 /* FALLTHRU */
13176 case SYMBOL_REF:
13177 /* TLS symbols are never valid. */
13178 if (SYMBOL_REF_TLS_MODEL (x))
13179 return false;
13181 /* DLLIMPORT symbols are never valid. */
13182 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13183 && SYMBOL_REF_DLLIMPORT_P (x))
13184 return false;
13186 #if TARGET_MACHO
13187 /* mdynamic-no-pic */
13188 if (MACHO_DYNAMIC_NO_PIC_P)
13189 return machopic_symbol_defined_p (x);
13190 #endif
13191 break;
13193 case CONST_WIDE_INT:
13194 if (!TARGET_64BIT && !standard_sse_constant_p (x))
13195 return false;
13196 break;
13198 case CONST_VECTOR:
13199 if (!standard_sse_constant_p (x))
13200 return false;
13202 default:
13203 break;
13206 /* Otherwise we handle everything else in the move patterns. */
13207 return true;
13210 /* Determine if it's legal to put X into the constant pool. This
13211 is not possible for the address of thread-local symbols, which
13212 is checked above. */
13214 static bool
13215 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
13217 /* We can always put integral constants and vectors in memory. */
13218 switch (GET_CODE (x))
13220 case CONST_INT:
13221 case CONST_WIDE_INT:
13222 case CONST_DOUBLE:
13223 case CONST_VECTOR:
13224 return false;
13226 default:
13227 break;
13229 return !ix86_legitimate_constant_p (mode, x);
13232 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
13233 otherwise zero. */
13235 static bool
13236 is_imported_p (rtx x)
13238 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
13239 || GET_CODE (x) != SYMBOL_REF)
13240 return false;
13242 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
13246 /* Nonzero if the constant value X is a legitimate general operand
13247 when generating PIC code. It is given that flag_pic is on and
13248 that X satisfies CONSTANT_P. */
13250 bool
13251 legitimate_pic_operand_p (rtx x)
13253 rtx inner;
13255 switch (GET_CODE (x))
13257 case CONST:
13258 inner = XEXP (x, 0);
13259 if (GET_CODE (inner) == PLUS
13260 && CONST_INT_P (XEXP (inner, 1)))
13261 inner = XEXP (inner, 0);
13263 /* Only some unspecs are valid as "constants". */
13264 if (GET_CODE (inner) == UNSPEC)
13265 switch (XINT (inner, 1))
13267 case UNSPEC_GOT:
13268 case UNSPEC_GOTOFF:
13269 case UNSPEC_PLTOFF:
13270 return TARGET_64BIT;
13271 case UNSPEC_TPOFF:
13272 x = XVECEXP (inner, 0, 0);
13273 return (GET_CODE (x) == SYMBOL_REF
13274 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13275 case UNSPEC_MACHOPIC_OFFSET:
13276 return legitimate_pic_address_disp_p (x);
13277 default:
13278 return false;
13280 /* FALLTHRU */
13282 case SYMBOL_REF:
13283 case LABEL_REF:
13284 return legitimate_pic_address_disp_p (x);
13286 default:
13287 return true;
13291 /* Determine if a given CONST RTX is a valid memory displacement
13292 in PIC mode. */
13294 bool
13295 legitimate_pic_address_disp_p (rtx disp)
13297 bool saw_plus;
13299 /* In 64bit mode we can allow direct addresses of symbols and labels
13300 when they are not dynamic symbols. */
13301 if (TARGET_64BIT)
13303 rtx op0 = disp, op1;
13305 switch (GET_CODE (disp))
13307 case LABEL_REF:
13308 return true;
13310 case CONST:
13311 if (GET_CODE (XEXP (disp, 0)) != PLUS)
13312 break;
13313 op0 = XEXP (XEXP (disp, 0), 0);
13314 op1 = XEXP (XEXP (disp, 0), 1);
13315 if (!CONST_INT_P (op1)
13316 || INTVAL (op1) >= 16*1024*1024
13317 || INTVAL (op1) < -16*1024*1024)
13318 break;
13319 if (GET_CODE (op0) == LABEL_REF)
13320 return true;
13321 if (GET_CODE (op0) == CONST
13322 && GET_CODE (XEXP (op0, 0)) == UNSPEC
13323 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
13324 return true;
13325 if (GET_CODE (op0) == UNSPEC
13326 && XINT (op0, 1) == UNSPEC_PCREL)
13327 return true;
13328 if (GET_CODE (op0) != SYMBOL_REF)
13329 break;
13330 /* FALLTHRU */
13332 case SYMBOL_REF:
13333 /* TLS references should always be enclosed in UNSPEC.
13334 The dllimported symbol needs always to be resolved. */
13335 if (SYMBOL_REF_TLS_MODEL (op0)
13336 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
13337 return false;
13339 if (TARGET_PECOFF)
13341 if (is_imported_p (op0))
13342 return true;
13344 if (SYMBOL_REF_FAR_ADDR_P (op0)
13345 || !SYMBOL_REF_LOCAL_P (op0))
13346 break;
13348 /* Function-symbols need to be resolved only for
13349 large-model.
13350 For the small-model we don't need to resolve anything
13351 here. */
13352 if ((ix86_cmodel != CM_LARGE_PIC
13353 && SYMBOL_REF_FUNCTION_P (op0))
13354 || ix86_cmodel == CM_SMALL_PIC)
13355 return true;
13356 /* Non-external symbols don't need to be resolved for
13357 large, and medium-model. */
13358 if ((ix86_cmodel == CM_LARGE_PIC
13359 || ix86_cmodel == CM_MEDIUM_PIC)
13360 && !SYMBOL_REF_EXTERNAL_P (op0))
13361 return true;
13363 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
13364 && (SYMBOL_REF_LOCAL_P (op0)
13365 || (HAVE_LD_PIE_COPYRELOC
13366 && flag_pie
13367 && !SYMBOL_REF_WEAK (op0)
13368 && !SYMBOL_REF_FUNCTION_P (op0)))
13369 && ix86_cmodel != CM_LARGE_PIC)
13370 return true;
13371 break;
13373 default:
13374 break;
13377 if (GET_CODE (disp) != CONST)
13378 return false;
13379 disp = XEXP (disp, 0);
13381 if (TARGET_64BIT)
13383 /* We are unsafe to allow PLUS expressions. This limit allowed distance
13384 of GOT tables. We should not need these anyway. */
13385 if (GET_CODE (disp) != UNSPEC
13386 || (XINT (disp, 1) != UNSPEC_GOTPCREL
13387 && XINT (disp, 1) != UNSPEC_GOTOFF
13388 && XINT (disp, 1) != UNSPEC_PCREL
13389 && XINT (disp, 1) != UNSPEC_PLTOFF))
13390 return false;
13392 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
13393 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
13394 return false;
13395 return true;
13398 saw_plus = false;
13399 if (GET_CODE (disp) == PLUS)
13401 if (!CONST_INT_P (XEXP (disp, 1)))
13402 return false;
13403 disp = XEXP (disp, 0);
13404 saw_plus = true;
13407 if (TARGET_MACHO && darwin_local_data_pic (disp))
13408 return true;
13410 if (GET_CODE (disp) != UNSPEC)
13411 return false;
13413 switch (XINT (disp, 1))
13415 case UNSPEC_GOT:
13416 if (saw_plus)
13417 return false;
13418 /* We need to check for both symbols and labels because VxWorks loads
13419 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
13420 details. */
13421 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13422 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
13423 case UNSPEC_GOTOFF:
13424 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
13425 While ABI specify also 32bit relocation but we don't produce it in
13426 small PIC model at all. */
13427 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13428 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
13429 && !TARGET_64BIT)
13430 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
13431 return false;
13432 case UNSPEC_GOTTPOFF:
13433 case UNSPEC_GOTNTPOFF:
13434 case UNSPEC_INDNTPOFF:
13435 if (saw_plus)
13436 return false;
13437 disp = XVECEXP (disp, 0, 0);
13438 return (GET_CODE (disp) == SYMBOL_REF
13439 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
13440 case UNSPEC_NTPOFF:
13441 disp = XVECEXP (disp, 0, 0);
13442 return (GET_CODE (disp) == SYMBOL_REF
13443 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
13444 case UNSPEC_DTPOFF:
13445 disp = XVECEXP (disp, 0, 0);
13446 return (GET_CODE (disp) == SYMBOL_REF
13447 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
13450 return false;
13453 /* Determine if op is suitable RTX for an address register.
13454 Return naked register if a register or a register subreg is
13455 found, otherwise return NULL_RTX. */
13457 static rtx
13458 ix86_validate_address_register (rtx op)
13460 machine_mode mode = GET_MODE (op);
13462 /* Only SImode or DImode registers can form the address. */
13463 if (mode != SImode && mode != DImode)
13464 return NULL_RTX;
13466 if (REG_P (op))
13467 return op;
13468 else if (GET_CODE (op) == SUBREG)
13470 rtx reg = SUBREG_REG (op);
13472 if (!REG_P (reg))
13473 return NULL_RTX;
13475 mode = GET_MODE (reg);
13477 /* Don't allow SUBREGs that span more than a word. It can
13478 lead to spill failures when the register is one word out
13479 of a two word structure. */
13480 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
13481 return NULL_RTX;
13483 /* Allow only SUBREGs of non-eliminable hard registers. */
13484 if (register_no_elim_operand (reg, mode))
13485 return reg;
13488 /* Op is not a register. */
13489 return NULL_RTX;
13492 /* Recognizes RTL expressions that are valid memory addresses for an
13493 instruction. The MODE argument is the machine mode for the MEM
13494 expression that wants to use this address.
13496 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
13497 convert common non-canonical forms to canonical form so that they will
13498 be recognized. */
13500 static bool
13501 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
13503 struct ix86_address parts;
13504 rtx base, index, disp;
13505 HOST_WIDE_INT scale;
13506 enum ix86_address_seg seg;
13508 if (ix86_decompose_address (addr, &parts) <= 0)
13509 /* Decomposition failed. */
13510 return false;
13512 base = parts.base;
13513 index = parts.index;
13514 disp = parts.disp;
13515 scale = parts.scale;
13516 seg = parts.seg;
13518 /* Validate base register. */
13519 if (base)
13521 rtx reg = ix86_validate_address_register (base);
13523 if (reg == NULL_RTX)
13524 return false;
13526 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
13527 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
13528 /* Base is not valid. */
13529 return false;
13532 /* Validate index register. */
13533 if (index)
13535 rtx reg = ix86_validate_address_register (index);
13537 if (reg == NULL_RTX)
13538 return false;
13540 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
13541 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
13542 /* Index is not valid. */
13543 return false;
13546 /* Index and base should have the same mode. */
13547 if (base && index
13548 && GET_MODE (base) != GET_MODE (index))
13549 return false;
13551 /* Address override works only on the (%reg) part of %fs:(%reg). */
13552 if (seg != SEG_DEFAULT
13553 && ((base && GET_MODE (base) != word_mode)
13554 || (index && GET_MODE (index) != word_mode)))
13555 return false;
13557 /* Validate scale factor. */
13558 if (scale != 1)
13560 if (!index)
13561 /* Scale without index. */
13562 return false;
13564 if (scale != 2 && scale != 4 && scale != 8)
13565 /* Scale is not a valid multiplier. */
13566 return false;
13569 /* Validate displacement. */
13570 if (disp)
13572 if (GET_CODE (disp) == CONST
13573 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13574 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
13575 switch (XINT (XEXP (disp, 0), 1))
13577 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
13578 used. While ABI specify also 32bit relocations, we don't produce
13579 them at all and use IP relative instead. */
13580 case UNSPEC_GOT:
13581 case UNSPEC_GOTOFF:
13582 gcc_assert (flag_pic);
13583 if (!TARGET_64BIT)
13584 goto is_legitimate_pic;
13586 /* 64bit address unspec. */
13587 return false;
13589 case UNSPEC_GOTPCREL:
13590 case UNSPEC_PCREL:
13591 gcc_assert (flag_pic);
13592 goto is_legitimate_pic;
13594 case UNSPEC_GOTTPOFF:
13595 case UNSPEC_GOTNTPOFF:
13596 case UNSPEC_INDNTPOFF:
13597 case UNSPEC_NTPOFF:
13598 case UNSPEC_DTPOFF:
13599 break;
13601 case UNSPEC_STACK_CHECK:
13602 gcc_assert (flag_split_stack);
13603 break;
13605 default:
13606 /* Invalid address unspec. */
13607 return false;
13610 else if (SYMBOLIC_CONST (disp)
13611 && (flag_pic
13612 || (TARGET_MACHO
13613 #if TARGET_MACHO
13614 && MACHOPIC_INDIRECT
13615 && !machopic_operand_p (disp)
13616 #endif
13620 is_legitimate_pic:
13621 if (TARGET_64BIT && (index || base))
13623 /* foo@dtpoff(%rX) is ok. */
13624 if (GET_CODE (disp) != CONST
13625 || GET_CODE (XEXP (disp, 0)) != PLUS
13626 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13627 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13628 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13629 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13630 /* Non-constant pic memory reference. */
13631 return false;
13633 else if ((!TARGET_MACHO || flag_pic)
13634 && ! legitimate_pic_address_disp_p (disp))
13635 /* Displacement is an invalid pic construct. */
13636 return false;
13637 #if TARGET_MACHO
13638 else if (MACHO_DYNAMIC_NO_PIC_P
13639 && !ix86_legitimate_constant_p (Pmode, disp))
13640 /* displacment must be referenced via non_lazy_pointer */
13641 return false;
13642 #endif
13644 /* This code used to verify that a symbolic pic displacement
13645 includes the pic_offset_table_rtx register.
13647 While this is good idea, unfortunately these constructs may
13648 be created by "adds using lea" optimization for incorrect
13649 code like:
13651 int a;
13652 int foo(int i)
13654 return *(&a+i);
13657 This code is nonsensical, but results in addressing
13658 GOT table with pic_offset_table_rtx base. We can't
13659 just refuse it easily, since it gets matched by
13660 "addsi3" pattern, that later gets split to lea in the
13661 case output register differs from input. While this
13662 can be handled by separate addsi pattern for this case
13663 that never results in lea, this seems to be easier and
13664 correct fix for crash to disable this test. */
13666 else if (GET_CODE (disp) != LABEL_REF
13667 && !CONST_INT_P (disp)
13668 && (GET_CODE (disp) != CONST
13669 || !ix86_legitimate_constant_p (Pmode, disp))
13670 && (GET_CODE (disp) != SYMBOL_REF
13671 || !ix86_legitimate_constant_p (Pmode, disp)))
13672 /* Displacement is not constant. */
13673 return false;
13674 else if (TARGET_64BIT
13675 && !x86_64_immediate_operand (disp, VOIDmode))
13676 /* Displacement is out of range. */
13677 return false;
13678 /* In x32 mode, constant addresses are sign extended to 64bit, so
13679 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13680 else if (TARGET_X32 && !(index || base)
13681 && CONST_INT_P (disp)
13682 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13683 return false;
13686 /* Everything looks valid. */
13687 return true;
13690 /* Determine if a given RTX is a valid constant address. */
13692 bool
13693 constant_address_p (rtx x)
13695 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13698 /* Return a unique alias set for the GOT. */
13700 static alias_set_type
13701 ix86_GOT_alias_set (void)
13703 static alias_set_type set = -1;
13704 if (set == -1)
13705 set = new_alias_set ();
13706 return set;
13709 /* Return a legitimate reference for ORIG (an address) using the
13710 register REG. If REG is 0, a new pseudo is generated.
13712 There are two types of references that must be handled:
13714 1. Global data references must load the address from the GOT, via
13715 the PIC reg. An insn is emitted to do this load, and the reg is
13716 returned.
13718 2. Static data references, constant pool addresses, and code labels
13719 compute the address as an offset from the GOT, whose base is in
13720 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13721 differentiate them from global data objects. The returned
13722 address is the PIC reg + an unspec constant.
13724 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13725 reg also appears in the address. */
13727 static rtx
13728 legitimize_pic_address (rtx orig, rtx reg)
13730 rtx addr = orig;
13731 rtx new_rtx = orig;
13733 #if TARGET_MACHO
13734 if (TARGET_MACHO && !TARGET_64BIT)
13736 if (reg == 0)
13737 reg = gen_reg_rtx (Pmode);
13738 /* Use the generic Mach-O PIC machinery. */
13739 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13741 #endif
13743 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13745 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13746 if (tmp)
13747 return tmp;
13750 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13751 new_rtx = addr;
13752 else if (TARGET_64BIT && !TARGET_PECOFF
13753 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13755 rtx tmpreg;
13756 /* This symbol may be referenced via a displacement from the PIC
13757 base address (@GOTOFF). */
13759 if (GET_CODE (addr) == CONST)
13760 addr = XEXP (addr, 0);
13761 if (GET_CODE (addr) == PLUS)
13763 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13764 UNSPEC_GOTOFF);
13765 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13767 else
13768 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13769 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13770 if (!reg)
13771 tmpreg = gen_reg_rtx (Pmode);
13772 else
13773 tmpreg = reg;
13774 emit_move_insn (tmpreg, new_rtx);
13776 if (reg != 0)
13778 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13779 tmpreg, 1, OPTAB_DIRECT);
13780 new_rtx = reg;
13782 else
13783 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13785 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13787 /* This symbol may be referenced via a displacement from the PIC
13788 base address (@GOTOFF). */
13790 if (GET_CODE (addr) == CONST)
13791 addr = XEXP (addr, 0);
13792 if (GET_CODE (addr) == PLUS)
13794 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13795 UNSPEC_GOTOFF);
13796 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13798 else
13799 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13800 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13801 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13803 if (reg != 0)
13805 emit_move_insn (reg, new_rtx);
13806 new_rtx = reg;
13809 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13810 /* We can't use @GOTOFF for text labels on VxWorks;
13811 see gotoff_operand. */
13812 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13814 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13815 if (tmp)
13816 return tmp;
13818 /* For x64 PE-COFF there is no GOT table. So we use address
13819 directly. */
13820 if (TARGET_64BIT && TARGET_PECOFF)
13822 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13823 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13825 if (reg == 0)
13826 reg = gen_reg_rtx (Pmode);
13827 emit_move_insn (reg, new_rtx);
13828 new_rtx = reg;
13830 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13832 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13833 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13834 new_rtx = gen_const_mem (Pmode, new_rtx);
13835 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13837 if (reg == 0)
13838 reg = gen_reg_rtx (Pmode);
13839 /* Use directly gen_movsi, otherwise the address is loaded
13840 into register for CSE. We don't want to CSE this addresses,
13841 instead we CSE addresses from the GOT table, so skip this. */
13842 emit_insn (gen_movsi (reg, new_rtx));
13843 new_rtx = reg;
13845 else
13847 /* This symbol must be referenced via a load from the
13848 Global Offset Table (@GOT). */
13850 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13851 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13852 if (TARGET_64BIT)
13853 new_rtx = force_reg (Pmode, new_rtx);
13854 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13855 new_rtx = gen_const_mem (Pmode, new_rtx);
13856 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13858 if (reg == 0)
13859 reg = gen_reg_rtx (Pmode);
13860 emit_move_insn (reg, new_rtx);
13861 new_rtx = reg;
13864 else
13866 if (CONST_INT_P (addr)
13867 && !x86_64_immediate_operand (addr, VOIDmode))
13869 if (reg)
13871 emit_move_insn (reg, addr);
13872 new_rtx = reg;
13874 else
13875 new_rtx = force_reg (Pmode, addr);
13877 else if (GET_CODE (addr) == CONST)
13879 addr = XEXP (addr, 0);
13881 /* We must match stuff we generate before. Assume the only
13882 unspecs that can get here are ours. Not that we could do
13883 anything with them anyway.... */
13884 if (GET_CODE (addr) == UNSPEC
13885 || (GET_CODE (addr) == PLUS
13886 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13887 return orig;
13888 gcc_assert (GET_CODE (addr) == PLUS);
13890 if (GET_CODE (addr) == PLUS)
13892 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13894 /* Check first to see if this is a constant offset from a @GOTOFF
13895 symbol reference. */
13896 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13897 && CONST_INT_P (op1))
13899 if (!TARGET_64BIT)
13901 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13902 UNSPEC_GOTOFF);
13903 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13904 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13905 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13907 if (reg != 0)
13909 emit_move_insn (reg, new_rtx);
13910 new_rtx = reg;
13913 else
13915 if (INTVAL (op1) < -16*1024*1024
13916 || INTVAL (op1) >= 16*1024*1024)
13918 if (!x86_64_immediate_operand (op1, Pmode))
13919 op1 = force_reg (Pmode, op1);
13920 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13924 else
13926 rtx base = legitimize_pic_address (op0, reg);
13927 machine_mode mode = GET_MODE (base);
13928 new_rtx
13929 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13931 if (CONST_INT_P (new_rtx))
13933 if (INTVAL (new_rtx) < -16*1024*1024
13934 || INTVAL (new_rtx) >= 16*1024*1024)
13936 if (!x86_64_immediate_operand (new_rtx, mode))
13937 new_rtx = force_reg (mode, new_rtx);
13938 new_rtx
13939 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13941 else
13942 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13944 else
13946 /* For %rip addressing, we have to use just disp32, not
13947 base nor index. */
13948 if (TARGET_64BIT
13949 && (GET_CODE (base) == SYMBOL_REF
13950 || GET_CODE (base) == LABEL_REF))
13951 base = force_reg (mode, base);
13952 if (GET_CODE (new_rtx) == PLUS
13953 && CONSTANT_P (XEXP (new_rtx, 1)))
13955 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13956 new_rtx = XEXP (new_rtx, 1);
13958 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13963 return new_rtx;
13966 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13968 static rtx
13969 get_thread_pointer (machine_mode tp_mode, bool to_reg)
13971 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13973 if (GET_MODE (tp) != tp_mode)
13975 gcc_assert (GET_MODE (tp) == SImode);
13976 gcc_assert (tp_mode == DImode);
13978 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13981 if (to_reg)
13982 tp = copy_to_mode_reg (tp_mode, tp);
13984 return tp;
13987 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13989 static GTY(()) rtx ix86_tls_symbol;
13991 static rtx
13992 ix86_tls_get_addr (void)
13994 if (!ix86_tls_symbol)
13996 const char *sym
13997 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13998 ? "___tls_get_addr" : "__tls_get_addr");
14000 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
14003 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
14005 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
14006 UNSPEC_PLTOFF);
14007 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
14008 gen_rtx_CONST (Pmode, unspec));
14011 return ix86_tls_symbol;
14014 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
14016 static GTY(()) rtx ix86_tls_module_base_symbol;
14019 ix86_tls_module_base (void)
14021 if (!ix86_tls_module_base_symbol)
14023 ix86_tls_module_base_symbol
14024 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
14026 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
14027 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
14030 return ix86_tls_module_base_symbol;
14033 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
14034 false if we expect this to be used for a memory address and true if
14035 we expect to load the address into a register. */
14037 static rtx
14038 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
14040 rtx dest, base, off;
14041 rtx pic = NULL_RTX, tp = NULL_RTX;
14042 machine_mode tp_mode = Pmode;
14043 int type;
14045 /* Fall back to global dynamic model if tool chain cannot support local
14046 dynamic. */
14047 if (TARGET_SUN_TLS && !TARGET_64BIT
14048 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
14049 && model == TLS_MODEL_LOCAL_DYNAMIC)
14050 model = TLS_MODEL_GLOBAL_DYNAMIC;
14052 switch (model)
14054 case TLS_MODEL_GLOBAL_DYNAMIC:
14055 dest = gen_reg_rtx (Pmode);
14057 if (!TARGET_64BIT)
14059 if (flag_pic && !TARGET_PECOFF)
14060 pic = pic_offset_table_rtx;
14061 else
14063 pic = gen_reg_rtx (Pmode);
14064 emit_insn (gen_set_got (pic));
14068 if (TARGET_GNU2_TLS)
14070 if (TARGET_64BIT)
14071 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
14072 else
14073 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
14075 tp = get_thread_pointer (Pmode, true);
14076 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
14078 if (GET_MODE (x) != Pmode)
14079 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14081 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
14083 else
14085 rtx caddr = ix86_tls_get_addr ();
14087 if (TARGET_64BIT)
14089 rtx rax = gen_rtx_REG (Pmode, AX_REG);
14090 rtx_insn *insns;
14092 start_sequence ();
14093 emit_call_insn
14094 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
14095 insns = get_insns ();
14096 end_sequence ();
14098 if (GET_MODE (x) != Pmode)
14099 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14101 RTL_CONST_CALL_P (insns) = 1;
14102 emit_libcall_block (insns, dest, rax, x);
14104 else
14105 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
14107 break;
14109 case TLS_MODEL_LOCAL_DYNAMIC:
14110 base = gen_reg_rtx (Pmode);
14112 if (!TARGET_64BIT)
14114 if (flag_pic)
14115 pic = pic_offset_table_rtx;
14116 else
14118 pic = gen_reg_rtx (Pmode);
14119 emit_insn (gen_set_got (pic));
14123 if (TARGET_GNU2_TLS)
14125 rtx tmp = ix86_tls_module_base ();
14127 if (TARGET_64BIT)
14128 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
14129 else
14130 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
14132 tp = get_thread_pointer (Pmode, true);
14133 set_unique_reg_note (get_last_insn (), REG_EQUAL,
14134 gen_rtx_MINUS (Pmode, tmp, tp));
14136 else
14138 rtx caddr = ix86_tls_get_addr ();
14140 if (TARGET_64BIT)
14142 rtx rax = gen_rtx_REG (Pmode, AX_REG);
14143 rtx_insn *insns;
14144 rtx eqv;
14146 start_sequence ();
14147 emit_call_insn
14148 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
14149 insns = get_insns ();
14150 end_sequence ();
14152 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
14153 share the LD_BASE result with other LD model accesses. */
14154 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
14155 UNSPEC_TLS_LD_BASE);
14157 RTL_CONST_CALL_P (insns) = 1;
14158 emit_libcall_block (insns, base, rax, eqv);
14160 else
14161 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
14164 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
14165 off = gen_rtx_CONST (Pmode, off);
14167 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
14169 if (TARGET_GNU2_TLS)
14171 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
14173 if (GET_MODE (x) != Pmode)
14174 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14176 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
14178 break;
14180 case TLS_MODEL_INITIAL_EXEC:
14181 if (TARGET_64BIT)
14183 if (TARGET_SUN_TLS && !TARGET_X32)
14185 /* The Sun linker took the AMD64 TLS spec literally
14186 and can only handle %rax as destination of the
14187 initial executable code sequence. */
14189 dest = gen_reg_rtx (DImode);
14190 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
14191 return dest;
14194 /* Generate DImode references to avoid %fs:(%reg32)
14195 problems and linker IE->LE relaxation bug. */
14196 tp_mode = DImode;
14197 pic = NULL;
14198 type = UNSPEC_GOTNTPOFF;
14200 else if (flag_pic)
14202 pic = pic_offset_table_rtx;
14203 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
14205 else if (!TARGET_ANY_GNU_TLS)
14207 pic = gen_reg_rtx (Pmode);
14208 emit_insn (gen_set_got (pic));
14209 type = UNSPEC_GOTTPOFF;
14211 else
14213 pic = NULL;
14214 type = UNSPEC_INDNTPOFF;
14217 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
14218 off = gen_rtx_CONST (tp_mode, off);
14219 if (pic)
14220 off = gen_rtx_PLUS (tp_mode, pic, off);
14221 off = gen_const_mem (tp_mode, off);
14222 set_mem_alias_set (off, ix86_GOT_alias_set ());
14224 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14226 base = get_thread_pointer (tp_mode,
14227 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14228 off = force_reg (tp_mode, off);
14229 return gen_rtx_PLUS (tp_mode, base, off);
14231 else
14233 base = get_thread_pointer (Pmode, true);
14234 dest = gen_reg_rtx (Pmode);
14235 emit_insn (ix86_gen_sub3 (dest, base, off));
14237 break;
14239 case TLS_MODEL_LOCAL_EXEC:
14240 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
14241 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14242 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
14243 off = gen_rtx_CONST (Pmode, off);
14245 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14247 base = get_thread_pointer (Pmode,
14248 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14249 return gen_rtx_PLUS (Pmode, base, off);
14251 else
14253 base = get_thread_pointer (Pmode, true);
14254 dest = gen_reg_rtx (Pmode);
14255 emit_insn (ix86_gen_sub3 (dest, base, off));
14257 break;
14259 default:
14260 gcc_unreachable ();
14263 return dest;
14266 /* Create or return the unique __imp_DECL dllimport symbol corresponding
14267 to symbol DECL if BEIMPORT is true. Otherwise create or return the
14268 unique refptr-DECL symbol corresponding to symbol DECL. */
14270 struct dllimport_hasher : ggc_cache_ptr_hash<tree_map>
14272 static inline hashval_t hash (tree_map *m) { return m->hash; }
14273 static inline bool
14274 equal (tree_map *a, tree_map *b)
14276 return a->base.from == b->base.from;
14279 static int
14280 keep_cache_entry (tree_map *&m)
14282 return ggc_marked_p (m->base.from);
14286 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
14288 static tree
14289 get_dllimport_decl (tree decl, bool beimport)
14291 struct tree_map *h, in;
14292 const char *name;
14293 const char *prefix;
14294 size_t namelen, prefixlen;
14295 char *imp_name;
14296 tree to;
14297 rtx rtl;
14299 if (!dllimport_map)
14300 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
14302 in.hash = htab_hash_pointer (decl);
14303 in.base.from = decl;
14304 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
14305 h = *loc;
14306 if (h)
14307 return h->to;
14309 *loc = h = ggc_alloc<tree_map> ();
14310 h->hash = in.hash;
14311 h->base.from = decl;
14312 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
14313 VAR_DECL, NULL, ptr_type_node);
14314 DECL_ARTIFICIAL (to) = 1;
14315 DECL_IGNORED_P (to) = 1;
14316 DECL_EXTERNAL (to) = 1;
14317 TREE_READONLY (to) = 1;
14319 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
14320 name = targetm.strip_name_encoding (name);
14321 if (beimport)
14322 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
14323 ? "*__imp_" : "*__imp__";
14324 else
14325 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
14326 namelen = strlen (name);
14327 prefixlen = strlen (prefix);
14328 imp_name = (char *) alloca (namelen + prefixlen + 1);
14329 memcpy (imp_name, prefix, prefixlen);
14330 memcpy (imp_name + prefixlen, name, namelen + 1);
14332 name = ggc_alloc_string (imp_name, namelen + prefixlen);
14333 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
14334 SET_SYMBOL_REF_DECL (rtl, to);
14335 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
14336 if (!beimport)
14338 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
14339 #ifdef SUB_TARGET_RECORD_STUB
14340 SUB_TARGET_RECORD_STUB (name);
14341 #endif
14344 rtl = gen_const_mem (Pmode, rtl);
14345 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
14347 SET_DECL_RTL (to, rtl);
14348 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
14350 return to;
14353 /* Expand SYMBOL into its corresponding far-addresse symbol.
14354 WANT_REG is true if we require the result be a register. */
14356 static rtx
14357 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
14359 tree imp_decl;
14360 rtx x;
14362 gcc_assert (SYMBOL_REF_DECL (symbol));
14363 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
14365 x = DECL_RTL (imp_decl);
14366 if (want_reg)
14367 x = force_reg (Pmode, x);
14368 return x;
14371 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
14372 true if we require the result be a register. */
14374 static rtx
14375 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
14377 tree imp_decl;
14378 rtx x;
14380 gcc_assert (SYMBOL_REF_DECL (symbol));
14381 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
14383 x = DECL_RTL (imp_decl);
14384 if (want_reg)
14385 x = force_reg (Pmode, x);
14386 return x;
14389 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
14390 is true if we require the result be a register. */
14392 static rtx
14393 legitimize_pe_coff_symbol (rtx addr, bool inreg)
14395 if (!TARGET_PECOFF)
14396 return NULL_RTX;
14398 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14400 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
14401 return legitimize_dllimport_symbol (addr, inreg);
14402 if (GET_CODE (addr) == CONST
14403 && GET_CODE (XEXP (addr, 0)) == PLUS
14404 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14405 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
14407 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
14408 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14412 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
14413 return NULL_RTX;
14414 if (GET_CODE (addr) == SYMBOL_REF
14415 && !is_imported_p (addr)
14416 && SYMBOL_REF_EXTERNAL_P (addr)
14417 && SYMBOL_REF_DECL (addr))
14418 return legitimize_pe_coff_extern_decl (addr, inreg);
14420 if (GET_CODE (addr) == CONST
14421 && GET_CODE (XEXP (addr, 0)) == PLUS
14422 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14423 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
14424 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
14425 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
14427 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
14428 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14430 return NULL_RTX;
14433 /* Try machine-dependent ways of modifying an illegitimate address
14434 to be legitimate. If we find one, return the new, valid address.
14435 This macro is used in only one place: `memory_address' in explow.c.
14437 OLDX is the address as it was before break_out_memory_refs was called.
14438 In some cases it is useful to look at this to decide what needs to be done.
14440 It is always safe for this macro to do nothing. It exists to recognize
14441 opportunities to optimize the output.
14443 For the 80386, we handle X+REG by loading X into a register R and
14444 using R+REG. R will go in a general reg and indexing will be used.
14445 However, if REG is a broken-out memory address or multiplication,
14446 nothing needs to be done because REG can certainly go in a general reg.
14448 When -fpic is used, special handling is needed for symbolic references.
14449 See comments by legitimize_pic_address in i386.c for details. */
14451 static rtx
14452 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
14454 bool changed = false;
14455 unsigned log;
14457 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
14458 if (log)
14459 return legitimize_tls_address (x, (enum tls_model) log, false);
14460 if (GET_CODE (x) == CONST
14461 && GET_CODE (XEXP (x, 0)) == PLUS
14462 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
14463 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
14465 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
14466 (enum tls_model) log, false);
14467 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
14470 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14472 rtx tmp = legitimize_pe_coff_symbol (x, true);
14473 if (tmp)
14474 return tmp;
14477 if (flag_pic && SYMBOLIC_CONST (x))
14478 return legitimize_pic_address (x, 0);
14480 #if TARGET_MACHO
14481 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
14482 return machopic_indirect_data_reference (x, 0);
14483 #endif
14485 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
14486 if (GET_CODE (x) == ASHIFT
14487 && CONST_INT_P (XEXP (x, 1))
14488 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
14490 changed = true;
14491 log = INTVAL (XEXP (x, 1));
14492 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
14493 GEN_INT (1 << log));
14496 if (GET_CODE (x) == PLUS)
14498 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
14500 if (GET_CODE (XEXP (x, 0)) == ASHIFT
14501 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14502 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
14504 changed = true;
14505 log = INTVAL (XEXP (XEXP (x, 0), 1));
14506 XEXP (x, 0) = gen_rtx_MULT (Pmode,
14507 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
14508 GEN_INT (1 << log));
14511 if (GET_CODE (XEXP (x, 1)) == ASHIFT
14512 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
14513 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
14515 changed = true;
14516 log = INTVAL (XEXP (XEXP (x, 1), 1));
14517 XEXP (x, 1) = gen_rtx_MULT (Pmode,
14518 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
14519 GEN_INT (1 << log));
14522 /* Put multiply first if it isn't already. */
14523 if (GET_CODE (XEXP (x, 1)) == MULT)
14525 std::swap (XEXP (x, 0), XEXP (x, 1));
14526 changed = true;
14529 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
14530 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
14531 created by virtual register instantiation, register elimination, and
14532 similar optimizations. */
14533 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
14535 changed = true;
14536 x = gen_rtx_PLUS (Pmode,
14537 gen_rtx_PLUS (Pmode, XEXP (x, 0),
14538 XEXP (XEXP (x, 1), 0)),
14539 XEXP (XEXP (x, 1), 1));
14542 /* Canonicalize
14543 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
14544 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
14545 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
14546 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14547 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
14548 && CONSTANT_P (XEXP (x, 1)))
14550 rtx constant;
14551 rtx other = NULL_RTX;
14553 if (CONST_INT_P (XEXP (x, 1)))
14555 constant = XEXP (x, 1);
14556 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
14558 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
14560 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
14561 other = XEXP (x, 1);
14563 else
14564 constant = 0;
14566 if (constant)
14568 changed = true;
14569 x = gen_rtx_PLUS (Pmode,
14570 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
14571 XEXP (XEXP (XEXP (x, 0), 1), 0)),
14572 plus_constant (Pmode, other,
14573 INTVAL (constant)));
14577 if (changed && ix86_legitimate_address_p (mode, x, false))
14578 return x;
14580 if (GET_CODE (XEXP (x, 0)) == MULT)
14582 changed = true;
14583 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
14586 if (GET_CODE (XEXP (x, 1)) == MULT)
14588 changed = true;
14589 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
14592 if (changed
14593 && REG_P (XEXP (x, 1))
14594 && REG_P (XEXP (x, 0)))
14595 return x;
14597 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
14599 changed = true;
14600 x = legitimize_pic_address (x, 0);
14603 if (changed && ix86_legitimate_address_p (mode, x, false))
14604 return x;
14606 if (REG_P (XEXP (x, 0)))
14608 rtx temp = gen_reg_rtx (Pmode);
14609 rtx val = force_operand (XEXP (x, 1), temp);
14610 if (val != temp)
14612 val = convert_to_mode (Pmode, val, 1);
14613 emit_move_insn (temp, val);
14616 XEXP (x, 1) = temp;
14617 return x;
14620 else if (REG_P (XEXP (x, 1)))
14622 rtx temp = gen_reg_rtx (Pmode);
14623 rtx val = force_operand (XEXP (x, 0), temp);
14624 if (val != temp)
14626 val = convert_to_mode (Pmode, val, 1);
14627 emit_move_insn (temp, val);
14630 XEXP (x, 0) = temp;
14631 return x;
14635 return x;
14638 /* Print an integer constant expression in assembler syntax. Addition
14639 and subtraction are the only arithmetic that may appear in these
14640 expressions. FILE is the stdio stream to write to, X is the rtx, and
14641 CODE is the operand print code from the output string. */
14643 static void
14644 output_pic_addr_const (FILE *file, rtx x, int code)
14646 char buf[256];
14648 switch (GET_CODE (x))
14650 case PC:
14651 gcc_assert (flag_pic);
14652 putc ('.', file);
14653 break;
14655 case SYMBOL_REF:
14656 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14657 output_addr_const (file, x);
14658 else
14660 const char *name = XSTR (x, 0);
14662 /* Mark the decl as referenced so that cgraph will
14663 output the function. */
14664 if (SYMBOL_REF_DECL (x))
14665 mark_decl_referenced (SYMBOL_REF_DECL (x));
14667 #if TARGET_MACHO
14668 if (MACHOPIC_INDIRECT
14669 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14670 name = machopic_indirection_name (x, /*stub_p=*/true);
14671 #endif
14672 assemble_name (file, name);
14674 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14675 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14676 fputs ("@PLT", file);
14677 break;
14679 case LABEL_REF:
14680 x = XEXP (x, 0);
14681 /* FALLTHRU */
14682 case CODE_LABEL:
14683 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14684 assemble_name (asm_out_file, buf);
14685 break;
14687 case CONST_INT:
14688 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14689 break;
14691 case CONST:
14692 /* This used to output parentheses around the expression,
14693 but that does not work on the 386 (either ATT or BSD assembler). */
14694 output_pic_addr_const (file, XEXP (x, 0), code);
14695 break;
14697 case CONST_DOUBLE:
14698 /* We can't handle floating point constants;
14699 TARGET_PRINT_OPERAND must handle them. */
14700 output_operand_lossage ("floating constant misused");
14701 break;
14703 case PLUS:
14704 /* Some assemblers need integer constants to appear first. */
14705 if (CONST_INT_P (XEXP (x, 0)))
14707 output_pic_addr_const (file, XEXP (x, 0), code);
14708 putc ('+', file);
14709 output_pic_addr_const (file, XEXP (x, 1), code);
14711 else
14713 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14714 output_pic_addr_const (file, XEXP (x, 1), code);
14715 putc ('+', file);
14716 output_pic_addr_const (file, XEXP (x, 0), code);
14718 break;
14720 case MINUS:
14721 if (!TARGET_MACHO)
14722 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14723 output_pic_addr_const (file, XEXP (x, 0), code);
14724 putc ('-', file);
14725 output_pic_addr_const (file, XEXP (x, 1), code);
14726 if (!TARGET_MACHO)
14727 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14728 break;
14730 case UNSPEC:
14731 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14733 bool f = i386_asm_output_addr_const_extra (file, x);
14734 gcc_assert (f);
14735 break;
14738 gcc_assert (XVECLEN (x, 0) == 1);
14739 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14740 switch (XINT (x, 1))
14742 case UNSPEC_GOT:
14743 fputs ("@GOT", file);
14744 break;
14745 case UNSPEC_GOTOFF:
14746 fputs ("@GOTOFF", file);
14747 break;
14748 case UNSPEC_PLTOFF:
14749 fputs ("@PLTOFF", file);
14750 break;
14751 case UNSPEC_PCREL:
14752 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14753 "(%rip)" : "[rip]", file);
14754 break;
14755 case UNSPEC_GOTPCREL:
14756 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14757 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14758 break;
14759 case UNSPEC_GOTTPOFF:
14760 /* FIXME: This might be @TPOFF in Sun ld too. */
14761 fputs ("@gottpoff", file);
14762 break;
14763 case UNSPEC_TPOFF:
14764 fputs ("@tpoff", file);
14765 break;
14766 case UNSPEC_NTPOFF:
14767 if (TARGET_64BIT)
14768 fputs ("@tpoff", file);
14769 else
14770 fputs ("@ntpoff", file);
14771 break;
14772 case UNSPEC_DTPOFF:
14773 fputs ("@dtpoff", file);
14774 break;
14775 case UNSPEC_GOTNTPOFF:
14776 if (TARGET_64BIT)
14777 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14778 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14779 else
14780 fputs ("@gotntpoff", file);
14781 break;
14782 case UNSPEC_INDNTPOFF:
14783 fputs ("@indntpoff", file);
14784 break;
14785 #if TARGET_MACHO
14786 case UNSPEC_MACHOPIC_OFFSET:
14787 putc ('-', file);
14788 machopic_output_function_base_name (file);
14789 break;
14790 #endif
14791 default:
14792 output_operand_lossage ("invalid UNSPEC as operand");
14793 break;
14795 break;
14797 default:
14798 output_operand_lossage ("invalid expression as operand");
14802 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14803 We need to emit DTP-relative relocations. */
14805 static void ATTRIBUTE_UNUSED
14806 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14808 fputs (ASM_LONG, file);
14809 output_addr_const (file, x);
14810 fputs ("@dtpoff", file);
14811 switch (size)
14813 case 4:
14814 break;
14815 case 8:
14816 fputs (", 0", file);
14817 break;
14818 default:
14819 gcc_unreachable ();
14823 /* Return true if X is a representation of the PIC register. This copes
14824 with calls from ix86_find_base_term, where the register might have
14825 been replaced by a cselib value. */
14827 static bool
14828 ix86_pic_register_p (rtx x)
14830 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14831 return (pic_offset_table_rtx
14832 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14833 else if (!REG_P (x))
14834 return false;
14835 else if (pic_offset_table_rtx)
14837 if (REGNO (x) == REGNO (pic_offset_table_rtx))
14838 return true;
14839 if (HARD_REGISTER_P (x)
14840 && !HARD_REGISTER_P (pic_offset_table_rtx)
14841 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
14842 return true;
14843 return false;
14845 else
14846 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14849 /* Helper function for ix86_delegitimize_address.
14850 Attempt to delegitimize TLS local-exec accesses. */
14852 static rtx
14853 ix86_delegitimize_tls_address (rtx orig_x)
14855 rtx x = orig_x, unspec;
14856 struct ix86_address addr;
14858 if (!TARGET_TLS_DIRECT_SEG_REFS)
14859 return orig_x;
14860 if (MEM_P (x))
14861 x = XEXP (x, 0);
14862 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14863 return orig_x;
14864 if (ix86_decompose_address (x, &addr) == 0
14865 || addr.seg != DEFAULT_TLS_SEG_REG
14866 || addr.disp == NULL_RTX
14867 || GET_CODE (addr.disp) != CONST)
14868 return orig_x;
14869 unspec = XEXP (addr.disp, 0);
14870 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14871 unspec = XEXP (unspec, 0);
14872 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14873 return orig_x;
14874 x = XVECEXP (unspec, 0, 0);
14875 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14876 if (unspec != XEXP (addr.disp, 0))
14877 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14878 if (addr.index)
14880 rtx idx = addr.index;
14881 if (addr.scale != 1)
14882 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14883 x = gen_rtx_PLUS (Pmode, idx, x);
14885 if (addr.base)
14886 x = gen_rtx_PLUS (Pmode, addr.base, x);
14887 if (MEM_P (orig_x))
14888 x = replace_equiv_address_nv (orig_x, x);
14889 return x;
14892 /* In the name of slightly smaller debug output, and to cater to
14893 general assembler lossage, recognize PIC+GOTOFF and turn it back
14894 into a direct symbol reference.
14896 On Darwin, this is necessary to avoid a crash, because Darwin
14897 has a different PIC label for each routine but the DWARF debugging
14898 information is not associated with any particular routine, so it's
14899 necessary to remove references to the PIC label from RTL stored by
14900 the DWARF output code. */
14902 static rtx
14903 ix86_delegitimize_address (rtx x)
14905 rtx orig_x = delegitimize_mem_from_attrs (x);
14906 /* addend is NULL or some rtx if x is something+GOTOFF where
14907 something doesn't include the PIC register. */
14908 rtx addend = NULL_RTX;
14909 /* reg_addend is NULL or a multiple of some register. */
14910 rtx reg_addend = NULL_RTX;
14911 /* const_addend is NULL or a const_int. */
14912 rtx const_addend = NULL_RTX;
14913 /* This is the result, or NULL. */
14914 rtx result = NULL_RTX;
14916 x = orig_x;
14918 if (MEM_P (x))
14919 x = XEXP (x, 0);
14921 if (TARGET_64BIT)
14923 if (GET_CODE (x) == CONST
14924 && GET_CODE (XEXP (x, 0)) == PLUS
14925 && GET_MODE (XEXP (x, 0)) == Pmode
14926 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14927 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14928 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14930 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14931 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14932 if (MEM_P (orig_x))
14933 x = replace_equiv_address_nv (orig_x, x);
14934 return x;
14937 if (GET_CODE (x) == CONST
14938 && GET_CODE (XEXP (x, 0)) == UNSPEC
14939 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14940 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14941 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14943 x = XVECEXP (XEXP (x, 0), 0, 0);
14944 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14946 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14947 GET_MODE (x), 0);
14948 if (x == NULL_RTX)
14949 return orig_x;
14951 return x;
14954 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14955 return ix86_delegitimize_tls_address (orig_x);
14957 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14958 and -mcmodel=medium -fpic. */
14961 if (GET_CODE (x) != PLUS
14962 || GET_CODE (XEXP (x, 1)) != CONST)
14963 return ix86_delegitimize_tls_address (orig_x);
14965 if (ix86_pic_register_p (XEXP (x, 0)))
14966 /* %ebx + GOT/GOTOFF */
14968 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14970 /* %ebx + %reg * scale + GOT/GOTOFF */
14971 reg_addend = XEXP (x, 0);
14972 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14973 reg_addend = XEXP (reg_addend, 1);
14974 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14975 reg_addend = XEXP (reg_addend, 0);
14976 else
14978 reg_addend = NULL_RTX;
14979 addend = XEXP (x, 0);
14982 else
14983 addend = XEXP (x, 0);
14985 x = XEXP (XEXP (x, 1), 0);
14986 if (GET_CODE (x) == PLUS
14987 && CONST_INT_P (XEXP (x, 1)))
14989 const_addend = XEXP (x, 1);
14990 x = XEXP (x, 0);
14993 if (GET_CODE (x) == UNSPEC
14994 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14995 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14996 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14997 && !MEM_P (orig_x) && !addend)))
14998 result = XVECEXP (x, 0, 0);
15000 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
15001 && !MEM_P (orig_x))
15002 result = XVECEXP (x, 0, 0);
15004 if (! result)
15005 return ix86_delegitimize_tls_address (orig_x);
15007 if (const_addend)
15008 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
15009 if (reg_addend)
15010 result = gen_rtx_PLUS (Pmode, reg_addend, result);
15011 if (addend)
15013 /* If the rest of original X doesn't involve the PIC register, add
15014 addend and subtract pic_offset_table_rtx. This can happen e.g.
15015 for code like:
15016 leal (%ebx, %ecx, 4), %ecx
15018 movl foo@GOTOFF(%ecx), %edx
15019 in which case we return (%ecx - %ebx) + foo
15020 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
15021 and reload has completed. */
15022 if (pic_offset_table_rtx
15023 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
15024 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
15025 pic_offset_table_rtx),
15026 result);
15027 else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
15029 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
15030 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
15031 result = gen_rtx_PLUS (Pmode, tmp, result);
15033 else
15034 return orig_x;
15036 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
15038 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
15039 if (result == NULL_RTX)
15040 return orig_x;
15042 return result;
15045 /* If X is a machine specific address (i.e. a symbol or label being
15046 referenced as a displacement from the GOT implemented using an
15047 UNSPEC), then return the base term. Otherwise return X. */
15050 ix86_find_base_term (rtx x)
15052 rtx term;
15054 if (TARGET_64BIT)
15056 if (GET_CODE (x) != CONST)
15057 return x;
15058 term = XEXP (x, 0);
15059 if (GET_CODE (term) == PLUS
15060 && CONST_INT_P (XEXP (term, 1)))
15061 term = XEXP (term, 0);
15062 if (GET_CODE (term) != UNSPEC
15063 || (XINT (term, 1) != UNSPEC_GOTPCREL
15064 && XINT (term, 1) != UNSPEC_PCREL))
15065 return x;
15067 return XVECEXP (term, 0, 0);
15070 return ix86_delegitimize_address (x);
15073 static void
15074 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
15075 bool fp, FILE *file)
15077 const char *suffix;
15079 if (mode == CCFPmode || mode == CCFPUmode)
15081 code = ix86_fp_compare_code_to_integer (code);
15082 mode = CCmode;
15084 if (reverse)
15085 code = reverse_condition (code);
15087 switch (code)
15089 case EQ:
15090 switch (mode)
15092 case CCAmode:
15093 suffix = "a";
15094 break;
15095 case CCCmode:
15096 suffix = "c";
15097 break;
15098 case CCOmode:
15099 suffix = "o";
15100 break;
15101 case CCPmode:
15102 suffix = "p";
15103 break;
15104 case CCSmode:
15105 suffix = "s";
15106 break;
15107 default:
15108 suffix = "e";
15109 break;
15111 break;
15112 case NE:
15113 switch (mode)
15115 case CCAmode:
15116 suffix = "na";
15117 break;
15118 case CCCmode:
15119 suffix = "nc";
15120 break;
15121 case CCOmode:
15122 suffix = "no";
15123 break;
15124 case CCPmode:
15125 suffix = "np";
15126 break;
15127 case CCSmode:
15128 suffix = "ns";
15129 break;
15130 default:
15131 suffix = "ne";
15132 break;
15134 break;
15135 case GT:
15136 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
15137 suffix = "g";
15138 break;
15139 case GTU:
15140 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
15141 Those same assemblers have the same but opposite lossage on cmov. */
15142 if (mode == CCmode)
15143 suffix = fp ? "nbe" : "a";
15144 else
15145 gcc_unreachable ();
15146 break;
15147 case LT:
15148 switch (mode)
15150 case CCNOmode:
15151 case CCGOCmode:
15152 suffix = "s";
15153 break;
15155 case CCmode:
15156 case CCGCmode:
15157 suffix = "l";
15158 break;
15160 default:
15161 gcc_unreachable ();
15163 break;
15164 case LTU:
15165 if (mode == CCmode)
15166 suffix = "b";
15167 else if (mode == CCCmode)
15168 suffix = fp ? "b" : "c";
15169 else
15170 gcc_unreachable ();
15171 break;
15172 case GE:
15173 switch (mode)
15175 case CCNOmode:
15176 case CCGOCmode:
15177 suffix = "ns";
15178 break;
15180 case CCmode:
15181 case CCGCmode:
15182 suffix = "ge";
15183 break;
15185 default:
15186 gcc_unreachable ();
15188 break;
15189 case GEU:
15190 if (mode == CCmode)
15191 suffix = "nb";
15192 else if (mode == CCCmode)
15193 suffix = fp ? "nb" : "nc";
15194 else
15195 gcc_unreachable ();
15196 break;
15197 case LE:
15198 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
15199 suffix = "le";
15200 break;
15201 case LEU:
15202 if (mode == CCmode)
15203 suffix = "be";
15204 else
15205 gcc_unreachable ();
15206 break;
15207 case UNORDERED:
15208 suffix = fp ? "u" : "p";
15209 break;
15210 case ORDERED:
15211 suffix = fp ? "nu" : "np";
15212 break;
15213 default:
15214 gcc_unreachable ();
15216 fputs (suffix, file);
15219 /* Print the name of register X to FILE based on its machine mode and number.
15220 If CODE is 'w', pretend the mode is HImode.
15221 If CODE is 'b', pretend the mode is QImode.
15222 If CODE is 'k', pretend the mode is SImode.
15223 If CODE is 'q', pretend the mode is DImode.
15224 If CODE is 'x', pretend the mode is V4SFmode.
15225 If CODE is 't', pretend the mode is V8SFmode.
15226 If CODE is 'g', pretend the mode is V16SFmode.
15227 If CODE is 'h', pretend the reg is the 'high' byte register.
15228 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
15229 If CODE is 'd', duplicate the operand for AVX instruction.
15232 void
15233 print_reg (rtx x, int code, FILE *file)
15235 const char *reg;
15236 int msize;
15237 unsigned int regno;
15238 bool duplicated;
15240 if (ASSEMBLER_DIALECT == ASM_ATT)
15241 putc ('%', file);
15243 if (x == pc_rtx)
15245 gcc_assert (TARGET_64BIT);
15246 fputs ("rip", file);
15247 return;
15250 if (code == 'y' && STACK_TOP_P (x))
15252 fputs ("st(0)", file);
15253 return;
15256 if (code == 'w')
15257 msize = 2;
15258 else if (code == 'b')
15259 msize = 1;
15260 else if (code == 'k')
15261 msize = 4;
15262 else if (code == 'q')
15263 msize = 8;
15264 else if (code == 'h')
15265 msize = 0;
15266 else if (code == 'x')
15267 msize = 16;
15268 else if (code == 't')
15269 msize = 32;
15270 else if (code == 'g')
15271 msize = 64;
15272 else
15273 msize = GET_MODE_SIZE (GET_MODE (x));
15275 regno = true_regnum (x);
15277 gcc_assert (regno != ARG_POINTER_REGNUM
15278 && regno != FRAME_POINTER_REGNUM
15279 && regno != FLAGS_REG
15280 && regno != FPSR_REG
15281 && regno != FPCR_REG);
15283 duplicated = code == 'd' && TARGET_AVX;
15285 switch (msize)
15287 case 8:
15288 case 4:
15289 if (LEGACY_INT_REGNO_P (regno))
15290 putc (msize == 8 && TARGET_64BIT ? 'r' : 'e', file);
15291 case 16:
15292 case 12:
15293 case 2:
15294 normal:
15295 reg = hi_reg_name[regno];
15296 break;
15297 case 1:
15298 if (regno >= ARRAY_SIZE (qi_reg_name))
15299 goto normal;
15300 reg = qi_reg_name[regno];
15301 break;
15302 case 0:
15303 if (regno >= ARRAY_SIZE (qi_high_reg_name))
15304 goto normal;
15305 reg = qi_high_reg_name[regno];
15306 break;
15307 case 32:
15308 case 64:
15309 if (SSE_REGNO_P (regno))
15311 gcc_assert (!duplicated);
15312 putc (msize == 32 ? 'y' : 'z', file);
15313 reg = hi_reg_name[regno] + 1;
15314 break;
15316 goto normal;
15317 default:
15318 gcc_unreachable ();
15321 fputs (reg, file);
15323 /* Irritatingly, AMD extended registers use
15324 different naming convention: "r%d[bwd]" */
15325 if (REX_INT_REGNO_P (regno))
15327 gcc_assert (TARGET_64BIT);
15328 switch (msize)
15330 case 0:
15331 error ("extended registers have no high halves");
15332 break;
15333 case 1:
15334 putc ('b', file);
15335 break;
15336 case 2:
15337 putc ('w', file);
15338 break;
15339 case 4:
15340 putc ('d', file);
15341 break;
15342 case 8:
15343 /* no suffix */
15344 break;
15345 default:
15346 error ("unsupported operand size for extended register");
15347 break;
15349 return;
15352 if (duplicated)
15354 if (ASSEMBLER_DIALECT == ASM_ATT)
15355 fprintf (file, ", %%%s", reg);
15356 else
15357 fprintf (file, ", %s", reg);
15361 /* Meaning of CODE:
15362 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
15363 C -- print opcode suffix for set/cmov insn.
15364 c -- like C, but print reversed condition
15365 F,f -- likewise, but for floating-point.
15366 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
15367 otherwise nothing
15368 R -- print embeded rounding and sae.
15369 r -- print only sae.
15370 z -- print the opcode suffix for the size of the current operand.
15371 Z -- likewise, with special suffixes for x87 instructions.
15372 * -- print a star (in certain assembler syntax)
15373 A -- print an absolute memory reference.
15374 E -- print address with DImode register names if TARGET_64BIT.
15375 w -- print the operand as if it's a "word" (HImode) even if it isn't.
15376 s -- print a shift double count, followed by the assemblers argument
15377 delimiter.
15378 b -- print the QImode name of the register for the indicated operand.
15379 %b0 would print %al if operands[0] is reg 0.
15380 w -- likewise, print the HImode name of the register.
15381 k -- likewise, print the SImode name of the register.
15382 q -- likewise, print the DImode name of the register.
15383 x -- likewise, print the V4SFmode name of the register.
15384 t -- likewise, print the V8SFmode name of the register.
15385 g -- likewise, print the V16SFmode name of the register.
15386 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
15387 y -- print "st(0)" instead of "st" as a register.
15388 d -- print duplicated register operand for AVX instruction.
15389 D -- print condition for SSE cmp instruction.
15390 P -- if PIC, print an @PLT suffix.
15391 p -- print raw symbol name.
15392 X -- don't print any sort of PIC '@' suffix for a symbol.
15393 & -- print some in-use local-dynamic symbol name.
15394 H -- print a memory address offset by 8; used for sse high-parts
15395 Y -- print condition for XOP pcom* instruction.
15396 + -- print a branch hint as 'cs' or 'ds' prefix
15397 ; -- print a semicolon (after prefixes due to bug in older gas).
15398 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
15399 @ -- print a segment register of thread base pointer load
15400 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
15401 ! -- print MPX prefix for jxx/call/ret instructions if required.
15404 void
15405 ix86_print_operand (FILE *file, rtx x, int code)
15407 if (code)
15409 switch (code)
15411 case 'A':
15412 switch (ASSEMBLER_DIALECT)
15414 case ASM_ATT:
15415 putc ('*', file);
15416 break;
15418 case ASM_INTEL:
15419 /* Intel syntax. For absolute addresses, registers should not
15420 be surrounded by braces. */
15421 if (!REG_P (x))
15423 putc ('[', file);
15424 ix86_print_operand (file, x, 0);
15425 putc (']', file);
15426 return;
15428 break;
15430 default:
15431 gcc_unreachable ();
15434 ix86_print_operand (file, x, 0);
15435 return;
15437 case 'E':
15438 /* Wrap address in an UNSPEC to declare special handling. */
15439 if (TARGET_64BIT)
15440 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
15442 output_address (x);
15443 return;
15445 case 'L':
15446 if (ASSEMBLER_DIALECT == ASM_ATT)
15447 putc ('l', file);
15448 return;
15450 case 'W':
15451 if (ASSEMBLER_DIALECT == ASM_ATT)
15452 putc ('w', file);
15453 return;
15455 case 'B':
15456 if (ASSEMBLER_DIALECT == ASM_ATT)
15457 putc ('b', file);
15458 return;
15460 case 'Q':
15461 if (ASSEMBLER_DIALECT == ASM_ATT)
15462 putc ('l', file);
15463 return;
15465 case 'S':
15466 if (ASSEMBLER_DIALECT == ASM_ATT)
15467 putc ('s', file);
15468 return;
15470 case 'T':
15471 if (ASSEMBLER_DIALECT == ASM_ATT)
15472 putc ('t', file);
15473 return;
15475 case 'O':
15476 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15477 if (ASSEMBLER_DIALECT != ASM_ATT)
15478 return;
15480 switch (GET_MODE_SIZE (GET_MODE (x)))
15482 case 2:
15483 putc ('w', file);
15484 break;
15486 case 4:
15487 putc ('l', file);
15488 break;
15490 case 8:
15491 putc ('q', file);
15492 break;
15494 default:
15495 output_operand_lossage
15496 ("invalid operand size for operand code 'O'");
15497 return;
15500 putc ('.', file);
15501 #endif
15502 return;
15504 case 'z':
15505 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15507 /* Opcodes don't get size suffixes if using Intel opcodes. */
15508 if (ASSEMBLER_DIALECT == ASM_INTEL)
15509 return;
15511 switch (GET_MODE_SIZE (GET_MODE (x)))
15513 case 1:
15514 putc ('b', file);
15515 return;
15517 case 2:
15518 putc ('w', file);
15519 return;
15521 case 4:
15522 putc ('l', file);
15523 return;
15525 case 8:
15526 putc ('q', file);
15527 return;
15529 default:
15530 output_operand_lossage
15531 ("invalid operand size for operand code 'z'");
15532 return;
15536 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15537 warning
15538 (0, "non-integer operand used with operand code 'z'");
15539 /* FALLTHRU */
15541 case 'Z':
15542 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
15543 if (ASSEMBLER_DIALECT == ASM_INTEL)
15544 return;
15546 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15548 switch (GET_MODE_SIZE (GET_MODE (x)))
15550 case 2:
15551 #ifdef HAVE_AS_IX86_FILDS
15552 putc ('s', file);
15553 #endif
15554 return;
15556 case 4:
15557 putc ('l', file);
15558 return;
15560 case 8:
15561 #ifdef HAVE_AS_IX86_FILDQ
15562 putc ('q', file);
15563 #else
15564 fputs ("ll", file);
15565 #endif
15566 return;
15568 default:
15569 break;
15572 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15574 /* 387 opcodes don't get size suffixes
15575 if the operands are registers. */
15576 if (STACK_REG_P (x))
15577 return;
15579 switch (GET_MODE_SIZE (GET_MODE (x)))
15581 case 4:
15582 putc ('s', file);
15583 return;
15585 case 8:
15586 putc ('l', file);
15587 return;
15589 case 12:
15590 case 16:
15591 putc ('t', file);
15592 return;
15594 default:
15595 break;
15598 else
15600 output_operand_lossage
15601 ("invalid operand type used with operand code 'Z'");
15602 return;
15605 output_operand_lossage
15606 ("invalid operand size for operand code 'Z'");
15607 return;
15609 case 'd':
15610 case 'b':
15611 case 'w':
15612 case 'k':
15613 case 'q':
15614 case 'h':
15615 case 't':
15616 case 'g':
15617 case 'y':
15618 case 'x':
15619 case 'X':
15620 case 'P':
15621 case 'p':
15622 break;
15624 case 's':
15625 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15627 ix86_print_operand (file, x, 0);
15628 fputs (", ", file);
15630 return;
15632 case 'Y':
15633 switch (GET_CODE (x))
15635 case NE:
15636 fputs ("neq", file);
15637 break;
15638 case EQ:
15639 fputs ("eq", file);
15640 break;
15641 case GE:
15642 case GEU:
15643 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15644 break;
15645 case GT:
15646 case GTU:
15647 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15648 break;
15649 case LE:
15650 case LEU:
15651 fputs ("le", file);
15652 break;
15653 case LT:
15654 case LTU:
15655 fputs ("lt", file);
15656 break;
15657 case UNORDERED:
15658 fputs ("unord", file);
15659 break;
15660 case ORDERED:
15661 fputs ("ord", file);
15662 break;
15663 case UNEQ:
15664 fputs ("ueq", file);
15665 break;
15666 case UNGE:
15667 fputs ("nlt", file);
15668 break;
15669 case UNGT:
15670 fputs ("nle", file);
15671 break;
15672 case UNLE:
15673 fputs ("ule", file);
15674 break;
15675 case UNLT:
15676 fputs ("ult", file);
15677 break;
15678 case LTGT:
15679 fputs ("une", file);
15680 break;
15681 default:
15682 output_operand_lossage ("operand is not a condition code, "
15683 "invalid operand code 'Y'");
15684 return;
15686 return;
15688 case 'D':
15689 /* Little bit of braindamage here. The SSE compare instructions
15690 does use completely different names for the comparisons that the
15691 fp conditional moves. */
15692 switch (GET_CODE (x))
15694 case UNEQ:
15695 if (TARGET_AVX)
15697 fputs ("eq_us", file);
15698 break;
15700 case EQ:
15701 fputs ("eq", file);
15702 break;
15703 case UNLT:
15704 if (TARGET_AVX)
15706 fputs ("nge", file);
15707 break;
15709 case LT:
15710 fputs ("lt", file);
15711 break;
15712 case UNLE:
15713 if (TARGET_AVX)
15715 fputs ("ngt", file);
15716 break;
15718 case LE:
15719 fputs ("le", file);
15720 break;
15721 case UNORDERED:
15722 fputs ("unord", file);
15723 break;
15724 case LTGT:
15725 if (TARGET_AVX)
15727 fputs ("neq_oq", file);
15728 break;
15730 case NE:
15731 fputs ("neq", file);
15732 break;
15733 case GE:
15734 if (TARGET_AVX)
15736 fputs ("ge", file);
15737 break;
15739 case UNGE:
15740 fputs ("nlt", file);
15741 break;
15742 case GT:
15743 if (TARGET_AVX)
15745 fputs ("gt", file);
15746 break;
15748 case UNGT:
15749 fputs ("nle", file);
15750 break;
15751 case ORDERED:
15752 fputs ("ord", file);
15753 break;
15754 default:
15755 output_operand_lossage ("operand is not a condition code, "
15756 "invalid operand code 'D'");
15757 return;
15759 return;
15761 case 'F':
15762 case 'f':
15763 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15764 if (ASSEMBLER_DIALECT == ASM_ATT)
15765 putc ('.', file);
15766 #endif
15768 case 'C':
15769 case 'c':
15770 if (!COMPARISON_P (x))
15772 output_operand_lossage ("operand is not a condition code, "
15773 "invalid operand code '%c'", code);
15774 return;
15776 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15777 code == 'c' || code == 'f',
15778 code == 'F' || code == 'f',
15779 file);
15780 return;
15782 case 'H':
15783 if (!offsettable_memref_p (x))
15785 output_operand_lossage ("operand is not an offsettable memory "
15786 "reference, invalid operand code 'H'");
15787 return;
15789 /* It doesn't actually matter what mode we use here, as we're
15790 only going to use this for printing. */
15791 x = adjust_address_nv (x, DImode, 8);
15792 /* Output 'qword ptr' for intel assembler dialect. */
15793 if (ASSEMBLER_DIALECT == ASM_INTEL)
15794 code = 'q';
15795 break;
15797 case 'K':
15798 gcc_assert (CONST_INT_P (x));
15800 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15801 #ifdef HAVE_AS_IX86_HLE
15802 fputs ("xacquire ", file);
15803 #else
15804 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15805 #endif
15806 else if (INTVAL (x) & IX86_HLE_RELEASE)
15807 #ifdef HAVE_AS_IX86_HLE
15808 fputs ("xrelease ", file);
15809 #else
15810 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15811 #endif
15812 /* We do not want to print value of the operand. */
15813 return;
15815 case 'N':
15816 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15817 fputs ("{z}", file);
15818 return;
15820 case 'r':
15821 gcc_assert (CONST_INT_P (x));
15822 gcc_assert (INTVAL (x) == ROUND_SAE);
15824 if (ASSEMBLER_DIALECT == ASM_INTEL)
15825 fputs (", ", file);
15827 fputs ("{sae}", file);
15829 if (ASSEMBLER_DIALECT == ASM_ATT)
15830 fputs (", ", file);
15832 return;
15834 case 'R':
15835 gcc_assert (CONST_INT_P (x));
15837 if (ASSEMBLER_DIALECT == ASM_INTEL)
15838 fputs (", ", file);
15840 switch (INTVAL (x))
15842 case ROUND_NEAREST_INT | ROUND_SAE:
15843 fputs ("{rn-sae}", file);
15844 break;
15845 case ROUND_NEG_INF | ROUND_SAE:
15846 fputs ("{rd-sae}", file);
15847 break;
15848 case ROUND_POS_INF | ROUND_SAE:
15849 fputs ("{ru-sae}", file);
15850 break;
15851 case ROUND_ZERO | ROUND_SAE:
15852 fputs ("{rz-sae}", file);
15853 break;
15854 default:
15855 gcc_unreachable ();
15858 if (ASSEMBLER_DIALECT == ASM_ATT)
15859 fputs (", ", file);
15861 return;
15863 case '*':
15864 if (ASSEMBLER_DIALECT == ASM_ATT)
15865 putc ('*', file);
15866 return;
15868 case '&':
15870 const char *name = get_some_local_dynamic_name ();
15871 if (name == NULL)
15872 output_operand_lossage ("'%%&' used without any "
15873 "local dynamic TLS references");
15874 else
15875 assemble_name (file, name);
15876 return;
15879 case '+':
15881 rtx x;
15883 if (!optimize
15884 || optimize_function_for_size_p (cfun)
15885 || !TARGET_BRANCH_PREDICTION_HINTS)
15886 return;
15888 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15889 if (x)
15891 int pred_val = XINT (x, 0);
15893 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15894 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15896 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15897 bool cputaken
15898 = final_forward_branch_p (current_output_insn) == 0;
15900 /* Emit hints only in the case default branch prediction
15901 heuristics would fail. */
15902 if (taken != cputaken)
15904 /* We use 3e (DS) prefix for taken branches and
15905 2e (CS) prefix for not taken branches. */
15906 if (taken)
15907 fputs ("ds ; ", file);
15908 else
15909 fputs ("cs ; ", file);
15913 return;
15916 case ';':
15917 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15918 putc (';', file);
15919 #endif
15920 return;
15922 case '@':
15923 if (ASSEMBLER_DIALECT == ASM_ATT)
15924 putc ('%', file);
15926 /* The kernel uses a different segment register for performance
15927 reasons; a system call would not have to trash the userspace
15928 segment register, which would be expensive. */
15929 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15930 fputs ("fs", file);
15931 else
15932 fputs ("gs", file);
15933 return;
15935 case '~':
15936 putc (TARGET_AVX2 ? 'i' : 'f', file);
15937 return;
15939 case '^':
15940 if (TARGET_64BIT && Pmode != word_mode)
15941 fputs ("addr32 ", file);
15942 return;
15944 case '!':
15945 if (ix86_bnd_prefixed_insn_p (current_output_insn))
15946 fputs ("bnd ", file);
15947 return;
15949 default:
15950 output_operand_lossage ("invalid operand code '%c'", code);
15954 if (REG_P (x))
15955 print_reg (x, code, file);
15957 else if (MEM_P (x))
15959 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15960 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15961 && GET_MODE (x) != BLKmode)
15963 const char * size;
15964 switch (GET_MODE_SIZE (GET_MODE (x)))
15966 case 1: size = "BYTE"; break;
15967 case 2: size = "WORD"; break;
15968 case 4: size = "DWORD"; break;
15969 case 8: size = "QWORD"; break;
15970 case 12: size = "TBYTE"; break;
15971 case 16:
15972 if (GET_MODE (x) == XFmode)
15973 size = "TBYTE";
15974 else
15975 size = "XMMWORD";
15976 break;
15977 case 32: size = "YMMWORD"; break;
15978 case 64: size = "ZMMWORD"; break;
15979 default:
15980 gcc_unreachable ();
15983 /* Check for explicit size override (codes 'b', 'w', 'k',
15984 'q' and 'x') */
15985 if (code == 'b')
15986 size = "BYTE";
15987 else if (code == 'w')
15988 size = "WORD";
15989 else if (code == 'k')
15990 size = "DWORD";
15991 else if (code == 'q')
15992 size = "QWORD";
15993 else if (code == 'x')
15994 size = "XMMWORD";
15996 fputs (size, file);
15997 fputs (" PTR ", file);
16000 x = XEXP (x, 0);
16001 /* Avoid (%rip) for call operands. */
16002 if (CONSTANT_ADDRESS_P (x) && code == 'P'
16003 && !CONST_INT_P (x))
16004 output_addr_const (file, x);
16005 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
16006 output_operand_lossage ("invalid constraints for operand");
16007 else
16008 output_address (x);
16011 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
16013 REAL_VALUE_TYPE r;
16014 long l;
16016 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16017 REAL_VALUE_TO_TARGET_SINGLE (r, l);
16019 if (ASSEMBLER_DIALECT == ASM_ATT)
16020 putc ('$', file);
16021 /* Sign extend 32bit SFmode immediate to 8 bytes. */
16022 if (code == 'q')
16023 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
16024 (unsigned long long) (int) l);
16025 else
16026 fprintf (file, "0x%08x", (unsigned int) l);
16029 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
16031 REAL_VALUE_TYPE r;
16032 long l[2];
16034 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16035 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16037 if (ASSEMBLER_DIALECT == ASM_ATT)
16038 putc ('$', file);
16039 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
16042 /* These float cases don't actually occur as immediate operands. */
16043 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
16045 char dstr[30];
16047 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
16048 fputs (dstr, file);
16051 else
16053 /* We have patterns that allow zero sets of memory, for instance.
16054 In 64-bit mode, we should probably support all 8-byte vectors,
16055 since we can in fact encode that into an immediate. */
16056 if (GET_CODE (x) == CONST_VECTOR)
16058 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
16059 x = const0_rtx;
16062 if (code != 'P' && code != 'p')
16064 if (CONST_INT_P (x))
16066 if (ASSEMBLER_DIALECT == ASM_ATT)
16067 putc ('$', file);
16069 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
16070 || GET_CODE (x) == LABEL_REF)
16072 if (ASSEMBLER_DIALECT == ASM_ATT)
16073 putc ('$', file);
16074 else
16075 fputs ("OFFSET FLAT:", file);
16078 if (CONST_INT_P (x))
16079 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16080 else if (flag_pic || MACHOPIC_INDIRECT)
16081 output_pic_addr_const (file, x, code);
16082 else
16083 output_addr_const (file, x);
16087 static bool
16088 ix86_print_operand_punct_valid_p (unsigned char code)
16090 return (code == '@' || code == '*' || code == '+' || code == '&'
16091 || code == ';' || code == '~' || code == '^' || code == '!');
16094 /* Print a memory operand whose address is ADDR. */
16096 static void
16097 ix86_print_operand_address (FILE *file, rtx addr)
16099 struct ix86_address parts;
16100 rtx base, index, disp;
16101 int scale;
16102 int ok;
16103 bool vsib = false;
16104 int code = 0;
16106 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
16108 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16109 gcc_assert (parts.index == NULL_RTX);
16110 parts.index = XVECEXP (addr, 0, 1);
16111 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
16112 addr = XVECEXP (addr, 0, 0);
16113 vsib = true;
16115 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
16117 gcc_assert (TARGET_64BIT);
16118 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16119 code = 'q';
16121 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
16123 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
16124 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
16125 if (parts.base != NULL_RTX)
16127 parts.index = parts.base;
16128 parts.scale = 1;
16130 parts.base = XVECEXP (addr, 0, 0);
16131 addr = XVECEXP (addr, 0, 0);
16133 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
16135 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16136 gcc_assert (parts.index == NULL_RTX);
16137 parts.index = XVECEXP (addr, 0, 1);
16138 addr = XVECEXP (addr, 0, 0);
16140 else
16141 ok = ix86_decompose_address (addr, &parts);
16143 gcc_assert (ok);
16145 base = parts.base;
16146 index = parts.index;
16147 disp = parts.disp;
16148 scale = parts.scale;
16150 switch (parts.seg)
16152 case SEG_DEFAULT:
16153 break;
16154 case SEG_FS:
16155 case SEG_GS:
16156 if (ASSEMBLER_DIALECT == ASM_ATT)
16157 putc ('%', file);
16158 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
16159 break;
16160 default:
16161 gcc_unreachable ();
16164 /* Use one byte shorter RIP relative addressing for 64bit mode. */
16165 if (TARGET_64BIT && !base && !index)
16167 rtx symbol = disp;
16169 if (GET_CODE (disp) == CONST
16170 && GET_CODE (XEXP (disp, 0)) == PLUS
16171 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16172 symbol = XEXP (XEXP (disp, 0), 0);
16174 if (GET_CODE (symbol) == LABEL_REF
16175 || (GET_CODE (symbol) == SYMBOL_REF
16176 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
16177 base = pc_rtx;
16179 if (!base && !index)
16181 /* Displacement only requires special attention. */
16183 if (CONST_INT_P (disp))
16185 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
16186 fputs ("ds:", file);
16187 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
16189 else if (flag_pic)
16190 output_pic_addr_const (file, disp, 0);
16191 else
16192 output_addr_const (file, disp);
16194 else
16196 /* Print SImode register names to force addr32 prefix. */
16197 if (SImode_address_operand (addr, VOIDmode))
16199 #ifdef ENABLE_CHECKING
16200 gcc_assert (TARGET_64BIT);
16201 switch (GET_CODE (addr))
16203 case SUBREG:
16204 gcc_assert (GET_MODE (addr) == SImode);
16205 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
16206 break;
16207 case ZERO_EXTEND:
16208 case AND:
16209 gcc_assert (GET_MODE (addr) == DImode);
16210 break;
16211 default:
16212 gcc_unreachable ();
16214 #endif
16215 gcc_assert (!code);
16216 code = 'k';
16218 else if (code == 0
16219 && TARGET_X32
16220 && disp
16221 && CONST_INT_P (disp)
16222 && INTVAL (disp) < -16*1024*1024)
16224 /* X32 runs in 64-bit mode, where displacement, DISP, in
16225 address DISP(%r64), is encoded as 32-bit immediate sign-
16226 extended from 32-bit to 64-bit. For -0x40000300(%r64),
16227 address is %r64 + 0xffffffffbffffd00. When %r64 <
16228 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
16229 which is invalid for x32. The correct address is %r64
16230 - 0x40000300 == 0xf7ffdd64. To properly encode
16231 -0x40000300(%r64) for x32, we zero-extend negative
16232 displacement by forcing addr32 prefix which truncates
16233 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
16234 zero-extend all negative displacements, including -1(%rsp).
16235 However, for small negative displacements, sign-extension
16236 won't cause overflow. We only zero-extend negative
16237 displacements if they < -16*1024*1024, which is also used
16238 to check legitimate address displacements for PIC. */
16239 code = 'k';
16242 if (ASSEMBLER_DIALECT == ASM_ATT)
16244 if (disp)
16246 if (flag_pic)
16247 output_pic_addr_const (file, disp, 0);
16248 else if (GET_CODE (disp) == LABEL_REF)
16249 output_asm_label (disp);
16250 else
16251 output_addr_const (file, disp);
16254 putc ('(', file);
16255 if (base)
16256 print_reg (base, code, file);
16257 if (index)
16259 putc (',', file);
16260 print_reg (index, vsib ? 0 : code, file);
16261 if (scale != 1 || vsib)
16262 fprintf (file, ",%d", scale);
16264 putc (')', file);
16266 else
16268 rtx offset = NULL_RTX;
16270 if (disp)
16272 /* Pull out the offset of a symbol; print any symbol itself. */
16273 if (GET_CODE (disp) == CONST
16274 && GET_CODE (XEXP (disp, 0)) == PLUS
16275 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16277 offset = XEXP (XEXP (disp, 0), 1);
16278 disp = gen_rtx_CONST (VOIDmode,
16279 XEXP (XEXP (disp, 0), 0));
16282 if (flag_pic)
16283 output_pic_addr_const (file, disp, 0);
16284 else if (GET_CODE (disp) == LABEL_REF)
16285 output_asm_label (disp);
16286 else if (CONST_INT_P (disp))
16287 offset = disp;
16288 else
16289 output_addr_const (file, disp);
16292 putc ('[', file);
16293 if (base)
16295 print_reg (base, code, file);
16296 if (offset)
16298 if (INTVAL (offset) >= 0)
16299 putc ('+', file);
16300 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16303 else if (offset)
16304 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16305 else
16306 putc ('0', file);
16308 if (index)
16310 putc ('+', file);
16311 print_reg (index, vsib ? 0 : code, file);
16312 if (scale != 1 || vsib)
16313 fprintf (file, "*%d", scale);
16315 putc (']', file);
16320 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
16322 static bool
16323 i386_asm_output_addr_const_extra (FILE *file, rtx x)
16325 rtx op;
16327 if (GET_CODE (x) != UNSPEC)
16328 return false;
16330 op = XVECEXP (x, 0, 0);
16331 switch (XINT (x, 1))
16333 case UNSPEC_GOTTPOFF:
16334 output_addr_const (file, op);
16335 /* FIXME: This might be @TPOFF in Sun ld. */
16336 fputs ("@gottpoff", file);
16337 break;
16338 case UNSPEC_TPOFF:
16339 output_addr_const (file, op);
16340 fputs ("@tpoff", file);
16341 break;
16342 case UNSPEC_NTPOFF:
16343 output_addr_const (file, op);
16344 if (TARGET_64BIT)
16345 fputs ("@tpoff", file);
16346 else
16347 fputs ("@ntpoff", file);
16348 break;
16349 case UNSPEC_DTPOFF:
16350 output_addr_const (file, op);
16351 fputs ("@dtpoff", file);
16352 break;
16353 case UNSPEC_GOTNTPOFF:
16354 output_addr_const (file, op);
16355 if (TARGET_64BIT)
16356 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16357 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
16358 else
16359 fputs ("@gotntpoff", file);
16360 break;
16361 case UNSPEC_INDNTPOFF:
16362 output_addr_const (file, op);
16363 fputs ("@indntpoff", file);
16364 break;
16365 #if TARGET_MACHO
16366 case UNSPEC_MACHOPIC_OFFSET:
16367 output_addr_const (file, op);
16368 putc ('-', file);
16369 machopic_output_function_base_name (file);
16370 break;
16371 #endif
16373 case UNSPEC_STACK_CHECK:
16375 int offset;
16377 gcc_assert (flag_split_stack);
16379 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
16380 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
16381 #else
16382 gcc_unreachable ();
16383 #endif
16385 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
16387 break;
16389 default:
16390 return false;
16393 return true;
16396 /* Split one or more double-mode RTL references into pairs of half-mode
16397 references. The RTL can be REG, offsettable MEM, integer constant, or
16398 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
16399 split and "num" is its length. lo_half and hi_half are output arrays
16400 that parallel "operands". */
16402 void
16403 split_double_mode (machine_mode mode, rtx operands[],
16404 int num, rtx lo_half[], rtx hi_half[])
16406 machine_mode half_mode;
16407 unsigned int byte;
16409 switch (mode)
16411 case TImode:
16412 half_mode = DImode;
16413 break;
16414 case DImode:
16415 half_mode = SImode;
16416 break;
16417 default:
16418 gcc_unreachable ();
16421 byte = GET_MODE_SIZE (half_mode);
16423 while (num--)
16425 rtx op = operands[num];
16427 /* simplify_subreg refuse to split volatile memory addresses,
16428 but we still have to handle it. */
16429 if (MEM_P (op))
16431 lo_half[num] = adjust_address (op, half_mode, 0);
16432 hi_half[num] = adjust_address (op, half_mode, byte);
16434 else
16436 lo_half[num] = simplify_gen_subreg (half_mode, op,
16437 GET_MODE (op) == VOIDmode
16438 ? mode : GET_MODE (op), 0);
16439 hi_half[num] = simplify_gen_subreg (half_mode, op,
16440 GET_MODE (op) == VOIDmode
16441 ? mode : GET_MODE (op), byte);
16446 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
16447 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
16448 is the expression of the binary operation. The output may either be
16449 emitted here, or returned to the caller, like all output_* functions.
16451 There is no guarantee that the operands are the same mode, as they
16452 might be within FLOAT or FLOAT_EXTEND expressions. */
16454 #ifndef SYSV386_COMPAT
16455 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
16456 wants to fix the assemblers because that causes incompatibility
16457 with gcc. No-one wants to fix gcc because that causes
16458 incompatibility with assemblers... You can use the option of
16459 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
16460 #define SYSV386_COMPAT 1
16461 #endif
16463 const char *
16464 output_387_binary_op (rtx insn, rtx *operands)
16466 static char buf[40];
16467 const char *p;
16468 const char *ssep;
16469 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
16471 #ifdef ENABLE_CHECKING
16472 /* Even if we do not want to check the inputs, this documents input
16473 constraints. Which helps in understanding the following code. */
16474 if (STACK_REG_P (operands[0])
16475 && ((REG_P (operands[1])
16476 && REGNO (operands[0]) == REGNO (operands[1])
16477 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
16478 || (REG_P (operands[2])
16479 && REGNO (operands[0]) == REGNO (operands[2])
16480 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
16481 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
16482 ; /* ok */
16483 else
16484 gcc_assert (is_sse);
16485 #endif
16487 switch (GET_CODE (operands[3]))
16489 case PLUS:
16490 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16491 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16492 p = "fiadd";
16493 else
16494 p = "fadd";
16495 ssep = "vadd";
16496 break;
16498 case MINUS:
16499 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16500 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16501 p = "fisub";
16502 else
16503 p = "fsub";
16504 ssep = "vsub";
16505 break;
16507 case MULT:
16508 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16509 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16510 p = "fimul";
16511 else
16512 p = "fmul";
16513 ssep = "vmul";
16514 break;
16516 case DIV:
16517 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16518 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16519 p = "fidiv";
16520 else
16521 p = "fdiv";
16522 ssep = "vdiv";
16523 break;
16525 default:
16526 gcc_unreachable ();
16529 if (is_sse)
16531 if (TARGET_AVX)
16533 strcpy (buf, ssep);
16534 if (GET_MODE (operands[0]) == SFmode)
16535 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
16536 else
16537 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
16539 else
16541 strcpy (buf, ssep + 1);
16542 if (GET_MODE (operands[0]) == SFmode)
16543 strcat (buf, "ss\t{%2, %0|%0, %2}");
16544 else
16545 strcat (buf, "sd\t{%2, %0|%0, %2}");
16547 return buf;
16549 strcpy (buf, p);
16551 switch (GET_CODE (operands[3]))
16553 case MULT:
16554 case PLUS:
16555 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
16556 std::swap (operands[1], operands[2]);
16558 /* know operands[0] == operands[1]. */
16560 if (MEM_P (operands[2]))
16562 p = "%Z2\t%2";
16563 break;
16566 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16568 if (STACK_TOP_P (operands[0]))
16569 /* How is it that we are storing to a dead operand[2]?
16570 Well, presumably operands[1] is dead too. We can't
16571 store the result to st(0) as st(0) gets popped on this
16572 instruction. Instead store to operands[2] (which I
16573 think has to be st(1)). st(1) will be popped later.
16574 gcc <= 2.8.1 didn't have this check and generated
16575 assembly code that the Unixware assembler rejected. */
16576 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16577 else
16578 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16579 break;
16582 if (STACK_TOP_P (operands[0]))
16583 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16584 else
16585 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16586 break;
16588 case MINUS:
16589 case DIV:
16590 if (MEM_P (operands[1]))
16592 p = "r%Z1\t%1";
16593 break;
16596 if (MEM_P (operands[2]))
16598 p = "%Z2\t%2";
16599 break;
16602 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16604 #if SYSV386_COMPAT
16605 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16606 derived assemblers, confusingly reverse the direction of
16607 the operation for fsub{r} and fdiv{r} when the
16608 destination register is not st(0). The Intel assembler
16609 doesn't have this brain damage. Read !SYSV386_COMPAT to
16610 figure out what the hardware really does. */
16611 if (STACK_TOP_P (operands[0]))
16612 p = "{p\t%0, %2|rp\t%2, %0}";
16613 else
16614 p = "{rp\t%2, %0|p\t%0, %2}";
16615 #else
16616 if (STACK_TOP_P (operands[0]))
16617 /* As above for fmul/fadd, we can't store to st(0). */
16618 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16619 else
16620 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16621 #endif
16622 break;
16625 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16627 #if SYSV386_COMPAT
16628 if (STACK_TOP_P (operands[0]))
16629 p = "{rp\t%0, %1|p\t%1, %0}";
16630 else
16631 p = "{p\t%1, %0|rp\t%0, %1}";
16632 #else
16633 if (STACK_TOP_P (operands[0]))
16634 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16635 else
16636 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16637 #endif
16638 break;
16641 if (STACK_TOP_P (operands[0]))
16643 if (STACK_TOP_P (operands[1]))
16644 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16645 else
16646 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16647 break;
16649 else if (STACK_TOP_P (operands[1]))
16651 #if SYSV386_COMPAT
16652 p = "{\t%1, %0|r\t%0, %1}";
16653 #else
16654 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16655 #endif
16657 else
16659 #if SYSV386_COMPAT
16660 p = "{r\t%2, %0|\t%0, %2}";
16661 #else
16662 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16663 #endif
16665 break;
16667 default:
16668 gcc_unreachable ();
16671 strcat (buf, p);
16672 return buf;
16675 /* Check if a 256bit AVX register is referenced inside of EXP. */
16677 static bool
16678 ix86_check_avx256_register (const_rtx exp)
16680 if (GET_CODE (exp) == SUBREG)
16681 exp = SUBREG_REG (exp);
16683 return (REG_P (exp)
16684 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
16687 /* Return needed mode for entity in optimize_mode_switching pass. */
16689 static int
16690 ix86_avx_u128_mode_needed (rtx_insn *insn)
16692 if (CALL_P (insn))
16694 rtx link;
16696 /* Needed mode is set to AVX_U128_CLEAN if there are
16697 no 256bit modes used in function arguments. */
16698 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16699 link;
16700 link = XEXP (link, 1))
16702 if (GET_CODE (XEXP (link, 0)) == USE)
16704 rtx arg = XEXP (XEXP (link, 0), 0);
16706 if (ix86_check_avx256_register (arg))
16707 return AVX_U128_DIRTY;
16711 return AVX_U128_CLEAN;
16714 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16715 changes state only when a 256bit register is written to, but we need
16716 to prevent the compiler from moving optimal insertion point above
16717 eventual read from 256bit register. */
16718 subrtx_iterator::array_type array;
16719 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
16720 if (ix86_check_avx256_register (*iter))
16721 return AVX_U128_DIRTY;
16723 return AVX_U128_ANY;
16726 /* Return mode that i387 must be switched into
16727 prior to the execution of insn. */
16729 static int
16730 ix86_i387_mode_needed (int entity, rtx_insn *insn)
16732 enum attr_i387_cw mode;
16734 /* The mode UNINITIALIZED is used to store control word after a
16735 function call or ASM pattern. The mode ANY specify that function
16736 has no requirements on the control word and make no changes in the
16737 bits we are interested in. */
16739 if (CALL_P (insn)
16740 || (NONJUMP_INSN_P (insn)
16741 && (asm_noperands (PATTERN (insn)) >= 0
16742 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16743 return I387_CW_UNINITIALIZED;
16745 if (recog_memoized (insn) < 0)
16746 return I387_CW_ANY;
16748 mode = get_attr_i387_cw (insn);
16750 switch (entity)
16752 case I387_TRUNC:
16753 if (mode == I387_CW_TRUNC)
16754 return mode;
16755 break;
16757 case I387_FLOOR:
16758 if (mode == I387_CW_FLOOR)
16759 return mode;
16760 break;
16762 case I387_CEIL:
16763 if (mode == I387_CW_CEIL)
16764 return mode;
16765 break;
16767 case I387_MASK_PM:
16768 if (mode == I387_CW_MASK_PM)
16769 return mode;
16770 break;
16772 default:
16773 gcc_unreachable ();
16776 return I387_CW_ANY;
16779 /* Return mode that entity must be switched into
16780 prior to the execution of insn. */
16782 static int
16783 ix86_mode_needed (int entity, rtx_insn *insn)
16785 switch (entity)
16787 case AVX_U128:
16788 return ix86_avx_u128_mode_needed (insn);
16789 case I387_TRUNC:
16790 case I387_FLOOR:
16791 case I387_CEIL:
16792 case I387_MASK_PM:
16793 return ix86_i387_mode_needed (entity, insn);
16794 default:
16795 gcc_unreachable ();
16797 return 0;
16800 /* Check if a 256bit AVX register is referenced in stores. */
16802 static void
16803 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16805 if (ix86_check_avx256_register (dest))
16807 bool *used = (bool *) data;
16808 *used = true;
16812 /* Calculate mode of upper 128bit AVX registers after the insn. */
16814 static int
16815 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
16817 rtx pat = PATTERN (insn);
16819 if (vzeroupper_operation (pat, VOIDmode)
16820 || vzeroall_operation (pat, VOIDmode))
16821 return AVX_U128_CLEAN;
16823 /* We know that state is clean after CALL insn if there are no
16824 256bit registers used in the function return register. */
16825 if (CALL_P (insn))
16827 bool avx_reg256_found = false;
16828 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16830 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16833 /* Otherwise, return current mode. Remember that if insn
16834 references AVX 256bit registers, the mode was already changed
16835 to DIRTY from MODE_NEEDED. */
16836 return mode;
16839 /* Return the mode that an insn results in. */
16841 static int
16842 ix86_mode_after (int entity, int mode, rtx_insn *insn)
16844 switch (entity)
16846 case AVX_U128:
16847 return ix86_avx_u128_mode_after (mode, insn);
16848 case I387_TRUNC:
16849 case I387_FLOOR:
16850 case I387_CEIL:
16851 case I387_MASK_PM:
16852 return mode;
16853 default:
16854 gcc_unreachable ();
16858 static int
16859 ix86_avx_u128_mode_entry (void)
16861 tree arg;
16863 /* Entry mode is set to AVX_U128_DIRTY if there are
16864 256bit modes used in function arguments. */
16865 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16866 arg = TREE_CHAIN (arg))
16868 rtx incoming = DECL_INCOMING_RTL (arg);
16870 if (incoming && ix86_check_avx256_register (incoming))
16871 return AVX_U128_DIRTY;
16874 return AVX_U128_CLEAN;
16877 /* Return a mode that ENTITY is assumed to be
16878 switched to at function entry. */
16880 static int
16881 ix86_mode_entry (int entity)
16883 switch (entity)
16885 case AVX_U128:
16886 return ix86_avx_u128_mode_entry ();
16887 case I387_TRUNC:
16888 case I387_FLOOR:
16889 case I387_CEIL:
16890 case I387_MASK_PM:
16891 return I387_CW_ANY;
16892 default:
16893 gcc_unreachable ();
16897 static int
16898 ix86_avx_u128_mode_exit (void)
16900 rtx reg = crtl->return_rtx;
16902 /* Exit mode is set to AVX_U128_DIRTY if there are
16903 256bit modes used in the function return register. */
16904 if (reg && ix86_check_avx256_register (reg))
16905 return AVX_U128_DIRTY;
16907 return AVX_U128_CLEAN;
16910 /* Return a mode that ENTITY is assumed to be
16911 switched to at function exit. */
16913 static int
16914 ix86_mode_exit (int entity)
16916 switch (entity)
16918 case AVX_U128:
16919 return ix86_avx_u128_mode_exit ();
16920 case I387_TRUNC:
16921 case I387_FLOOR:
16922 case I387_CEIL:
16923 case I387_MASK_PM:
16924 return I387_CW_ANY;
16925 default:
16926 gcc_unreachable ();
16930 static int
16931 ix86_mode_priority (int, int n)
16933 return n;
16936 /* Output code to initialize control word copies used by trunc?f?i and
16937 rounding patterns. CURRENT_MODE is set to current control word,
16938 while NEW_MODE is set to new control word. */
16940 static void
16941 emit_i387_cw_initialization (int mode)
16943 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16944 rtx new_mode;
16946 enum ix86_stack_slot slot;
16948 rtx reg = gen_reg_rtx (HImode);
16950 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16951 emit_move_insn (reg, copy_rtx (stored_mode));
16953 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16954 || optimize_insn_for_size_p ())
16956 switch (mode)
16958 case I387_CW_TRUNC:
16959 /* round toward zero (truncate) */
16960 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16961 slot = SLOT_CW_TRUNC;
16962 break;
16964 case I387_CW_FLOOR:
16965 /* round down toward -oo */
16966 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16967 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16968 slot = SLOT_CW_FLOOR;
16969 break;
16971 case I387_CW_CEIL:
16972 /* round up toward +oo */
16973 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16974 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16975 slot = SLOT_CW_CEIL;
16976 break;
16978 case I387_CW_MASK_PM:
16979 /* mask precision exception for nearbyint() */
16980 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16981 slot = SLOT_CW_MASK_PM;
16982 break;
16984 default:
16985 gcc_unreachable ();
16988 else
16990 switch (mode)
16992 case I387_CW_TRUNC:
16993 /* round toward zero (truncate) */
16994 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16995 slot = SLOT_CW_TRUNC;
16996 break;
16998 case I387_CW_FLOOR:
16999 /* round down toward -oo */
17000 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
17001 slot = SLOT_CW_FLOOR;
17002 break;
17004 case I387_CW_CEIL:
17005 /* round up toward +oo */
17006 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
17007 slot = SLOT_CW_CEIL;
17008 break;
17010 case I387_CW_MASK_PM:
17011 /* mask precision exception for nearbyint() */
17012 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
17013 slot = SLOT_CW_MASK_PM;
17014 break;
17016 default:
17017 gcc_unreachable ();
17021 gcc_assert (slot < MAX_386_STACK_LOCALS);
17023 new_mode = assign_386_stack_local (HImode, slot);
17024 emit_move_insn (new_mode, reg);
17027 /* Emit vzeroupper. */
17029 void
17030 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
17032 int i;
17034 /* Cancel automatic vzeroupper insertion if there are
17035 live call-saved SSE registers at the insertion point. */
17037 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17038 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
17039 return;
17041 if (TARGET_64BIT)
17042 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17043 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
17044 return;
17046 emit_insn (gen_avx_vzeroupper ());
17049 /* Generate one or more insns to set ENTITY to MODE. */
17051 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
17052 is the set of hard registers live at the point where the insn(s)
17053 are to be inserted. */
17055 static void
17056 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
17057 HARD_REG_SET regs_live)
17059 switch (entity)
17061 case AVX_U128:
17062 if (mode == AVX_U128_CLEAN)
17063 ix86_avx_emit_vzeroupper (regs_live);
17064 break;
17065 case I387_TRUNC:
17066 case I387_FLOOR:
17067 case I387_CEIL:
17068 case I387_MASK_PM:
17069 if (mode != I387_CW_ANY
17070 && mode != I387_CW_UNINITIALIZED)
17071 emit_i387_cw_initialization (mode);
17072 break;
17073 default:
17074 gcc_unreachable ();
17078 /* Output code for INSN to convert a float to a signed int. OPERANDS
17079 are the insn operands. The output may be [HSD]Imode and the input
17080 operand may be [SDX]Fmode. */
17082 const char *
17083 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
17085 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17086 int dimode_p = GET_MODE (operands[0]) == DImode;
17087 int round_mode = get_attr_i387_cw (insn);
17089 /* Jump through a hoop or two for DImode, since the hardware has no
17090 non-popping instruction. We used to do this a different way, but
17091 that was somewhat fragile and broke with post-reload splitters. */
17092 if ((dimode_p || fisttp) && !stack_top_dies)
17093 output_asm_insn ("fld\t%y1", operands);
17095 gcc_assert (STACK_TOP_P (operands[1]));
17096 gcc_assert (MEM_P (operands[0]));
17097 gcc_assert (GET_MODE (operands[1]) != TFmode);
17099 if (fisttp)
17100 output_asm_insn ("fisttp%Z0\t%0", operands);
17101 else
17103 if (round_mode != I387_CW_ANY)
17104 output_asm_insn ("fldcw\t%3", operands);
17105 if (stack_top_dies || dimode_p)
17106 output_asm_insn ("fistp%Z0\t%0", operands);
17107 else
17108 output_asm_insn ("fist%Z0\t%0", operands);
17109 if (round_mode != I387_CW_ANY)
17110 output_asm_insn ("fldcw\t%2", operands);
17113 return "";
17116 /* Output code for x87 ffreep insn. The OPNO argument, which may only
17117 have the values zero or one, indicates the ffreep insn's operand
17118 from the OPERANDS array. */
17120 static const char *
17121 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
17123 if (TARGET_USE_FFREEP)
17124 #ifdef HAVE_AS_IX86_FFREEP
17125 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
17126 #else
17128 static char retval[32];
17129 int regno = REGNO (operands[opno]);
17131 gcc_assert (STACK_REGNO_P (regno));
17133 regno -= FIRST_STACK_REG;
17135 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
17136 return retval;
17138 #endif
17140 return opno ? "fstp\t%y1" : "fstp\t%y0";
17144 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
17145 should be used. UNORDERED_P is true when fucom should be used. */
17147 const char *
17148 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
17150 int stack_top_dies;
17151 rtx cmp_op0, cmp_op1;
17152 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
17154 if (eflags_p)
17156 cmp_op0 = operands[0];
17157 cmp_op1 = operands[1];
17159 else
17161 cmp_op0 = operands[1];
17162 cmp_op1 = operands[2];
17165 if (is_sse)
17167 if (GET_MODE (operands[0]) == SFmode)
17168 if (unordered_p)
17169 return "%vucomiss\t{%1, %0|%0, %1}";
17170 else
17171 return "%vcomiss\t{%1, %0|%0, %1}";
17172 else
17173 if (unordered_p)
17174 return "%vucomisd\t{%1, %0|%0, %1}";
17175 else
17176 return "%vcomisd\t{%1, %0|%0, %1}";
17179 gcc_assert (STACK_TOP_P (cmp_op0));
17181 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17183 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
17185 if (stack_top_dies)
17187 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
17188 return output_387_ffreep (operands, 1);
17190 else
17191 return "ftst\n\tfnstsw\t%0";
17194 if (STACK_REG_P (cmp_op1)
17195 && stack_top_dies
17196 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
17197 && REGNO (cmp_op1) != FIRST_STACK_REG)
17199 /* If both the top of the 387 stack dies, and the other operand
17200 is also a stack register that dies, then this must be a
17201 `fcompp' float compare */
17203 if (eflags_p)
17205 /* There is no double popping fcomi variant. Fortunately,
17206 eflags is immune from the fstp's cc clobbering. */
17207 if (unordered_p)
17208 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
17209 else
17210 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
17211 return output_387_ffreep (operands, 0);
17213 else
17215 if (unordered_p)
17216 return "fucompp\n\tfnstsw\t%0";
17217 else
17218 return "fcompp\n\tfnstsw\t%0";
17221 else
17223 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
17225 static const char * const alt[16] =
17227 "fcom%Z2\t%y2\n\tfnstsw\t%0",
17228 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
17229 "fucom%Z2\t%y2\n\tfnstsw\t%0",
17230 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
17232 "ficom%Z2\t%y2\n\tfnstsw\t%0",
17233 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
17234 NULL,
17235 NULL,
17237 "fcomi\t{%y1, %0|%0, %y1}",
17238 "fcomip\t{%y1, %0|%0, %y1}",
17239 "fucomi\t{%y1, %0|%0, %y1}",
17240 "fucomip\t{%y1, %0|%0, %y1}",
17242 NULL,
17243 NULL,
17244 NULL,
17245 NULL
17248 int mask;
17249 const char *ret;
17251 mask = eflags_p << 3;
17252 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
17253 mask |= unordered_p << 1;
17254 mask |= stack_top_dies;
17256 gcc_assert (mask < 16);
17257 ret = alt[mask];
17258 gcc_assert (ret);
17260 return ret;
17264 void
17265 ix86_output_addr_vec_elt (FILE *file, int value)
17267 const char *directive = ASM_LONG;
17269 #ifdef ASM_QUAD
17270 if (TARGET_LP64)
17271 directive = ASM_QUAD;
17272 #else
17273 gcc_assert (!TARGET_64BIT);
17274 #endif
17276 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
17279 void
17280 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
17282 const char *directive = ASM_LONG;
17284 #ifdef ASM_QUAD
17285 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
17286 directive = ASM_QUAD;
17287 #else
17288 gcc_assert (!TARGET_64BIT);
17289 #endif
17290 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
17291 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
17292 fprintf (file, "%s%s%d-%s%d\n",
17293 directive, LPREFIX, value, LPREFIX, rel);
17294 else if (HAVE_AS_GOTOFF_IN_DATA)
17295 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
17296 #if TARGET_MACHO
17297 else if (TARGET_MACHO)
17299 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
17300 machopic_output_function_base_name (file);
17301 putc ('\n', file);
17303 #endif
17304 else
17305 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
17306 GOT_SYMBOL_NAME, LPREFIX, value);
17309 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
17310 for the target. */
17312 void
17313 ix86_expand_clear (rtx dest)
17315 rtx tmp;
17317 /* We play register width games, which are only valid after reload. */
17318 gcc_assert (reload_completed);
17320 /* Avoid HImode and its attendant prefix byte. */
17321 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
17322 dest = gen_rtx_REG (SImode, REGNO (dest));
17323 tmp = gen_rtx_SET (dest, const0_rtx);
17325 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
17327 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17328 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
17331 emit_insn (tmp);
17334 /* X is an unchanging MEM. If it is a constant pool reference, return
17335 the constant pool rtx, else NULL. */
17338 maybe_get_pool_constant (rtx x)
17340 x = ix86_delegitimize_address (XEXP (x, 0));
17342 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
17343 return get_pool_constant (x);
17345 return NULL_RTX;
17348 void
17349 ix86_expand_move (machine_mode mode, rtx operands[])
17351 rtx op0, op1;
17352 enum tls_model model;
17354 op0 = operands[0];
17355 op1 = operands[1];
17357 if (GET_CODE (op1) == SYMBOL_REF)
17359 rtx tmp;
17361 model = SYMBOL_REF_TLS_MODEL (op1);
17362 if (model)
17364 op1 = legitimize_tls_address (op1, model, true);
17365 op1 = force_operand (op1, op0);
17366 if (op1 == op0)
17367 return;
17368 op1 = convert_to_mode (mode, op1, 1);
17370 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
17371 op1 = tmp;
17373 else if (GET_CODE (op1) == CONST
17374 && GET_CODE (XEXP (op1, 0)) == PLUS
17375 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
17377 rtx addend = XEXP (XEXP (op1, 0), 1);
17378 rtx symbol = XEXP (XEXP (op1, 0), 0);
17379 rtx tmp;
17381 model = SYMBOL_REF_TLS_MODEL (symbol);
17382 if (model)
17383 tmp = legitimize_tls_address (symbol, model, true);
17384 else
17385 tmp = legitimize_pe_coff_symbol (symbol, true);
17387 if (tmp)
17389 tmp = force_operand (tmp, NULL);
17390 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
17391 op0, 1, OPTAB_DIRECT);
17392 if (tmp == op0)
17393 return;
17394 op1 = convert_to_mode (mode, tmp, 1);
17398 if ((flag_pic || MACHOPIC_INDIRECT)
17399 && symbolic_operand (op1, mode))
17401 if (TARGET_MACHO && !TARGET_64BIT)
17403 #if TARGET_MACHO
17404 /* dynamic-no-pic */
17405 if (MACHOPIC_INDIRECT)
17407 rtx temp = (op0 && REG_P (op0) && mode == Pmode)
17408 ? op0 : gen_reg_rtx (Pmode);
17409 op1 = machopic_indirect_data_reference (op1, temp);
17410 if (MACHOPIC_PURE)
17411 op1 = machopic_legitimize_pic_address (op1, mode,
17412 temp == op1 ? 0 : temp);
17414 if (op0 != op1 && GET_CODE (op0) != MEM)
17416 rtx insn = gen_rtx_SET (op0, op1);
17417 emit_insn (insn);
17418 return;
17420 if (GET_CODE (op0) == MEM)
17421 op1 = force_reg (Pmode, op1);
17422 else
17424 rtx temp = op0;
17425 if (GET_CODE (temp) != REG)
17426 temp = gen_reg_rtx (Pmode);
17427 temp = legitimize_pic_address (op1, temp);
17428 if (temp == op0)
17429 return;
17430 op1 = temp;
17432 /* dynamic-no-pic */
17433 #endif
17435 else
17437 if (MEM_P (op0))
17438 op1 = force_reg (mode, op1);
17439 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
17441 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
17442 op1 = legitimize_pic_address (op1, reg);
17443 if (op0 == op1)
17444 return;
17445 op1 = convert_to_mode (mode, op1, 1);
17449 else
17451 if (MEM_P (op0)
17452 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
17453 || !push_operand (op0, mode))
17454 && MEM_P (op1))
17455 op1 = force_reg (mode, op1);
17457 if (push_operand (op0, mode)
17458 && ! general_no_elim_operand (op1, mode))
17459 op1 = copy_to_mode_reg (mode, op1);
17461 /* Force large constants in 64bit compilation into register
17462 to get them CSEed. */
17463 if (can_create_pseudo_p ()
17464 && (mode == DImode) && TARGET_64BIT
17465 && immediate_operand (op1, mode)
17466 && !x86_64_zext_immediate_operand (op1, VOIDmode)
17467 && !register_operand (op0, mode)
17468 && optimize)
17469 op1 = copy_to_mode_reg (mode, op1);
17471 if (can_create_pseudo_p ()
17472 && CONST_DOUBLE_P (op1))
17474 /* If we are loading a floating point constant to a register,
17475 force the value to memory now, since we'll get better code
17476 out the back end. */
17478 op1 = validize_mem (force_const_mem (mode, op1));
17479 if (!register_operand (op0, mode))
17481 rtx temp = gen_reg_rtx (mode);
17482 emit_insn (gen_rtx_SET (temp, op1));
17483 emit_move_insn (op0, temp);
17484 return;
17489 emit_insn (gen_rtx_SET (op0, op1));
17492 void
17493 ix86_expand_vector_move (machine_mode mode, rtx operands[])
17495 rtx op0 = operands[0], op1 = operands[1];
17496 unsigned int align = GET_MODE_ALIGNMENT (mode);
17498 if (push_operand (op0, VOIDmode))
17499 op0 = emit_move_resolve_push (mode, op0);
17501 /* Force constants other than zero into memory. We do not know how
17502 the instructions used to build constants modify the upper 64 bits
17503 of the register, once we have that information we may be able
17504 to handle some of them more efficiently. */
17505 if (can_create_pseudo_p ()
17506 && register_operand (op0, mode)
17507 && (CONSTANT_P (op1)
17508 || (GET_CODE (op1) == SUBREG
17509 && CONSTANT_P (SUBREG_REG (op1))))
17510 && !standard_sse_constant_p (op1))
17511 op1 = validize_mem (force_const_mem (mode, op1));
17513 /* We need to check memory alignment for SSE mode since attribute
17514 can make operands unaligned. */
17515 if (can_create_pseudo_p ()
17516 && SSE_REG_MODE_P (mode)
17517 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
17518 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
17520 rtx tmp[2];
17522 /* ix86_expand_vector_move_misalign() does not like constants ... */
17523 if (CONSTANT_P (op1)
17524 || (GET_CODE (op1) == SUBREG
17525 && CONSTANT_P (SUBREG_REG (op1))))
17526 op1 = validize_mem (force_const_mem (mode, op1));
17528 /* ... nor both arguments in memory. */
17529 if (!register_operand (op0, mode)
17530 && !register_operand (op1, mode))
17531 op1 = force_reg (mode, op1);
17533 tmp[0] = op0; tmp[1] = op1;
17534 ix86_expand_vector_move_misalign (mode, tmp);
17535 return;
17538 /* Make operand1 a register if it isn't already. */
17539 if (can_create_pseudo_p ()
17540 && !register_operand (op0, mode)
17541 && !register_operand (op1, mode))
17543 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
17544 return;
17547 emit_insn (gen_rtx_SET (op0, op1));
17550 /* Split 32-byte AVX unaligned load and store if needed. */
17552 static void
17553 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
17555 rtx m;
17556 rtx (*extract) (rtx, rtx, rtx);
17557 rtx (*load_unaligned) (rtx, rtx);
17558 rtx (*store_unaligned) (rtx, rtx);
17559 machine_mode mode;
17561 switch (GET_MODE (op0))
17563 default:
17564 gcc_unreachable ();
17565 case V32QImode:
17566 extract = gen_avx_vextractf128v32qi;
17567 load_unaligned = gen_avx_loaddquv32qi;
17568 store_unaligned = gen_avx_storedquv32qi;
17569 mode = V16QImode;
17570 break;
17571 case V8SFmode:
17572 extract = gen_avx_vextractf128v8sf;
17573 load_unaligned = gen_avx_loadups256;
17574 store_unaligned = gen_avx_storeups256;
17575 mode = V4SFmode;
17576 break;
17577 case V4DFmode:
17578 extract = gen_avx_vextractf128v4df;
17579 load_unaligned = gen_avx_loadupd256;
17580 store_unaligned = gen_avx_storeupd256;
17581 mode = V2DFmode;
17582 break;
17585 if (MEM_P (op1))
17587 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
17588 && optimize_insn_for_speed_p ())
17590 rtx r = gen_reg_rtx (mode);
17591 m = adjust_address (op1, mode, 0);
17592 emit_move_insn (r, m);
17593 m = adjust_address (op1, mode, 16);
17594 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
17595 emit_move_insn (op0, r);
17597 /* Normal *mov<mode>_internal pattern will handle
17598 unaligned loads just fine if misaligned_operand
17599 is true, and without the UNSPEC it can be combined
17600 with arithmetic instructions. */
17601 else if (misaligned_operand (op1, GET_MODE (op1)))
17602 emit_insn (gen_rtx_SET (op0, op1));
17603 else
17604 emit_insn (load_unaligned (op0, op1));
17606 else if (MEM_P (op0))
17608 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
17609 && optimize_insn_for_speed_p ())
17611 m = adjust_address (op0, mode, 0);
17612 emit_insn (extract (m, op1, const0_rtx));
17613 m = adjust_address (op0, mode, 16);
17614 emit_insn (extract (m, op1, const1_rtx));
17616 else
17617 emit_insn (store_unaligned (op0, op1));
17619 else
17620 gcc_unreachable ();
17623 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17624 straight to ix86_expand_vector_move. */
17625 /* Code generation for scalar reg-reg moves of single and double precision data:
17626 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17627 movaps reg, reg
17628 else
17629 movss reg, reg
17630 if (x86_sse_partial_reg_dependency == true)
17631 movapd reg, reg
17632 else
17633 movsd reg, reg
17635 Code generation for scalar loads of double precision data:
17636 if (x86_sse_split_regs == true)
17637 movlpd mem, reg (gas syntax)
17638 else
17639 movsd mem, reg
17641 Code generation for unaligned packed loads of single precision data
17642 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17643 if (x86_sse_unaligned_move_optimal)
17644 movups mem, reg
17646 if (x86_sse_partial_reg_dependency == true)
17648 xorps reg, reg
17649 movlps mem, reg
17650 movhps mem+8, reg
17652 else
17654 movlps mem, reg
17655 movhps mem+8, reg
17658 Code generation for unaligned packed loads of double precision data
17659 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17660 if (x86_sse_unaligned_move_optimal)
17661 movupd mem, reg
17663 if (x86_sse_split_regs == true)
17665 movlpd mem, reg
17666 movhpd mem+8, reg
17668 else
17670 movsd mem, reg
17671 movhpd mem+8, reg
17675 void
17676 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
17678 rtx op0, op1, orig_op0 = NULL_RTX, m;
17679 rtx (*load_unaligned) (rtx, rtx);
17680 rtx (*store_unaligned) (rtx, rtx);
17682 op0 = operands[0];
17683 op1 = operands[1];
17685 if (GET_MODE_SIZE (mode) == 64)
17687 switch (GET_MODE_CLASS (mode))
17689 case MODE_VECTOR_INT:
17690 case MODE_INT:
17691 if (GET_MODE (op0) != V16SImode)
17693 if (!MEM_P (op0))
17695 orig_op0 = op0;
17696 op0 = gen_reg_rtx (V16SImode);
17698 else
17699 op0 = gen_lowpart (V16SImode, op0);
17701 op1 = gen_lowpart (V16SImode, op1);
17702 /* FALLTHRU */
17704 case MODE_VECTOR_FLOAT:
17705 switch (GET_MODE (op0))
17707 default:
17708 gcc_unreachable ();
17709 case V16SImode:
17710 load_unaligned = gen_avx512f_loaddquv16si;
17711 store_unaligned = gen_avx512f_storedquv16si;
17712 break;
17713 case V16SFmode:
17714 load_unaligned = gen_avx512f_loadups512;
17715 store_unaligned = gen_avx512f_storeups512;
17716 break;
17717 case V8DFmode:
17718 load_unaligned = gen_avx512f_loadupd512;
17719 store_unaligned = gen_avx512f_storeupd512;
17720 break;
17723 if (MEM_P (op1))
17724 emit_insn (load_unaligned (op0, op1));
17725 else if (MEM_P (op0))
17726 emit_insn (store_unaligned (op0, op1));
17727 else
17728 gcc_unreachable ();
17729 if (orig_op0)
17730 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17731 break;
17733 default:
17734 gcc_unreachable ();
17737 return;
17740 if (TARGET_AVX
17741 && GET_MODE_SIZE (mode) == 32)
17743 switch (GET_MODE_CLASS (mode))
17745 case MODE_VECTOR_INT:
17746 case MODE_INT:
17747 if (GET_MODE (op0) != V32QImode)
17749 if (!MEM_P (op0))
17751 orig_op0 = op0;
17752 op0 = gen_reg_rtx (V32QImode);
17754 else
17755 op0 = gen_lowpart (V32QImode, op0);
17757 op1 = gen_lowpart (V32QImode, op1);
17758 /* FALLTHRU */
17760 case MODE_VECTOR_FLOAT:
17761 ix86_avx256_split_vector_move_misalign (op0, op1);
17762 if (orig_op0)
17763 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17764 break;
17766 default:
17767 gcc_unreachable ();
17770 return;
17773 if (MEM_P (op1))
17775 /* Normal *mov<mode>_internal pattern will handle
17776 unaligned loads just fine if misaligned_operand
17777 is true, and without the UNSPEC it can be combined
17778 with arithmetic instructions. */
17779 if (TARGET_AVX
17780 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17781 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17782 && misaligned_operand (op1, GET_MODE (op1)))
17783 emit_insn (gen_rtx_SET (op0, op1));
17784 /* ??? If we have typed data, then it would appear that using
17785 movdqu is the only way to get unaligned data loaded with
17786 integer type. */
17787 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17789 if (GET_MODE (op0) != V16QImode)
17791 orig_op0 = op0;
17792 op0 = gen_reg_rtx (V16QImode);
17794 op1 = gen_lowpart (V16QImode, op1);
17795 /* We will eventually emit movups based on insn attributes. */
17796 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17797 if (orig_op0)
17798 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17800 else if (TARGET_SSE2 && mode == V2DFmode)
17802 rtx zero;
17804 if (TARGET_AVX
17805 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17806 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17807 || optimize_insn_for_size_p ())
17809 /* We will eventually emit movups based on insn attributes. */
17810 emit_insn (gen_sse2_loadupd (op0, op1));
17811 return;
17814 /* When SSE registers are split into halves, we can avoid
17815 writing to the top half twice. */
17816 if (TARGET_SSE_SPLIT_REGS)
17818 emit_clobber (op0);
17819 zero = op0;
17821 else
17823 /* ??? Not sure about the best option for the Intel chips.
17824 The following would seem to satisfy; the register is
17825 entirely cleared, breaking the dependency chain. We
17826 then store to the upper half, with a dependency depth
17827 of one. A rumor has it that Intel recommends two movsd
17828 followed by an unpacklpd, but this is unconfirmed. And
17829 given that the dependency depth of the unpacklpd would
17830 still be one, I'm not sure why this would be better. */
17831 zero = CONST0_RTX (V2DFmode);
17834 m = adjust_address (op1, DFmode, 0);
17835 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17836 m = adjust_address (op1, DFmode, 8);
17837 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17839 else
17841 rtx t;
17843 if (TARGET_AVX
17844 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17845 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17846 || optimize_insn_for_size_p ())
17848 if (GET_MODE (op0) != V4SFmode)
17850 orig_op0 = op0;
17851 op0 = gen_reg_rtx (V4SFmode);
17853 op1 = gen_lowpart (V4SFmode, op1);
17854 emit_insn (gen_sse_loadups (op0, op1));
17855 if (orig_op0)
17856 emit_move_insn (orig_op0,
17857 gen_lowpart (GET_MODE (orig_op0), op0));
17858 return;
17861 if (mode != V4SFmode)
17862 t = gen_reg_rtx (V4SFmode);
17863 else
17864 t = op0;
17866 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17867 emit_move_insn (t, CONST0_RTX (V4SFmode));
17868 else
17869 emit_clobber (t);
17871 m = adjust_address (op1, V2SFmode, 0);
17872 emit_insn (gen_sse_loadlps (t, t, m));
17873 m = adjust_address (op1, V2SFmode, 8);
17874 emit_insn (gen_sse_loadhps (t, t, m));
17875 if (mode != V4SFmode)
17876 emit_move_insn (op0, gen_lowpart (mode, t));
17879 else if (MEM_P (op0))
17881 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17883 op0 = gen_lowpart (V16QImode, op0);
17884 op1 = gen_lowpart (V16QImode, op1);
17885 /* We will eventually emit movups based on insn attributes. */
17886 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17888 else if (TARGET_SSE2 && mode == V2DFmode)
17890 if (TARGET_AVX
17891 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17892 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17893 || optimize_insn_for_size_p ())
17894 /* We will eventually emit movups based on insn attributes. */
17895 emit_insn (gen_sse2_storeupd (op0, op1));
17896 else
17898 m = adjust_address (op0, DFmode, 0);
17899 emit_insn (gen_sse2_storelpd (m, op1));
17900 m = adjust_address (op0, DFmode, 8);
17901 emit_insn (gen_sse2_storehpd (m, op1));
17904 else
17906 if (mode != V4SFmode)
17907 op1 = gen_lowpart (V4SFmode, op1);
17909 if (TARGET_AVX
17910 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17911 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17912 || optimize_insn_for_size_p ())
17914 op0 = gen_lowpart (V4SFmode, op0);
17915 emit_insn (gen_sse_storeups (op0, op1));
17917 else
17919 m = adjust_address (op0, V2SFmode, 0);
17920 emit_insn (gen_sse_storelps (m, op1));
17921 m = adjust_address (op0, V2SFmode, 8);
17922 emit_insn (gen_sse_storehps (m, op1));
17926 else
17927 gcc_unreachable ();
17930 /* Helper function of ix86_fixup_binary_operands to canonicalize
17931 operand order. Returns true if the operands should be swapped. */
17933 static bool
17934 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
17935 rtx operands[])
17937 rtx dst = operands[0];
17938 rtx src1 = operands[1];
17939 rtx src2 = operands[2];
17941 /* If the operation is not commutative, we can't do anything. */
17942 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17943 return false;
17945 /* Highest priority is that src1 should match dst. */
17946 if (rtx_equal_p (dst, src1))
17947 return false;
17948 if (rtx_equal_p (dst, src2))
17949 return true;
17951 /* Next highest priority is that immediate constants come second. */
17952 if (immediate_operand (src2, mode))
17953 return false;
17954 if (immediate_operand (src1, mode))
17955 return true;
17957 /* Lowest priority is that memory references should come second. */
17958 if (MEM_P (src2))
17959 return false;
17960 if (MEM_P (src1))
17961 return true;
17963 return false;
17967 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17968 destination to use for the operation. If different from the true
17969 destination in operands[0], a copy operation will be required. */
17972 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
17973 rtx operands[])
17975 rtx dst = operands[0];
17976 rtx src1 = operands[1];
17977 rtx src2 = operands[2];
17979 /* Canonicalize operand order. */
17980 if (ix86_swap_binary_operands_p (code, mode, operands))
17982 /* It is invalid to swap operands of different modes. */
17983 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17985 std::swap (src1, src2);
17988 /* Both source operands cannot be in memory. */
17989 if (MEM_P (src1) && MEM_P (src2))
17991 /* Optimization: Only read from memory once. */
17992 if (rtx_equal_p (src1, src2))
17994 src2 = force_reg (mode, src2);
17995 src1 = src2;
17997 else if (rtx_equal_p (dst, src1))
17998 src2 = force_reg (mode, src2);
17999 else
18000 src1 = force_reg (mode, src1);
18003 /* If the destination is memory, and we do not have matching source
18004 operands, do things in registers. */
18005 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
18006 dst = gen_reg_rtx (mode);
18008 /* Source 1 cannot be a constant. */
18009 if (CONSTANT_P (src1))
18010 src1 = force_reg (mode, src1);
18012 /* Source 1 cannot be a non-matching memory. */
18013 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
18014 src1 = force_reg (mode, src1);
18016 /* Improve address combine. */
18017 if (code == PLUS
18018 && GET_MODE_CLASS (mode) == MODE_INT
18019 && MEM_P (src2))
18020 src2 = force_reg (mode, src2);
18022 operands[1] = src1;
18023 operands[2] = src2;
18024 return dst;
18027 /* Similarly, but assume that the destination has already been
18028 set up properly. */
18030 void
18031 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
18032 machine_mode mode, rtx operands[])
18034 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
18035 gcc_assert (dst == operands[0]);
18038 /* Attempt to expand a binary operator. Make the expansion closer to the
18039 actual machine, then just general_operand, which will allow 3 separate
18040 memory references (one output, two input) in a single insn. */
18042 void
18043 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
18044 rtx operands[])
18046 rtx src1, src2, dst, op, clob;
18048 dst = ix86_fixup_binary_operands (code, mode, operands);
18049 src1 = operands[1];
18050 src2 = operands[2];
18052 /* Emit the instruction. */
18054 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, src1, src2));
18056 if (reload_completed
18057 && code == PLUS
18058 && !rtx_equal_p (dst, src1))
18060 /* This is going to be an LEA; avoid splitting it later. */
18061 emit_insn (op);
18063 else
18065 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18066 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18069 /* Fix up the destination if needed. */
18070 if (dst != operands[0])
18071 emit_move_insn (operands[0], dst);
18074 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
18075 the given OPERANDS. */
18077 void
18078 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
18079 rtx operands[])
18081 rtx op1 = NULL_RTX, op2 = NULL_RTX;
18082 if (GET_CODE (operands[1]) == SUBREG)
18084 op1 = operands[1];
18085 op2 = operands[2];
18087 else if (GET_CODE (operands[2]) == SUBREG)
18089 op1 = operands[2];
18090 op2 = operands[1];
18092 /* Optimize (__m128i) d | (__m128i) e and similar code
18093 when d and e are float vectors into float vector logical
18094 insn. In C/C++ without using intrinsics there is no other way
18095 to express vector logical operation on float vectors than
18096 to cast them temporarily to integer vectors. */
18097 if (op1
18098 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
18099 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
18100 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
18101 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
18102 && SUBREG_BYTE (op1) == 0
18103 && (GET_CODE (op2) == CONST_VECTOR
18104 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
18105 && SUBREG_BYTE (op2) == 0))
18106 && can_create_pseudo_p ())
18108 rtx dst;
18109 switch (GET_MODE (SUBREG_REG (op1)))
18111 case V4SFmode:
18112 case V8SFmode:
18113 case V16SFmode:
18114 case V2DFmode:
18115 case V4DFmode:
18116 case V8DFmode:
18117 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
18118 if (GET_CODE (op2) == CONST_VECTOR)
18120 op2 = gen_lowpart (GET_MODE (dst), op2);
18121 op2 = force_reg (GET_MODE (dst), op2);
18123 else
18125 op1 = operands[1];
18126 op2 = SUBREG_REG (operands[2]);
18127 if (!nonimmediate_operand (op2, GET_MODE (dst)))
18128 op2 = force_reg (GET_MODE (dst), op2);
18130 op1 = SUBREG_REG (op1);
18131 if (!nonimmediate_operand (op1, GET_MODE (dst)))
18132 op1 = force_reg (GET_MODE (dst), op1);
18133 emit_insn (gen_rtx_SET (dst,
18134 gen_rtx_fmt_ee (code, GET_MODE (dst),
18135 op1, op2)));
18136 emit_move_insn (operands[0], gen_lowpart (mode, dst));
18137 return;
18138 default:
18139 break;
18142 if (!nonimmediate_operand (operands[1], mode))
18143 operands[1] = force_reg (mode, operands[1]);
18144 if (!nonimmediate_operand (operands[2], mode))
18145 operands[2] = force_reg (mode, operands[2]);
18146 ix86_fixup_binary_operands_no_copy (code, mode, operands);
18147 emit_insn (gen_rtx_SET (operands[0],
18148 gen_rtx_fmt_ee (code, mode, operands[1],
18149 operands[2])));
18152 /* Return TRUE or FALSE depending on whether the binary operator meets the
18153 appropriate constraints. */
18155 bool
18156 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
18157 rtx operands[3])
18159 rtx dst = operands[0];
18160 rtx src1 = operands[1];
18161 rtx src2 = operands[2];
18163 /* Both source operands cannot be in memory. */
18164 if (MEM_P (src1) && MEM_P (src2))
18165 return false;
18167 /* Canonicalize operand order for commutative operators. */
18168 if (ix86_swap_binary_operands_p (code, mode, operands))
18169 std::swap (src1, src2);
18171 /* If the destination is memory, we must have a matching source operand. */
18172 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
18173 return false;
18175 /* Source 1 cannot be a constant. */
18176 if (CONSTANT_P (src1))
18177 return false;
18179 /* Source 1 cannot be a non-matching memory. */
18180 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
18181 /* Support "andhi/andsi/anddi" as a zero-extending move. */
18182 return (code == AND
18183 && (mode == HImode
18184 || mode == SImode
18185 || (TARGET_64BIT && mode == DImode))
18186 && satisfies_constraint_L (src2));
18188 return true;
18191 /* Attempt to expand a unary operator. Make the expansion closer to the
18192 actual machine, then just general_operand, which will allow 2 separate
18193 memory references (one output, one input) in a single insn. */
18195 void
18196 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
18197 rtx operands[])
18199 bool matching_memory = false;
18200 rtx src, dst, op, clob;
18202 dst = operands[0];
18203 src = operands[1];
18205 /* If the destination is memory, and we do not have matching source
18206 operands, do things in registers. */
18207 if (MEM_P (dst))
18209 if (rtx_equal_p (dst, src))
18210 matching_memory = true;
18211 else
18212 dst = gen_reg_rtx (mode);
18215 /* When source operand is memory, destination must match. */
18216 if (MEM_P (src) && !matching_memory)
18217 src = force_reg (mode, src);
18219 /* Emit the instruction. */
18221 op = gen_rtx_SET (dst, gen_rtx_fmt_e (code, mode, src));
18223 if (code == NOT)
18224 emit_insn (op);
18225 else
18227 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18228 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18231 /* Fix up the destination if needed. */
18232 if (dst != operands[0])
18233 emit_move_insn (operands[0], dst);
18236 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
18237 divisor are within the range [0-255]. */
18239 void
18240 ix86_split_idivmod (machine_mode mode, rtx operands[],
18241 bool signed_p)
18243 rtx_code_label *end_label, *qimode_label;
18244 rtx insn, div, mod;
18245 rtx scratch, tmp0, tmp1, tmp2;
18246 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
18247 rtx (*gen_zero_extend) (rtx, rtx);
18248 rtx (*gen_test_ccno_1) (rtx, rtx);
18250 switch (mode)
18252 case SImode:
18253 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
18254 gen_test_ccno_1 = gen_testsi_ccno_1;
18255 gen_zero_extend = gen_zero_extendqisi2;
18256 break;
18257 case DImode:
18258 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
18259 gen_test_ccno_1 = gen_testdi_ccno_1;
18260 gen_zero_extend = gen_zero_extendqidi2;
18261 break;
18262 default:
18263 gcc_unreachable ();
18266 end_label = gen_label_rtx ();
18267 qimode_label = gen_label_rtx ();
18269 scratch = gen_reg_rtx (mode);
18271 /* Use 8bit unsigned divimod if dividend and divisor are within
18272 the range [0-255]. */
18273 emit_move_insn (scratch, operands[2]);
18274 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
18275 scratch, 1, OPTAB_DIRECT);
18276 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
18277 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
18278 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
18279 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
18280 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
18281 pc_rtx);
18282 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp0));
18283 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18284 JUMP_LABEL (insn) = qimode_label;
18286 /* Generate original signed/unsigned divimod. */
18287 div = gen_divmod4_1 (operands[0], operands[1],
18288 operands[2], operands[3]);
18289 emit_insn (div);
18291 /* Branch to the end. */
18292 emit_jump_insn (gen_jump (end_label));
18293 emit_barrier ();
18295 /* Generate 8bit unsigned divide. */
18296 emit_label (qimode_label);
18297 /* Don't use operands[0] for result of 8bit divide since not all
18298 registers support QImode ZERO_EXTRACT. */
18299 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
18300 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
18301 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
18302 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
18304 if (signed_p)
18306 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
18307 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
18309 else
18311 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
18312 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
18315 /* Extract remainder from AH. */
18316 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
18317 if (REG_P (operands[1]))
18318 insn = emit_move_insn (operands[1], tmp1);
18319 else
18321 /* Need a new scratch register since the old one has result
18322 of 8bit divide. */
18323 scratch = gen_reg_rtx (mode);
18324 emit_move_insn (scratch, tmp1);
18325 insn = emit_move_insn (operands[1], scratch);
18327 set_unique_reg_note (insn, REG_EQUAL, mod);
18329 /* Zero extend quotient from AL. */
18330 tmp1 = gen_lowpart (QImode, tmp0);
18331 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
18332 set_unique_reg_note (insn, REG_EQUAL, div);
18334 emit_label (end_label);
18337 #define LEA_MAX_STALL (3)
18338 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
18340 /* Increase given DISTANCE in half-cycles according to
18341 dependencies between PREV and NEXT instructions.
18342 Add 1 half-cycle if there is no dependency and
18343 go to next cycle if there is some dependecy. */
18345 static unsigned int
18346 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
18348 df_ref def, use;
18350 if (!prev || !next)
18351 return distance + (distance & 1) + 2;
18353 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
18354 return distance + 1;
18356 FOR_EACH_INSN_USE (use, next)
18357 FOR_EACH_INSN_DEF (def, prev)
18358 if (!DF_REF_IS_ARTIFICIAL (def)
18359 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
18360 return distance + (distance & 1) + 2;
18362 return distance + 1;
18365 /* Function checks if instruction INSN defines register number
18366 REGNO1 or REGNO2. */
18368 static bool
18369 insn_defines_reg (unsigned int regno1, unsigned int regno2,
18370 rtx_insn *insn)
18372 df_ref def;
18374 FOR_EACH_INSN_DEF (def, insn)
18375 if (DF_REF_REG_DEF_P (def)
18376 && !DF_REF_IS_ARTIFICIAL (def)
18377 && (regno1 == DF_REF_REGNO (def)
18378 || regno2 == DF_REF_REGNO (def)))
18379 return true;
18381 return false;
18384 /* Function checks if instruction INSN uses register number
18385 REGNO as a part of address expression. */
18387 static bool
18388 insn_uses_reg_mem (unsigned int regno, rtx insn)
18390 df_ref use;
18392 FOR_EACH_INSN_USE (use, insn)
18393 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
18394 return true;
18396 return false;
18399 /* Search backward for non-agu definition of register number REGNO1
18400 or register number REGNO2 in basic block starting from instruction
18401 START up to head of basic block or instruction INSN.
18403 Function puts true value into *FOUND var if definition was found
18404 and false otherwise.
18406 Distance in half-cycles between START and found instruction or head
18407 of BB is added to DISTANCE and returned. */
18409 static int
18410 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
18411 rtx_insn *insn, int distance,
18412 rtx_insn *start, bool *found)
18414 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
18415 rtx_insn *prev = start;
18416 rtx_insn *next = NULL;
18418 *found = false;
18420 while (prev
18421 && prev != insn
18422 && distance < LEA_SEARCH_THRESHOLD)
18424 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
18426 distance = increase_distance (prev, next, distance);
18427 if (insn_defines_reg (regno1, regno2, prev))
18429 if (recog_memoized (prev) < 0
18430 || get_attr_type (prev) != TYPE_LEA)
18432 *found = true;
18433 return distance;
18437 next = prev;
18439 if (prev == BB_HEAD (bb))
18440 break;
18442 prev = PREV_INSN (prev);
18445 return distance;
18448 /* Search backward for non-agu definition of register number REGNO1
18449 or register number REGNO2 in INSN's basic block until
18450 1. Pass LEA_SEARCH_THRESHOLD instructions, or
18451 2. Reach neighbour BBs boundary, or
18452 3. Reach agu definition.
18453 Returns the distance between the non-agu definition point and INSN.
18454 If no definition point, returns -1. */
18456 static int
18457 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
18458 rtx_insn *insn)
18460 basic_block bb = BLOCK_FOR_INSN (insn);
18461 int distance = 0;
18462 bool found = false;
18464 if (insn != BB_HEAD (bb))
18465 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
18466 distance, PREV_INSN (insn),
18467 &found);
18469 if (!found && distance < LEA_SEARCH_THRESHOLD)
18471 edge e;
18472 edge_iterator ei;
18473 bool simple_loop = false;
18475 FOR_EACH_EDGE (e, ei, bb->preds)
18476 if (e->src == bb)
18478 simple_loop = true;
18479 break;
18482 if (simple_loop)
18483 distance = distance_non_agu_define_in_bb (regno1, regno2,
18484 insn, distance,
18485 BB_END (bb), &found);
18486 else
18488 int shortest_dist = -1;
18489 bool found_in_bb = false;
18491 FOR_EACH_EDGE (e, ei, bb->preds)
18493 int bb_dist
18494 = distance_non_agu_define_in_bb (regno1, regno2,
18495 insn, distance,
18496 BB_END (e->src),
18497 &found_in_bb);
18498 if (found_in_bb)
18500 if (shortest_dist < 0)
18501 shortest_dist = bb_dist;
18502 else if (bb_dist > 0)
18503 shortest_dist = MIN (bb_dist, shortest_dist);
18505 found = true;
18509 distance = shortest_dist;
18513 /* get_attr_type may modify recog data. We want to make sure
18514 that recog data is valid for instruction INSN, on which
18515 distance_non_agu_define is called. INSN is unchanged here. */
18516 extract_insn_cached (insn);
18518 if (!found)
18519 return -1;
18521 return distance >> 1;
18524 /* Return the distance in half-cycles between INSN and the next
18525 insn that uses register number REGNO in memory address added
18526 to DISTANCE. Return -1 if REGNO0 is set.
18528 Put true value into *FOUND if register usage was found and
18529 false otherwise.
18530 Put true value into *REDEFINED if register redefinition was
18531 found and false otherwise. */
18533 static int
18534 distance_agu_use_in_bb (unsigned int regno,
18535 rtx_insn *insn, int distance, rtx_insn *start,
18536 bool *found, bool *redefined)
18538 basic_block bb = NULL;
18539 rtx_insn *next = start;
18540 rtx_insn *prev = NULL;
18542 *found = false;
18543 *redefined = false;
18545 if (start != NULL_RTX)
18547 bb = BLOCK_FOR_INSN (start);
18548 if (start != BB_HEAD (bb))
18549 /* If insn and start belong to the same bb, set prev to insn,
18550 so the call to increase_distance will increase the distance
18551 between insns by 1. */
18552 prev = insn;
18555 while (next
18556 && next != insn
18557 && distance < LEA_SEARCH_THRESHOLD)
18559 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
18561 distance = increase_distance(prev, next, distance);
18562 if (insn_uses_reg_mem (regno, next))
18564 /* Return DISTANCE if OP0 is used in memory
18565 address in NEXT. */
18566 *found = true;
18567 return distance;
18570 if (insn_defines_reg (regno, INVALID_REGNUM, next))
18572 /* Return -1 if OP0 is set in NEXT. */
18573 *redefined = true;
18574 return -1;
18577 prev = next;
18580 if (next == BB_END (bb))
18581 break;
18583 next = NEXT_INSN (next);
18586 return distance;
18589 /* Return the distance between INSN and the next insn that uses
18590 register number REGNO0 in memory address. Return -1 if no such
18591 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18593 static int
18594 distance_agu_use (unsigned int regno0, rtx_insn *insn)
18596 basic_block bb = BLOCK_FOR_INSN (insn);
18597 int distance = 0;
18598 bool found = false;
18599 bool redefined = false;
18601 if (insn != BB_END (bb))
18602 distance = distance_agu_use_in_bb (regno0, insn, distance,
18603 NEXT_INSN (insn),
18604 &found, &redefined);
18606 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18608 edge e;
18609 edge_iterator ei;
18610 bool simple_loop = false;
18612 FOR_EACH_EDGE (e, ei, bb->succs)
18613 if (e->dest == bb)
18615 simple_loop = true;
18616 break;
18619 if (simple_loop)
18620 distance = distance_agu_use_in_bb (regno0, insn,
18621 distance, BB_HEAD (bb),
18622 &found, &redefined);
18623 else
18625 int shortest_dist = -1;
18626 bool found_in_bb = false;
18627 bool redefined_in_bb = false;
18629 FOR_EACH_EDGE (e, ei, bb->succs)
18631 int bb_dist
18632 = distance_agu_use_in_bb (regno0, insn,
18633 distance, BB_HEAD (e->dest),
18634 &found_in_bb, &redefined_in_bb);
18635 if (found_in_bb)
18637 if (shortest_dist < 0)
18638 shortest_dist = bb_dist;
18639 else if (bb_dist > 0)
18640 shortest_dist = MIN (bb_dist, shortest_dist);
18642 found = true;
18646 distance = shortest_dist;
18650 if (!found || redefined)
18651 return -1;
18653 return distance >> 1;
18656 /* Define this macro to tune LEA priority vs ADD, it take effect when
18657 there is a dilemma of choicing LEA or ADD
18658 Negative value: ADD is more preferred than LEA
18659 Zero: Netrual
18660 Positive value: LEA is more preferred than ADD*/
18661 #define IX86_LEA_PRIORITY 0
18663 /* Return true if usage of lea INSN has performance advantage
18664 over a sequence of instructions. Instructions sequence has
18665 SPLIT_COST cycles higher latency than lea latency. */
18667 static bool
18668 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
18669 unsigned int regno2, int split_cost, bool has_scale)
18671 int dist_define, dist_use;
18673 /* For Silvermont if using a 2-source or 3-source LEA for
18674 non-destructive destination purposes, or due to wanting
18675 ability to use SCALE, the use of LEA is justified. */
18676 if (TARGET_SILVERMONT || TARGET_INTEL)
18678 if (has_scale)
18679 return true;
18680 if (split_cost < 1)
18681 return false;
18682 if (regno0 == regno1 || regno0 == regno2)
18683 return false;
18684 return true;
18687 dist_define = distance_non_agu_define (regno1, regno2, insn);
18688 dist_use = distance_agu_use (regno0, insn);
18690 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18692 /* If there is no non AGU operand definition, no AGU
18693 operand usage and split cost is 0 then both lea
18694 and non lea variants have same priority. Currently
18695 we prefer lea for 64 bit code and non lea on 32 bit
18696 code. */
18697 if (dist_use < 0 && split_cost == 0)
18698 return TARGET_64BIT || IX86_LEA_PRIORITY;
18699 else
18700 return true;
18703 /* With longer definitions distance lea is more preferable.
18704 Here we change it to take into account splitting cost and
18705 lea priority. */
18706 dist_define += split_cost + IX86_LEA_PRIORITY;
18708 /* If there is no use in memory addess then we just check
18709 that split cost exceeds AGU stall. */
18710 if (dist_use < 0)
18711 return dist_define > LEA_MAX_STALL;
18713 /* If this insn has both backward non-agu dependence and forward
18714 agu dependence, the one with short distance takes effect. */
18715 return dist_define >= dist_use;
18718 /* Return true if it is legal to clobber flags by INSN and
18719 false otherwise. */
18721 static bool
18722 ix86_ok_to_clobber_flags (rtx_insn *insn)
18724 basic_block bb = BLOCK_FOR_INSN (insn);
18725 df_ref use;
18726 bitmap live;
18728 while (insn)
18730 if (NONDEBUG_INSN_P (insn))
18732 FOR_EACH_INSN_USE (use, insn)
18733 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18734 return false;
18736 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18737 return true;
18740 if (insn == BB_END (bb))
18741 break;
18743 insn = NEXT_INSN (insn);
18746 live = df_get_live_out(bb);
18747 return !REGNO_REG_SET_P (live, FLAGS_REG);
18750 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18751 move and add to avoid AGU stalls. */
18753 bool
18754 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
18756 unsigned int regno0, regno1, regno2;
18758 /* Check if we need to optimize. */
18759 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18760 return false;
18762 /* Check it is correct to split here. */
18763 if (!ix86_ok_to_clobber_flags(insn))
18764 return false;
18766 regno0 = true_regnum (operands[0]);
18767 regno1 = true_regnum (operands[1]);
18768 regno2 = true_regnum (operands[2]);
18770 /* We need to split only adds with non destructive
18771 destination operand. */
18772 if (regno0 == regno1 || regno0 == regno2)
18773 return false;
18774 else
18775 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18778 /* Return true if we should emit lea instruction instead of mov
18779 instruction. */
18781 bool
18782 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
18784 unsigned int regno0, regno1;
18786 /* Check if we need to optimize. */
18787 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18788 return false;
18790 /* Use lea for reg to reg moves only. */
18791 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18792 return false;
18794 regno0 = true_regnum (operands[0]);
18795 regno1 = true_regnum (operands[1]);
18797 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18800 /* Return true if we need to split lea into a sequence of
18801 instructions to avoid AGU stalls. */
18803 bool
18804 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
18806 unsigned int regno0, regno1, regno2;
18807 int split_cost;
18808 struct ix86_address parts;
18809 int ok;
18811 /* Check we need to optimize. */
18812 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18813 return false;
18815 /* The "at least two components" test below might not catch simple
18816 move or zero extension insns if parts.base is non-NULL and parts.disp
18817 is const0_rtx as the only components in the address, e.g. if the
18818 register is %rbp or %r13. As this test is much cheaper and moves or
18819 zero extensions are the common case, do this check first. */
18820 if (REG_P (operands[1])
18821 || (SImode_address_operand (operands[1], VOIDmode)
18822 && REG_P (XEXP (operands[1], 0))))
18823 return false;
18825 /* Check if it is OK to split here. */
18826 if (!ix86_ok_to_clobber_flags (insn))
18827 return false;
18829 ok = ix86_decompose_address (operands[1], &parts);
18830 gcc_assert (ok);
18832 /* There should be at least two components in the address. */
18833 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18834 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18835 return false;
18837 /* We should not split into add if non legitimate pic
18838 operand is used as displacement. */
18839 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18840 return false;
18842 regno0 = true_regnum (operands[0]) ;
18843 regno1 = INVALID_REGNUM;
18844 regno2 = INVALID_REGNUM;
18846 if (parts.base)
18847 regno1 = true_regnum (parts.base);
18848 if (parts.index)
18849 regno2 = true_regnum (parts.index);
18851 split_cost = 0;
18853 /* Compute how many cycles we will add to execution time
18854 if split lea into a sequence of instructions. */
18855 if (parts.base || parts.index)
18857 /* Have to use mov instruction if non desctructive
18858 destination form is used. */
18859 if (regno1 != regno0 && regno2 != regno0)
18860 split_cost += 1;
18862 /* Have to add index to base if both exist. */
18863 if (parts.base && parts.index)
18864 split_cost += 1;
18866 /* Have to use shift and adds if scale is 2 or greater. */
18867 if (parts.scale > 1)
18869 if (regno0 != regno1)
18870 split_cost += 1;
18871 else if (regno2 == regno0)
18872 split_cost += 4;
18873 else
18874 split_cost += parts.scale;
18877 /* Have to use add instruction with immediate if
18878 disp is non zero. */
18879 if (parts.disp && parts.disp != const0_rtx)
18880 split_cost += 1;
18882 /* Subtract the price of lea. */
18883 split_cost -= 1;
18886 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18887 parts.scale > 1);
18890 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18891 matches destination. RTX includes clobber of FLAGS_REG. */
18893 static void
18894 ix86_emit_binop (enum rtx_code code, machine_mode mode,
18895 rtx dst, rtx src)
18897 rtx op, clob;
18899 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, dst, src));
18900 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18902 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18905 /* Return true if regno1 def is nearest to the insn. */
18907 static bool
18908 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
18910 rtx_insn *prev = insn;
18911 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
18913 if (insn == start)
18914 return false;
18915 while (prev && prev != start)
18917 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18919 prev = PREV_INSN (prev);
18920 continue;
18922 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18923 return true;
18924 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18925 return false;
18926 prev = PREV_INSN (prev);
18929 /* None of the regs is defined in the bb. */
18930 return false;
18933 /* Split lea instructions into a sequence of instructions
18934 which are executed on ALU to avoid AGU stalls.
18935 It is assumed that it is allowed to clobber flags register
18936 at lea position. */
18938 void
18939 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
18941 unsigned int regno0, regno1, regno2;
18942 struct ix86_address parts;
18943 rtx target, tmp;
18944 int ok, adds;
18946 ok = ix86_decompose_address (operands[1], &parts);
18947 gcc_assert (ok);
18949 target = gen_lowpart (mode, operands[0]);
18951 regno0 = true_regnum (target);
18952 regno1 = INVALID_REGNUM;
18953 regno2 = INVALID_REGNUM;
18955 if (parts.base)
18957 parts.base = gen_lowpart (mode, parts.base);
18958 regno1 = true_regnum (parts.base);
18961 if (parts.index)
18963 parts.index = gen_lowpart (mode, parts.index);
18964 regno2 = true_regnum (parts.index);
18967 if (parts.disp)
18968 parts.disp = gen_lowpart (mode, parts.disp);
18970 if (parts.scale > 1)
18972 /* Case r1 = r1 + ... */
18973 if (regno1 == regno0)
18975 /* If we have a case r1 = r1 + C * r2 then we
18976 should use multiplication which is very
18977 expensive. Assume cost model is wrong if we
18978 have such case here. */
18979 gcc_assert (regno2 != regno0);
18981 for (adds = parts.scale; adds > 0; adds--)
18982 ix86_emit_binop (PLUS, mode, target, parts.index);
18984 else
18986 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18987 if (regno0 != regno2)
18988 emit_insn (gen_rtx_SET (target, parts.index));
18990 /* Use shift for scaling. */
18991 ix86_emit_binop (ASHIFT, mode, target,
18992 GEN_INT (exact_log2 (parts.scale)));
18994 if (parts.base)
18995 ix86_emit_binop (PLUS, mode, target, parts.base);
18997 if (parts.disp && parts.disp != const0_rtx)
18998 ix86_emit_binop (PLUS, mode, target, parts.disp);
19001 else if (!parts.base && !parts.index)
19003 gcc_assert(parts.disp);
19004 emit_insn (gen_rtx_SET (target, parts.disp));
19006 else
19008 if (!parts.base)
19010 if (regno0 != regno2)
19011 emit_insn (gen_rtx_SET (target, parts.index));
19013 else if (!parts.index)
19015 if (regno0 != regno1)
19016 emit_insn (gen_rtx_SET (target, parts.base));
19018 else
19020 if (regno0 == regno1)
19021 tmp = parts.index;
19022 else if (regno0 == regno2)
19023 tmp = parts.base;
19024 else
19026 rtx tmp1;
19028 /* Find better operand for SET instruction, depending
19029 on which definition is farther from the insn. */
19030 if (find_nearest_reg_def (insn, regno1, regno2))
19031 tmp = parts.index, tmp1 = parts.base;
19032 else
19033 tmp = parts.base, tmp1 = parts.index;
19035 emit_insn (gen_rtx_SET (target, tmp));
19037 if (parts.disp && parts.disp != const0_rtx)
19038 ix86_emit_binop (PLUS, mode, target, parts.disp);
19040 ix86_emit_binop (PLUS, mode, target, tmp1);
19041 return;
19044 ix86_emit_binop (PLUS, mode, target, tmp);
19047 if (parts.disp && parts.disp != const0_rtx)
19048 ix86_emit_binop (PLUS, mode, target, parts.disp);
19052 /* Return true if it is ok to optimize an ADD operation to LEA
19053 operation to avoid flag register consumation. For most processors,
19054 ADD is faster than LEA. For the processors like BONNELL, if the
19055 destination register of LEA holds an actual address which will be
19056 used soon, LEA is better and otherwise ADD is better. */
19058 bool
19059 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
19061 unsigned int regno0 = true_regnum (operands[0]);
19062 unsigned int regno1 = true_regnum (operands[1]);
19063 unsigned int regno2 = true_regnum (operands[2]);
19065 /* If a = b + c, (a!=b && a!=c), must use lea form. */
19066 if (regno0 != regno1 && regno0 != regno2)
19067 return true;
19069 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
19070 return false;
19072 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
19075 /* Return true if destination reg of SET_BODY is shift count of
19076 USE_BODY. */
19078 static bool
19079 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
19081 rtx set_dest;
19082 rtx shift_rtx;
19083 int i;
19085 /* Retrieve destination of SET_BODY. */
19086 switch (GET_CODE (set_body))
19088 case SET:
19089 set_dest = SET_DEST (set_body);
19090 if (!set_dest || !REG_P (set_dest))
19091 return false;
19092 break;
19093 case PARALLEL:
19094 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
19095 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
19096 use_body))
19097 return true;
19098 default:
19099 return false;
19100 break;
19103 /* Retrieve shift count of USE_BODY. */
19104 switch (GET_CODE (use_body))
19106 case SET:
19107 shift_rtx = XEXP (use_body, 1);
19108 break;
19109 case PARALLEL:
19110 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
19111 if (ix86_dep_by_shift_count_body (set_body,
19112 XVECEXP (use_body, 0, i)))
19113 return true;
19114 default:
19115 return false;
19116 break;
19119 if (shift_rtx
19120 && (GET_CODE (shift_rtx) == ASHIFT
19121 || GET_CODE (shift_rtx) == LSHIFTRT
19122 || GET_CODE (shift_rtx) == ASHIFTRT
19123 || GET_CODE (shift_rtx) == ROTATE
19124 || GET_CODE (shift_rtx) == ROTATERT))
19126 rtx shift_count = XEXP (shift_rtx, 1);
19128 /* Return true if shift count is dest of SET_BODY. */
19129 if (REG_P (shift_count))
19131 /* Add check since it can be invoked before register
19132 allocation in pre-reload schedule. */
19133 if (reload_completed
19134 && true_regnum (set_dest) == true_regnum (shift_count))
19135 return true;
19136 else if (REGNO(set_dest) == REGNO(shift_count))
19137 return true;
19141 return false;
19144 /* Return true if destination reg of SET_INSN is shift count of
19145 USE_INSN. */
19147 bool
19148 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
19150 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
19151 PATTERN (use_insn));
19154 /* Return TRUE or FALSE depending on whether the unary operator meets the
19155 appropriate constraints. */
19157 bool
19158 ix86_unary_operator_ok (enum rtx_code,
19159 machine_mode,
19160 rtx operands[2])
19162 /* If one of operands is memory, source and destination must match. */
19163 if ((MEM_P (operands[0])
19164 || MEM_P (operands[1]))
19165 && ! rtx_equal_p (operands[0], operands[1]))
19166 return false;
19167 return true;
19170 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
19171 are ok, keeping in mind the possible movddup alternative. */
19173 bool
19174 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
19176 if (MEM_P (operands[0]))
19177 return rtx_equal_p (operands[0], operands[1 + high]);
19178 if (MEM_P (operands[1]) && MEM_P (operands[2]))
19179 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
19180 return true;
19183 /* Post-reload splitter for converting an SF or DFmode value in an
19184 SSE register into an unsigned SImode. */
19186 void
19187 ix86_split_convert_uns_si_sse (rtx operands[])
19189 machine_mode vecmode;
19190 rtx value, large, zero_or_two31, input, two31, x;
19192 large = operands[1];
19193 zero_or_two31 = operands[2];
19194 input = operands[3];
19195 two31 = operands[4];
19196 vecmode = GET_MODE (large);
19197 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
19199 /* Load up the value into the low element. We must ensure that the other
19200 elements are valid floats -- zero is the easiest such value. */
19201 if (MEM_P (input))
19203 if (vecmode == V4SFmode)
19204 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
19205 else
19206 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
19208 else
19210 input = gen_rtx_REG (vecmode, REGNO (input));
19211 emit_move_insn (value, CONST0_RTX (vecmode));
19212 if (vecmode == V4SFmode)
19213 emit_insn (gen_sse_movss (value, value, input));
19214 else
19215 emit_insn (gen_sse2_movsd (value, value, input));
19218 emit_move_insn (large, two31);
19219 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
19221 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
19222 emit_insn (gen_rtx_SET (large, x));
19224 x = gen_rtx_AND (vecmode, zero_or_two31, large);
19225 emit_insn (gen_rtx_SET (zero_or_two31, x));
19227 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
19228 emit_insn (gen_rtx_SET (value, x));
19230 large = gen_rtx_REG (V4SImode, REGNO (large));
19231 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
19233 x = gen_rtx_REG (V4SImode, REGNO (value));
19234 if (vecmode == V4SFmode)
19235 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
19236 else
19237 emit_insn (gen_sse2_cvttpd2dq (x, value));
19238 value = x;
19240 emit_insn (gen_xorv4si3 (value, value, large));
19243 /* Convert an unsigned DImode value into a DFmode, using only SSE.
19244 Expects the 64-bit DImode to be supplied in a pair of integral
19245 registers. Requires SSE2; will use SSE3 if available. For x86_32,
19246 -mfpmath=sse, !optimize_size only. */
19248 void
19249 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
19251 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
19252 rtx int_xmm, fp_xmm;
19253 rtx biases, exponents;
19254 rtx x;
19256 int_xmm = gen_reg_rtx (V4SImode);
19257 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
19258 emit_insn (gen_movdi_to_sse (int_xmm, input));
19259 else if (TARGET_SSE_SPLIT_REGS)
19261 emit_clobber (int_xmm);
19262 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
19264 else
19266 x = gen_reg_rtx (V2DImode);
19267 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
19268 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
19271 x = gen_rtx_CONST_VECTOR (V4SImode,
19272 gen_rtvec (4, GEN_INT (0x43300000UL),
19273 GEN_INT (0x45300000UL),
19274 const0_rtx, const0_rtx));
19275 exponents = validize_mem (force_const_mem (V4SImode, x));
19277 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
19278 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
19280 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
19281 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
19282 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
19283 (0x1.0p84 + double(fp_value_hi_xmm)).
19284 Note these exponents differ by 32. */
19286 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
19288 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
19289 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
19290 real_ldexp (&bias_lo_rvt, &dconst1, 52);
19291 real_ldexp (&bias_hi_rvt, &dconst1, 84);
19292 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
19293 x = const_double_from_real_value (bias_hi_rvt, DFmode);
19294 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
19295 biases = validize_mem (force_const_mem (V2DFmode, biases));
19296 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
19298 /* Add the upper and lower DFmode values together. */
19299 if (TARGET_SSE3)
19300 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
19301 else
19303 x = copy_to_mode_reg (V2DFmode, fp_xmm);
19304 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
19305 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
19308 ix86_expand_vector_extract (false, target, fp_xmm, 0);
19311 /* Not used, but eases macroization of patterns. */
19312 void
19313 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
19315 gcc_unreachable ();
19318 /* Convert an unsigned SImode value into a DFmode. Only currently used
19319 for SSE, but applicable anywhere. */
19321 void
19322 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
19324 REAL_VALUE_TYPE TWO31r;
19325 rtx x, fp;
19327 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
19328 NULL, 1, OPTAB_DIRECT);
19330 fp = gen_reg_rtx (DFmode);
19331 emit_insn (gen_floatsidf2 (fp, x));
19333 real_ldexp (&TWO31r, &dconst1, 31);
19334 x = const_double_from_real_value (TWO31r, DFmode);
19336 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
19337 if (x != target)
19338 emit_move_insn (target, x);
19341 /* Convert a signed DImode value into a DFmode. Only used for SSE in
19342 32-bit mode; otherwise we have a direct convert instruction. */
19344 void
19345 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
19347 REAL_VALUE_TYPE TWO32r;
19348 rtx fp_lo, fp_hi, x;
19350 fp_lo = gen_reg_rtx (DFmode);
19351 fp_hi = gen_reg_rtx (DFmode);
19353 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
19355 real_ldexp (&TWO32r, &dconst1, 32);
19356 x = const_double_from_real_value (TWO32r, DFmode);
19357 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
19359 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
19361 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
19362 0, OPTAB_DIRECT);
19363 if (x != target)
19364 emit_move_insn (target, x);
19367 /* Convert an unsigned SImode value into a SFmode, using only SSE.
19368 For x86_32, -mfpmath=sse, !optimize_size only. */
19369 void
19370 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
19372 REAL_VALUE_TYPE ONE16r;
19373 rtx fp_hi, fp_lo, int_hi, int_lo, x;
19375 real_ldexp (&ONE16r, &dconst1, 16);
19376 x = const_double_from_real_value (ONE16r, SFmode);
19377 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
19378 NULL, 0, OPTAB_DIRECT);
19379 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
19380 NULL, 0, OPTAB_DIRECT);
19381 fp_hi = gen_reg_rtx (SFmode);
19382 fp_lo = gen_reg_rtx (SFmode);
19383 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
19384 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
19385 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
19386 0, OPTAB_DIRECT);
19387 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
19388 0, OPTAB_DIRECT);
19389 if (!rtx_equal_p (target, fp_hi))
19390 emit_move_insn (target, fp_hi);
19393 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
19394 a vector of unsigned ints VAL to vector of floats TARGET. */
19396 void
19397 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
19399 rtx tmp[8];
19400 REAL_VALUE_TYPE TWO16r;
19401 machine_mode intmode = GET_MODE (val);
19402 machine_mode fltmode = GET_MODE (target);
19403 rtx (*cvt) (rtx, rtx);
19405 if (intmode == V4SImode)
19406 cvt = gen_floatv4siv4sf2;
19407 else
19408 cvt = gen_floatv8siv8sf2;
19409 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
19410 tmp[0] = force_reg (intmode, tmp[0]);
19411 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
19412 OPTAB_DIRECT);
19413 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
19414 NULL_RTX, 1, OPTAB_DIRECT);
19415 tmp[3] = gen_reg_rtx (fltmode);
19416 emit_insn (cvt (tmp[3], tmp[1]));
19417 tmp[4] = gen_reg_rtx (fltmode);
19418 emit_insn (cvt (tmp[4], tmp[2]));
19419 real_ldexp (&TWO16r, &dconst1, 16);
19420 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
19421 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
19422 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
19423 OPTAB_DIRECT);
19424 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
19425 OPTAB_DIRECT);
19426 if (tmp[7] != target)
19427 emit_move_insn (target, tmp[7]);
19430 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
19431 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
19432 This is done by doing just signed conversion if < 0x1p31, and otherwise by
19433 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
19436 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
19438 REAL_VALUE_TYPE TWO31r;
19439 rtx two31r, tmp[4];
19440 machine_mode mode = GET_MODE (val);
19441 machine_mode scalarmode = GET_MODE_INNER (mode);
19442 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
19443 rtx (*cmp) (rtx, rtx, rtx, rtx);
19444 int i;
19446 for (i = 0; i < 3; i++)
19447 tmp[i] = gen_reg_rtx (mode);
19448 real_ldexp (&TWO31r, &dconst1, 31);
19449 two31r = const_double_from_real_value (TWO31r, scalarmode);
19450 two31r = ix86_build_const_vector (mode, 1, two31r);
19451 two31r = force_reg (mode, two31r);
19452 switch (mode)
19454 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
19455 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
19456 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
19457 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
19458 default: gcc_unreachable ();
19460 tmp[3] = gen_rtx_LE (mode, two31r, val);
19461 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
19462 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
19463 0, OPTAB_DIRECT);
19464 if (intmode == V4SImode || TARGET_AVX2)
19465 *xorp = expand_simple_binop (intmode, ASHIFT,
19466 gen_lowpart (intmode, tmp[0]),
19467 GEN_INT (31), NULL_RTX, 0,
19468 OPTAB_DIRECT);
19469 else
19471 rtx two31 = GEN_INT (HOST_WIDE_INT_1U << 31);
19472 two31 = ix86_build_const_vector (intmode, 1, two31);
19473 *xorp = expand_simple_binop (intmode, AND,
19474 gen_lowpart (intmode, tmp[0]),
19475 two31, NULL_RTX, 0,
19476 OPTAB_DIRECT);
19478 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
19479 0, OPTAB_DIRECT);
19482 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
19483 then replicate the value for all elements of the vector
19484 register. */
19487 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
19489 int i, n_elt;
19490 rtvec v;
19491 machine_mode scalar_mode;
19493 switch (mode)
19495 case V64QImode:
19496 case V32QImode:
19497 case V16QImode:
19498 case V32HImode:
19499 case V16HImode:
19500 case V8HImode:
19501 case V16SImode:
19502 case V8SImode:
19503 case V4SImode:
19504 case V8DImode:
19505 case V4DImode:
19506 case V2DImode:
19507 gcc_assert (vect);
19508 case V16SFmode:
19509 case V8SFmode:
19510 case V4SFmode:
19511 case V8DFmode:
19512 case V4DFmode:
19513 case V2DFmode:
19514 n_elt = GET_MODE_NUNITS (mode);
19515 v = rtvec_alloc (n_elt);
19516 scalar_mode = GET_MODE_INNER (mode);
19518 RTVEC_ELT (v, 0) = value;
19520 for (i = 1; i < n_elt; ++i)
19521 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
19523 return gen_rtx_CONST_VECTOR (mode, v);
19525 default:
19526 gcc_unreachable ();
19530 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
19531 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
19532 for an SSE register. If VECT is true, then replicate the mask for
19533 all elements of the vector register. If INVERT is true, then create
19534 a mask excluding the sign bit. */
19537 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
19539 machine_mode vec_mode, imode;
19540 wide_int w;
19541 rtx mask, v;
19543 switch (mode)
19545 case V16SImode:
19546 case V16SFmode:
19547 case V8SImode:
19548 case V4SImode:
19549 case V8SFmode:
19550 case V4SFmode:
19551 vec_mode = mode;
19552 mode = GET_MODE_INNER (mode);
19553 imode = SImode;
19554 break;
19556 case V8DImode:
19557 case V4DImode:
19558 case V2DImode:
19559 case V8DFmode:
19560 case V4DFmode:
19561 case V2DFmode:
19562 vec_mode = mode;
19563 mode = GET_MODE_INNER (mode);
19564 imode = DImode;
19565 break;
19567 case TImode:
19568 case TFmode:
19569 vec_mode = VOIDmode;
19570 imode = TImode;
19571 break;
19573 default:
19574 gcc_unreachable ();
19577 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (mode) - 1,
19578 GET_MODE_BITSIZE (mode));
19579 if (invert)
19580 w = wi::bit_not (w);
19582 /* Force this value into the low part of a fp vector constant. */
19583 mask = immed_wide_int_const (w, imode);
19584 mask = gen_lowpart (mode, mask);
19586 if (vec_mode == VOIDmode)
19587 return force_reg (mode, mask);
19589 v = ix86_build_const_vector (vec_mode, vect, mask);
19590 return force_reg (vec_mode, v);
19593 /* Generate code for floating point ABS or NEG. */
19595 void
19596 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
19597 rtx operands[])
19599 rtx mask, set, dst, src;
19600 bool use_sse = false;
19601 bool vector_mode = VECTOR_MODE_P (mode);
19602 machine_mode vmode = mode;
19604 if (vector_mode)
19605 use_sse = true;
19606 else if (mode == TFmode)
19607 use_sse = true;
19608 else if (TARGET_SSE_MATH)
19610 use_sse = SSE_FLOAT_MODE_P (mode);
19611 if (mode == SFmode)
19612 vmode = V4SFmode;
19613 else if (mode == DFmode)
19614 vmode = V2DFmode;
19617 /* NEG and ABS performed with SSE use bitwise mask operations.
19618 Create the appropriate mask now. */
19619 if (use_sse)
19620 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19621 else
19622 mask = NULL_RTX;
19624 dst = operands[0];
19625 src = operands[1];
19627 set = gen_rtx_fmt_e (code, mode, src);
19628 set = gen_rtx_SET (dst, set);
19630 if (mask)
19632 rtx use, clob;
19633 rtvec par;
19635 use = gen_rtx_USE (VOIDmode, mask);
19636 if (vector_mode)
19637 par = gen_rtvec (2, set, use);
19638 else
19640 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19641 par = gen_rtvec (3, set, use, clob);
19643 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19645 else
19646 emit_insn (set);
19649 /* Expand a copysign operation. Special case operand 0 being a constant. */
19651 void
19652 ix86_expand_copysign (rtx operands[])
19654 machine_mode mode, vmode;
19655 rtx dest, op0, op1, mask, nmask;
19657 dest = operands[0];
19658 op0 = operands[1];
19659 op1 = operands[2];
19661 mode = GET_MODE (dest);
19663 if (mode == SFmode)
19664 vmode = V4SFmode;
19665 else if (mode == DFmode)
19666 vmode = V2DFmode;
19667 else
19668 vmode = mode;
19670 if (CONST_DOUBLE_P (op0))
19672 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19674 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19675 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19677 if (mode == SFmode || mode == DFmode)
19679 if (op0 == CONST0_RTX (mode))
19680 op0 = CONST0_RTX (vmode);
19681 else
19683 rtx v = ix86_build_const_vector (vmode, false, op0);
19685 op0 = force_reg (vmode, v);
19688 else if (op0 != CONST0_RTX (mode))
19689 op0 = force_reg (mode, op0);
19691 mask = ix86_build_signbit_mask (vmode, 0, 0);
19693 if (mode == SFmode)
19694 copysign_insn = gen_copysignsf3_const;
19695 else if (mode == DFmode)
19696 copysign_insn = gen_copysigndf3_const;
19697 else
19698 copysign_insn = gen_copysigntf3_const;
19700 emit_insn (copysign_insn (dest, op0, op1, mask));
19702 else
19704 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19706 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19707 mask = ix86_build_signbit_mask (vmode, 0, 0);
19709 if (mode == SFmode)
19710 copysign_insn = gen_copysignsf3_var;
19711 else if (mode == DFmode)
19712 copysign_insn = gen_copysigndf3_var;
19713 else
19714 copysign_insn = gen_copysigntf3_var;
19716 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19720 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19721 be a constant, and so has already been expanded into a vector constant. */
19723 void
19724 ix86_split_copysign_const (rtx operands[])
19726 machine_mode mode, vmode;
19727 rtx dest, op0, mask, x;
19729 dest = operands[0];
19730 op0 = operands[1];
19731 mask = operands[3];
19733 mode = GET_MODE (dest);
19734 vmode = GET_MODE (mask);
19736 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19737 x = gen_rtx_AND (vmode, dest, mask);
19738 emit_insn (gen_rtx_SET (dest, x));
19740 if (op0 != CONST0_RTX (vmode))
19742 x = gen_rtx_IOR (vmode, dest, op0);
19743 emit_insn (gen_rtx_SET (dest, x));
19747 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19748 so we have to do two masks. */
19750 void
19751 ix86_split_copysign_var (rtx operands[])
19753 machine_mode mode, vmode;
19754 rtx dest, scratch, op0, op1, mask, nmask, x;
19756 dest = operands[0];
19757 scratch = operands[1];
19758 op0 = operands[2];
19759 op1 = operands[3];
19760 nmask = operands[4];
19761 mask = operands[5];
19763 mode = GET_MODE (dest);
19764 vmode = GET_MODE (mask);
19766 if (rtx_equal_p (op0, op1))
19768 /* Shouldn't happen often (it's useless, obviously), but when it does
19769 we'd generate incorrect code if we continue below. */
19770 emit_move_insn (dest, op0);
19771 return;
19774 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19776 gcc_assert (REGNO (op1) == REGNO (scratch));
19778 x = gen_rtx_AND (vmode, scratch, mask);
19779 emit_insn (gen_rtx_SET (scratch, x));
19781 dest = mask;
19782 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19783 x = gen_rtx_NOT (vmode, dest);
19784 x = gen_rtx_AND (vmode, x, op0);
19785 emit_insn (gen_rtx_SET (dest, x));
19787 else
19789 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19791 x = gen_rtx_AND (vmode, scratch, mask);
19793 else /* alternative 2,4 */
19795 gcc_assert (REGNO (mask) == REGNO (scratch));
19796 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19797 x = gen_rtx_AND (vmode, scratch, op1);
19799 emit_insn (gen_rtx_SET (scratch, x));
19801 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19803 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19804 x = gen_rtx_AND (vmode, dest, nmask);
19806 else /* alternative 3,4 */
19808 gcc_assert (REGNO (nmask) == REGNO (dest));
19809 dest = nmask;
19810 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19811 x = gen_rtx_AND (vmode, dest, op0);
19813 emit_insn (gen_rtx_SET (dest, x));
19816 x = gen_rtx_IOR (vmode, dest, scratch);
19817 emit_insn (gen_rtx_SET (dest, x));
19820 /* Return TRUE or FALSE depending on whether the first SET in INSN
19821 has source and destination with matching CC modes, and that the
19822 CC mode is at least as constrained as REQ_MODE. */
19824 bool
19825 ix86_match_ccmode (rtx insn, machine_mode req_mode)
19827 rtx set;
19828 machine_mode set_mode;
19830 set = PATTERN (insn);
19831 if (GET_CODE (set) == PARALLEL)
19832 set = XVECEXP (set, 0, 0);
19833 gcc_assert (GET_CODE (set) == SET);
19834 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19836 set_mode = GET_MODE (SET_DEST (set));
19837 switch (set_mode)
19839 case CCNOmode:
19840 if (req_mode != CCNOmode
19841 && (req_mode != CCmode
19842 || XEXP (SET_SRC (set), 1) != const0_rtx))
19843 return false;
19844 break;
19845 case CCmode:
19846 if (req_mode == CCGCmode)
19847 return false;
19848 /* FALLTHRU */
19849 case CCGCmode:
19850 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19851 return false;
19852 /* FALLTHRU */
19853 case CCGOCmode:
19854 if (req_mode == CCZmode)
19855 return false;
19856 /* FALLTHRU */
19857 case CCZmode:
19858 break;
19860 case CCAmode:
19861 case CCCmode:
19862 case CCOmode:
19863 case CCPmode:
19864 case CCSmode:
19865 if (set_mode != req_mode)
19866 return false;
19867 break;
19869 default:
19870 gcc_unreachable ();
19873 return GET_MODE (SET_SRC (set)) == set_mode;
19876 /* Generate insn patterns to do an integer compare of OPERANDS. */
19878 static rtx
19879 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19881 machine_mode cmpmode;
19882 rtx tmp, flags;
19884 cmpmode = SELECT_CC_MODE (code, op0, op1);
19885 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19887 /* This is very simple, but making the interface the same as in the
19888 FP case makes the rest of the code easier. */
19889 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19890 emit_insn (gen_rtx_SET (flags, tmp));
19892 /* Return the test that should be put into the flags user, i.e.
19893 the bcc, scc, or cmov instruction. */
19894 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19897 /* Figure out whether to use ordered or unordered fp comparisons.
19898 Return the appropriate mode to use. */
19900 machine_mode
19901 ix86_fp_compare_mode (enum rtx_code)
19903 /* ??? In order to make all comparisons reversible, we do all comparisons
19904 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19905 all forms trapping and nontrapping comparisons, we can make inequality
19906 comparisons trapping again, since it results in better code when using
19907 FCOM based compares. */
19908 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19911 machine_mode
19912 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19914 machine_mode mode = GET_MODE (op0);
19916 if (SCALAR_FLOAT_MODE_P (mode))
19918 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19919 return ix86_fp_compare_mode (code);
19922 switch (code)
19924 /* Only zero flag is needed. */
19925 case EQ: /* ZF=0 */
19926 case NE: /* ZF!=0 */
19927 return CCZmode;
19928 /* Codes needing carry flag. */
19929 case GEU: /* CF=0 */
19930 case LTU: /* CF=1 */
19931 /* Detect overflow checks. They need just the carry flag. */
19932 if (GET_CODE (op0) == PLUS
19933 && rtx_equal_p (op1, XEXP (op0, 0)))
19934 return CCCmode;
19935 else
19936 return CCmode;
19937 case GTU: /* CF=0 & ZF=0 */
19938 case LEU: /* CF=1 | ZF=1 */
19939 return CCmode;
19940 /* Codes possibly doable only with sign flag when
19941 comparing against zero. */
19942 case GE: /* SF=OF or SF=0 */
19943 case LT: /* SF<>OF or SF=1 */
19944 if (op1 == const0_rtx)
19945 return CCGOCmode;
19946 else
19947 /* For other cases Carry flag is not required. */
19948 return CCGCmode;
19949 /* Codes doable only with sign flag when comparing
19950 against zero, but we miss jump instruction for it
19951 so we need to use relational tests against overflow
19952 that thus needs to be zero. */
19953 case GT: /* ZF=0 & SF=OF */
19954 case LE: /* ZF=1 | SF<>OF */
19955 if (op1 == const0_rtx)
19956 return CCNOmode;
19957 else
19958 return CCGCmode;
19959 /* strcmp pattern do (use flags) and combine may ask us for proper
19960 mode. */
19961 case USE:
19962 return CCmode;
19963 default:
19964 gcc_unreachable ();
19968 /* Return the fixed registers used for condition codes. */
19970 static bool
19971 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19973 *p1 = FLAGS_REG;
19974 *p2 = FPSR_REG;
19975 return true;
19978 /* If two condition code modes are compatible, return a condition code
19979 mode which is compatible with both. Otherwise, return
19980 VOIDmode. */
19982 static machine_mode
19983 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
19985 if (m1 == m2)
19986 return m1;
19988 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19989 return VOIDmode;
19991 if ((m1 == CCGCmode && m2 == CCGOCmode)
19992 || (m1 == CCGOCmode && m2 == CCGCmode))
19993 return CCGCmode;
19995 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19996 return m2;
19997 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19998 return m1;
20000 switch (m1)
20002 default:
20003 gcc_unreachable ();
20005 case CCmode:
20006 case CCGCmode:
20007 case CCGOCmode:
20008 case CCNOmode:
20009 case CCAmode:
20010 case CCCmode:
20011 case CCOmode:
20012 case CCPmode:
20013 case CCSmode:
20014 case CCZmode:
20015 switch (m2)
20017 default:
20018 return VOIDmode;
20020 case CCmode:
20021 case CCGCmode:
20022 case CCGOCmode:
20023 case CCNOmode:
20024 case CCAmode:
20025 case CCCmode:
20026 case CCOmode:
20027 case CCPmode:
20028 case CCSmode:
20029 case CCZmode:
20030 return CCmode;
20033 case CCFPmode:
20034 case CCFPUmode:
20035 /* These are only compatible with themselves, which we already
20036 checked above. */
20037 return VOIDmode;
20042 /* Return a comparison we can do and that it is equivalent to
20043 swap_condition (code) apart possibly from orderedness.
20044 But, never change orderedness if TARGET_IEEE_FP, returning
20045 UNKNOWN in that case if necessary. */
20047 static enum rtx_code
20048 ix86_fp_swap_condition (enum rtx_code code)
20050 switch (code)
20052 case GT: /* GTU - CF=0 & ZF=0 */
20053 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
20054 case GE: /* GEU - CF=0 */
20055 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
20056 case UNLT: /* LTU - CF=1 */
20057 return TARGET_IEEE_FP ? UNKNOWN : GT;
20058 case UNLE: /* LEU - CF=1 | ZF=1 */
20059 return TARGET_IEEE_FP ? UNKNOWN : GE;
20060 default:
20061 return swap_condition (code);
20065 /* Return cost of comparison CODE using the best strategy for performance.
20066 All following functions do use number of instructions as a cost metrics.
20067 In future this should be tweaked to compute bytes for optimize_size and
20068 take into account performance of various instructions on various CPUs. */
20070 static int
20071 ix86_fp_comparison_cost (enum rtx_code code)
20073 int arith_cost;
20075 /* The cost of code using bit-twiddling on %ah. */
20076 switch (code)
20078 case UNLE:
20079 case UNLT:
20080 case LTGT:
20081 case GT:
20082 case GE:
20083 case UNORDERED:
20084 case ORDERED:
20085 case UNEQ:
20086 arith_cost = 4;
20087 break;
20088 case LT:
20089 case NE:
20090 case EQ:
20091 case UNGE:
20092 arith_cost = TARGET_IEEE_FP ? 5 : 4;
20093 break;
20094 case LE:
20095 case UNGT:
20096 arith_cost = TARGET_IEEE_FP ? 6 : 4;
20097 break;
20098 default:
20099 gcc_unreachable ();
20102 switch (ix86_fp_comparison_strategy (code))
20104 case IX86_FPCMP_COMI:
20105 return arith_cost > 4 ? 3 : 2;
20106 case IX86_FPCMP_SAHF:
20107 return arith_cost > 4 ? 4 : 3;
20108 default:
20109 return arith_cost;
20113 /* Return strategy to use for floating-point. We assume that fcomi is always
20114 preferrable where available, since that is also true when looking at size
20115 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
20117 enum ix86_fpcmp_strategy
20118 ix86_fp_comparison_strategy (enum rtx_code)
20120 /* Do fcomi/sahf based test when profitable. */
20122 if (TARGET_CMOVE)
20123 return IX86_FPCMP_COMI;
20125 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
20126 return IX86_FPCMP_SAHF;
20128 return IX86_FPCMP_ARITH;
20131 /* Swap, force into registers, or otherwise massage the two operands
20132 to a fp comparison. The operands are updated in place; the new
20133 comparison code is returned. */
20135 static enum rtx_code
20136 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
20138 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
20139 rtx op0 = *pop0, op1 = *pop1;
20140 machine_mode op_mode = GET_MODE (op0);
20141 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
20143 /* All of the unordered compare instructions only work on registers.
20144 The same is true of the fcomi compare instructions. The XFmode
20145 compare instructions require registers except when comparing
20146 against zero or when converting operand 1 from fixed point to
20147 floating point. */
20149 if (!is_sse
20150 && (fpcmp_mode == CCFPUmode
20151 || (op_mode == XFmode
20152 && ! (standard_80387_constant_p (op0) == 1
20153 || standard_80387_constant_p (op1) == 1)
20154 && GET_CODE (op1) != FLOAT)
20155 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
20157 op0 = force_reg (op_mode, op0);
20158 op1 = force_reg (op_mode, op1);
20160 else
20162 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
20163 things around if they appear profitable, otherwise force op0
20164 into a register. */
20166 if (standard_80387_constant_p (op0) == 0
20167 || (MEM_P (op0)
20168 && ! (standard_80387_constant_p (op1) == 0
20169 || MEM_P (op1))))
20171 enum rtx_code new_code = ix86_fp_swap_condition (code);
20172 if (new_code != UNKNOWN)
20174 std::swap (op0, op1);
20175 code = new_code;
20179 if (!REG_P (op0))
20180 op0 = force_reg (op_mode, op0);
20182 if (CONSTANT_P (op1))
20184 int tmp = standard_80387_constant_p (op1);
20185 if (tmp == 0)
20186 op1 = validize_mem (force_const_mem (op_mode, op1));
20187 else if (tmp == 1)
20189 if (TARGET_CMOVE)
20190 op1 = force_reg (op_mode, op1);
20192 else
20193 op1 = force_reg (op_mode, op1);
20197 /* Try to rearrange the comparison to make it cheaper. */
20198 if (ix86_fp_comparison_cost (code)
20199 > ix86_fp_comparison_cost (swap_condition (code))
20200 && (REG_P (op1) || can_create_pseudo_p ()))
20202 std::swap (op0, op1);
20203 code = swap_condition (code);
20204 if (!REG_P (op0))
20205 op0 = force_reg (op_mode, op0);
20208 *pop0 = op0;
20209 *pop1 = op1;
20210 return code;
20213 /* Convert comparison codes we use to represent FP comparison to integer
20214 code that will result in proper branch. Return UNKNOWN if no such code
20215 is available. */
20217 enum rtx_code
20218 ix86_fp_compare_code_to_integer (enum rtx_code code)
20220 switch (code)
20222 case GT:
20223 return GTU;
20224 case GE:
20225 return GEU;
20226 case ORDERED:
20227 case UNORDERED:
20228 return code;
20229 break;
20230 case UNEQ:
20231 return EQ;
20232 break;
20233 case UNLT:
20234 return LTU;
20235 break;
20236 case UNLE:
20237 return LEU;
20238 break;
20239 case LTGT:
20240 return NE;
20241 break;
20242 default:
20243 return UNKNOWN;
20247 /* Generate insn patterns to do a floating point compare of OPERANDS. */
20249 static rtx
20250 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
20252 machine_mode fpcmp_mode, intcmp_mode;
20253 rtx tmp, tmp2;
20255 fpcmp_mode = ix86_fp_compare_mode (code);
20256 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
20258 /* Do fcomi/sahf based test when profitable. */
20259 switch (ix86_fp_comparison_strategy (code))
20261 case IX86_FPCMP_COMI:
20262 intcmp_mode = fpcmp_mode;
20263 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20264 tmp = gen_rtx_SET (gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
20265 emit_insn (tmp);
20266 break;
20268 case IX86_FPCMP_SAHF:
20269 intcmp_mode = fpcmp_mode;
20270 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20271 tmp = gen_rtx_SET (gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
20273 if (!scratch)
20274 scratch = gen_reg_rtx (HImode);
20275 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
20276 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
20277 break;
20279 case IX86_FPCMP_ARITH:
20280 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
20281 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20282 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
20283 if (!scratch)
20284 scratch = gen_reg_rtx (HImode);
20285 emit_insn (gen_rtx_SET (scratch, tmp2));
20287 /* In the unordered case, we have to check C2 for NaN's, which
20288 doesn't happen to work out to anything nice combination-wise.
20289 So do some bit twiddling on the value we've got in AH to come
20290 up with an appropriate set of condition codes. */
20292 intcmp_mode = CCNOmode;
20293 switch (code)
20295 case GT:
20296 case UNGT:
20297 if (code == GT || !TARGET_IEEE_FP)
20299 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20300 code = EQ;
20302 else
20304 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20305 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20306 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
20307 intcmp_mode = CCmode;
20308 code = GEU;
20310 break;
20311 case LT:
20312 case UNLT:
20313 if (code == LT && TARGET_IEEE_FP)
20315 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20316 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
20317 intcmp_mode = CCmode;
20318 code = EQ;
20320 else
20322 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
20323 code = NE;
20325 break;
20326 case GE:
20327 case UNGE:
20328 if (code == GE || !TARGET_IEEE_FP)
20330 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
20331 code = EQ;
20333 else
20335 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20336 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
20337 code = NE;
20339 break;
20340 case LE:
20341 case UNLE:
20342 if (code == LE && TARGET_IEEE_FP)
20344 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20345 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20346 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20347 intcmp_mode = CCmode;
20348 code = LTU;
20350 else
20352 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20353 code = NE;
20355 break;
20356 case EQ:
20357 case UNEQ:
20358 if (code == EQ && TARGET_IEEE_FP)
20360 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20361 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20362 intcmp_mode = CCmode;
20363 code = EQ;
20365 else
20367 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20368 code = NE;
20370 break;
20371 case NE:
20372 case LTGT:
20373 if (code == NE && TARGET_IEEE_FP)
20375 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20376 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
20377 GEN_INT (0x40)));
20378 code = NE;
20380 else
20382 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20383 code = EQ;
20385 break;
20387 case UNORDERED:
20388 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20389 code = NE;
20390 break;
20391 case ORDERED:
20392 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20393 code = EQ;
20394 break;
20396 default:
20397 gcc_unreachable ();
20399 break;
20401 default:
20402 gcc_unreachable();
20405 /* Return the test that should be put into the flags user, i.e.
20406 the bcc, scc, or cmov instruction. */
20407 return gen_rtx_fmt_ee (code, VOIDmode,
20408 gen_rtx_REG (intcmp_mode, FLAGS_REG),
20409 const0_rtx);
20412 static rtx
20413 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
20415 rtx ret;
20417 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
20418 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
20420 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
20422 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
20423 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20425 else
20426 ret = ix86_expand_int_compare (code, op0, op1);
20428 return ret;
20431 void
20432 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
20434 machine_mode mode = GET_MODE (op0);
20435 rtx tmp;
20437 switch (mode)
20439 case SFmode:
20440 case DFmode:
20441 case XFmode:
20442 case QImode:
20443 case HImode:
20444 case SImode:
20445 simple:
20446 tmp = ix86_expand_compare (code, op0, op1);
20447 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20448 gen_rtx_LABEL_REF (VOIDmode, label),
20449 pc_rtx);
20450 emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
20451 return;
20453 case DImode:
20454 if (TARGET_64BIT)
20455 goto simple;
20456 case TImode:
20457 /* Expand DImode branch into multiple compare+branch. */
20459 rtx lo[2], hi[2];
20460 rtx_code_label *label2;
20461 enum rtx_code code1, code2, code3;
20462 machine_mode submode;
20464 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
20466 std::swap (op0, op1);
20467 code = swap_condition (code);
20470 split_double_mode (mode, &op0, 1, lo+0, hi+0);
20471 split_double_mode (mode, &op1, 1, lo+1, hi+1);
20473 submode = mode == DImode ? SImode : DImode;
20475 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
20476 avoid two branches. This costs one extra insn, so disable when
20477 optimizing for size. */
20479 if ((code == EQ || code == NE)
20480 && (!optimize_insn_for_size_p ()
20481 || hi[1] == const0_rtx || lo[1] == const0_rtx))
20483 rtx xor0, xor1;
20485 xor1 = hi[0];
20486 if (hi[1] != const0_rtx)
20487 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
20488 NULL_RTX, 0, OPTAB_WIDEN);
20490 xor0 = lo[0];
20491 if (lo[1] != const0_rtx)
20492 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
20493 NULL_RTX, 0, OPTAB_WIDEN);
20495 tmp = expand_binop (submode, ior_optab, xor1, xor0,
20496 NULL_RTX, 0, OPTAB_WIDEN);
20498 ix86_expand_branch (code, tmp, const0_rtx, label);
20499 return;
20502 /* Otherwise, if we are doing less-than or greater-or-equal-than,
20503 op1 is a constant and the low word is zero, then we can just
20504 examine the high word. Similarly for low word -1 and
20505 less-or-equal-than or greater-than. */
20507 if (CONST_INT_P (hi[1]))
20508 switch (code)
20510 case LT: case LTU: case GE: case GEU:
20511 if (lo[1] == const0_rtx)
20513 ix86_expand_branch (code, hi[0], hi[1], label);
20514 return;
20516 break;
20517 case LE: case LEU: case GT: case GTU:
20518 if (lo[1] == constm1_rtx)
20520 ix86_expand_branch (code, hi[0], hi[1], label);
20521 return;
20523 break;
20524 default:
20525 break;
20528 /* Otherwise, we need two or three jumps. */
20530 label2 = gen_label_rtx ();
20532 code1 = code;
20533 code2 = swap_condition (code);
20534 code3 = unsigned_condition (code);
20536 switch (code)
20538 case LT: case GT: case LTU: case GTU:
20539 break;
20541 case LE: code1 = LT; code2 = GT; break;
20542 case GE: code1 = GT; code2 = LT; break;
20543 case LEU: code1 = LTU; code2 = GTU; break;
20544 case GEU: code1 = GTU; code2 = LTU; break;
20546 case EQ: code1 = UNKNOWN; code2 = NE; break;
20547 case NE: code2 = UNKNOWN; break;
20549 default:
20550 gcc_unreachable ();
20554 * a < b =>
20555 * if (hi(a) < hi(b)) goto true;
20556 * if (hi(a) > hi(b)) goto false;
20557 * if (lo(a) < lo(b)) goto true;
20558 * false:
20561 if (code1 != UNKNOWN)
20562 ix86_expand_branch (code1, hi[0], hi[1], label);
20563 if (code2 != UNKNOWN)
20564 ix86_expand_branch (code2, hi[0], hi[1], label2);
20566 ix86_expand_branch (code3, lo[0], lo[1], label);
20568 if (code2 != UNKNOWN)
20569 emit_label (label2);
20570 return;
20573 default:
20574 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20575 goto simple;
20579 /* Split branch based on floating point condition. */
20580 void
20581 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20582 rtx target1, rtx target2, rtx tmp)
20584 rtx condition;
20585 rtx i;
20587 if (target2 != pc_rtx)
20589 std::swap (target1, target2);
20590 code = reverse_condition_maybe_unordered (code);
20593 condition = ix86_expand_fp_compare (code, op1, op2,
20594 tmp);
20596 i = emit_jump_insn (gen_rtx_SET
20597 (pc_rtx,
20598 gen_rtx_IF_THEN_ELSE (VOIDmode,
20599 condition, target1, target2)));
20600 if (split_branch_probability >= 0)
20601 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20604 void
20605 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20607 rtx ret;
20609 gcc_assert (GET_MODE (dest) == QImode);
20611 ret = ix86_expand_compare (code, op0, op1);
20612 PUT_MODE (ret, QImode);
20613 emit_insn (gen_rtx_SET (dest, ret));
20616 /* Expand comparison setting or clearing carry flag. Return true when
20617 successful and set pop for the operation. */
20618 static bool
20619 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20621 machine_mode mode =
20622 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20624 /* Do not handle double-mode compares that go through special path. */
20625 if (mode == (TARGET_64BIT ? TImode : DImode))
20626 return false;
20628 if (SCALAR_FLOAT_MODE_P (mode))
20630 rtx compare_op;
20631 rtx_insn *compare_seq;
20633 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20635 /* Shortcut: following common codes never translate
20636 into carry flag compares. */
20637 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20638 || code == ORDERED || code == UNORDERED)
20639 return false;
20641 /* These comparisons require zero flag; swap operands so they won't. */
20642 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20643 && !TARGET_IEEE_FP)
20645 std::swap (op0, op1);
20646 code = swap_condition (code);
20649 /* Try to expand the comparison and verify that we end up with
20650 carry flag based comparison. This fails to be true only when
20651 we decide to expand comparison using arithmetic that is not
20652 too common scenario. */
20653 start_sequence ();
20654 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20655 compare_seq = get_insns ();
20656 end_sequence ();
20658 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20659 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20660 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20661 else
20662 code = GET_CODE (compare_op);
20664 if (code != LTU && code != GEU)
20665 return false;
20667 emit_insn (compare_seq);
20668 *pop = compare_op;
20669 return true;
20672 if (!INTEGRAL_MODE_P (mode))
20673 return false;
20675 switch (code)
20677 case LTU:
20678 case GEU:
20679 break;
20681 /* Convert a==0 into (unsigned)a<1. */
20682 case EQ:
20683 case NE:
20684 if (op1 != const0_rtx)
20685 return false;
20686 op1 = const1_rtx;
20687 code = (code == EQ ? LTU : GEU);
20688 break;
20690 /* Convert a>b into b<a or a>=b-1. */
20691 case GTU:
20692 case LEU:
20693 if (CONST_INT_P (op1))
20695 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20696 /* Bail out on overflow. We still can swap operands but that
20697 would force loading of the constant into register. */
20698 if (op1 == const0_rtx
20699 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20700 return false;
20701 code = (code == GTU ? GEU : LTU);
20703 else
20705 std::swap (op0, op1);
20706 code = (code == GTU ? LTU : GEU);
20708 break;
20710 /* Convert a>=0 into (unsigned)a<0x80000000. */
20711 case LT:
20712 case GE:
20713 if (mode == DImode || op1 != const0_rtx)
20714 return false;
20715 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20716 code = (code == LT ? GEU : LTU);
20717 break;
20718 case LE:
20719 case GT:
20720 if (mode == DImode || op1 != constm1_rtx)
20721 return false;
20722 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20723 code = (code == LE ? GEU : LTU);
20724 break;
20726 default:
20727 return false;
20729 /* Swapping operands may cause constant to appear as first operand. */
20730 if (!nonimmediate_operand (op0, VOIDmode))
20732 if (!can_create_pseudo_p ())
20733 return false;
20734 op0 = force_reg (mode, op0);
20736 *pop = ix86_expand_compare (code, op0, op1);
20737 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20738 return true;
20741 bool
20742 ix86_expand_int_movcc (rtx operands[])
20744 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20745 rtx_insn *compare_seq;
20746 rtx compare_op;
20747 machine_mode mode = GET_MODE (operands[0]);
20748 bool sign_bit_compare_p = false;
20749 rtx op0 = XEXP (operands[1], 0);
20750 rtx op1 = XEXP (operands[1], 1);
20752 if (GET_MODE (op0) == TImode
20753 || (GET_MODE (op0) == DImode
20754 && !TARGET_64BIT))
20755 return false;
20757 start_sequence ();
20758 compare_op = ix86_expand_compare (code, op0, op1);
20759 compare_seq = get_insns ();
20760 end_sequence ();
20762 compare_code = GET_CODE (compare_op);
20764 if ((op1 == const0_rtx && (code == GE || code == LT))
20765 || (op1 == constm1_rtx && (code == GT || code == LE)))
20766 sign_bit_compare_p = true;
20768 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20769 HImode insns, we'd be swallowed in word prefix ops. */
20771 if ((mode != HImode || TARGET_FAST_PREFIX)
20772 && (mode != (TARGET_64BIT ? TImode : DImode))
20773 && CONST_INT_P (operands[2])
20774 && CONST_INT_P (operands[3]))
20776 rtx out = operands[0];
20777 HOST_WIDE_INT ct = INTVAL (operands[2]);
20778 HOST_WIDE_INT cf = INTVAL (operands[3]);
20779 HOST_WIDE_INT diff;
20781 diff = ct - cf;
20782 /* Sign bit compares are better done using shifts than we do by using
20783 sbb. */
20784 if (sign_bit_compare_p
20785 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20787 /* Detect overlap between destination and compare sources. */
20788 rtx tmp = out;
20790 if (!sign_bit_compare_p)
20792 rtx flags;
20793 bool fpcmp = false;
20795 compare_code = GET_CODE (compare_op);
20797 flags = XEXP (compare_op, 0);
20799 if (GET_MODE (flags) == CCFPmode
20800 || GET_MODE (flags) == CCFPUmode)
20802 fpcmp = true;
20803 compare_code
20804 = ix86_fp_compare_code_to_integer (compare_code);
20807 /* To simplify rest of code, restrict to the GEU case. */
20808 if (compare_code == LTU)
20810 std::swap (ct, cf);
20811 compare_code = reverse_condition (compare_code);
20812 code = reverse_condition (code);
20814 else
20816 if (fpcmp)
20817 PUT_CODE (compare_op,
20818 reverse_condition_maybe_unordered
20819 (GET_CODE (compare_op)));
20820 else
20821 PUT_CODE (compare_op,
20822 reverse_condition (GET_CODE (compare_op)));
20824 diff = ct - cf;
20826 if (reg_overlap_mentioned_p (out, op0)
20827 || reg_overlap_mentioned_p (out, op1))
20828 tmp = gen_reg_rtx (mode);
20830 if (mode == DImode)
20831 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20832 else
20833 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20834 flags, compare_op));
20836 else
20838 if (code == GT || code == GE)
20839 code = reverse_condition (code);
20840 else
20842 std::swap (ct, cf);
20843 diff = ct - cf;
20845 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20848 if (diff == 1)
20851 * cmpl op0,op1
20852 * sbbl dest,dest
20853 * [addl dest, ct]
20855 * Size 5 - 8.
20857 if (ct)
20858 tmp = expand_simple_binop (mode, PLUS,
20859 tmp, GEN_INT (ct),
20860 copy_rtx (tmp), 1, OPTAB_DIRECT);
20862 else if (cf == -1)
20865 * cmpl op0,op1
20866 * sbbl dest,dest
20867 * orl $ct, dest
20869 * Size 8.
20871 tmp = expand_simple_binop (mode, IOR,
20872 tmp, GEN_INT (ct),
20873 copy_rtx (tmp), 1, OPTAB_DIRECT);
20875 else if (diff == -1 && ct)
20878 * cmpl op0,op1
20879 * sbbl dest,dest
20880 * notl dest
20881 * [addl dest, cf]
20883 * Size 8 - 11.
20885 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20886 if (cf)
20887 tmp = expand_simple_binop (mode, PLUS,
20888 copy_rtx (tmp), GEN_INT (cf),
20889 copy_rtx (tmp), 1, OPTAB_DIRECT);
20891 else
20894 * cmpl op0,op1
20895 * sbbl dest,dest
20896 * [notl dest]
20897 * andl cf - ct, dest
20898 * [addl dest, ct]
20900 * Size 8 - 11.
20903 if (cf == 0)
20905 cf = ct;
20906 ct = 0;
20907 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20910 tmp = expand_simple_binop (mode, AND,
20911 copy_rtx (tmp),
20912 gen_int_mode (cf - ct, mode),
20913 copy_rtx (tmp), 1, OPTAB_DIRECT);
20914 if (ct)
20915 tmp = expand_simple_binop (mode, PLUS,
20916 copy_rtx (tmp), GEN_INT (ct),
20917 copy_rtx (tmp), 1, OPTAB_DIRECT);
20920 if (!rtx_equal_p (tmp, out))
20921 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20923 return true;
20926 if (diff < 0)
20928 machine_mode cmp_mode = GET_MODE (op0);
20929 enum rtx_code new_code;
20931 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20933 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20935 /* We may be reversing unordered compare to normal compare, that
20936 is not valid in general (we may convert non-trapping condition
20937 to trapping one), however on i386 we currently emit all
20938 comparisons unordered. */
20939 new_code = reverse_condition_maybe_unordered (code);
20941 else
20942 new_code = ix86_reverse_condition (code, cmp_mode);
20943 if (new_code != UNKNOWN)
20945 std::swap (ct, cf);
20946 diff = -diff;
20947 code = new_code;
20951 compare_code = UNKNOWN;
20952 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20953 && CONST_INT_P (op1))
20955 if (op1 == const0_rtx
20956 && (code == LT || code == GE))
20957 compare_code = code;
20958 else if (op1 == constm1_rtx)
20960 if (code == LE)
20961 compare_code = LT;
20962 else if (code == GT)
20963 compare_code = GE;
20967 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20968 if (compare_code != UNKNOWN
20969 && GET_MODE (op0) == GET_MODE (out)
20970 && (cf == -1 || ct == -1))
20972 /* If lea code below could be used, only optimize
20973 if it results in a 2 insn sequence. */
20975 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20976 || diff == 3 || diff == 5 || diff == 9)
20977 || (compare_code == LT && ct == -1)
20978 || (compare_code == GE && cf == -1))
20981 * notl op1 (if necessary)
20982 * sarl $31, op1
20983 * orl cf, op1
20985 if (ct != -1)
20987 cf = ct;
20988 ct = -1;
20989 code = reverse_condition (code);
20992 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20994 out = expand_simple_binop (mode, IOR,
20995 out, GEN_INT (cf),
20996 out, 1, OPTAB_DIRECT);
20997 if (out != operands[0])
20998 emit_move_insn (operands[0], out);
21000 return true;
21005 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
21006 || diff == 3 || diff == 5 || diff == 9)
21007 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
21008 && (mode != DImode
21009 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
21012 * xorl dest,dest
21013 * cmpl op1,op2
21014 * setcc dest
21015 * lea cf(dest*(ct-cf)),dest
21017 * Size 14.
21019 * This also catches the degenerate setcc-only case.
21022 rtx tmp;
21023 int nops;
21025 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21027 nops = 0;
21028 /* On x86_64 the lea instruction operates on Pmode, so we need
21029 to get arithmetics done in proper mode to match. */
21030 if (diff == 1)
21031 tmp = copy_rtx (out);
21032 else
21034 rtx out1;
21035 out1 = copy_rtx (out);
21036 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
21037 nops++;
21038 if (diff & 1)
21040 tmp = gen_rtx_PLUS (mode, tmp, out1);
21041 nops++;
21044 if (cf != 0)
21046 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
21047 nops++;
21049 if (!rtx_equal_p (tmp, out))
21051 if (nops == 1)
21052 out = force_operand (tmp, copy_rtx (out));
21053 else
21054 emit_insn (gen_rtx_SET (copy_rtx (out), copy_rtx (tmp)));
21056 if (!rtx_equal_p (out, operands[0]))
21057 emit_move_insn (operands[0], copy_rtx (out));
21059 return true;
21063 * General case: Jumpful:
21064 * xorl dest,dest cmpl op1, op2
21065 * cmpl op1, op2 movl ct, dest
21066 * setcc dest jcc 1f
21067 * decl dest movl cf, dest
21068 * andl (cf-ct),dest 1:
21069 * addl ct,dest
21071 * Size 20. Size 14.
21073 * This is reasonably steep, but branch mispredict costs are
21074 * high on modern cpus, so consider failing only if optimizing
21075 * for space.
21078 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21079 && BRANCH_COST (optimize_insn_for_speed_p (),
21080 false) >= 2)
21082 if (cf == 0)
21084 machine_mode cmp_mode = GET_MODE (op0);
21085 enum rtx_code new_code;
21087 if (SCALAR_FLOAT_MODE_P (cmp_mode))
21089 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
21091 /* We may be reversing unordered compare to normal compare,
21092 that is not valid in general (we may convert non-trapping
21093 condition to trapping one), however on i386 we currently
21094 emit all comparisons unordered. */
21095 new_code = reverse_condition_maybe_unordered (code);
21097 else
21099 new_code = ix86_reverse_condition (code, cmp_mode);
21100 if (compare_code != UNKNOWN && new_code != UNKNOWN)
21101 compare_code = reverse_condition (compare_code);
21104 if (new_code != UNKNOWN)
21106 cf = ct;
21107 ct = 0;
21108 code = new_code;
21112 if (compare_code != UNKNOWN)
21114 /* notl op1 (if needed)
21115 sarl $31, op1
21116 andl (cf-ct), op1
21117 addl ct, op1
21119 For x < 0 (resp. x <= -1) there will be no notl,
21120 so if possible swap the constants to get rid of the
21121 complement.
21122 True/false will be -1/0 while code below (store flag
21123 followed by decrement) is 0/-1, so the constants need
21124 to be exchanged once more. */
21126 if (compare_code == GE || !cf)
21128 code = reverse_condition (code);
21129 compare_code = LT;
21131 else
21132 std::swap (ct, cf);
21134 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
21136 else
21138 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21140 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
21141 constm1_rtx,
21142 copy_rtx (out), 1, OPTAB_DIRECT);
21145 out = expand_simple_binop (mode, AND, copy_rtx (out),
21146 gen_int_mode (cf - ct, mode),
21147 copy_rtx (out), 1, OPTAB_DIRECT);
21148 if (ct)
21149 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
21150 copy_rtx (out), 1, OPTAB_DIRECT);
21151 if (!rtx_equal_p (out, operands[0]))
21152 emit_move_insn (operands[0], copy_rtx (out));
21154 return true;
21158 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21160 /* Try a few things more with specific constants and a variable. */
21162 optab op;
21163 rtx var, orig_out, out, tmp;
21165 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
21166 return false;
21168 /* If one of the two operands is an interesting constant, load a
21169 constant with the above and mask it in with a logical operation. */
21171 if (CONST_INT_P (operands[2]))
21173 var = operands[3];
21174 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
21175 operands[3] = constm1_rtx, op = and_optab;
21176 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
21177 operands[3] = const0_rtx, op = ior_optab;
21178 else
21179 return false;
21181 else if (CONST_INT_P (operands[3]))
21183 var = operands[2];
21184 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
21185 operands[2] = constm1_rtx, op = and_optab;
21186 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
21187 operands[2] = const0_rtx, op = ior_optab;
21188 else
21189 return false;
21191 else
21192 return false;
21194 orig_out = operands[0];
21195 tmp = gen_reg_rtx (mode);
21196 operands[0] = tmp;
21198 /* Recurse to get the constant loaded. */
21199 if (ix86_expand_int_movcc (operands) == 0)
21200 return false;
21202 /* Mask in the interesting variable. */
21203 out = expand_binop (mode, op, var, tmp, orig_out, 0,
21204 OPTAB_WIDEN);
21205 if (!rtx_equal_p (out, orig_out))
21206 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
21208 return true;
21212 * For comparison with above,
21214 * movl cf,dest
21215 * movl ct,tmp
21216 * cmpl op1,op2
21217 * cmovcc tmp,dest
21219 * Size 15.
21222 if (! nonimmediate_operand (operands[2], mode))
21223 operands[2] = force_reg (mode, operands[2]);
21224 if (! nonimmediate_operand (operands[3], mode))
21225 operands[3] = force_reg (mode, operands[3]);
21227 if (! register_operand (operands[2], VOIDmode)
21228 && (mode == QImode
21229 || ! register_operand (operands[3], VOIDmode)))
21230 operands[2] = force_reg (mode, operands[2]);
21232 if (mode == QImode
21233 && ! register_operand (operands[3], VOIDmode))
21234 operands[3] = force_reg (mode, operands[3]);
21236 emit_insn (compare_seq);
21237 emit_insn (gen_rtx_SET (operands[0],
21238 gen_rtx_IF_THEN_ELSE (mode,
21239 compare_op, operands[2],
21240 operands[3])));
21241 return true;
21244 /* Swap, force into registers, or otherwise massage the two operands
21245 to an sse comparison with a mask result. Thus we differ a bit from
21246 ix86_prepare_fp_compare_args which expects to produce a flags result.
21248 The DEST operand exists to help determine whether to commute commutative
21249 operators. The POP0/POP1 operands are updated in place. The new
21250 comparison code is returned, or UNKNOWN if not implementable. */
21252 static enum rtx_code
21253 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
21254 rtx *pop0, rtx *pop1)
21256 switch (code)
21258 case LTGT:
21259 case UNEQ:
21260 /* AVX supports all the needed comparisons. */
21261 if (TARGET_AVX)
21262 break;
21263 /* We have no LTGT as an operator. We could implement it with
21264 NE & ORDERED, but this requires an extra temporary. It's
21265 not clear that it's worth it. */
21266 return UNKNOWN;
21268 case LT:
21269 case LE:
21270 case UNGT:
21271 case UNGE:
21272 /* These are supported directly. */
21273 break;
21275 case EQ:
21276 case NE:
21277 case UNORDERED:
21278 case ORDERED:
21279 /* AVX has 3 operand comparisons, no need to swap anything. */
21280 if (TARGET_AVX)
21281 break;
21282 /* For commutative operators, try to canonicalize the destination
21283 operand to be first in the comparison - this helps reload to
21284 avoid extra moves. */
21285 if (!dest || !rtx_equal_p (dest, *pop1))
21286 break;
21287 /* FALLTHRU */
21289 case GE:
21290 case GT:
21291 case UNLE:
21292 case UNLT:
21293 /* These are not supported directly before AVX, and furthermore
21294 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
21295 comparison operands to transform into something that is
21296 supported. */
21297 std::swap (*pop0, *pop1);
21298 code = swap_condition (code);
21299 break;
21301 default:
21302 gcc_unreachable ();
21305 return code;
21308 /* Detect conditional moves that exactly match min/max operational
21309 semantics. Note that this is IEEE safe, as long as we don't
21310 interchange the operands.
21312 Returns FALSE if this conditional move doesn't match a MIN/MAX,
21313 and TRUE if the operation is successful and instructions are emitted. */
21315 static bool
21316 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
21317 rtx cmp_op1, rtx if_true, rtx if_false)
21319 machine_mode mode;
21320 bool is_min;
21321 rtx tmp;
21323 if (code == LT)
21325 else if (code == UNGE)
21326 std::swap (if_true, if_false);
21327 else
21328 return false;
21330 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
21331 is_min = true;
21332 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
21333 is_min = false;
21334 else
21335 return false;
21337 mode = GET_MODE (dest);
21339 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
21340 but MODE may be a vector mode and thus not appropriate. */
21341 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
21343 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
21344 rtvec v;
21346 if_true = force_reg (mode, if_true);
21347 v = gen_rtvec (2, if_true, if_false);
21348 tmp = gen_rtx_UNSPEC (mode, v, u);
21350 else
21352 code = is_min ? SMIN : SMAX;
21353 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
21356 emit_insn (gen_rtx_SET (dest, tmp));
21357 return true;
21360 /* Expand an sse vector comparison. Return the register with the result. */
21362 static rtx
21363 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
21364 rtx op_true, rtx op_false)
21366 machine_mode mode = GET_MODE (dest);
21367 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
21369 /* In general case result of comparison can differ from operands' type. */
21370 machine_mode cmp_mode;
21372 /* In AVX512F the result of comparison is an integer mask. */
21373 bool maskcmp = false;
21374 rtx x;
21376 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
21378 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
21379 gcc_assert (cmp_mode != BLKmode);
21381 maskcmp = true;
21383 else
21384 cmp_mode = cmp_ops_mode;
21387 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
21388 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
21389 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
21391 if (optimize
21392 || reg_overlap_mentioned_p (dest, op_true)
21393 || reg_overlap_mentioned_p (dest, op_false))
21394 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
21396 /* Compare patterns for int modes are unspec in AVX512F only. */
21397 if (maskcmp && (code == GT || code == EQ))
21399 rtx (*gen)(rtx, rtx, rtx);
21401 switch (cmp_ops_mode)
21403 case V64QImode:
21404 gcc_assert (TARGET_AVX512BW);
21405 gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
21406 break;
21407 case V32HImode:
21408 gcc_assert (TARGET_AVX512BW);
21409 gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
21410 break;
21411 case V16SImode:
21412 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
21413 break;
21414 case V8DImode:
21415 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
21416 break;
21417 default:
21418 gen = NULL;
21421 if (gen)
21423 emit_insn (gen (dest, cmp_op0, cmp_op1));
21424 return dest;
21427 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
21429 if (cmp_mode != mode && !maskcmp)
21431 x = force_reg (cmp_ops_mode, x);
21432 convert_move (dest, x, false);
21434 else
21435 emit_insn (gen_rtx_SET (dest, x));
21437 return dest;
21440 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
21441 operations. This is used for both scalar and vector conditional moves. */
21443 static void
21444 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
21446 machine_mode mode = GET_MODE (dest);
21447 machine_mode cmpmode = GET_MODE (cmp);
21449 /* In AVX512F the result of comparison is an integer mask. */
21450 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
21452 rtx t2, t3, x;
21454 if (vector_all_ones_operand (op_true, mode)
21455 && rtx_equal_p (op_false, CONST0_RTX (mode))
21456 && !maskcmp)
21458 emit_insn (gen_rtx_SET (dest, cmp));
21460 else if (op_false == CONST0_RTX (mode)
21461 && !maskcmp)
21463 op_true = force_reg (mode, op_true);
21464 x = gen_rtx_AND (mode, cmp, op_true);
21465 emit_insn (gen_rtx_SET (dest, x));
21467 else if (op_true == CONST0_RTX (mode)
21468 && !maskcmp)
21470 op_false = force_reg (mode, op_false);
21471 x = gen_rtx_NOT (mode, cmp);
21472 x = gen_rtx_AND (mode, x, op_false);
21473 emit_insn (gen_rtx_SET (dest, x));
21475 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
21476 && !maskcmp)
21478 op_false = force_reg (mode, op_false);
21479 x = gen_rtx_IOR (mode, cmp, op_false);
21480 emit_insn (gen_rtx_SET (dest, x));
21482 else if (TARGET_XOP
21483 && !maskcmp)
21485 op_true = force_reg (mode, op_true);
21487 if (!nonimmediate_operand (op_false, mode))
21488 op_false = force_reg (mode, op_false);
21490 emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cmp,
21491 op_true,
21492 op_false)));
21494 else
21496 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21497 rtx d = dest;
21499 if (!nonimmediate_operand (op_true, mode))
21500 op_true = force_reg (mode, op_true);
21502 op_false = force_reg (mode, op_false);
21504 switch (mode)
21506 case V4SFmode:
21507 if (TARGET_SSE4_1)
21508 gen = gen_sse4_1_blendvps;
21509 break;
21510 case V2DFmode:
21511 if (TARGET_SSE4_1)
21512 gen = gen_sse4_1_blendvpd;
21513 break;
21514 case V16QImode:
21515 case V8HImode:
21516 case V4SImode:
21517 case V2DImode:
21518 if (TARGET_SSE4_1)
21520 gen = gen_sse4_1_pblendvb;
21521 if (mode != V16QImode)
21522 d = gen_reg_rtx (V16QImode);
21523 op_false = gen_lowpart (V16QImode, op_false);
21524 op_true = gen_lowpart (V16QImode, op_true);
21525 cmp = gen_lowpart (V16QImode, cmp);
21527 break;
21528 case V8SFmode:
21529 if (TARGET_AVX)
21530 gen = gen_avx_blendvps256;
21531 break;
21532 case V4DFmode:
21533 if (TARGET_AVX)
21534 gen = gen_avx_blendvpd256;
21535 break;
21536 case V32QImode:
21537 case V16HImode:
21538 case V8SImode:
21539 case V4DImode:
21540 if (TARGET_AVX2)
21542 gen = gen_avx2_pblendvb;
21543 if (mode != V32QImode)
21544 d = gen_reg_rtx (V32QImode);
21545 op_false = gen_lowpart (V32QImode, op_false);
21546 op_true = gen_lowpart (V32QImode, op_true);
21547 cmp = gen_lowpart (V32QImode, cmp);
21549 break;
21551 case V64QImode:
21552 gen = gen_avx512bw_blendmv64qi;
21553 break;
21554 case V32HImode:
21555 gen = gen_avx512bw_blendmv32hi;
21556 break;
21557 case V16SImode:
21558 gen = gen_avx512f_blendmv16si;
21559 break;
21560 case V8DImode:
21561 gen = gen_avx512f_blendmv8di;
21562 break;
21563 case V8DFmode:
21564 gen = gen_avx512f_blendmv8df;
21565 break;
21566 case V16SFmode:
21567 gen = gen_avx512f_blendmv16sf;
21568 break;
21570 default:
21571 break;
21574 if (gen != NULL)
21576 emit_insn (gen (d, op_false, op_true, cmp));
21577 if (d != dest)
21578 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21580 else
21582 op_true = force_reg (mode, op_true);
21584 t2 = gen_reg_rtx (mode);
21585 if (optimize)
21586 t3 = gen_reg_rtx (mode);
21587 else
21588 t3 = dest;
21590 x = gen_rtx_AND (mode, op_true, cmp);
21591 emit_insn (gen_rtx_SET (t2, x));
21593 x = gen_rtx_NOT (mode, cmp);
21594 x = gen_rtx_AND (mode, x, op_false);
21595 emit_insn (gen_rtx_SET (t3, x));
21597 x = gen_rtx_IOR (mode, t3, t2);
21598 emit_insn (gen_rtx_SET (dest, x));
21603 /* Expand a floating-point conditional move. Return true if successful. */
21605 bool
21606 ix86_expand_fp_movcc (rtx operands[])
21608 machine_mode mode = GET_MODE (operands[0]);
21609 enum rtx_code code = GET_CODE (operands[1]);
21610 rtx tmp, compare_op;
21611 rtx op0 = XEXP (operands[1], 0);
21612 rtx op1 = XEXP (operands[1], 1);
21614 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21616 machine_mode cmode;
21618 /* Since we've no cmove for sse registers, don't force bad register
21619 allocation just to gain access to it. Deny movcc when the
21620 comparison mode doesn't match the move mode. */
21621 cmode = GET_MODE (op0);
21622 if (cmode == VOIDmode)
21623 cmode = GET_MODE (op1);
21624 if (cmode != mode)
21625 return false;
21627 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21628 if (code == UNKNOWN)
21629 return false;
21631 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21632 operands[2], operands[3]))
21633 return true;
21635 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21636 operands[2], operands[3]);
21637 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21638 return true;
21641 if (GET_MODE (op0) == TImode
21642 || (GET_MODE (op0) == DImode
21643 && !TARGET_64BIT))
21644 return false;
21646 /* The floating point conditional move instructions don't directly
21647 support conditions resulting from a signed integer comparison. */
21649 compare_op = ix86_expand_compare (code, op0, op1);
21650 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21652 tmp = gen_reg_rtx (QImode);
21653 ix86_expand_setcc (tmp, code, op0, op1);
21655 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21658 emit_insn (gen_rtx_SET (operands[0],
21659 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21660 operands[2], operands[3])));
21662 return true;
21665 /* Expand a floating-point vector conditional move; a vcond operation
21666 rather than a movcc operation. */
21668 bool
21669 ix86_expand_fp_vcond (rtx operands[])
21671 enum rtx_code code = GET_CODE (operands[3]);
21672 rtx cmp;
21674 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21675 &operands[4], &operands[5]);
21676 if (code == UNKNOWN)
21678 rtx temp;
21679 switch (GET_CODE (operands[3]))
21681 case LTGT:
21682 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21683 operands[5], operands[0], operands[0]);
21684 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21685 operands[5], operands[1], operands[2]);
21686 code = AND;
21687 break;
21688 case UNEQ:
21689 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21690 operands[5], operands[0], operands[0]);
21691 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21692 operands[5], operands[1], operands[2]);
21693 code = IOR;
21694 break;
21695 default:
21696 gcc_unreachable ();
21698 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21699 OPTAB_DIRECT);
21700 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21701 return true;
21704 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21705 operands[5], operands[1], operands[2]))
21706 return true;
21708 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21709 operands[1], operands[2]);
21710 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21711 return true;
21714 /* Expand a signed/unsigned integral vector conditional move. */
21716 bool
21717 ix86_expand_int_vcond (rtx operands[])
21719 machine_mode data_mode = GET_MODE (operands[0]);
21720 machine_mode mode = GET_MODE (operands[4]);
21721 enum rtx_code code = GET_CODE (operands[3]);
21722 bool negate = false;
21723 rtx x, cop0, cop1;
21725 cop0 = operands[4];
21726 cop1 = operands[5];
21728 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21729 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21730 if ((code == LT || code == GE)
21731 && data_mode == mode
21732 && cop1 == CONST0_RTX (mode)
21733 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21734 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21735 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21736 && (GET_MODE_SIZE (data_mode) == 16
21737 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21739 rtx negop = operands[2 - (code == LT)];
21740 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21741 if (negop == CONST1_RTX (data_mode))
21743 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21744 operands[0], 1, OPTAB_DIRECT);
21745 if (res != operands[0])
21746 emit_move_insn (operands[0], res);
21747 return true;
21749 else if (GET_MODE_INNER (data_mode) != DImode
21750 && vector_all_ones_operand (negop, data_mode))
21752 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21753 operands[0], 0, OPTAB_DIRECT);
21754 if (res != operands[0])
21755 emit_move_insn (operands[0], res);
21756 return true;
21760 if (!nonimmediate_operand (cop1, mode))
21761 cop1 = force_reg (mode, cop1);
21762 if (!general_operand (operands[1], data_mode))
21763 operands[1] = force_reg (data_mode, operands[1]);
21764 if (!general_operand (operands[2], data_mode))
21765 operands[2] = force_reg (data_mode, operands[2]);
21767 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21768 if (TARGET_XOP
21769 && (mode == V16QImode || mode == V8HImode
21770 || mode == V4SImode || mode == V2DImode))
21772 else
21774 /* Canonicalize the comparison to EQ, GT, GTU. */
21775 switch (code)
21777 case EQ:
21778 case GT:
21779 case GTU:
21780 break;
21782 case NE:
21783 case LE:
21784 case LEU:
21785 code = reverse_condition (code);
21786 negate = true;
21787 break;
21789 case GE:
21790 case GEU:
21791 code = reverse_condition (code);
21792 negate = true;
21793 /* FALLTHRU */
21795 case LT:
21796 case LTU:
21797 std::swap (cop0, cop1);
21798 code = swap_condition (code);
21799 break;
21801 default:
21802 gcc_unreachable ();
21805 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21806 if (mode == V2DImode)
21808 switch (code)
21810 case EQ:
21811 /* SSE4.1 supports EQ. */
21812 if (!TARGET_SSE4_1)
21813 return false;
21814 break;
21816 case GT:
21817 case GTU:
21818 /* SSE4.2 supports GT/GTU. */
21819 if (!TARGET_SSE4_2)
21820 return false;
21821 break;
21823 default:
21824 gcc_unreachable ();
21828 /* Unsigned parallel compare is not supported by the hardware.
21829 Play some tricks to turn this into a signed comparison
21830 against 0. */
21831 if (code == GTU)
21833 cop0 = force_reg (mode, cop0);
21835 switch (mode)
21837 case V16SImode:
21838 case V8DImode:
21839 case V8SImode:
21840 case V4DImode:
21841 case V4SImode:
21842 case V2DImode:
21844 rtx t1, t2, mask;
21845 rtx (*gen_sub3) (rtx, rtx, rtx);
21847 switch (mode)
21849 case V16SImode: gen_sub3 = gen_subv16si3; break;
21850 case V8DImode: gen_sub3 = gen_subv8di3; break;
21851 case V8SImode: gen_sub3 = gen_subv8si3; break;
21852 case V4DImode: gen_sub3 = gen_subv4di3; break;
21853 case V4SImode: gen_sub3 = gen_subv4si3; break;
21854 case V2DImode: gen_sub3 = gen_subv2di3; break;
21855 default:
21856 gcc_unreachable ();
21858 /* Subtract (-(INT MAX) - 1) from both operands to make
21859 them signed. */
21860 mask = ix86_build_signbit_mask (mode, true, false);
21861 t1 = gen_reg_rtx (mode);
21862 emit_insn (gen_sub3 (t1, cop0, mask));
21864 t2 = gen_reg_rtx (mode);
21865 emit_insn (gen_sub3 (t2, cop1, mask));
21867 cop0 = t1;
21868 cop1 = t2;
21869 code = GT;
21871 break;
21873 case V64QImode:
21874 case V32HImode:
21875 case V32QImode:
21876 case V16HImode:
21877 case V16QImode:
21878 case V8HImode:
21879 /* Perform a parallel unsigned saturating subtraction. */
21880 x = gen_reg_rtx (mode);
21881 emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, cop0, cop1)));
21883 cop0 = x;
21884 cop1 = CONST0_RTX (mode);
21885 code = EQ;
21886 negate = !negate;
21887 break;
21889 default:
21890 gcc_unreachable ();
21895 /* Allow the comparison to be done in one mode, but the movcc to
21896 happen in another mode. */
21897 if (data_mode == mode)
21899 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21900 operands[1+negate], operands[2-negate]);
21902 else
21904 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21905 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21906 operands[1+negate], operands[2-negate]);
21907 if (GET_MODE (x) == mode)
21908 x = gen_lowpart (data_mode, x);
21911 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21912 operands[2-negate]);
21913 return true;
21916 /* AVX512F does support 64-byte integer vector operations,
21917 thus the longest vector we are faced with is V64QImode. */
21918 #define MAX_VECT_LEN 64
21920 struct expand_vec_perm_d
21922 rtx target, op0, op1;
21923 unsigned char perm[MAX_VECT_LEN];
21924 machine_mode vmode;
21925 unsigned char nelt;
21926 bool one_operand_p;
21927 bool testing_p;
21930 static bool
21931 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
21932 struct expand_vec_perm_d *d)
21934 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21935 expander, so args are either in d, or in op0, op1 etc. */
21936 machine_mode mode = GET_MODE (d ? d->op0 : op0);
21937 machine_mode maskmode = mode;
21938 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21940 switch (mode)
21942 case V8HImode:
21943 if (TARGET_AVX512VL && TARGET_AVX512BW)
21944 gen = gen_avx512vl_vpermi2varv8hi3;
21945 break;
21946 case V16HImode:
21947 if (TARGET_AVX512VL && TARGET_AVX512BW)
21948 gen = gen_avx512vl_vpermi2varv16hi3;
21949 break;
21950 case V64QImode:
21951 if (TARGET_AVX512VBMI)
21952 gen = gen_avx512bw_vpermi2varv64qi3;
21953 break;
21954 case V32HImode:
21955 if (TARGET_AVX512BW)
21956 gen = gen_avx512bw_vpermi2varv32hi3;
21957 break;
21958 case V4SImode:
21959 if (TARGET_AVX512VL)
21960 gen = gen_avx512vl_vpermi2varv4si3;
21961 break;
21962 case V8SImode:
21963 if (TARGET_AVX512VL)
21964 gen = gen_avx512vl_vpermi2varv8si3;
21965 break;
21966 case V16SImode:
21967 if (TARGET_AVX512F)
21968 gen = gen_avx512f_vpermi2varv16si3;
21969 break;
21970 case V4SFmode:
21971 if (TARGET_AVX512VL)
21973 gen = gen_avx512vl_vpermi2varv4sf3;
21974 maskmode = V4SImode;
21976 break;
21977 case V8SFmode:
21978 if (TARGET_AVX512VL)
21980 gen = gen_avx512vl_vpermi2varv8sf3;
21981 maskmode = V8SImode;
21983 break;
21984 case V16SFmode:
21985 if (TARGET_AVX512F)
21987 gen = gen_avx512f_vpermi2varv16sf3;
21988 maskmode = V16SImode;
21990 break;
21991 case V2DImode:
21992 if (TARGET_AVX512VL)
21993 gen = gen_avx512vl_vpermi2varv2di3;
21994 break;
21995 case V4DImode:
21996 if (TARGET_AVX512VL)
21997 gen = gen_avx512vl_vpermi2varv4di3;
21998 break;
21999 case V8DImode:
22000 if (TARGET_AVX512F)
22001 gen = gen_avx512f_vpermi2varv8di3;
22002 break;
22003 case V2DFmode:
22004 if (TARGET_AVX512VL)
22006 gen = gen_avx512vl_vpermi2varv2df3;
22007 maskmode = V2DImode;
22009 break;
22010 case V4DFmode:
22011 if (TARGET_AVX512VL)
22013 gen = gen_avx512vl_vpermi2varv4df3;
22014 maskmode = V4DImode;
22016 break;
22017 case V8DFmode:
22018 if (TARGET_AVX512F)
22020 gen = gen_avx512f_vpermi2varv8df3;
22021 maskmode = V8DImode;
22023 break;
22024 default:
22025 break;
22028 if (gen == NULL)
22029 return false;
22031 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
22032 expander, so args are either in d, or in op0, op1 etc. */
22033 if (d)
22035 rtx vec[64];
22036 target = d->target;
22037 op0 = d->op0;
22038 op1 = d->op1;
22039 for (int i = 0; i < d->nelt; ++i)
22040 vec[i] = GEN_INT (d->perm[i]);
22041 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
22044 emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
22045 return true;
22048 /* Expand a variable vector permutation. */
22050 void
22051 ix86_expand_vec_perm (rtx operands[])
22053 rtx target = operands[0];
22054 rtx op0 = operands[1];
22055 rtx op1 = operands[2];
22056 rtx mask = operands[3];
22057 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
22058 machine_mode mode = GET_MODE (op0);
22059 machine_mode maskmode = GET_MODE (mask);
22060 int w, e, i;
22061 bool one_operand_shuffle = rtx_equal_p (op0, op1);
22063 /* Number of elements in the vector. */
22064 w = GET_MODE_NUNITS (mode);
22065 e = GET_MODE_UNIT_SIZE (mode);
22066 gcc_assert (w <= 64);
22068 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
22069 return;
22071 if (TARGET_AVX2)
22073 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
22075 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
22076 an constant shuffle operand. With a tiny bit of effort we can
22077 use VPERMD instead. A re-interpretation stall for V4DFmode is
22078 unfortunate but there's no avoiding it.
22079 Similarly for V16HImode we don't have instructions for variable
22080 shuffling, while for V32QImode we can use after preparing suitable
22081 masks vpshufb; vpshufb; vpermq; vpor. */
22083 if (mode == V16HImode)
22085 maskmode = mode = V32QImode;
22086 w = 32;
22087 e = 1;
22089 else
22091 maskmode = mode = V8SImode;
22092 w = 8;
22093 e = 4;
22095 t1 = gen_reg_rtx (maskmode);
22097 /* Replicate the low bits of the V4DImode mask into V8SImode:
22098 mask = { A B C D }
22099 t1 = { A A B B C C D D }. */
22100 for (i = 0; i < w / 2; ++i)
22101 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
22102 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22103 vt = force_reg (maskmode, vt);
22104 mask = gen_lowpart (maskmode, mask);
22105 if (maskmode == V8SImode)
22106 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
22107 else
22108 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
22110 /* Multiply the shuffle indicies by two. */
22111 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
22112 OPTAB_DIRECT);
22114 /* Add one to the odd shuffle indicies:
22115 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
22116 for (i = 0; i < w / 2; ++i)
22118 vec[i * 2] = const0_rtx;
22119 vec[i * 2 + 1] = const1_rtx;
22121 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22122 vt = validize_mem (force_const_mem (maskmode, vt));
22123 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
22124 OPTAB_DIRECT);
22126 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
22127 operands[3] = mask = t1;
22128 target = gen_reg_rtx (mode);
22129 op0 = gen_lowpart (mode, op0);
22130 op1 = gen_lowpart (mode, op1);
22133 switch (mode)
22135 case V8SImode:
22136 /* The VPERMD and VPERMPS instructions already properly ignore
22137 the high bits of the shuffle elements. No need for us to
22138 perform an AND ourselves. */
22139 if (one_operand_shuffle)
22141 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
22142 if (target != operands[0])
22143 emit_move_insn (operands[0],
22144 gen_lowpart (GET_MODE (operands[0]), target));
22146 else
22148 t1 = gen_reg_rtx (V8SImode);
22149 t2 = gen_reg_rtx (V8SImode);
22150 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
22151 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
22152 goto merge_two;
22154 return;
22156 case V8SFmode:
22157 mask = gen_lowpart (V8SImode, mask);
22158 if (one_operand_shuffle)
22159 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
22160 else
22162 t1 = gen_reg_rtx (V8SFmode);
22163 t2 = gen_reg_rtx (V8SFmode);
22164 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
22165 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
22166 goto merge_two;
22168 return;
22170 case V4SImode:
22171 /* By combining the two 128-bit input vectors into one 256-bit
22172 input vector, we can use VPERMD and VPERMPS for the full
22173 two-operand shuffle. */
22174 t1 = gen_reg_rtx (V8SImode);
22175 t2 = gen_reg_rtx (V8SImode);
22176 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
22177 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22178 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
22179 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
22180 return;
22182 case V4SFmode:
22183 t1 = gen_reg_rtx (V8SFmode);
22184 t2 = gen_reg_rtx (V8SImode);
22185 mask = gen_lowpart (V4SImode, mask);
22186 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
22187 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22188 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
22189 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
22190 return;
22192 case V32QImode:
22193 t1 = gen_reg_rtx (V32QImode);
22194 t2 = gen_reg_rtx (V32QImode);
22195 t3 = gen_reg_rtx (V32QImode);
22196 vt2 = GEN_INT (-128);
22197 for (i = 0; i < 32; i++)
22198 vec[i] = vt2;
22199 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22200 vt = force_reg (V32QImode, vt);
22201 for (i = 0; i < 32; i++)
22202 vec[i] = i < 16 ? vt2 : const0_rtx;
22203 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22204 vt2 = force_reg (V32QImode, vt2);
22205 /* From mask create two adjusted masks, which contain the same
22206 bits as mask in the low 7 bits of each vector element.
22207 The first mask will have the most significant bit clear
22208 if it requests element from the same 128-bit lane
22209 and MSB set if it requests element from the other 128-bit lane.
22210 The second mask will have the opposite values of the MSB,
22211 and additionally will have its 128-bit lanes swapped.
22212 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
22213 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
22214 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
22215 stands for other 12 bytes. */
22216 /* The bit whether element is from the same lane or the other
22217 lane is bit 4, so shift it up by 3 to the MSB position. */
22218 t5 = gen_reg_rtx (V4DImode);
22219 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
22220 GEN_INT (3)));
22221 /* Clear MSB bits from the mask just in case it had them set. */
22222 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
22223 /* After this t1 will have MSB set for elements from other lane. */
22224 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
22225 /* Clear bits other than MSB. */
22226 emit_insn (gen_andv32qi3 (t1, t1, vt));
22227 /* Or in the lower bits from mask into t3. */
22228 emit_insn (gen_iorv32qi3 (t3, t1, t2));
22229 /* And invert MSB bits in t1, so MSB is set for elements from the same
22230 lane. */
22231 emit_insn (gen_xorv32qi3 (t1, t1, vt));
22232 /* Swap 128-bit lanes in t3. */
22233 t6 = gen_reg_rtx (V4DImode);
22234 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
22235 const2_rtx, GEN_INT (3),
22236 const0_rtx, const1_rtx));
22237 /* And or in the lower bits from mask into t1. */
22238 emit_insn (gen_iorv32qi3 (t1, t1, t2));
22239 if (one_operand_shuffle)
22241 /* Each of these shuffles will put 0s in places where
22242 element from the other 128-bit lane is needed, otherwise
22243 will shuffle in the requested value. */
22244 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
22245 gen_lowpart (V32QImode, t6)));
22246 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
22247 /* For t3 the 128-bit lanes are swapped again. */
22248 t7 = gen_reg_rtx (V4DImode);
22249 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
22250 const2_rtx, GEN_INT (3),
22251 const0_rtx, const1_rtx));
22252 /* And oring both together leads to the result. */
22253 emit_insn (gen_iorv32qi3 (target, t1,
22254 gen_lowpart (V32QImode, t7)));
22255 if (target != operands[0])
22256 emit_move_insn (operands[0],
22257 gen_lowpart (GET_MODE (operands[0]), target));
22258 return;
22261 t4 = gen_reg_rtx (V32QImode);
22262 /* Similarly to the above one_operand_shuffle code,
22263 just for repeated twice for each operand. merge_two:
22264 code will merge the two results together. */
22265 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
22266 gen_lowpart (V32QImode, t6)));
22267 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
22268 gen_lowpart (V32QImode, t6)));
22269 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
22270 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
22271 t7 = gen_reg_rtx (V4DImode);
22272 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
22273 const2_rtx, GEN_INT (3),
22274 const0_rtx, const1_rtx));
22275 t8 = gen_reg_rtx (V4DImode);
22276 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
22277 const2_rtx, GEN_INT (3),
22278 const0_rtx, const1_rtx));
22279 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
22280 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
22281 t1 = t4;
22282 t2 = t3;
22283 goto merge_two;
22285 default:
22286 gcc_assert (GET_MODE_SIZE (mode) <= 16);
22287 break;
22291 if (TARGET_XOP)
22293 /* The XOP VPPERM insn supports three inputs. By ignoring the
22294 one_operand_shuffle special case, we avoid creating another
22295 set of constant vectors in memory. */
22296 one_operand_shuffle = false;
22298 /* mask = mask & {2*w-1, ...} */
22299 vt = GEN_INT (2*w - 1);
22301 else
22303 /* mask = mask & {w-1, ...} */
22304 vt = GEN_INT (w - 1);
22307 for (i = 0; i < w; i++)
22308 vec[i] = vt;
22309 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22310 mask = expand_simple_binop (maskmode, AND, mask, vt,
22311 NULL_RTX, 0, OPTAB_DIRECT);
22313 /* For non-QImode operations, convert the word permutation control
22314 into a byte permutation control. */
22315 if (mode != V16QImode)
22317 mask = expand_simple_binop (maskmode, ASHIFT, mask,
22318 GEN_INT (exact_log2 (e)),
22319 NULL_RTX, 0, OPTAB_DIRECT);
22321 /* Convert mask to vector of chars. */
22322 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
22324 /* Replicate each of the input bytes into byte positions:
22325 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
22326 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
22327 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
22328 for (i = 0; i < 16; ++i)
22329 vec[i] = GEN_INT (i/e * e);
22330 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22331 vt = validize_mem (force_const_mem (V16QImode, vt));
22332 if (TARGET_XOP)
22333 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
22334 else
22335 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
22337 /* Convert it into the byte positions by doing
22338 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
22339 for (i = 0; i < 16; ++i)
22340 vec[i] = GEN_INT (i % e);
22341 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22342 vt = validize_mem (force_const_mem (V16QImode, vt));
22343 emit_insn (gen_addv16qi3 (mask, mask, vt));
22346 /* The actual shuffle operations all operate on V16QImode. */
22347 op0 = gen_lowpart (V16QImode, op0);
22348 op1 = gen_lowpart (V16QImode, op1);
22350 if (TARGET_XOP)
22352 if (GET_MODE (target) != V16QImode)
22353 target = gen_reg_rtx (V16QImode);
22354 emit_insn (gen_xop_pperm (target, op0, op1, mask));
22355 if (target != operands[0])
22356 emit_move_insn (operands[0],
22357 gen_lowpart (GET_MODE (operands[0]), target));
22359 else if (one_operand_shuffle)
22361 if (GET_MODE (target) != V16QImode)
22362 target = gen_reg_rtx (V16QImode);
22363 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
22364 if (target != operands[0])
22365 emit_move_insn (operands[0],
22366 gen_lowpart (GET_MODE (operands[0]), target));
22368 else
22370 rtx xops[6];
22371 bool ok;
22373 /* Shuffle the two input vectors independently. */
22374 t1 = gen_reg_rtx (V16QImode);
22375 t2 = gen_reg_rtx (V16QImode);
22376 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
22377 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
22379 merge_two:
22380 /* Then merge them together. The key is whether any given control
22381 element contained a bit set that indicates the second word. */
22382 mask = operands[3];
22383 vt = GEN_INT (w);
22384 if (maskmode == V2DImode && !TARGET_SSE4_1)
22386 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
22387 more shuffle to convert the V2DI input mask into a V4SI
22388 input mask. At which point the masking that expand_int_vcond
22389 will work as desired. */
22390 rtx t3 = gen_reg_rtx (V4SImode);
22391 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
22392 const0_rtx, const0_rtx,
22393 const2_rtx, const2_rtx));
22394 mask = t3;
22395 maskmode = V4SImode;
22396 e = w = 4;
22399 for (i = 0; i < w; i++)
22400 vec[i] = vt;
22401 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22402 vt = force_reg (maskmode, vt);
22403 mask = expand_simple_binop (maskmode, AND, mask, vt,
22404 NULL_RTX, 0, OPTAB_DIRECT);
22406 if (GET_MODE (target) != mode)
22407 target = gen_reg_rtx (mode);
22408 xops[0] = target;
22409 xops[1] = gen_lowpart (mode, t2);
22410 xops[2] = gen_lowpart (mode, t1);
22411 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
22412 xops[4] = mask;
22413 xops[5] = vt;
22414 ok = ix86_expand_int_vcond (xops);
22415 gcc_assert (ok);
22416 if (target != operands[0])
22417 emit_move_insn (operands[0],
22418 gen_lowpart (GET_MODE (operands[0]), target));
22422 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
22423 true if we should do zero extension, else sign extension. HIGH_P is
22424 true if we want the N/2 high elements, else the low elements. */
22426 void
22427 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
22429 machine_mode imode = GET_MODE (src);
22430 rtx tmp;
22432 if (TARGET_SSE4_1)
22434 rtx (*unpack)(rtx, rtx);
22435 rtx (*extract)(rtx, rtx) = NULL;
22436 machine_mode halfmode = BLKmode;
22438 switch (imode)
22440 case V64QImode:
22441 if (unsigned_p)
22442 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
22443 else
22444 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
22445 halfmode = V32QImode;
22446 extract
22447 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
22448 break;
22449 case V32QImode:
22450 if (unsigned_p)
22451 unpack = gen_avx2_zero_extendv16qiv16hi2;
22452 else
22453 unpack = gen_avx2_sign_extendv16qiv16hi2;
22454 halfmode = V16QImode;
22455 extract
22456 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
22457 break;
22458 case V32HImode:
22459 if (unsigned_p)
22460 unpack = gen_avx512f_zero_extendv16hiv16si2;
22461 else
22462 unpack = gen_avx512f_sign_extendv16hiv16si2;
22463 halfmode = V16HImode;
22464 extract
22465 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
22466 break;
22467 case V16HImode:
22468 if (unsigned_p)
22469 unpack = gen_avx2_zero_extendv8hiv8si2;
22470 else
22471 unpack = gen_avx2_sign_extendv8hiv8si2;
22472 halfmode = V8HImode;
22473 extract
22474 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
22475 break;
22476 case V16SImode:
22477 if (unsigned_p)
22478 unpack = gen_avx512f_zero_extendv8siv8di2;
22479 else
22480 unpack = gen_avx512f_sign_extendv8siv8di2;
22481 halfmode = V8SImode;
22482 extract
22483 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
22484 break;
22485 case V8SImode:
22486 if (unsigned_p)
22487 unpack = gen_avx2_zero_extendv4siv4di2;
22488 else
22489 unpack = gen_avx2_sign_extendv4siv4di2;
22490 halfmode = V4SImode;
22491 extract
22492 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
22493 break;
22494 case V16QImode:
22495 if (unsigned_p)
22496 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
22497 else
22498 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
22499 break;
22500 case V8HImode:
22501 if (unsigned_p)
22502 unpack = gen_sse4_1_zero_extendv4hiv4si2;
22503 else
22504 unpack = gen_sse4_1_sign_extendv4hiv4si2;
22505 break;
22506 case V4SImode:
22507 if (unsigned_p)
22508 unpack = gen_sse4_1_zero_extendv2siv2di2;
22509 else
22510 unpack = gen_sse4_1_sign_extendv2siv2di2;
22511 break;
22512 default:
22513 gcc_unreachable ();
22516 if (GET_MODE_SIZE (imode) >= 32)
22518 tmp = gen_reg_rtx (halfmode);
22519 emit_insn (extract (tmp, src));
22521 else if (high_p)
22523 /* Shift higher 8 bytes to lower 8 bytes. */
22524 tmp = gen_reg_rtx (V1TImode);
22525 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
22526 GEN_INT (64)));
22527 tmp = gen_lowpart (imode, tmp);
22529 else
22530 tmp = src;
22532 emit_insn (unpack (dest, tmp));
22534 else
22536 rtx (*unpack)(rtx, rtx, rtx);
22538 switch (imode)
22540 case V16QImode:
22541 if (high_p)
22542 unpack = gen_vec_interleave_highv16qi;
22543 else
22544 unpack = gen_vec_interleave_lowv16qi;
22545 break;
22546 case V8HImode:
22547 if (high_p)
22548 unpack = gen_vec_interleave_highv8hi;
22549 else
22550 unpack = gen_vec_interleave_lowv8hi;
22551 break;
22552 case V4SImode:
22553 if (high_p)
22554 unpack = gen_vec_interleave_highv4si;
22555 else
22556 unpack = gen_vec_interleave_lowv4si;
22557 break;
22558 default:
22559 gcc_unreachable ();
22562 if (unsigned_p)
22563 tmp = force_reg (imode, CONST0_RTX (imode));
22564 else
22565 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
22566 src, pc_rtx, pc_rtx);
22568 rtx tmp2 = gen_reg_rtx (imode);
22569 emit_insn (unpack (tmp2, src, tmp));
22570 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
22574 /* Expand conditional increment or decrement using adb/sbb instructions.
22575 The default case using setcc followed by the conditional move can be
22576 done by generic code. */
22577 bool
22578 ix86_expand_int_addcc (rtx operands[])
22580 enum rtx_code code = GET_CODE (operands[1]);
22581 rtx flags;
22582 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
22583 rtx compare_op;
22584 rtx val = const0_rtx;
22585 bool fpcmp = false;
22586 machine_mode mode;
22587 rtx op0 = XEXP (operands[1], 0);
22588 rtx op1 = XEXP (operands[1], 1);
22590 if (operands[3] != const1_rtx
22591 && operands[3] != constm1_rtx)
22592 return false;
22593 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22594 return false;
22595 code = GET_CODE (compare_op);
22597 flags = XEXP (compare_op, 0);
22599 if (GET_MODE (flags) == CCFPmode
22600 || GET_MODE (flags) == CCFPUmode)
22602 fpcmp = true;
22603 code = ix86_fp_compare_code_to_integer (code);
22606 if (code != LTU)
22608 val = constm1_rtx;
22609 if (fpcmp)
22610 PUT_CODE (compare_op,
22611 reverse_condition_maybe_unordered
22612 (GET_CODE (compare_op)));
22613 else
22614 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
22617 mode = GET_MODE (operands[0]);
22619 /* Construct either adc or sbb insn. */
22620 if ((code == LTU) == (operands[3] == constm1_rtx))
22622 switch (mode)
22624 case QImode:
22625 insn = gen_subqi3_carry;
22626 break;
22627 case HImode:
22628 insn = gen_subhi3_carry;
22629 break;
22630 case SImode:
22631 insn = gen_subsi3_carry;
22632 break;
22633 case DImode:
22634 insn = gen_subdi3_carry;
22635 break;
22636 default:
22637 gcc_unreachable ();
22640 else
22642 switch (mode)
22644 case QImode:
22645 insn = gen_addqi3_carry;
22646 break;
22647 case HImode:
22648 insn = gen_addhi3_carry;
22649 break;
22650 case SImode:
22651 insn = gen_addsi3_carry;
22652 break;
22653 case DImode:
22654 insn = gen_adddi3_carry;
22655 break;
22656 default:
22657 gcc_unreachable ();
22660 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22662 return true;
22666 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22667 but works for floating pointer parameters and nonoffsetable memories.
22668 For pushes, it returns just stack offsets; the values will be saved
22669 in the right order. Maximally three parts are generated. */
22671 static int
22672 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
22674 int size;
22676 if (!TARGET_64BIT)
22677 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22678 else
22679 size = (GET_MODE_SIZE (mode) + 4) / 8;
22681 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22682 gcc_assert (size >= 2 && size <= 4);
22684 /* Optimize constant pool reference to immediates. This is used by fp
22685 moves, that force all constants to memory to allow combining. */
22686 if (MEM_P (operand) && MEM_READONLY_P (operand))
22688 rtx tmp = maybe_get_pool_constant (operand);
22689 if (tmp)
22690 operand = tmp;
22693 if (MEM_P (operand) && !offsettable_memref_p (operand))
22695 /* The only non-offsetable memories we handle are pushes. */
22696 int ok = push_operand (operand, VOIDmode);
22698 gcc_assert (ok);
22700 operand = copy_rtx (operand);
22701 PUT_MODE (operand, word_mode);
22702 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22703 return size;
22706 if (GET_CODE (operand) == CONST_VECTOR)
22708 machine_mode imode = int_mode_for_mode (mode);
22709 /* Caution: if we looked through a constant pool memory above,
22710 the operand may actually have a different mode now. That's
22711 ok, since we want to pun this all the way back to an integer. */
22712 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22713 gcc_assert (operand != NULL);
22714 mode = imode;
22717 if (!TARGET_64BIT)
22719 if (mode == DImode)
22720 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22721 else
22723 int i;
22725 if (REG_P (operand))
22727 gcc_assert (reload_completed);
22728 for (i = 0; i < size; i++)
22729 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22731 else if (offsettable_memref_p (operand))
22733 operand = adjust_address (operand, SImode, 0);
22734 parts[0] = operand;
22735 for (i = 1; i < size; i++)
22736 parts[i] = adjust_address (operand, SImode, 4 * i);
22738 else if (CONST_DOUBLE_P (operand))
22740 REAL_VALUE_TYPE r;
22741 long l[4];
22743 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22744 switch (mode)
22746 case TFmode:
22747 real_to_target (l, &r, mode);
22748 parts[3] = gen_int_mode (l[3], SImode);
22749 parts[2] = gen_int_mode (l[2], SImode);
22750 break;
22751 case XFmode:
22752 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22753 long double may not be 80-bit. */
22754 real_to_target (l, &r, mode);
22755 parts[2] = gen_int_mode (l[2], SImode);
22756 break;
22757 case DFmode:
22758 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22759 break;
22760 default:
22761 gcc_unreachable ();
22763 parts[1] = gen_int_mode (l[1], SImode);
22764 parts[0] = gen_int_mode (l[0], SImode);
22766 else
22767 gcc_unreachable ();
22770 else
22772 if (mode == TImode)
22773 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22774 if (mode == XFmode || mode == TFmode)
22776 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22777 if (REG_P (operand))
22779 gcc_assert (reload_completed);
22780 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22781 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22783 else if (offsettable_memref_p (operand))
22785 operand = adjust_address (operand, DImode, 0);
22786 parts[0] = operand;
22787 parts[1] = adjust_address (operand, upper_mode, 8);
22789 else if (CONST_DOUBLE_P (operand))
22791 REAL_VALUE_TYPE r;
22792 long l[4];
22794 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22795 real_to_target (l, &r, mode);
22797 /* real_to_target puts 32-bit pieces in each long. */
22798 parts[0] =
22799 gen_int_mode
22800 ((l[0] & (HOST_WIDE_INT) 0xffffffff)
22801 | ((l[1] & (HOST_WIDE_INT) 0xffffffff) << 32),
22802 DImode);
22804 if (upper_mode == SImode)
22805 parts[1] = gen_int_mode (l[2], SImode);
22806 else
22807 parts[1] =
22808 gen_int_mode
22809 ((l[2] & (HOST_WIDE_INT) 0xffffffff)
22810 | ((l[3] & (HOST_WIDE_INT) 0xffffffff) << 32),
22811 DImode);
22813 else
22814 gcc_unreachable ();
22818 return size;
22821 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22822 Return false when normal moves are needed; true when all required
22823 insns have been emitted. Operands 2-4 contain the input values
22824 int the correct order; operands 5-7 contain the output values. */
22826 void
22827 ix86_split_long_move (rtx operands[])
22829 rtx part[2][4];
22830 int nparts, i, j;
22831 int push = 0;
22832 int collisions = 0;
22833 machine_mode mode = GET_MODE (operands[0]);
22834 bool collisionparts[4];
22836 /* The DFmode expanders may ask us to move double.
22837 For 64bit target this is single move. By hiding the fact
22838 here we simplify i386.md splitters. */
22839 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22841 /* Optimize constant pool reference to immediates. This is used by
22842 fp moves, that force all constants to memory to allow combining. */
22844 if (MEM_P (operands[1])
22845 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22846 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22847 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22848 if (push_operand (operands[0], VOIDmode))
22850 operands[0] = copy_rtx (operands[0]);
22851 PUT_MODE (operands[0], word_mode);
22853 else
22854 operands[0] = gen_lowpart (DImode, operands[0]);
22855 operands[1] = gen_lowpart (DImode, operands[1]);
22856 emit_move_insn (operands[0], operands[1]);
22857 return;
22860 /* The only non-offsettable memory we handle is push. */
22861 if (push_operand (operands[0], VOIDmode))
22862 push = 1;
22863 else
22864 gcc_assert (!MEM_P (operands[0])
22865 || offsettable_memref_p (operands[0]));
22867 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22868 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22870 /* When emitting push, take care for source operands on the stack. */
22871 if (push && MEM_P (operands[1])
22872 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22874 rtx src_base = XEXP (part[1][nparts - 1], 0);
22876 /* Compensate for the stack decrement by 4. */
22877 if (!TARGET_64BIT && nparts == 3
22878 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22879 src_base = plus_constant (Pmode, src_base, 4);
22881 /* src_base refers to the stack pointer and is
22882 automatically decreased by emitted push. */
22883 for (i = 0; i < nparts; i++)
22884 part[1][i] = change_address (part[1][i],
22885 GET_MODE (part[1][i]), src_base);
22888 /* We need to do copy in the right order in case an address register
22889 of the source overlaps the destination. */
22890 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22892 rtx tmp;
22894 for (i = 0; i < nparts; i++)
22896 collisionparts[i]
22897 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22898 if (collisionparts[i])
22899 collisions++;
22902 /* Collision in the middle part can be handled by reordering. */
22903 if (collisions == 1 && nparts == 3 && collisionparts [1])
22905 std::swap (part[0][1], part[0][2]);
22906 std::swap (part[1][1], part[1][2]);
22908 else if (collisions == 1
22909 && nparts == 4
22910 && (collisionparts [1] || collisionparts [2]))
22912 if (collisionparts [1])
22914 std::swap (part[0][1], part[0][2]);
22915 std::swap (part[1][1], part[1][2]);
22917 else
22919 std::swap (part[0][2], part[0][3]);
22920 std::swap (part[1][2], part[1][3]);
22924 /* If there are more collisions, we can't handle it by reordering.
22925 Do an lea to the last part and use only one colliding move. */
22926 else if (collisions > 1)
22928 rtx base, addr, tls_base = NULL_RTX;
22930 collisions = 1;
22932 base = part[0][nparts - 1];
22934 /* Handle the case when the last part isn't valid for lea.
22935 Happens in 64-bit mode storing the 12-byte XFmode. */
22936 if (GET_MODE (base) != Pmode)
22937 base = gen_rtx_REG (Pmode, REGNO (base));
22939 addr = XEXP (part[1][0], 0);
22940 if (TARGET_TLS_DIRECT_SEG_REFS)
22942 struct ix86_address parts;
22943 int ok = ix86_decompose_address (addr, &parts);
22944 gcc_assert (ok);
22945 if (parts.seg == DEFAULT_TLS_SEG_REG)
22947 /* It is not valid to use %gs: or %fs: in
22948 lea though, so we need to remove it from the
22949 address used for lea and add it to each individual
22950 memory loads instead. */
22951 addr = copy_rtx (addr);
22952 rtx *x = &addr;
22953 while (GET_CODE (*x) == PLUS)
22955 for (i = 0; i < 2; i++)
22957 rtx u = XEXP (*x, i);
22958 if (GET_CODE (u) == ZERO_EXTEND)
22959 u = XEXP (u, 0);
22960 if (GET_CODE (u) == UNSPEC
22961 && XINT (u, 1) == UNSPEC_TP)
22963 tls_base = XEXP (*x, i);
22964 *x = XEXP (*x, 1 - i);
22965 break;
22968 if (tls_base)
22969 break;
22970 x = &XEXP (*x, 0);
22972 gcc_assert (tls_base);
22975 emit_insn (gen_rtx_SET (base, addr));
22976 if (tls_base)
22977 base = gen_rtx_PLUS (GET_MODE (base), base, tls_base);
22978 part[1][0] = replace_equiv_address (part[1][0], base);
22979 for (i = 1; i < nparts; i++)
22981 if (tls_base)
22982 base = copy_rtx (base);
22983 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22984 part[1][i] = replace_equiv_address (part[1][i], tmp);
22989 if (push)
22991 if (!TARGET_64BIT)
22993 if (nparts == 3)
22995 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22996 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22997 stack_pointer_rtx, GEN_INT (-4)));
22998 emit_move_insn (part[0][2], part[1][2]);
23000 else if (nparts == 4)
23002 emit_move_insn (part[0][3], part[1][3]);
23003 emit_move_insn (part[0][2], part[1][2]);
23006 else
23008 /* In 64bit mode we don't have 32bit push available. In case this is
23009 register, it is OK - we will just use larger counterpart. We also
23010 retype memory - these comes from attempt to avoid REX prefix on
23011 moving of second half of TFmode value. */
23012 if (GET_MODE (part[1][1]) == SImode)
23014 switch (GET_CODE (part[1][1]))
23016 case MEM:
23017 part[1][1] = adjust_address (part[1][1], DImode, 0);
23018 break;
23020 case REG:
23021 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
23022 break;
23024 default:
23025 gcc_unreachable ();
23028 if (GET_MODE (part[1][0]) == SImode)
23029 part[1][0] = part[1][1];
23032 emit_move_insn (part[0][1], part[1][1]);
23033 emit_move_insn (part[0][0], part[1][0]);
23034 return;
23037 /* Choose correct order to not overwrite the source before it is copied. */
23038 if ((REG_P (part[0][0])
23039 && REG_P (part[1][1])
23040 && (REGNO (part[0][0]) == REGNO (part[1][1])
23041 || (nparts == 3
23042 && REGNO (part[0][0]) == REGNO (part[1][2]))
23043 || (nparts == 4
23044 && REGNO (part[0][0]) == REGNO (part[1][3]))))
23045 || (collisions > 0
23046 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
23048 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
23050 operands[2 + i] = part[0][j];
23051 operands[6 + i] = part[1][j];
23054 else
23056 for (i = 0; i < nparts; i++)
23058 operands[2 + i] = part[0][i];
23059 operands[6 + i] = part[1][i];
23063 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
23064 if (optimize_insn_for_size_p ())
23066 for (j = 0; j < nparts - 1; j++)
23067 if (CONST_INT_P (operands[6 + j])
23068 && operands[6 + j] != const0_rtx
23069 && REG_P (operands[2 + j]))
23070 for (i = j; i < nparts - 1; i++)
23071 if (CONST_INT_P (operands[7 + i])
23072 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
23073 operands[7 + i] = operands[2 + j];
23076 for (i = 0; i < nparts; i++)
23077 emit_move_insn (operands[2 + i], operands[6 + i]);
23079 return;
23082 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
23083 left shift by a constant, either using a single shift or
23084 a sequence of add instructions. */
23086 static void
23087 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
23089 rtx (*insn)(rtx, rtx, rtx);
23091 if (count == 1
23092 || (count * ix86_cost->add <= ix86_cost->shift_const
23093 && !optimize_insn_for_size_p ()))
23095 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
23096 while (count-- > 0)
23097 emit_insn (insn (operand, operand, operand));
23099 else
23101 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23102 emit_insn (insn (operand, operand, GEN_INT (count)));
23106 void
23107 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
23109 rtx (*gen_ashl3)(rtx, rtx, rtx);
23110 rtx (*gen_shld)(rtx, rtx, rtx);
23111 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23113 rtx low[2], high[2];
23114 int count;
23116 if (CONST_INT_P (operands[2]))
23118 split_double_mode (mode, operands, 2, low, high);
23119 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23121 if (count >= half_width)
23123 emit_move_insn (high[0], low[1]);
23124 emit_move_insn (low[0], const0_rtx);
23126 if (count > half_width)
23127 ix86_expand_ashl_const (high[0], count - half_width, mode);
23129 else
23131 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23133 if (!rtx_equal_p (operands[0], operands[1]))
23134 emit_move_insn (operands[0], operands[1]);
23136 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
23137 ix86_expand_ashl_const (low[0], count, mode);
23139 return;
23142 split_double_mode (mode, operands, 1, low, high);
23144 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23146 if (operands[1] == const1_rtx)
23148 /* Assuming we've chosen a QImode capable registers, then 1 << N
23149 can be done with two 32/64-bit shifts, no branches, no cmoves. */
23150 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
23152 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
23154 ix86_expand_clear (low[0]);
23155 ix86_expand_clear (high[0]);
23156 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
23158 d = gen_lowpart (QImode, low[0]);
23159 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23160 s = gen_rtx_EQ (QImode, flags, const0_rtx);
23161 emit_insn (gen_rtx_SET (d, s));
23163 d = gen_lowpart (QImode, high[0]);
23164 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23165 s = gen_rtx_NE (QImode, flags, const0_rtx);
23166 emit_insn (gen_rtx_SET (d, s));
23169 /* Otherwise, we can get the same results by manually performing
23170 a bit extract operation on bit 5/6, and then performing the two
23171 shifts. The two methods of getting 0/1 into low/high are exactly
23172 the same size. Avoiding the shift in the bit extract case helps
23173 pentium4 a bit; no one else seems to care much either way. */
23174 else
23176 machine_mode half_mode;
23177 rtx (*gen_lshr3)(rtx, rtx, rtx);
23178 rtx (*gen_and3)(rtx, rtx, rtx);
23179 rtx (*gen_xor3)(rtx, rtx, rtx);
23180 HOST_WIDE_INT bits;
23181 rtx x;
23183 if (mode == DImode)
23185 half_mode = SImode;
23186 gen_lshr3 = gen_lshrsi3;
23187 gen_and3 = gen_andsi3;
23188 gen_xor3 = gen_xorsi3;
23189 bits = 5;
23191 else
23193 half_mode = DImode;
23194 gen_lshr3 = gen_lshrdi3;
23195 gen_and3 = gen_anddi3;
23196 gen_xor3 = gen_xordi3;
23197 bits = 6;
23200 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
23201 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
23202 else
23203 x = gen_lowpart (half_mode, operands[2]);
23204 emit_insn (gen_rtx_SET (high[0], x));
23206 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
23207 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
23208 emit_move_insn (low[0], high[0]);
23209 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
23212 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23213 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
23214 return;
23217 if (operands[1] == constm1_rtx)
23219 /* For -1 << N, we can avoid the shld instruction, because we
23220 know that we're shifting 0...31/63 ones into a -1. */
23221 emit_move_insn (low[0], constm1_rtx);
23222 if (optimize_insn_for_size_p ())
23223 emit_move_insn (high[0], low[0]);
23224 else
23225 emit_move_insn (high[0], constm1_rtx);
23227 else
23229 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23231 if (!rtx_equal_p (operands[0], operands[1]))
23232 emit_move_insn (operands[0], operands[1]);
23234 split_double_mode (mode, operands, 1, low, high);
23235 emit_insn (gen_shld (high[0], low[0], operands[2]));
23238 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23240 if (TARGET_CMOVE && scratch)
23242 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23243 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23245 ix86_expand_clear (scratch);
23246 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
23248 else
23250 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23251 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23253 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
23257 void
23258 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
23260 rtx (*gen_ashr3)(rtx, rtx, rtx)
23261 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
23262 rtx (*gen_shrd)(rtx, rtx, rtx);
23263 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23265 rtx low[2], high[2];
23266 int count;
23268 if (CONST_INT_P (operands[2]))
23270 split_double_mode (mode, operands, 2, low, high);
23271 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23273 if (count == GET_MODE_BITSIZE (mode) - 1)
23275 emit_move_insn (high[0], high[1]);
23276 emit_insn (gen_ashr3 (high[0], high[0],
23277 GEN_INT (half_width - 1)));
23278 emit_move_insn (low[0], high[0]);
23281 else if (count >= half_width)
23283 emit_move_insn (low[0], high[1]);
23284 emit_move_insn (high[0], low[0]);
23285 emit_insn (gen_ashr3 (high[0], high[0],
23286 GEN_INT (half_width - 1)));
23288 if (count > half_width)
23289 emit_insn (gen_ashr3 (low[0], low[0],
23290 GEN_INT (count - half_width)));
23292 else
23294 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23296 if (!rtx_equal_p (operands[0], operands[1]))
23297 emit_move_insn (operands[0], operands[1]);
23299 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23300 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
23303 else
23305 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23307 if (!rtx_equal_p (operands[0], operands[1]))
23308 emit_move_insn (operands[0], operands[1]);
23310 split_double_mode (mode, operands, 1, low, high);
23312 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23313 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
23315 if (TARGET_CMOVE && scratch)
23317 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23318 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23320 emit_move_insn (scratch, high[0]);
23321 emit_insn (gen_ashr3 (scratch, scratch,
23322 GEN_INT (half_width - 1)));
23323 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23324 scratch));
23326 else
23328 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
23329 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
23331 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
23336 void
23337 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
23339 rtx (*gen_lshr3)(rtx, rtx, rtx)
23340 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
23341 rtx (*gen_shrd)(rtx, rtx, rtx);
23342 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23344 rtx low[2], high[2];
23345 int count;
23347 if (CONST_INT_P (operands[2]))
23349 split_double_mode (mode, operands, 2, low, high);
23350 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23352 if (count >= half_width)
23354 emit_move_insn (low[0], high[1]);
23355 ix86_expand_clear (high[0]);
23357 if (count > half_width)
23358 emit_insn (gen_lshr3 (low[0], low[0],
23359 GEN_INT (count - half_width)));
23361 else
23363 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23365 if (!rtx_equal_p (operands[0], operands[1]))
23366 emit_move_insn (operands[0], operands[1]);
23368 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23369 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
23372 else
23374 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23376 if (!rtx_equal_p (operands[0], operands[1]))
23377 emit_move_insn (operands[0], operands[1]);
23379 split_double_mode (mode, operands, 1, low, high);
23381 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23382 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
23384 if (TARGET_CMOVE && scratch)
23386 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23387 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23389 ix86_expand_clear (scratch);
23390 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23391 scratch));
23393 else
23395 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23396 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23398 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
23403 /* Predict just emitted jump instruction to be taken with probability PROB. */
23404 static void
23405 predict_jump (int prob)
23407 rtx insn = get_last_insn ();
23408 gcc_assert (JUMP_P (insn));
23409 add_int_reg_note (insn, REG_BR_PROB, prob);
23412 /* Helper function for the string operations below. Dest VARIABLE whether
23413 it is aligned to VALUE bytes. If true, jump to the label. */
23414 static rtx_code_label *
23415 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
23417 rtx_code_label *label = gen_label_rtx ();
23418 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
23419 if (GET_MODE (variable) == DImode)
23420 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
23421 else
23422 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
23423 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
23424 1, label);
23425 if (epilogue)
23426 predict_jump (REG_BR_PROB_BASE * 50 / 100);
23427 else
23428 predict_jump (REG_BR_PROB_BASE * 90 / 100);
23429 return label;
23432 /* Adjust COUNTER by the VALUE. */
23433 static void
23434 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
23436 rtx (*gen_add)(rtx, rtx, rtx)
23437 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
23439 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
23442 /* Zero extend possibly SImode EXP to Pmode register. */
23444 ix86_zero_extend_to_Pmode (rtx exp)
23446 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
23449 /* Divide COUNTREG by SCALE. */
23450 static rtx
23451 scale_counter (rtx countreg, int scale)
23453 rtx sc;
23455 if (scale == 1)
23456 return countreg;
23457 if (CONST_INT_P (countreg))
23458 return GEN_INT (INTVAL (countreg) / scale);
23459 gcc_assert (REG_P (countreg));
23461 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
23462 GEN_INT (exact_log2 (scale)),
23463 NULL, 1, OPTAB_DIRECT);
23464 return sc;
23467 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
23468 DImode for constant loop counts. */
23470 static machine_mode
23471 counter_mode (rtx count_exp)
23473 if (GET_MODE (count_exp) != VOIDmode)
23474 return GET_MODE (count_exp);
23475 if (!CONST_INT_P (count_exp))
23476 return Pmode;
23477 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
23478 return DImode;
23479 return SImode;
23482 /* Copy the address to a Pmode register. This is used for x32 to
23483 truncate DImode TLS address to a SImode register. */
23485 static rtx
23486 ix86_copy_addr_to_reg (rtx addr)
23488 rtx reg;
23489 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
23491 reg = copy_addr_to_reg (addr);
23492 REG_POINTER (reg) = 1;
23493 return reg;
23495 else
23497 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
23498 reg = copy_to_mode_reg (DImode, addr);
23499 REG_POINTER (reg) = 1;
23500 return gen_rtx_SUBREG (SImode, reg, 0);
23504 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
23505 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
23506 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
23507 memory by VALUE (supposed to be in MODE).
23509 The size is rounded down to whole number of chunk size moved at once.
23510 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
23513 static void
23514 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
23515 rtx destptr, rtx srcptr, rtx value,
23516 rtx count, machine_mode mode, int unroll,
23517 int expected_size, bool issetmem)
23519 rtx_code_label *out_label, *top_label;
23520 rtx iter, tmp;
23521 machine_mode iter_mode = counter_mode (count);
23522 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
23523 rtx piece_size = GEN_INT (piece_size_n);
23524 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
23525 rtx size;
23526 int i;
23528 top_label = gen_label_rtx ();
23529 out_label = gen_label_rtx ();
23530 iter = gen_reg_rtx (iter_mode);
23532 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
23533 NULL, 1, OPTAB_DIRECT);
23534 /* Those two should combine. */
23535 if (piece_size == const1_rtx)
23537 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
23538 true, out_label);
23539 predict_jump (REG_BR_PROB_BASE * 10 / 100);
23541 emit_move_insn (iter, const0_rtx);
23543 emit_label (top_label);
23545 tmp = convert_modes (Pmode, iter_mode, iter, true);
23547 /* This assert could be relaxed - in this case we'll need to compute
23548 smallest power of two, containing in PIECE_SIZE_N and pass it to
23549 offset_address. */
23550 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
23551 destmem = offset_address (destmem, tmp, piece_size_n);
23552 destmem = adjust_address (destmem, mode, 0);
23554 if (!issetmem)
23556 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
23557 srcmem = adjust_address (srcmem, mode, 0);
23559 /* When unrolling for chips that reorder memory reads and writes,
23560 we can save registers by using single temporary.
23561 Also using 4 temporaries is overkill in 32bit mode. */
23562 if (!TARGET_64BIT && 0)
23564 for (i = 0; i < unroll; i++)
23566 if (i)
23568 destmem =
23569 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23570 srcmem =
23571 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23573 emit_move_insn (destmem, srcmem);
23576 else
23578 rtx tmpreg[4];
23579 gcc_assert (unroll <= 4);
23580 for (i = 0; i < unroll; i++)
23582 tmpreg[i] = gen_reg_rtx (mode);
23583 if (i)
23585 srcmem =
23586 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23588 emit_move_insn (tmpreg[i], srcmem);
23590 for (i = 0; i < unroll; i++)
23592 if (i)
23594 destmem =
23595 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23597 emit_move_insn (destmem, tmpreg[i]);
23601 else
23602 for (i = 0; i < unroll; i++)
23604 if (i)
23605 destmem =
23606 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23607 emit_move_insn (destmem, value);
23610 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
23611 true, OPTAB_LIB_WIDEN);
23612 if (tmp != iter)
23613 emit_move_insn (iter, tmp);
23615 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
23616 true, top_label);
23617 if (expected_size != -1)
23619 expected_size /= GET_MODE_SIZE (mode) * unroll;
23620 if (expected_size == 0)
23621 predict_jump (0);
23622 else if (expected_size > REG_BR_PROB_BASE)
23623 predict_jump (REG_BR_PROB_BASE - 1);
23624 else
23625 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
23627 else
23628 predict_jump (REG_BR_PROB_BASE * 80 / 100);
23629 iter = ix86_zero_extend_to_Pmode (iter);
23630 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
23631 true, OPTAB_LIB_WIDEN);
23632 if (tmp != destptr)
23633 emit_move_insn (destptr, tmp);
23634 if (!issetmem)
23636 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
23637 true, OPTAB_LIB_WIDEN);
23638 if (tmp != srcptr)
23639 emit_move_insn (srcptr, tmp);
23641 emit_label (out_label);
23644 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
23645 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
23646 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
23647 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
23648 ORIG_VALUE is the original value passed to memset to fill the memory with.
23649 Other arguments have same meaning as for previous function. */
23651 static void
23652 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
23653 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
23654 rtx count,
23655 machine_mode mode, bool issetmem)
23657 rtx destexp;
23658 rtx srcexp;
23659 rtx countreg;
23660 HOST_WIDE_INT rounded_count;
23662 /* If possible, it is shorter to use rep movs.
23663 TODO: Maybe it is better to move this logic to decide_alg. */
23664 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
23665 && (!issetmem || orig_value == const0_rtx))
23666 mode = SImode;
23668 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
23669 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
23671 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
23672 GET_MODE_SIZE (mode)));
23673 if (mode != QImode)
23675 destexp = gen_rtx_ASHIFT (Pmode, countreg,
23676 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23677 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
23679 else
23680 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
23681 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
23683 rounded_count = (INTVAL (count)
23684 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23685 destmem = shallow_copy_rtx (destmem);
23686 set_mem_size (destmem, rounded_count);
23688 else if (MEM_SIZE_KNOWN_P (destmem))
23689 clear_mem_size (destmem);
23691 if (issetmem)
23693 value = force_reg (mode, gen_lowpart (mode, value));
23694 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23696 else
23698 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23699 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23700 if (mode != QImode)
23702 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23703 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23704 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23706 else
23707 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23708 if (CONST_INT_P (count))
23710 rounded_count = (INTVAL (count)
23711 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23712 srcmem = shallow_copy_rtx (srcmem);
23713 set_mem_size (srcmem, rounded_count);
23715 else
23717 if (MEM_SIZE_KNOWN_P (srcmem))
23718 clear_mem_size (srcmem);
23720 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23721 destexp, srcexp));
23725 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23726 DESTMEM.
23727 SRC is passed by pointer to be updated on return.
23728 Return value is updated DST. */
23729 static rtx
23730 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23731 HOST_WIDE_INT size_to_move)
23733 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23734 enum insn_code code;
23735 machine_mode move_mode;
23736 int piece_size, i;
23738 /* Find the widest mode in which we could perform moves.
23739 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23740 it until move of such size is supported. */
23741 piece_size = 1 << floor_log2 (size_to_move);
23742 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23743 code = optab_handler (mov_optab, move_mode);
23744 while (code == CODE_FOR_nothing && piece_size > 1)
23746 piece_size >>= 1;
23747 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23748 code = optab_handler (mov_optab, move_mode);
23751 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23752 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23753 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23755 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23756 move_mode = mode_for_vector (word_mode, nunits);
23757 code = optab_handler (mov_optab, move_mode);
23758 if (code == CODE_FOR_nothing)
23760 move_mode = word_mode;
23761 piece_size = GET_MODE_SIZE (move_mode);
23762 code = optab_handler (mov_optab, move_mode);
23765 gcc_assert (code != CODE_FOR_nothing);
23767 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23768 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23770 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23771 gcc_assert (size_to_move % piece_size == 0);
23772 adjust = GEN_INT (piece_size);
23773 for (i = 0; i < size_to_move; i += piece_size)
23775 /* We move from memory to memory, so we'll need to do it via
23776 a temporary register. */
23777 tempreg = gen_reg_rtx (move_mode);
23778 emit_insn (GEN_FCN (code) (tempreg, src));
23779 emit_insn (GEN_FCN (code) (dst, tempreg));
23781 emit_move_insn (destptr,
23782 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23783 emit_move_insn (srcptr,
23784 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23786 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23787 piece_size);
23788 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23789 piece_size);
23792 /* Update DST and SRC rtx. */
23793 *srcmem = src;
23794 return dst;
23797 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23798 static void
23799 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23800 rtx destptr, rtx srcptr, rtx count, int max_size)
23802 rtx src, dest;
23803 if (CONST_INT_P (count))
23805 HOST_WIDE_INT countval = INTVAL (count);
23806 HOST_WIDE_INT epilogue_size = countval % max_size;
23807 int i;
23809 /* For now MAX_SIZE should be a power of 2. This assert could be
23810 relaxed, but it'll require a bit more complicated epilogue
23811 expanding. */
23812 gcc_assert ((max_size & (max_size - 1)) == 0);
23813 for (i = max_size; i >= 1; i >>= 1)
23815 if (epilogue_size & i)
23816 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23818 return;
23820 if (max_size > 8)
23822 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23823 count, 1, OPTAB_DIRECT);
23824 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23825 count, QImode, 1, 4, false);
23826 return;
23829 /* When there are stringops, we can cheaply increase dest and src pointers.
23830 Otherwise we save code size by maintaining offset (zero is readily
23831 available from preceding rep operation) and using x86 addressing modes.
23833 if (TARGET_SINGLE_STRINGOP)
23835 if (max_size > 4)
23837 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23838 src = change_address (srcmem, SImode, srcptr);
23839 dest = change_address (destmem, SImode, destptr);
23840 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23841 emit_label (label);
23842 LABEL_NUSES (label) = 1;
23844 if (max_size > 2)
23846 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23847 src = change_address (srcmem, HImode, srcptr);
23848 dest = change_address (destmem, HImode, destptr);
23849 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23850 emit_label (label);
23851 LABEL_NUSES (label) = 1;
23853 if (max_size > 1)
23855 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23856 src = change_address (srcmem, QImode, srcptr);
23857 dest = change_address (destmem, QImode, destptr);
23858 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23859 emit_label (label);
23860 LABEL_NUSES (label) = 1;
23863 else
23865 rtx offset = force_reg (Pmode, const0_rtx);
23866 rtx tmp;
23868 if (max_size > 4)
23870 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23871 src = change_address (srcmem, SImode, srcptr);
23872 dest = change_address (destmem, SImode, destptr);
23873 emit_move_insn (dest, src);
23874 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23875 true, OPTAB_LIB_WIDEN);
23876 if (tmp != offset)
23877 emit_move_insn (offset, tmp);
23878 emit_label (label);
23879 LABEL_NUSES (label) = 1;
23881 if (max_size > 2)
23883 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23884 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23885 src = change_address (srcmem, HImode, tmp);
23886 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23887 dest = change_address (destmem, HImode, tmp);
23888 emit_move_insn (dest, src);
23889 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23890 true, OPTAB_LIB_WIDEN);
23891 if (tmp != offset)
23892 emit_move_insn (offset, tmp);
23893 emit_label (label);
23894 LABEL_NUSES (label) = 1;
23896 if (max_size > 1)
23898 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23899 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23900 src = change_address (srcmem, QImode, tmp);
23901 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23902 dest = change_address (destmem, QImode, tmp);
23903 emit_move_insn (dest, src);
23904 emit_label (label);
23905 LABEL_NUSES (label) = 1;
23910 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23911 with value PROMOTED_VAL.
23912 SRC is passed by pointer to be updated on return.
23913 Return value is updated DST. */
23914 static rtx
23915 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23916 HOST_WIDE_INT size_to_move)
23918 rtx dst = destmem, adjust;
23919 enum insn_code code;
23920 machine_mode move_mode;
23921 int piece_size, i;
23923 /* Find the widest mode in which we could perform moves.
23924 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23925 it until move of such size is supported. */
23926 move_mode = GET_MODE (promoted_val);
23927 if (move_mode == VOIDmode)
23928 move_mode = QImode;
23929 if (size_to_move < GET_MODE_SIZE (move_mode))
23931 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23932 promoted_val = gen_lowpart (move_mode, promoted_val);
23934 piece_size = GET_MODE_SIZE (move_mode);
23935 code = optab_handler (mov_optab, move_mode);
23936 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23938 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23940 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23941 gcc_assert (size_to_move % piece_size == 0);
23942 adjust = GEN_INT (piece_size);
23943 for (i = 0; i < size_to_move; i += piece_size)
23945 if (piece_size <= GET_MODE_SIZE (word_mode))
23947 emit_insn (gen_strset (destptr, dst, promoted_val));
23948 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23949 piece_size);
23950 continue;
23953 emit_insn (GEN_FCN (code) (dst, promoted_val));
23955 emit_move_insn (destptr,
23956 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23958 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23959 piece_size);
23962 /* Update DST rtx. */
23963 return dst;
23965 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23966 static void
23967 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23968 rtx count, int max_size)
23970 count =
23971 expand_simple_binop (counter_mode (count), AND, count,
23972 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23973 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23974 gen_lowpart (QImode, value), count, QImode,
23975 1, max_size / 2, true);
23978 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23979 static void
23980 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23981 rtx count, int max_size)
23983 rtx dest;
23985 if (CONST_INT_P (count))
23987 HOST_WIDE_INT countval = INTVAL (count);
23988 HOST_WIDE_INT epilogue_size = countval % max_size;
23989 int i;
23991 /* For now MAX_SIZE should be a power of 2. This assert could be
23992 relaxed, but it'll require a bit more complicated epilogue
23993 expanding. */
23994 gcc_assert ((max_size & (max_size - 1)) == 0);
23995 for (i = max_size; i >= 1; i >>= 1)
23997 if (epilogue_size & i)
23999 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
24000 destmem = emit_memset (destmem, destptr, vec_value, i);
24001 else
24002 destmem = emit_memset (destmem, destptr, value, i);
24005 return;
24007 if (max_size > 32)
24009 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
24010 return;
24012 if (max_size > 16)
24014 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
24015 if (TARGET_64BIT)
24017 dest = change_address (destmem, DImode, destptr);
24018 emit_insn (gen_strset (destptr, dest, value));
24019 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
24020 emit_insn (gen_strset (destptr, dest, value));
24022 else
24024 dest = change_address (destmem, SImode, destptr);
24025 emit_insn (gen_strset (destptr, dest, value));
24026 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
24027 emit_insn (gen_strset (destptr, dest, value));
24028 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
24029 emit_insn (gen_strset (destptr, dest, value));
24030 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
24031 emit_insn (gen_strset (destptr, dest, value));
24033 emit_label (label);
24034 LABEL_NUSES (label) = 1;
24036 if (max_size > 8)
24038 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
24039 if (TARGET_64BIT)
24041 dest = change_address (destmem, DImode, destptr);
24042 emit_insn (gen_strset (destptr, dest, value));
24044 else
24046 dest = change_address (destmem, SImode, destptr);
24047 emit_insn (gen_strset (destptr, dest, value));
24048 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
24049 emit_insn (gen_strset (destptr, dest, value));
24051 emit_label (label);
24052 LABEL_NUSES (label) = 1;
24054 if (max_size > 4)
24056 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
24057 dest = change_address (destmem, SImode, destptr);
24058 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
24059 emit_label (label);
24060 LABEL_NUSES (label) = 1;
24062 if (max_size > 2)
24064 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
24065 dest = change_address (destmem, HImode, destptr);
24066 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
24067 emit_label (label);
24068 LABEL_NUSES (label) = 1;
24070 if (max_size > 1)
24072 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
24073 dest = change_address (destmem, QImode, destptr);
24074 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
24075 emit_label (label);
24076 LABEL_NUSES (label) = 1;
24080 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
24081 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
24082 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
24083 ignored.
24084 Return value is updated DESTMEM. */
24085 static rtx
24086 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
24087 rtx destptr, rtx srcptr, rtx value,
24088 rtx vec_value, rtx count, int align,
24089 int desired_alignment, bool issetmem)
24091 int i;
24092 for (i = 1; i < desired_alignment; i <<= 1)
24094 if (align <= i)
24096 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
24097 if (issetmem)
24099 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
24100 destmem = emit_memset (destmem, destptr, vec_value, i);
24101 else
24102 destmem = emit_memset (destmem, destptr, value, i);
24104 else
24105 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
24106 ix86_adjust_counter (count, i);
24107 emit_label (label);
24108 LABEL_NUSES (label) = 1;
24109 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
24112 return destmem;
24115 /* Test if COUNT&SIZE is nonzero and if so, expand movme
24116 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
24117 and jump to DONE_LABEL. */
24118 static void
24119 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
24120 rtx destptr, rtx srcptr,
24121 rtx value, rtx vec_value,
24122 rtx count, int size,
24123 rtx done_label, bool issetmem)
24125 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
24126 machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
24127 rtx modesize;
24128 int n;
24130 /* If we do not have vector value to copy, we must reduce size. */
24131 if (issetmem)
24133 if (!vec_value)
24135 if (GET_MODE (value) == VOIDmode && size > 8)
24136 mode = Pmode;
24137 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
24138 mode = GET_MODE (value);
24140 else
24141 mode = GET_MODE (vec_value), value = vec_value;
24143 else
24145 /* Choose appropriate vector mode. */
24146 if (size >= 32)
24147 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
24148 else if (size >= 16)
24149 mode = TARGET_SSE ? V16QImode : DImode;
24150 srcmem = change_address (srcmem, mode, srcptr);
24152 destmem = change_address (destmem, mode, destptr);
24153 modesize = GEN_INT (GET_MODE_SIZE (mode));
24154 gcc_assert (GET_MODE_SIZE (mode) <= size);
24155 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24157 if (issetmem)
24158 emit_move_insn (destmem, gen_lowpart (mode, value));
24159 else
24161 emit_move_insn (destmem, srcmem);
24162 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24164 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24167 destmem = offset_address (destmem, count, 1);
24168 destmem = offset_address (destmem, GEN_INT (-2 * size),
24169 GET_MODE_SIZE (mode));
24170 if (!issetmem)
24172 srcmem = offset_address (srcmem, count, 1);
24173 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
24174 GET_MODE_SIZE (mode));
24176 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24178 if (issetmem)
24179 emit_move_insn (destmem, gen_lowpart (mode, value));
24180 else
24182 emit_move_insn (destmem, srcmem);
24183 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24185 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24187 emit_jump_insn (gen_jump (done_label));
24188 emit_barrier ();
24190 emit_label (label);
24191 LABEL_NUSES (label) = 1;
24194 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
24195 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
24196 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
24197 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
24198 DONE_LABEL is a label after the whole copying sequence. The label is created
24199 on demand if *DONE_LABEL is NULL.
24200 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
24201 bounds after the initial copies.
24203 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
24204 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
24205 we will dispatch to a library call for large blocks.
24207 In pseudocode we do:
24209 if (COUNT < SIZE)
24211 Assume that SIZE is 4. Bigger sizes are handled analogously
24212 if (COUNT & 4)
24214 copy 4 bytes from SRCPTR to DESTPTR
24215 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
24216 goto done_label
24218 if (!COUNT)
24219 goto done_label;
24220 copy 1 byte from SRCPTR to DESTPTR
24221 if (COUNT & 2)
24223 copy 2 bytes from SRCPTR to DESTPTR
24224 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
24227 else
24229 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
24230 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
24232 OLD_DESPTR = DESTPTR;
24233 Align DESTPTR up to DESIRED_ALIGN
24234 SRCPTR += DESTPTR - OLD_DESTPTR
24235 COUNT -= DEST_PTR - OLD_DESTPTR
24236 if (DYNAMIC_CHECK)
24237 Round COUNT down to multiple of SIZE
24238 << optional caller supplied zero size guard is here >>
24239 << optional caller suppplied dynamic check is here >>
24240 << caller supplied main copy loop is here >>
24242 done_label:
24244 static void
24245 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
24246 rtx *destptr, rtx *srcptr,
24247 machine_mode mode,
24248 rtx value, rtx vec_value,
24249 rtx *count,
24250 rtx_code_label **done_label,
24251 int size,
24252 int desired_align,
24253 int align,
24254 unsigned HOST_WIDE_INT *min_size,
24255 bool dynamic_check,
24256 bool issetmem)
24258 rtx_code_label *loop_label = NULL, *label;
24259 int n;
24260 rtx modesize;
24261 int prolog_size = 0;
24262 rtx mode_value;
24264 /* Chose proper value to copy. */
24265 if (issetmem && VECTOR_MODE_P (mode))
24266 mode_value = vec_value;
24267 else
24268 mode_value = value;
24269 gcc_assert (GET_MODE_SIZE (mode) <= size);
24271 /* See if block is big or small, handle small blocks. */
24272 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
24274 int size2 = size;
24275 loop_label = gen_label_rtx ();
24277 if (!*done_label)
24278 *done_label = gen_label_rtx ();
24280 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
24281 1, loop_label);
24282 size2 >>= 1;
24284 /* Handle sizes > 3. */
24285 for (;size2 > 2; size2 >>= 1)
24286 expand_small_movmem_or_setmem (destmem, srcmem,
24287 *destptr, *srcptr,
24288 value, vec_value,
24289 *count,
24290 size2, *done_label, issetmem);
24291 /* Nothing to copy? Jump to DONE_LABEL if so */
24292 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
24293 1, *done_label);
24295 /* Do a byte copy. */
24296 destmem = change_address (destmem, QImode, *destptr);
24297 if (issetmem)
24298 emit_move_insn (destmem, gen_lowpart (QImode, value));
24299 else
24301 srcmem = change_address (srcmem, QImode, *srcptr);
24302 emit_move_insn (destmem, srcmem);
24305 /* Handle sizes 2 and 3. */
24306 label = ix86_expand_aligntest (*count, 2, false);
24307 destmem = change_address (destmem, HImode, *destptr);
24308 destmem = offset_address (destmem, *count, 1);
24309 destmem = offset_address (destmem, GEN_INT (-2), 2);
24310 if (issetmem)
24311 emit_move_insn (destmem, gen_lowpart (HImode, value));
24312 else
24314 srcmem = change_address (srcmem, HImode, *srcptr);
24315 srcmem = offset_address (srcmem, *count, 1);
24316 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
24317 emit_move_insn (destmem, srcmem);
24320 emit_label (label);
24321 LABEL_NUSES (label) = 1;
24322 emit_jump_insn (gen_jump (*done_label));
24323 emit_barrier ();
24325 else
24326 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
24327 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
24329 /* Start memcpy for COUNT >= SIZE. */
24330 if (loop_label)
24332 emit_label (loop_label);
24333 LABEL_NUSES (loop_label) = 1;
24336 /* Copy first desired_align bytes. */
24337 if (!issetmem)
24338 srcmem = change_address (srcmem, mode, *srcptr);
24339 destmem = change_address (destmem, mode, *destptr);
24340 modesize = GEN_INT (GET_MODE_SIZE (mode));
24341 for (n = 0; prolog_size < desired_align - align; n++)
24343 if (issetmem)
24344 emit_move_insn (destmem, mode_value);
24345 else
24347 emit_move_insn (destmem, srcmem);
24348 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24350 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24351 prolog_size += GET_MODE_SIZE (mode);
24355 /* Copy last SIZE bytes. */
24356 destmem = offset_address (destmem, *count, 1);
24357 destmem = offset_address (destmem,
24358 GEN_INT (-size - prolog_size),
24360 if (issetmem)
24361 emit_move_insn (destmem, mode_value);
24362 else
24364 srcmem = offset_address (srcmem, *count, 1);
24365 srcmem = offset_address (srcmem,
24366 GEN_INT (-size - prolog_size),
24368 emit_move_insn (destmem, srcmem);
24370 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
24372 destmem = offset_address (destmem, modesize, 1);
24373 if (issetmem)
24374 emit_move_insn (destmem, mode_value);
24375 else
24377 srcmem = offset_address (srcmem, modesize, 1);
24378 emit_move_insn (destmem, srcmem);
24382 /* Align destination. */
24383 if (desired_align > 1 && desired_align > align)
24385 rtx saveddest = *destptr;
24387 gcc_assert (desired_align <= size);
24388 /* Align destptr up, place it to new register. */
24389 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
24390 GEN_INT (prolog_size),
24391 NULL_RTX, 1, OPTAB_DIRECT);
24392 if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest))
24393 REG_POINTER (*destptr) = 1;
24394 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
24395 GEN_INT (-desired_align),
24396 *destptr, 1, OPTAB_DIRECT);
24397 /* See how many bytes we skipped. */
24398 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
24399 *destptr,
24400 saveddest, 1, OPTAB_DIRECT);
24401 /* Adjust srcptr and count. */
24402 if (!issetmem)
24403 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr,
24404 saveddest, *srcptr, 1, OPTAB_DIRECT);
24405 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24406 saveddest, *count, 1, OPTAB_DIRECT);
24407 /* We copied at most size + prolog_size. */
24408 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
24409 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
24410 else
24411 *min_size = 0;
24413 /* Our loops always round down the bock size, but for dispatch to library
24414 we need precise value. */
24415 if (dynamic_check)
24416 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
24417 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
24419 else
24421 gcc_assert (prolog_size == 0);
24422 /* Decrease count, so we won't end up copying last word twice. */
24423 if (!CONST_INT_P (*count))
24424 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24425 constm1_rtx, *count, 1, OPTAB_DIRECT);
24426 else
24427 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
24428 if (*min_size)
24429 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
24434 /* This function is like the previous one, except here we know how many bytes
24435 need to be copied. That allows us to update alignment not only of DST, which
24436 is returned, but also of SRC, which is passed as a pointer for that
24437 reason. */
24438 static rtx
24439 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
24440 rtx srcreg, rtx value, rtx vec_value,
24441 int desired_align, int align_bytes,
24442 bool issetmem)
24444 rtx src = NULL;
24445 rtx orig_dst = dst;
24446 rtx orig_src = NULL;
24447 int piece_size = 1;
24448 int copied_bytes = 0;
24450 if (!issetmem)
24452 gcc_assert (srcp != NULL);
24453 src = *srcp;
24454 orig_src = src;
24457 for (piece_size = 1;
24458 piece_size <= desired_align && copied_bytes < align_bytes;
24459 piece_size <<= 1)
24461 if (align_bytes & piece_size)
24463 if (issetmem)
24465 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
24466 dst = emit_memset (dst, destreg, vec_value, piece_size);
24467 else
24468 dst = emit_memset (dst, destreg, value, piece_size);
24470 else
24471 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
24472 copied_bytes += piece_size;
24475 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
24476 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24477 if (MEM_SIZE_KNOWN_P (orig_dst))
24478 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
24480 if (!issetmem)
24482 int src_align_bytes = get_mem_align_offset (src, desired_align
24483 * BITS_PER_UNIT);
24484 if (src_align_bytes >= 0)
24485 src_align_bytes = desired_align - src_align_bytes;
24486 if (src_align_bytes >= 0)
24488 unsigned int src_align;
24489 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
24491 if ((src_align_bytes & (src_align - 1))
24492 == (align_bytes & (src_align - 1)))
24493 break;
24495 if (src_align > (unsigned int) desired_align)
24496 src_align = desired_align;
24497 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
24498 set_mem_align (src, src_align * BITS_PER_UNIT);
24500 if (MEM_SIZE_KNOWN_P (orig_src))
24501 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
24502 *srcp = src;
24505 return dst;
24508 /* Return true if ALG can be used in current context.
24509 Assume we expand memset if MEMSET is true. */
24510 static bool
24511 alg_usable_p (enum stringop_alg alg, bool memset)
24513 if (alg == no_stringop)
24514 return false;
24515 if (alg == vector_loop)
24516 return TARGET_SSE || TARGET_AVX;
24517 /* Algorithms using the rep prefix want at least edi and ecx;
24518 additionally, memset wants eax and memcpy wants esi. Don't
24519 consider such algorithms if the user has appropriated those
24520 registers for their own purposes. */
24521 if (alg == rep_prefix_1_byte
24522 || alg == rep_prefix_4_byte
24523 || alg == rep_prefix_8_byte)
24524 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
24525 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
24526 return true;
24529 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
24530 static enum stringop_alg
24531 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
24532 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
24533 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
24535 const struct stringop_algs * algs;
24536 bool optimize_for_speed;
24537 int max = 0;
24538 const struct processor_costs *cost;
24539 int i;
24540 bool any_alg_usable_p = false;
24542 *noalign = false;
24543 *dynamic_check = -1;
24545 /* Even if the string operation call is cold, we still might spend a lot
24546 of time processing large blocks. */
24547 if (optimize_function_for_size_p (cfun)
24548 || (optimize_insn_for_size_p ()
24549 && (max_size < 256
24550 || (expected_size != -1 && expected_size < 256))))
24551 optimize_for_speed = false;
24552 else
24553 optimize_for_speed = true;
24555 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
24556 if (memset)
24557 algs = &cost->memset[TARGET_64BIT != 0];
24558 else
24559 algs = &cost->memcpy[TARGET_64BIT != 0];
24561 /* See maximal size for user defined algorithm. */
24562 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24564 enum stringop_alg candidate = algs->size[i].alg;
24565 bool usable = alg_usable_p (candidate, memset);
24566 any_alg_usable_p |= usable;
24568 if (candidate != libcall && candidate && usable)
24569 max = algs->size[i].max;
24572 /* If expected size is not known but max size is small enough
24573 so inline version is a win, set expected size into
24574 the range. */
24575 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
24576 && expected_size == -1)
24577 expected_size = min_size / 2 + max_size / 2;
24579 /* If user specified the algorithm, honnor it if possible. */
24580 if (ix86_stringop_alg != no_stringop
24581 && alg_usable_p (ix86_stringop_alg, memset))
24582 return ix86_stringop_alg;
24583 /* rep; movq or rep; movl is the smallest variant. */
24584 else if (!optimize_for_speed)
24586 *noalign = true;
24587 if (!count || (count & 3) || (memset && !zero_memset))
24588 return alg_usable_p (rep_prefix_1_byte, memset)
24589 ? rep_prefix_1_byte : loop_1_byte;
24590 else
24591 return alg_usable_p (rep_prefix_4_byte, memset)
24592 ? rep_prefix_4_byte : loop;
24594 /* Very tiny blocks are best handled via the loop, REP is expensive to
24595 setup. */
24596 else if (expected_size != -1 && expected_size < 4)
24597 return loop_1_byte;
24598 else if (expected_size != -1)
24600 enum stringop_alg alg = libcall;
24601 bool alg_noalign = false;
24602 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24604 /* We get here if the algorithms that were not libcall-based
24605 were rep-prefix based and we are unable to use rep prefixes
24606 based on global register usage. Break out of the loop and
24607 use the heuristic below. */
24608 if (algs->size[i].max == 0)
24609 break;
24610 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
24612 enum stringop_alg candidate = algs->size[i].alg;
24614 if (candidate != libcall && alg_usable_p (candidate, memset))
24616 alg = candidate;
24617 alg_noalign = algs->size[i].noalign;
24619 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
24620 last non-libcall inline algorithm. */
24621 if (TARGET_INLINE_ALL_STRINGOPS)
24623 /* When the current size is best to be copied by a libcall,
24624 but we are still forced to inline, run the heuristic below
24625 that will pick code for medium sized blocks. */
24626 if (alg != libcall)
24628 *noalign = alg_noalign;
24629 return alg;
24631 else if (!any_alg_usable_p)
24632 break;
24634 else if (alg_usable_p (candidate, memset))
24636 *noalign = algs->size[i].noalign;
24637 return candidate;
24642 /* When asked to inline the call anyway, try to pick meaningful choice.
24643 We look for maximal size of block that is faster to copy by hand and
24644 take blocks of at most of that size guessing that average size will
24645 be roughly half of the block.
24647 If this turns out to be bad, we might simply specify the preferred
24648 choice in ix86_costs. */
24649 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24650 && (algs->unknown_size == libcall
24651 || !alg_usable_p (algs->unknown_size, memset)))
24653 enum stringop_alg alg;
24655 /* If there aren't any usable algorithms, then recursing on
24656 smaller sizes isn't going to find anything. Just return the
24657 simple byte-at-a-time copy loop. */
24658 if (!any_alg_usable_p)
24660 /* Pick something reasonable. */
24661 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24662 *dynamic_check = 128;
24663 return loop_1_byte;
24665 if (max <= 0)
24666 max = 4096;
24667 alg = decide_alg (count, max / 2, min_size, max_size, memset,
24668 zero_memset, dynamic_check, noalign);
24669 gcc_assert (*dynamic_check == -1);
24670 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24671 *dynamic_check = max;
24672 else
24673 gcc_assert (alg != libcall);
24674 return alg;
24676 return (alg_usable_p (algs->unknown_size, memset)
24677 ? algs->unknown_size : libcall);
24680 /* Decide on alignment. We know that the operand is already aligned to ALIGN
24681 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
24682 static int
24683 decide_alignment (int align,
24684 enum stringop_alg alg,
24685 int expected_size,
24686 machine_mode move_mode)
24688 int desired_align = 0;
24690 gcc_assert (alg != no_stringop);
24692 if (alg == libcall)
24693 return 0;
24694 if (move_mode == VOIDmode)
24695 return 0;
24697 desired_align = GET_MODE_SIZE (move_mode);
24698 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24699 copying whole cacheline at once. */
24700 if (TARGET_PENTIUMPRO
24701 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24702 desired_align = 8;
24704 if (optimize_size)
24705 desired_align = 1;
24706 if (desired_align < align)
24707 desired_align = align;
24708 if (expected_size != -1 && expected_size < 4)
24709 desired_align = align;
24711 return desired_align;
24715 /* Helper function for memcpy. For QImode value 0xXY produce
24716 0xXYXYXYXY of wide specified by MODE. This is essentially
24717 a * 0x10101010, but we can do slightly better than
24718 synth_mult by unwinding the sequence by hand on CPUs with
24719 slow multiply. */
24720 static rtx
24721 promote_duplicated_reg (machine_mode mode, rtx val)
24723 machine_mode valmode = GET_MODE (val);
24724 rtx tmp;
24725 int nops = mode == DImode ? 3 : 2;
24727 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24728 if (val == const0_rtx)
24729 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24730 if (CONST_INT_P (val))
24732 HOST_WIDE_INT v = INTVAL (val) & 255;
24734 v |= v << 8;
24735 v |= v << 16;
24736 if (mode == DImode)
24737 v |= (v << 16) << 16;
24738 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24741 if (valmode == VOIDmode)
24742 valmode = QImode;
24743 if (valmode != QImode)
24744 val = gen_lowpart (QImode, val);
24745 if (mode == QImode)
24746 return val;
24747 if (!TARGET_PARTIAL_REG_STALL)
24748 nops--;
24749 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24750 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24751 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24752 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24754 rtx reg = convert_modes (mode, QImode, val, true);
24755 tmp = promote_duplicated_reg (mode, const1_rtx);
24756 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24757 OPTAB_DIRECT);
24759 else
24761 rtx reg = convert_modes (mode, QImode, val, true);
24763 if (!TARGET_PARTIAL_REG_STALL)
24764 if (mode == SImode)
24765 emit_insn (gen_movsi_insv_1 (reg, reg));
24766 else
24767 emit_insn (gen_movdi_insv_1 (reg, reg));
24768 else
24770 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24771 NULL, 1, OPTAB_DIRECT);
24772 reg =
24773 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24775 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24776 NULL, 1, OPTAB_DIRECT);
24777 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24778 if (mode == SImode)
24779 return reg;
24780 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24781 NULL, 1, OPTAB_DIRECT);
24782 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24783 return reg;
24787 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24788 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24789 alignment from ALIGN to DESIRED_ALIGN. */
24790 static rtx
24791 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24792 int align)
24794 rtx promoted_val;
24796 if (TARGET_64BIT
24797 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24798 promoted_val = promote_duplicated_reg (DImode, val);
24799 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24800 promoted_val = promote_duplicated_reg (SImode, val);
24801 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24802 promoted_val = promote_duplicated_reg (HImode, val);
24803 else
24804 promoted_val = val;
24806 return promoted_val;
24809 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24810 operations when profitable. The code depends upon architecture, block size
24811 and alignment, but always has one of the following overall structures:
24813 Aligned move sequence:
24815 1) Prologue guard: Conditional that jumps up to epilogues for small
24816 blocks that can be handled by epilogue alone. This is faster
24817 but also needed for correctness, since prologue assume the block
24818 is larger than the desired alignment.
24820 Optional dynamic check for size and libcall for large
24821 blocks is emitted here too, with -minline-stringops-dynamically.
24823 2) Prologue: copy first few bytes in order to get destination
24824 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24825 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24826 copied. We emit either a jump tree on power of two sized
24827 blocks, or a byte loop.
24829 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24830 with specified algorithm.
24832 4) Epilogue: code copying tail of the block that is too small to be
24833 handled by main body (or up to size guarded by prologue guard).
24835 Misaligned move sequence
24837 1) missaligned move prologue/epilogue containing:
24838 a) Prologue handling small memory blocks and jumping to done_label
24839 (skipped if blocks are known to be large enough)
24840 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24841 needed by single possibly misaligned move
24842 (skipped if alignment is not needed)
24843 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24845 2) Zero size guard dispatching to done_label, if needed
24847 3) dispatch to library call, if needed,
24849 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24850 with specified algorithm. */
24851 bool
24852 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24853 rtx align_exp, rtx expected_align_exp,
24854 rtx expected_size_exp, rtx min_size_exp,
24855 rtx max_size_exp, rtx probable_max_size_exp,
24856 bool issetmem)
24858 rtx destreg;
24859 rtx srcreg = NULL;
24860 rtx_code_label *label = NULL;
24861 rtx tmp;
24862 rtx_code_label *jump_around_label = NULL;
24863 HOST_WIDE_INT align = 1;
24864 unsigned HOST_WIDE_INT count = 0;
24865 HOST_WIDE_INT expected_size = -1;
24866 int size_needed = 0, epilogue_size_needed;
24867 int desired_align = 0, align_bytes = 0;
24868 enum stringop_alg alg;
24869 rtx promoted_val = NULL;
24870 rtx vec_promoted_val = NULL;
24871 bool force_loopy_epilogue = false;
24872 int dynamic_check;
24873 bool need_zero_guard = false;
24874 bool noalign;
24875 machine_mode move_mode = VOIDmode;
24876 int unroll_factor = 1;
24877 /* TODO: Once value ranges are available, fill in proper data. */
24878 unsigned HOST_WIDE_INT min_size = 0;
24879 unsigned HOST_WIDE_INT max_size = -1;
24880 unsigned HOST_WIDE_INT probable_max_size = -1;
24881 bool misaligned_prologue_used = false;
24883 if (CONST_INT_P (align_exp))
24884 align = INTVAL (align_exp);
24885 /* i386 can do misaligned access on reasonably increased cost. */
24886 if (CONST_INT_P (expected_align_exp)
24887 && INTVAL (expected_align_exp) > align)
24888 align = INTVAL (expected_align_exp);
24889 /* ALIGN is the minimum of destination and source alignment, but we care here
24890 just about destination alignment. */
24891 else if (!issetmem
24892 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24893 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24895 if (CONST_INT_P (count_exp))
24897 min_size = max_size = probable_max_size = count = expected_size
24898 = INTVAL (count_exp);
24899 /* When COUNT is 0, there is nothing to do. */
24900 if (!count)
24901 return true;
24903 else
24905 if (min_size_exp)
24906 min_size = INTVAL (min_size_exp);
24907 if (max_size_exp)
24908 max_size = INTVAL (max_size_exp);
24909 if (probable_max_size_exp)
24910 probable_max_size = INTVAL (probable_max_size_exp);
24911 if (CONST_INT_P (expected_size_exp))
24912 expected_size = INTVAL (expected_size_exp);
24915 /* Make sure we don't need to care about overflow later on. */
24916 if (count > (HOST_WIDE_INT_1U << 30))
24917 return false;
24919 /* Step 0: Decide on preferred algorithm, desired alignment and
24920 size of chunks to be copied by main loop. */
24921 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24922 issetmem,
24923 issetmem && val_exp == const0_rtx,
24924 &dynamic_check, &noalign);
24925 if (alg == libcall)
24926 return false;
24927 gcc_assert (alg != no_stringop);
24929 /* For now vector-version of memset is generated only for memory zeroing, as
24930 creating of promoted vector value is very cheap in this case. */
24931 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24932 alg = unrolled_loop;
24934 if (!count)
24935 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24936 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24937 if (!issetmem)
24938 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24940 unroll_factor = 1;
24941 move_mode = word_mode;
24942 switch (alg)
24944 case libcall:
24945 case no_stringop:
24946 case last_alg:
24947 gcc_unreachable ();
24948 case loop_1_byte:
24949 need_zero_guard = true;
24950 move_mode = QImode;
24951 break;
24952 case loop:
24953 need_zero_guard = true;
24954 break;
24955 case unrolled_loop:
24956 need_zero_guard = true;
24957 unroll_factor = (TARGET_64BIT ? 4 : 2);
24958 break;
24959 case vector_loop:
24960 need_zero_guard = true;
24961 unroll_factor = 4;
24962 /* Find the widest supported mode. */
24963 move_mode = word_mode;
24964 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24965 != CODE_FOR_nothing)
24966 move_mode = GET_MODE_WIDER_MODE (move_mode);
24968 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24969 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24970 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24972 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24973 move_mode = mode_for_vector (word_mode, nunits);
24974 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24975 move_mode = word_mode;
24977 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24978 break;
24979 case rep_prefix_8_byte:
24980 move_mode = DImode;
24981 break;
24982 case rep_prefix_4_byte:
24983 move_mode = SImode;
24984 break;
24985 case rep_prefix_1_byte:
24986 move_mode = QImode;
24987 break;
24989 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24990 epilogue_size_needed = size_needed;
24992 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24993 if (!TARGET_ALIGN_STRINGOPS || noalign)
24994 align = desired_align;
24996 /* Step 1: Prologue guard. */
24998 /* Alignment code needs count to be in register. */
24999 if (CONST_INT_P (count_exp) && desired_align > align)
25001 if (INTVAL (count_exp) > desired_align
25002 && INTVAL (count_exp) > size_needed)
25004 align_bytes
25005 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
25006 if (align_bytes <= 0)
25007 align_bytes = 0;
25008 else
25009 align_bytes = desired_align - align_bytes;
25011 if (align_bytes == 0)
25012 count_exp = force_reg (counter_mode (count_exp), count_exp);
25014 gcc_assert (desired_align >= 1 && align >= 1);
25016 /* Misaligned move sequences handle both prologue and epilogue at once.
25017 Default code generation results in a smaller code for large alignments
25018 and also avoids redundant job when sizes are known precisely. */
25019 misaligned_prologue_used
25020 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
25021 && MAX (desired_align, epilogue_size_needed) <= 32
25022 && desired_align <= epilogue_size_needed
25023 && ((desired_align > align && !align_bytes)
25024 || (!count && epilogue_size_needed > 1)));
25026 /* Do the cheap promotion to allow better CSE across the
25027 main loop and epilogue (ie one load of the big constant in the
25028 front of all code.
25029 For now the misaligned move sequences do not have fast path
25030 without broadcasting. */
25031 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
25033 if (alg == vector_loop)
25035 gcc_assert (val_exp == const0_rtx);
25036 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
25037 promoted_val = promote_duplicated_reg_to_size (val_exp,
25038 GET_MODE_SIZE (word_mode),
25039 desired_align, align);
25041 else
25043 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
25044 desired_align, align);
25047 /* Misaligned move sequences handles both prologues and epilogues at once.
25048 Default code generation results in smaller code for large alignments and
25049 also avoids redundant job when sizes are known precisely. */
25050 if (misaligned_prologue_used)
25052 /* Misaligned move prologue handled small blocks by itself. */
25053 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
25054 (dst, src, &destreg, &srcreg,
25055 move_mode, promoted_val, vec_promoted_val,
25056 &count_exp,
25057 &jump_around_label,
25058 desired_align < align
25059 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
25060 desired_align, align, &min_size, dynamic_check, issetmem);
25061 if (!issetmem)
25062 src = change_address (src, BLKmode, srcreg);
25063 dst = change_address (dst, BLKmode, destreg);
25064 set_mem_align (dst, desired_align * BITS_PER_UNIT);
25065 epilogue_size_needed = 0;
25066 if (need_zero_guard && !min_size)
25068 /* It is possible that we copied enough so the main loop will not
25069 execute. */
25070 gcc_assert (size_needed > 1);
25071 if (jump_around_label == NULL_RTX)
25072 jump_around_label = gen_label_rtx ();
25073 emit_cmp_and_jump_insns (count_exp,
25074 GEN_INT (size_needed),
25075 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
25076 if (expected_size == -1
25077 || expected_size < (desired_align - align) / 2 + size_needed)
25078 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25079 else
25080 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25083 /* Ensure that alignment prologue won't copy past end of block. */
25084 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
25086 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
25087 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
25088 Make sure it is power of 2. */
25089 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
25091 /* To improve performance of small blocks, we jump around the VAL
25092 promoting mode. This mean that if the promoted VAL is not constant,
25093 we might not use it in the epilogue and have to use byte
25094 loop variant. */
25095 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
25096 force_loopy_epilogue = true;
25097 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25098 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25100 /* If main algorithm works on QImode, no epilogue is needed.
25101 For small sizes just don't align anything. */
25102 if (size_needed == 1)
25103 desired_align = align;
25104 else
25105 goto epilogue;
25107 else if (!count
25108 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25110 label = gen_label_rtx ();
25111 emit_cmp_and_jump_insns (count_exp,
25112 GEN_INT (epilogue_size_needed),
25113 LTU, 0, counter_mode (count_exp), 1, label);
25114 if (expected_size == -1 || expected_size < epilogue_size_needed)
25115 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25116 else
25117 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25121 /* Emit code to decide on runtime whether library call or inline should be
25122 used. */
25123 if (dynamic_check != -1)
25125 if (!issetmem && CONST_INT_P (count_exp))
25127 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
25129 emit_block_move_via_libcall (dst, src, count_exp, false);
25130 count_exp = const0_rtx;
25131 goto epilogue;
25134 else
25136 rtx_code_label *hot_label = gen_label_rtx ();
25137 if (jump_around_label == NULL_RTX)
25138 jump_around_label = gen_label_rtx ();
25139 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
25140 LEU, 0, counter_mode (count_exp),
25141 1, hot_label);
25142 predict_jump (REG_BR_PROB_BASE * 90 / 100);
25143 if (issetmem)
25144 set_storage_via_libcall (dst, count_exp, val_exp, false);
25145 else
25146 emit_block_move_via_libcall (dst, src, count_exp, false);
25147 emit_jump (jump_around_label);
25148 emit_label (hot_label);
25152 /* Step 2: Alignment prologue. */
25153 /* Do the expensive promotion once we branched off the small blocks. */
25154 if (issetmem && !promoted_val)
25155 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
25156 desired_align, align);
25158 if (desired_align > align && !misaligned_prologue_used)
25160 if (align_bytes == 0)
25162 /* Except for the first move in prologue, we no longer know
25163 constant offset in aliasing info. It don't seems to worth
25164 the pain to maintain it for the first move, so throw away
25165 the info early. */
25166 dst = change_address (dst, BLKmode, destreg);
25167 if (!issetmem)
25168 src = change_address (src, BLKmode, srcreg);
25169 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
25170 promoted_val, vec_promoted_val,
25171 count_exp, align, desired_align,
25172 issetmem);
25173 /* At most desired_align - align bytes are copied. */
25174 if (min_size < (unsigned)(desired_align - align))
25175 min_size = 0;
25176 else
25177 min_size -= desired_align - align;
25179 else
25181 /* If we know how many bytes need to be stored before dst is
25182 sufficiently aligned, maintain aliasing info accurately. */
25183 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
25184 srcreg,
25185 promoted_val,
25186 vec_promoted_val,
25187 desired_align,
25188 align_bytes,
25189 issetmem);
25191 count_exp = plus_constant (counter_mode (count_exp),
25192 count_exp, -align_bytes);
25193 count -= align_bytes;
25194 min_size -= align_bytes;
25195 max_size -= align_bytes;
25197 if (need_zero_guard
25198 && !min_size
25199 && (count < (unsigned HOST_WIDE_INT) size_needed
25200 || (align_bytes == 0
25201 && count < ((unsigned HOST_WIDE_INT) size_needed
25202 + desired_align - align))))
25204 /* It is possible that we copied enough so the main loop will not
25205 execute. */
25206 gcc_assert (size_needed > 1);
25207 if (label == NULL_RTX)
25208 label = gen_label_rtx ();
25209 emit_cmp_and_jump_insns (count_exp,
25210 GEN_INT (size_needed),
25211 LTU, 0, counter_mode (count_exp), 1, label);
25212 if (expected_size == -1
25213 || expected_size < (desired_align - align) / 2 + size_needed)
25214 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25215 else
25216 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25219 if (label && size_needed == 1)
25221 emit_label (label);
25222 LABEL_NUSES (label) = 1;
25223 label = NULL;
25224 epilogue_size_needed = 1;
25225 if (issetmem)
25226 promoted_val = val_exp;
25228 else if (label == NULL_RTX && !misaligned_prologue_used)
25229 epilogue_size_needed = size_needed;
25231 /* Step 3: Main loop. */
25233 switch (alg)
25235 case libcall:
25236 case no_stringop:
25237 case last_alg:
25238 gcc_unreachable ();
25239 case loop_1_byte:
25240 case loop:
25241 case unrolled_loop:
25242 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
25243 count_exp, move_mode, unroll_factor,
25244 expected_size, issetmem);
25245 break;
25246 case vector_loop:
25247 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
25248 vec_promoted_val, count_exp, move_mode,
25249 unroll_factor, expected_size, issetmem);
25250 break;
25251 case rep_prefix_8_byte:
25252 case rep_prefix_4_byte:
25253 case rep_prefix_1_byte:
25254 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
25255 val_exp, count_exp, move_mode, issetmem);
25256 break;
25258 /* Adjust properly the offset of src and dest memory for aliasing. */
25259 if (CONST_INT_P (count_exp))
25261 if (!issetmem)
25262 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
25263 (count / size_needed) * size_needed);
25264 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
25265 (count / size_needed) * size_needed);
25267 else
25269 if (!issetmem)
25270 src = change_address (src, BLKmode, srcreg);
25271 dst = change_address (dst, BLKmode, destreg);
25274 /* Step 4: Epilogue to copy the remaining bytes. */
25275 epilogue:
25276 if (label)
25278 /* When the main loop is done, COUNT_EXP might hold original count,
25279 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
25280 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
25281 bytes. Compensate if needed. */
25283 if (size_needed < epilogue_size_needed)
25285 tmp =
25286 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
25287 GEN_INT (size_needed - 1), count_exp, 1,
25288 OPTAB_DIRECT);
25289 if (tmp != count_exp)
25290 emit_move_insn (count_exp, tmp);
25292 emit_label (label);
25293 LABEL_NUSES (label) = 1;
25296 if (count_exp != const0_rtx && epilogue_size_needed > 1)
25298 if (force_loopy_epilogue)
25299 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
25300 epilogue_size_needed);
25301 else
25303 if (issetmem)
25304 expand_setmem_epilogue (dst, destreg, promoted_val,
25305 vec_promoted_val, count_exp,
25306 epilogue_size_needed);
25307 else
25308 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
25309 epilogue_size_needed);
25312 if (jump_around_label)
25313 emit_label (jump_around_label);
25314 return true;
25318 /* Expand the appropriate insns for doing strlen if not just doing
25319 repnz; scasb
25321 out = result, initialized with the start address
25322 align_rtx = alignment of the address.
25323 scratch = scratch register, initialized with the startaddress when
25324 not aligned, otherwise undefined
25326 This is just the body. It needs the initializations mentioned above and
25327 some address computing at the end. These things are done in i386.md. */
25329 static void
25330 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
25332 int align;
25333 rtx tmp;
25334 rtx_code_label *align_2_label = NULL;
25335 rtx_code_label *align_3_label = NULL;
25336 rtx_code_label *align_4_label = gen_label_rtx ();
25337 rtx_code_label *end_0_label = gen_label_rtx ();
25338 rtx mem;
25339 rtx tmpreg = gen_reg_rtx (SImode);
25340 rtx scratch = gen_reg_rtx (SImode);
25341 rtx cmp;
25343 align = 0;
25344 if (CONST_INT_P (align_rtx))
25345 align = INTVAL (align_rtx);
25347 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
25349 /* Is there a known alignment and is it less than 4? */
25350 if (align < 4)
25352 rtx scratch1 = gen_reg_rtx (Pmode);
25353 emit_move_insn (scratch1, out);
25354 /* Is there a known alignment and is it not 2? */
25355 if (align != 2)
25357 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
25358 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
25360 /* Leave just the 3 lower bits. */
25361 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
25362 NULL_RTX, 0, OPTAB_WIDEN);
25364 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25365 Pmode, 1, align_4_label);
25366 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
25367 Pmode, 1, align_2_label);
25368 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
25369 Pmode, 1, align_3_label);
25371 else
25373 /* Since the alignment is 2, we have to check 2 or 0 bytes;
25374 check if is aligned to 4 - byte. */
25376 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
25377 NULL_RTX, 0, OPTAB_WIDEN);
25379 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25380 Pmode, 1, align_4_label);
25383 mem = change_address (src, QImode, out);
25385 /* Now compare the bytes. */
25387 /* Compare the first n unaligned byte on a byte per byte basis. */
25388 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
25389 QImode, 1, end_0_label);
25391 /* Increment the address. */
25392 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25394 /* Not needed with an alignment of 2 */
25395 if (align != 2)
25397 emit_label (align_2_label);
25399 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25400 end_0_label);
25402 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25404 emit_label (align_3_label);
25407 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25408 end_0_label);
25410 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25413 /* Generate loop to check 4 bytes at a time. It is not a good idea to
25414 align this loop. It gives only huge programs, but does not help to
25415 speed up. */
25416 emit_label (align_4_label);
25418 mem = change_address (src, SImode, out);
25419 emit_move_insn (scratch, mem);
25420 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
25422 /* This formula yields a nonzero result iff one of the bytes is zero.
25423 This saves three branches inside loop and many cycles. */
25425 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
25426 emit_insn (gen_one_cmplsi2 (scratch, scratch));
25427 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
25428 emit_insn (gen_andsi3 (tmpreg, tmpreg,
25429 gen_int_mode (0x80808080, SImode)));
25430 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
25431 align_4_label);
25433 if (TARGET_CMOVE)
25435 rtx reg = gen_reg_rtx (SImode);
25436 rtx reg2 = gen_reg_rtx (Pmode);
25437 emit_move_insn (reg, tmpreg);
25438 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
25440 /* If zero is not in the first two bytes, move two bytes forward. */
25441 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25442 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25443 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25444 emit_insn (gen_rtx_SET (tmpreg,
25445 gen_rtx_IF_THEN_ELSE (SImode, tmp,
25446 reg,
25447 tmpreg)));
25448 /* Emit lea manually to avoid clobbering of flags. */
25449 emit_insn (gen_rtx_SET (reg2, gen_rtx_PLUS (Pmode, out, const2_rtx)));
25451 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25452 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25453 emit_insn (gen_rtx_SET (out,
25454 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
25455 reg2,
25456 out)));
25458 else
25460 rtx_code_label *end_2_label = gen_label_rtx ();
25461 /* Is zero in the first two bytes? */
25463 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25464 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25465 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
25466 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
25467 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
25468 pc_rtx);
25469 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
25470 JUMP_LABEL (tmp) = end_2_label;
25472 /* Not in the first two. Move two bytes forward. */
25473 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
25474 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
25476 emit_label (end_2_label);
25480 /* Avoid branch in fixing the byte. */
25481 tmpreg = gen_lowpart (QImode, tmpreg);
25482 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
25483 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
25484 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
25485 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
25487 emit_label (end_0_label);
25490 /* Expand strlen. */
25492 bool
25493 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
25495 rtx addr, scratch1, scratch2, scratch3, scratch4;
25497 /* The generic case of strlen expander is long. Avoid it's
25498 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
25500 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25501 && !TARGET_INLINE_ALL_STRINGOPS
25502 && !optimize_insn_for_size_p ()
25503 && (!CONST_INT_P (align) || INTVAL (align) < 4))
25504 return false;
25506 addr = force_reg (Pmode, XEXP (src, 0));
25507 scratch1 = gen_reg_rtx (Pmode);
25509 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25510 && !optimize_insn_for_size_p ())
25512 /* Well it seems that some optimizer does not combine a call like
25513 foo(strlen(bar), strlen(bar));
25514 when the move and the subtraction is done here. It does calculate
25515 the length just once when these instructions are done inside of
25516 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
25517 often used and I use one fewer register for the lifetime of
25518 output_strlen_unroll() this is better. */
25520 emit_move_insn (out, addr);
25522 ix86_expand_strlensi_unroll_1 (out, src, align);
25524 /* strlensi_unroll_1 returns the address of the zero at the end of
25525 the string, like memchr(), so compute the length by subtracting
25526 the start address. */
25527 emit_insn (ix86_gen_sub3 (out, out, addr));
25529 else
25531 rtx unspec;
25533 /* Can't use this if the user has appropriated eax, ecx, or edi. */
25534 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
25535 return false;
25537 scratch2 = gen_reg_rtx (Pmode);
25538 scratch3 = gen_reg_rtx (Pmode);
25539 scratch4 = force_reg (Pmode, constm1_rtx);
25541 emit_move_insn (scratch3, addr);
25542 eoschar = force_reg (QImode, eoschar);
25544 src = replace_equiv_address_nv (src, scratch3);
25546 /* If .md starts supporting :P, this can be done in .md. */
25547 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
25548 scratch4), UNSPEC_SCAS);
25549 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
25550 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
25551 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
25553 return true;
25556 /* For given symbol (function) construct code to compute address of it's PLT
25557 entry in large x86-64 PIC model. */
25558 static rtx
25559 construct_plt_address (rtx symbol)
25561 rtx tmp, unspec;
25563 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
25564 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
25565 gcc_assert (Pmode == DImode);
25567 tmp = gen_reg_rtx (Pmode);
25568 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
25570 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
25571 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
25572 return tmp;
25576 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
25577 rtx callarg2,
25578 rtx pop, bool sibcall)
25580 rtx vec[3];
25581 rtx use = NULL, call;
25582 unsigned int vec_len = 0;
25584 if (pop == const0_rtx)
25585 pop = NULL;
25586 gcc_assert (!TARGET_64BIT || !pop);
25588 if (TARGET_MACHO && !TARGET_64BIT)
25590 #if TARGET_MACHO
25591 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
25592 fnaddr = machopic_indirect_call_target (fnaddr);
25593 #endif
25595 else
25597 /* Static functions and indirect calls don't need the pic register. Also,
25598 check if PLT was explicitly avoided via no-plt or "noplt" attribute, making
25599 it an indirect call. */
25600 if (flag_pic
25601 && (!TARGET_64BIT
25602 || (ix86_cmodel == CM_LARGE_PIC
25603 && DEFAULT_ABI != MS_ABI))
25604 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25605 && !SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0))
25606 && flag_plt
25607 && (SYMBOL_REF_DECL ((XEXP (fnaddr, 0))) == NULL_TREE
25608 || !lookup_attribute ("noplt",
25609 DECL_ATTRIBUTES (SYMBOL_REF_DECL (XEXP (fnaddr, 0))))))
25611 use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
25612 if (ix86_use_pseudo_pic_reg ())
25613 emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
25614 pic_offset_table_rtx);
25618 /* Skip setting up RAX register for -mskip-rax-setup when there are no
25619 parameters passed in vector registers. */
25620 if (TARGET_64BIT
25621 && (INTVAL (callarg2) > 0
25622 || (INTVAL (callarg2) == 0
25623 && (TARGET_SSE || !flag_skip_rax_setup))))
25625 rtx al = gen_rtx_REG (QImode, AX_REG);
25626 emit_move_insn (al, callarg2);
25627 use_reg (&use, al);
25630 if (ix86_cmodel == CM_LARGE_PIC
25631 && !TARGET_PECOFF
25632 && MEM_P (fnaddr)
25633 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25634 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
25635 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
25636 else if (sibcall
25637 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
25638 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
25640 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
25641 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
25644 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
25646 if (retval)
25648 /* We should add bounds as destination register in case
25649 pointer with bounds may be returned. */
25650 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
25652 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
25653 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
25654 if (GET_CODE (retval) == PARALLEL)
25656 b0 = gen_rtx_EXPR_LIST (VOIDmode, b0, const0_rtx);
25657 b1 = gen_rtx_EXPR_LIST (VOIDmode, b1, const0_rtx);
25658 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, b0, b1));
25659 retval = chkp_join_splitted_slot (retval, par);
25661 else
25663 retval = gen_rtx_PARALLEL (VOIDmode,
25664 gen_rtvec (3, retval, b0, b1));
25665 chkp_put_regs_to_expr_list (retval);
25669 call = gen_rtx_SET (retval, call);
25671 vec[vec_len++] = call;
25673 if (pop)
25675 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
25676 pop = gen_rtx_SET (stack_pointer_rtx, pop);
25677 vec[vec_len++] = pop;
25680 if (TARGET_64BIT_MS_ABI
25681 && (!callarg2 || INTVAL (callarg2) != -2))
25683 int const cregs_size
25684 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
25685 int i;
25687 for (i = 0; i < cregs_size; i++)
25689 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
25690 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
25692 clobber_reg (&use, gen_rtx_REG (mode, regno));
25696 if (vec_len > 1)
25697 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
25698 call = emit_call_insn (call);
25699 if (use)
25700 CALL_INSN_FUNCTION_USAGE (call) = use;
25702 return call;
25705 /* Return true if the function being called was marked with attribute "noplt"
25706 or using -fno-plt and we are compiling for non-PIC and x86_64. We need to
25707 handle the non-PIC case in the backend because there is no easy interface
25708 for the front-end to force non-PLT calls to use the GOT. This is currently
25709 used only with 64-bit ELF targets to call the function marked "noplt"
25710 indirectly. */
25712 static bool
25713 ix86_nopic_noplt_attribute_p (rtx call_op)
25715 if (flag_pic || ix86_cmodel == CM_LARGE
25716 || !TARGET_64BIT || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
25717 || SYMBOL_REF_LOCAL_P (call_op))
25718 return false;
25720 tree symbol_decl = SYMBOL_REF_DECL (call_op);
25722 if (!flag_plt
25723 || (symbol_decl != NULL_TREE
25724 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
25725 return true;
25727 return false;
25730 /* Output the assembly for a call instruction. */
25732 const char *
25733 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
25735 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
25736 bool seh_nop_p = false;
25737 const char *xasm;
25739 if (SIBLING_CALL_P (insn))
25741 if (direct_p && ix86_nopic_noplt_attribute_p (call_op))
25742 xasm = "%!jmp\t*%p0@GOTPCREL(%%rip)";
25743 else if (direct_p)
25744 xasm = "%!jmp\t%P0";
25745 /* SEH epilogue detection requires the indirect branch case
25746 to include REX.W. */
25747 else if (TARGET_SEH)
25748 xasm = "%!rex.W jmp %A0";
25749 else
25750 xasm = "%!jmp\t%A0";
25752 output_asm_insn (xasm, &call_op);
25753 return "";
25756 /* SEH unwinding can require an extra nop to be emitted in several
25757 circumstances. Determine if we have one of those. */
25758 if (TARGET_SEH)
25760 rtx_insn *i;
25762 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25764 /* If we get to another real insn, we don't need the nop. */
25765 if (INSN_P (i))
25766 break;
25768 /* If we get to the epilogue note, prevent a catch region from
25769 being adjacent to the standard epilogue sequence. If non-
25770 call-exceptions, we'll have done this during epilogue emission. */
25771 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25772 && !flag_non_call_exceptions
25773 && !can_throw_internal (insn))
25775 seh_nop_p = true;
25776 break;
25780 /* If we didn't find a real insn following the call, prevent the
25781 unwinder from looking into the next function. */
25782 if (i == NULL)
25783 seh_nop_p = true;
25786 if (direct_p && ix86_nopic_noplt_attribute_p (call_op))
25787 xasm = "%!call\t*%p0@GOTPCREL(%%rip)";
25788 else if (direct_p)
25789 xasm = "%!call\t%P0";
25790 else
25791 xasm = "%!call\t%A0";
25793 output_asm_insn (xasm, &call_op);
25795 if (seh_nop_p)
25796 return "nop";
25798 return "";
25801 /* Clear stack slot assignments remembered from previous functions.
25802 This is called from INIT_EXPANDERS once before RTL is emitted for each
25803 function. */
25805 static struct machine_function *
25806 ix86_init_machine_status (void)
25808 struct machine_function *f;
25810 f = ggc_cleared_alloc<machine_function> ();
25811 f->use_fast_prologue_epilogue_nregs = -1;
25812 f->call_abi = ix86_abi;
25814 return f;
25817 /* Return a MEM corresponding to a stack slot with mode MODE.
25818 Allocate a new slot if necessary.
25820 The RTL for a function can have several slots available: N is
25821 which slot to use. */
25824 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
25826 struct stack_local_entry *s;
25828 gcc_assert (n < MAX_386_STACK_LOCALS);
25830 for (s = ix86_stack_locals; s; s = s->next)
25831 if (s->mode == mode && s->n == n)
25832 return validize_mem (copy_rtx (s->rtl));
25834 s = ggc_alloc<stack_local_entry> ();
25835 s->n = n;
25836 s->mode = mode;
25837 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25839 s->next = ix86_stack_locals;
25840 ix86_stack_locals = s;
25841 return validize_mem (copy_rtx (s->rtl));
25844 static void
25845 ix86_instantiate_decls (void)
25847 struct stack_local_entry *s;
25849 for (s = ix86_stack_locals; s; s = s->next)
25850 if (s->rtl != NULL_RTX)
25851 instantiate_decl_rtl (s->rtl);
25854 /* Check whether x86 address PARTS is a pc-relative address. */
25856 static bool
25857 rip_relative_addr_p (struct ix86_address *parts)
25859 rtx base, index, disp;
25861 base = parts->base;
25862 index = parts->index;
25863 disp = parts->disp;
25865 if (disp && !base && !index)
25867 if (TARGET_64BIT)
25869 rtx symbol = disp;
25871 if (GET_CODE (disp) == CONST)
25872 symbol = XEXP (disp, 0);
25873 if (GET_CODE (symbol) == PLUS
25874 && CONST_INT_P (XEXP (symbol, 1)))
25875 symbol = XEXP (symbol, 0);
25877 if (GET_CODE (symbol) == LABEL_REF
25878 || (GET_CODE (symbol) == SYMBOL_REF
25879 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25880 || (GET_CODE (symbol) == UNSPEC
25881 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25882 || XINT (symbol, 1) == UNSPEC_PCREL
25883 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25884 return true;
25887 return false;
25890 /* Calculate the length of the memory address in the instruction encoding.
25891 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25892 or other prefixes. We never generate addr32 prefix for LEA insn. */
25895 memory_address_length (rtx addr, bool lea)
25897 struct ix86_address parts;
25898 rtx base, index, disp;
25899 int len;
25900 int ok;
25902 if (GET_CODE (addr) == PRE_DEC
25903 || GET_CODE (addr) == POST_INC
25904 || GET_CODE (addr) == PRE_MODIFY
25905 || GET_CODE (addr) == POST_MODIFY)
25906 return 0;
25908 ok = ix86_decompose_address (addr, &parts);
25909 gcc_assert (ok);
25911 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25913 /* If this is not LEA instruction, add the length of addr32 prefix. */
25914 if (TARGET_64BIT && !lea
25915 && (SImode_address_operand (addr, VOIDmode)
25916 || (parts.base && GET_MODE (parts.base) == SImode)
25917 || (parts.index && GET_MODE (parts.index) == SImode)))
25918 len++;
25920 base = parts.base;
25921 index = parts.index;
25922 disp = parts.disp;
25924 if (base && GET_CODE (base) == SUBREG)
25925 base = SUBREG_REG (base);
25926 if (index && GET_CODE (index) == SUBREG)
25927 index = SUBREG_REG (index);
25929 gcc_assert (base == NULL_RTX || REG_P (base));
25930 gcc_assert (index == NULL_RTX || REG_P (index));
25932 /* Rule of thumb:
25933 - esp as the base always wants an index,
25934 - ebp as the base always wants a displacement,
25935 - r12 as the base always wants an index,
25936 - r13 as the base always wants a displacement. */
25938 /* Register Indirect. */
25939 if (base && !index && !disp)
25941 /* esp (for its index) and ebp (for its displacement) need
25942 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25943 code. */
25944 if (base == arg_pointer_rtx
25945 || base == frame_pointer_rtx
25946 || REGNO (base) == SP_REG
25947 || REGNO (base) == BP_REG
25948 || REGNO (base) == R12_REG
25949 || REGNO (base) == R13_REG)
25950 len++;
25953 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25954 is not disp32, but disp32(%rip), so for disp32
25955 SIB byte is needed, unless print_operand_address
25956 optimizes it into disp32(%rip) or (%rip) is implied
25957 by UNSPEC. */
25958 else if (disp && !base && !index)
25960 len += 4;
25961 if (rip_relative_addr_p (&parts))
25962 len++;
25964 else
25966 /* Find the length of the displacement constant. */
25967 if (disp)
25969 if (base && satisfies_constraint_K (disp))
25970 len += 1;
25971 else
25972 len += 4;
25974 /* ebp always wants a displacement. Similarly r13. */
25975 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25976 len++;
25978 /* An index requires the two-byte modrm form.... */
25979 if (index
25980 /* ...like esp (or r12), which always wants an index. */
25981 || base == arg_pointer_rtx
25982 || base == frame_pointer_rtx
25983 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25984 len++;
25987 return len;
25990 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25991 is set, expect that insn have 8bit immediate alternative. */
25993 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
25995 int len = 0;
25996 int i;
25997 extract_insn_cached (insn);
25998 for (i = recog_data.n_operands - 1; i >= 0; --i)
25999 if (CONSTANT_P (recog_data.operand[i]))
26001 enum attr_mode mode = get_attr_mode (insn);
26003 gcc_assert (!len);
26004 if (shortform && CONST_INT_P (recog_data.operand[i]))
26006 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
26007 switch (mode)
26009 case MODE_QI:
26010 len = 1;
26011 continue;
26012 case MODE_HI:
26013 ival = trunc_int_for_mode (ival, HImode);
26014 break;
26015 case MODE_SI:
26016 ival = trunc_int_for_mode (ival, SImode);
26017 break;
26018 default:
26019 break;
26021 if (IN_RANGE (ival, -128, 127))
26023 len = 1;
26024 continue;
26027 switch (mode)
26029 case MODE_QI:
26030 len = 1;
26031 break;
26032 case MODE_HI:
26033 len = 2;
26034 break;
26035 case MODE_SI:
26036 len = 4;
26037 break;
26038 /* Immediates for DImode instructions are encoded
26039 as 32bit sign extended values. */
26040 case MODE_DI:
26041 len = 4;
26042 break;
26043 default:
26044 fatal_insn ("unknown insn mode", insn);
26047 return len;
26050 /* Compute default value for "length_address" attribute. */
26052 ix86_attr_length_address_default (rtx_insn *insn)
26054 int i;
26056 if (get_attr_type (insn) == TYPE_LEA)
26058 rtx set = PATTERN (insn), addr;
26060 if (GET_CODE (set) == PARALLEL)
26061 set = XVECEXP (set, 0, 0);
26063 gcc_assert (GET_CODE (set) == SET);
26065 addr = SET_SRC (set);
26067 return memory_address_length (addr, true);
26070 extract_insn_cached (insn);
26071 for (i = recog_data.n_operands - 1; i >= 0; --i)
26072 if (MEM_P (recog_data.operand[i]))
26074 constrain_operands_cached (insn, reload_completed);
26075 if (which_alternative != -1)
26077 const char *constraints = recog_data.constraints[i];
26078 int alt = which_alternative;
26080 while (*constraints == '=' || *constraints == '+')
26081 constraints++;
26082 while (alt-- > 0)
26083 while (*constraints++ != ',')
26085 /* Skip ignored operands. */
26086 if (*constraints == 'X')
26087 continue;
26089 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
26091 return 0;
26094 /* Compute default value for "length_vex" attribute. It includes
26095 2 or 3 byte VEX prefix and 1 opcode byte. */
26098 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
26099 bool has_vex_w)
26101 int i;
26103 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
26104 byte VEX prefix. */
26105 if (!has_0f_opcode || has_vex_w)
26106 return 3 + 1;
26108 /* We can always use 2 byte VEX prefix in 32bit. */
26109 if (!TARGET_64BIT)
26110 return 2 + 1;
26112 extract_insn_cached (insn);
26114 for (i = recog_data.n_operands - 1; i >= 0; --i)
26115 if (REG_P (recog_data.operand[i]))
26117 /* REX.W bit uses 3 byte VEX prefix. */
26118 if (GET_MODE (recog_data.operand[i]) == DImode
26119 && GENERAL_REG_P (recog_data.operand[i]))
26120 return 3 + 1;
26122 else
26124 /* REX.X or REX.B bits use 3 byte VEX prefix. */
26125 if (MEM_P (recog_data.operand[i])
26126 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
26127 return 3 + 1;
26130 return 2 + 1;
26133 /* Return the maximum number of instructions a cpu can issue. */
26135 static int
26136 ix86_issue_rate (void)
26138 switch (ix86_tune)
26140 case PROCESSOR_PENTIUM:
26141 case PROCESSOR_BONNELL:
26142 case PROCESSOR_SILVERMONT:
26143 case PROCESSOR_KNL:
26144 case PROCESSOR_INTEL:
26145 case PROCESSOR_K6:
26146 case PROCESSOR_BTVER2:
26147 case PROCESSOR_PENTIUM4:
26148 case PROCESSOR_NOCONA:
26149 return 2;
26151 case PROCESSOR_PENTIUMPRO:
26152 case PROCESSOR_ATHLON:
26153 case PROCESSOR_K8:
26154 case PROCESSOR_AMDFAM10:
26155 case PROCESSOR_GENERIC:
26156 case PROCESSOR_BTVER1:
26157 return 3;
26159 case PROCESSOR_BDVER1:
26160 case PROCESSOR_BDVER2:
26161 case PROCESSOR_BDVER3:
26162 case PROCESSOR_BDVER4:
26163 case PROCESSOR_CORE2:
26164 case PROCESSOR_NEHALEM:
26165 case PROCESSOR_SANDYBRIDGE:
26166 case PROCESSOR_HASWELL:
26167 return 4;
26169 default:
26170 return 1;
26174 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
26175 by DEP_INSN and nothing set by DEP_INSN. */
26177 static bool
26178 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
26180 rtx set, set2;
26182 /* Simplify the test for uninteresting insns. */
26183 if (insn_type != TYPE_SETCC
26184 && insn_type != TYPE_ICMOV
26185 && insn_type != TYPE_FCMOV
26186 && insn_type != TYPE_IBR)
26187 return false;
26189 if ((set = single_set (dep_insn)) != 0)
26191 set = SET_DEST (set);
26192 set2 = NULL_RTX;
26194 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
26195 && XVECLEN (PATTERN (dep_insn), 0) == 2
26196 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
26197 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
26199 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26200 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26202 else
26203 return false;
26205 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
26206 return false;
26208 /* This test is true if the dependent insn reads the flags but
26209 not any other potentially set register. */
26210 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
26211 return false;
26213 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
26214 return false;
26216 return true;
26219 /* Return true iff USE_INSN has a memory address with operands set by
26220 SET_INSN. */
26222 bool
26223 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
26225 int i;
26226 extract_insn_cached (use_insn);
26227 for (i = recog_data.n_operands - 1; i >= 0; --i)
26228 if (MEM_P (recog_data.operand[i]))
26230 rtx addr = XEXP (recog_data.operand[i], 0);
26231 return modified_in_p (addr, set_insn) != 0;
26233 return false;
26236 /* Helper function for exact_store_load_dependency.
26237 Return true if addr is found in insn. */
26238 static bool
26239 exact_dependency_1 (rtx addr, rtx insn)
26241 enum rtx_code code;
26242 const char *format_ptr;
26243 int i, j;
26245 code = GET_CODE (insn);
26246 switch (code)
26248 case MEM:
26249 if (rtx_equal_p (addr, insn))
26250 return true;
26251 break;
26252 case REG:
26253 CASE_CONST_ANY:
26254 case SYMBOL_REF:
26255 case CODE_LABEL:
26256 case PC:
26257 case CC0:
26258 case EXPR_LIST:
26259 return false;
26260 default:
26261 break;
26264 format_ptr = GET_RTX_FORMAT (code);
26265 for (i = 0; i < GET_RTX_LENGTH (code); i++)
26267 switch (*format_ptr++)
26269 case 'e':
26270 if (exact_dependency_1 (addr, XEXP (insn, i)))
26271 return true;
26272 break;
26273 case 'E':
26274 for (j = 0; j < XVECLEN (insn, i); j++)
26275 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
26276 return true;
26277 break;
26280 return false;
26283 /* Return true if there exists exact dependency for store & load, i.e.
26284 the same memory address is used in them. */
26285 static bool
26286 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
26288 rtx set1, set2;
26290 set1 = single_set (store);
26291 if (!set1)
26292 return false;
26293 if (!MEM_P (SET_DEST (set1)))
26294 return false;
26295 set2 = single_set (load);
26296 if (!set2)
26297 return false;
26298 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
26299 return true;
26300 return false;
26303 static int
26304 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
26306 enum attr_type insn_type, dep_insn_type;
26307 enum attr_memory memory;
26308 rtx set, set2;
26309 int dep_insn_code_number;
26311 /* Anti and output dependencies have zero cost on all CPUs. */
26312 if (REG_NOTE_KIND (link) != 0)
26313 return 0;
26315 dep_insn_code_number = recog_memoized (dep_insn);
26317 /* If we can't recognize the insns, we can't really do anything. */
26318 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
26319 return cost;
26321 insn_type = get_attr_type (insn);
26322 dep_insn_type = get_attr_type (dep_insn);
26324 switch (ix86_tune)
26326 case PROCESSOR_PENTIUM:
26327 /* Address Generation Interlock adds a cycle of latency. */
26328 if (insn_type == TYPE_LEA)
26330 rtx addr = PATTERN (insn);
26332 if (GET_CODE (addr) == PARALLEL)
26333 addr = XVECEXP (addr, 0, 0);
26335 gcc_assert (GET_CODE (addr) == SET);
26337 addr = SET_SRC (addr);
26338 if (modified_in_p (addr, dep_insn))
26339 cost += 1;
26341 else if (ix86_agi_dependent (dep_insn, insn))
26342 cost += 1;
26344 /* ??? Compares pair with jump/setcc. */
26345 if (ix86_flags_dependent (insn, dep_insn, insn_type))
26346 cost = 0;
26348 /* Floating point stores require value to be ready one cycle earlier. */
26349 if (insn_type == TYPE_FMOV
26350 && get_attr_memory (insn) == MEMORY_STORE
26351 && !ix86_agi_dependent (dep_insn, insn))
26352 cost += 1;
26353 break;
26355 case PROCESSOR_PENTIUMPRO:
26356 /* INT->FP conversion is expensive. */
26357 if (get_attr_fp_int_src (dep_insn))
26358 cost += 5;
26360 /* There is one cycle extra latency between an FP op and a store. */
26361 if (insn_type == TYPE_FMOV
26362 && (set = single_set (dep_insn)) != NULL_RTX
26363 && (set2 = single_set (insn)) != NULL_RTX
26364 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
26365 && MEM_P (SET_DEST (set2)))
26366 cost += 1;
26368 memory = get_attr_memory (insn);
26370 /* Show ability of reorder buffer to hide latency of load by executing
26371 in parallel with previous instruction in case
26372 previous instruction is not needed to compute the address. */
26373 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26374 && !ix86_agi_dependent (dep_insn, insn))
26376 /* Claim moves to take one cycle, as core can issue one load
26377 at time and the next load can start cycle later. */
26378 if (dep_insn_type == TYPE_IMOV
26379 || dep_insn_type == TYPE_FMOV)
26380 cost = 1;
26381 else if (cost > 1)
26382 cost--;
26384 break;
26386 case PROCESSOR_K6:
26387 /* The esp dependency is resolved before
26388 the instruction is really finished. */
26389 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26390 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26391 return 1;
26393 /* INT->FP conversion is expensive. */
26394 if (get_attr_fp_int_src (dep_insn))
26395 cost += 5;
26397 memory = get_attr_memory (insn);
26399 /* Show ability of reorder buffer to hide latency of load by executing
26400 in parallel with previous instruction in case
26401 previous instruction is not needed to compute the address. */
26402 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26403 && !ix86_agi_dependent (dep_insn, insn))
26405 /* Claim moves to take one cycle, as core can issue one load
26406 at time and the next load can start cycle later. */
26407 if (dep_insn_type == TYPE_IMOV
26408 || dep_insn_type == TYPE_FMOV)
26409 cost = 1;
26410 else if (cost > 2)
26411 cost -= 2;
26412 else
26413 cost = 1;
26415 break;
26417 case PROCESSOR_AMDFAM10:
26418 case PROCESSOR_BDVER1:
26419 case PROCESSOR_BDVER2:
26420 case PROCESSOR_BDVER3:
26421 case PROCESSOR_BDVER4:
26422 case PROCESSOR_BTVER1:
26423 case PROCESSOR_BTVER2:
26424 case PROCESSOR_GENERIC:
26425 /* Stack engine allows to execute push&pop instructions in parall. */
26426 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26427 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26428 return 0;
26429 /* FALLTHRU */
26431 case PROCESSOR_ATHLON:
26432 case PROCESSOR_K8:
26433 memory = get_attr_memory (insn);
26435 /* Show ability of reorder buffer to hide latency of load by executing
26436 in parallel with previous instruction in case
26437 previous instruction is not needed to compute the address. */
26438 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26439 && !ix86_agi_dependent (dep_insn, insn))
26441 enum attr_unit unit = get_attr_unit (insn);
26442 int loadcost = 3;
26444 /* Because of the difference between the length of integer and
26445 floating unit pipeline preparation stages, the memory operands
26446 for floating point are cheaper.
26448 ??? For Athlon it the difference is most probably 2. */
26449 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
26450 loadcost = 3;
26451 else
26452 loadcost = TARGET_ATHLON ? 2 : 0;
26454 if (cost >= loadcost)
26455 cost -= loadcost;
26456 else
26457 cost = 0;
26459 break;
26461 case PROCESSOR_CORE2:
26462 case PROCESSOR_NEHALEM:
26463 case PROCESSOR_SANDYBRIDGE:
26464 case PROCESSOR_HASWELL:
26465 /* Stack engine allows to execute push&pop instructions in parall. */
26466 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26467 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26468 return 0;
26470 memory = get_attr_memory (insn);
26472 /* Show ability of reorder buffer to hide latency of load by executing
26473 in parallel with previous instruction in case
26474 previous instruction is not needed to compute the address. */
26475 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26476 && !ix86_agi_dependent (dep_insn, insn))
26478 if (cost >= 4)
26479 cost -= 4;
26480 else
26481 cost = 0;
26483 break;
26485 case PROCESSOR_SILVERMONT:
26486 case PROCESSOR_KNL:
26487 case PROCESSOR_INTEL:
26488 if (!reload_completed)
26489 return cost;
26491 /* Increase cost of integer loads. */
26492 memory = get_attr_memory (dep_insn);
26493 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26495 enum attr_unit unit = get_attr_unit (dep_insn);
26496 if (unit == UNIT_INTEGER && cost == 1)
26498 if (memory == MEMORY_LOAD)
26499 cost = 3;
26500 else
26502 /* Increase cost of ld/st for short int types only
26503 because of store forwarding issue. */
26504 rtx set = single_set (dep_insn);
26505 if (set && (GET_MODE (SET_DEST (set)) == QImode
26506 || GET_MODE (SET_DEST (set)) == HImode))
26508 /* Increase cost of store/load insn if exact
26509 dependence exists and it is load insn. */
26510 enum attr_memory insn_memory = get_attr_memory (insn);
26511 if (insn_memory == MEMORY_LOAD
26512 && exact_store_load_dependency (dep_insn, insn))
26513 cost = 3;
26519 default:
26520 break;
26523 return cost;
26526 /* How many alternative schedules to try. This should be as wide as the
26527 scheduling freedom in the DFA, but no wider. Making this value too
26528 large results extra work for the scheduler. */
26530 static int
26531 ia32_multipass_dfa_lookahead (void)
26533 switch (ix86_tune)
26535 case PROCESSOR_PENTIUM:
26536 return 2;
26538 case PROCESSOR_PENTIUMPRO:
26539 case PROCESSOR_K6:
26540 return 1;
26542 case PROCESSOR_BDVER1:
26543 case PROCESSOR_BDVER2:
26544 case PROCESSOR_BDVER3:
26545 case PROCESSOR_BDVER4:
26546 /* We use lookahead value 4 for BD both before and after reload
26547 schedules. Plan is to have value 8 included for O3. */
26548 return 4;
26550 case PROCESSOR_CORE2:
26551 case PROCESSOR_NEHALEM:
26552 case PROCESSOR_SANDYBRIDGE:
26553 case PROCESSOR_HASWELL:
26554 case PROCESSOR_BONNELL:
26555 case PROCESSOR_SILVERMONT:
26556 case PROCESSOR_KNL:
26557 case PROCESSOR_INTEL:
26558 /* Generally, we want haifa-sched:max_issue() to look ahead as far
26559 as many instructions can be executed on a cycle, i.e.,
26560 issue_rate. I wonder why tuning for many CPUs does not do this. */
26561 if (reload_completed)
26562 return ix86_issue_rate ();
26563 /* Don't use lookahead for pre-reload schedule to save compile time. */
26564 return 0;
26566 default:
26567 return 0;
26571 /* Return true if target platform supports macro-fusion. */
26573 static bool
26574 ix86_macro_fusion_p ()
26576 return TARGET_FUSE_CMP_AND_BRANCH;
26579 /* Check whether current microarchitecture support macro fusion
26580 for insn pair "CONDGEN + CONDJMP". Refer to
26581 "Intel Architectures Optimization Reference Manual". */
26583 static bool
26584 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
26586 rtx src, dest;
26587 enum rtx_code ccode;
26588 rtx compare_set = NULL_RTX, test_if, cond;
26589 rtx alu_set = NULL_RTX, addr = NULL_RTX;
26591 if (!any_condjump_p (condjmp))
26592 return false;
26594 if (get_attr_type (condgen) != TYPE_TEST
26595 && get_attr_type (condgen) != TYPE_ICMP
26596 && get_attr_type (condgen) != TYPE_INCDEC
26597 && get_attr_type (condgen) != TYPE_ALU)
26598 return false;
26600 compare_set = single_set (condgen);
26601 if (compare_set == NULL_RTX
26602 && !TARGET_FUSE_ALU_AND_BRANCH)
26603 return false;
26605 if (compare_set == NULL_RTX)
26607 int i;
26608 rtx pat = PATTERN (condgen);
26609 for (i = 0; i < XVECLEN (pat, 0); i++)
26610 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26612 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
26613 if (GET_CODE (set_src) == COMPARE)
26614 compare_set = XVECEXP (pat, 0, i);
26615 else
26616 alu_set = XVECEXP (pat, 0, i);
26619 if (compare_set == NULL_RTX)
26620 return false;
26621 src = SET_SRC (compare_set);
26622 if (GET_CODE (src) != COMPARE)
26623 return false;
26625 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
26626 supported. */
26627 if ((MEM_P (XEXP (src, 0))
26628 && CONST_INT_P (XEXP (src, 1)))
26629 || (MEM_P (XEXP (src, 1))
26630 && CONST_INT_P (XEXP (src, 0))))
26631 return false;
26633 /* No fusion for RIP-relative address. */
26634 if (MEM_P (XEXP (src, 0)))
26635 addr = XEXP (XEXP (src, 0), 0);
26636 else if (MEM_P (XEXP (src, 1)))
26637 addr = XEXP (XEXP (src, 1), 0);
26639 if (addr) {
26640 ix86_address parts;
26641 int ok = ix86_decompose_address (addr, &parts);
26642 gcc_assert (ok);
26644 if (rip_relative_addr_p (&parts))
26645 return false;
26648 test_if = SET_SRC (pc_set (condjmp));
26649 cond = XEXP (test_if, 0);
26650 ccode = GET_CODE (cond);
26651 /* Check whether conditional jump use Sign or Overflow Flags. */
26652 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
26653 && (ccode == GE
26654 || ccode == GT
26655 || ccode == LE
26656 || ccode == LT))
26657 return false;
26659 /* Return true for TYPE_TEST and TYPE_ICMP. */
26660 if (get_attr_type (condgen) == TYPE_TEST
26661 || get_attr_type (condgen) == TYPE_ICMP)
26662 return true;
26664 /* The following is the case that macro-fusion for alu + jmp. */
26665 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
26666 return false;
26668 /* No fusion for alu op with memory destination operand. */
26669 dest = SET_DEST (alu_set);
26670 if (MEM_P (dest))
26671 return false;
26673 /* Macro-fusion for inc/dec + unsigned conditional jump is not
26674 supported. */
26675 if (get_attr_type (condgen) == TYPE_INCDEC
26676 && (ccode == GEU
26677 || ccode == GTU
26678 || ccode == LEU
26679 || ccode == LTU))
26680 return false;
26682 return true;
26685 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
26686 execution. It is applied if
26687 (1) IMUL instruction is on the top of list;
26688 (2) There exists the only producer of independent IMUL instruction in
26689 ready list.
26690 Return index of IMUL producer if it was found and -1 otherwise. */
26691 static int
26692 do_reorder_for_imul (rtx_insn **ready, int n_ready)
26694 rtx_insn *insn;
26695 rtx set, insn1, insn2;
26696 sd_iterator_def sd_it;
26697 dep_t dep;
26698 int index = -1;
26699 int i;
26701 if (!TARGET_BONNELL)
26702 return index;
26704 /* Check that IMUL instruction is on the top of ready list. */
26705 insn = ready[n_ready - 1];
26706 set = single_set (insn);
26707 if (!set)
26708 return index;
26709 if (!(GET_CODE (SET_SRC (set)) == MULT
26710 && GET_MODE (SET_SRC (set)) == SImode))
26711 return index;
26713 /* Search for producer of independent IMUL instruction. */
26714 for (i = n_ready - 2; i >= 0; i--)
26716 insn = ready[i];
26717 if (!NONDEBUG_INSN_P (insn))
26718 continue;
26719 /* Skip IMUL instruction. */
26720 insn2 = PATTERN (insn);
26721 if (GET_CODE (insn2) == PARALLEL)
26722 insn2 = XVECEXP (insn2, 0, 0);
26723 if (GET_CODE (insn2) == SET
26724 && GET_CODE (SET_SRC (insn2)) == MULT
26725 && GET_MODE (SET_SRC (insn2)) == SImode)
26726 continue;
26728 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
26730 rtx con;
26731 con = DEP_CON (dep);
26732 if (!NONDEBUG_INSN_P (con))
26733 continue;
26734 insn1 = PATTERN (con);
26735 if (GET_CODE (insn1) == PARALLEL)
26736 insn1 = XVECEXP (insn1, 0, 0);
26738 if (GET_CODE (insn1) == SET
26739 && GET_CODE (SET_SRC (insn1)) == MULT
26740 && GET_MODE (SET_SRC (insn1)) == SImode)
26742 sd_iterator_def sd_it1;
26743 dep_t dep1;
26744 /* Check if there is no other dependee for IMUL. */
26745 index = i;
26746 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
26748 rtx pro;
26749 pro = DEP_PRO (dep1);
26750 if (!NONDEBUG_INSN_P (pro))
26751 continue;
26752 if (pro != insn)
26753 index = -1;
26755 if (index >= 0)
26756 break;
26759 if (index >= 0)
26760 break;
26762 return index;
26765 /* Try to find the best candidate on the top of ready list if two insns
26766 have the same priority - candidate is best if its dependees were
26767 scheduled earlier. Applied for Silvermont only.
26768 Return true if top 2 insns must be interchanged. */
26769 static bool
26770 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
26772 rtx_insn *top = ready[n_ready - 1];
26773 rtx_insn *next = ready[n_ready - 2];
26774 rtx set;
26775 sd_iterator_def sd_it;
26776 dep_t dep;
26777 int clock1 = -1;
26778 int clock2 = -1;
26779 #define INSN_TICK(INSN) (HID (INSN)->tick)
26781 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26782 return false;
26784 if (!NONDEBUG_INSN_P (top))
26785 return false;
26786 if (!NONJUMP_INSN_P (top))
26787 return false;
26788 if (!NONDEBUG_INSN_P (next))
26789 return false;
26790 if (!NONJUMP_INSN_P (next))
26791 return false;
26792 set = single_set (top);
26793 if (!set)
26794 return false;
26795 set = single_set (next);
26796 if (!set)
26797 return false;
26799 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26801 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26802 return false;
26803 /* Determine winner more precise. */
26804 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26806 rtx pro;
26807 pro = DEP_PRO (dep);
26808 if (!NONDEBUG_INSN_P (pro))
26809 continue;
26810 if (INSN_TICK (pro) > clock1)
26811 clock1 = INSN_TICK (pro);
26813 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26815 rtx pro;
26816 pro = DEP_PRO (dep);
26817 if (!NONDEBUG_INSN_P (pro))
26818 continue;
26819 if (INSN_TICK (pro) > clock2)
26820 clock2 = INSN_TICK (pro);
26823 if (clock1 == clock2)
26825 /* Determine winner - load must win. */
26826 enum attr_memory memory1, memory2;
26827 memory1 = get_attr_memory (top);
26828 memory2 = get_attr_memory (next);
26829 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26830 return true;
26832 return (bool) (clock2 < clock1);
26834 return false;
26835 #undef INSN_TICK
26838 /* Perform possible reodering of ready list for Atom/Silvermont only.
26839 Return issue rate. */
26840 static int
26841 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
26842 int *pn_ready, int clock_var)
26844 int issue_rate = -1;
26845 int n_ready = *pn_ready;
26846 int i;
26847 rtx_insn *insn;
26848 int index = -1;
26850 /* Set up issue rate. */
26851 issue_rate = ix86_issue_rate ();
26853 /* Do reodering for BONNELL/SILVERMONT only. */
26854 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26855 return issue_rate;
26857 /* Nothing to do if ready list contains only 1 instruction. */
26858 if (n_ready <= 1)
26859 return issue_rate;
26861 /* Do reodering for post-reload scheduler only. */
26862 if (!reload_completed)
26863 return issue_rate;
26865 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26867 if (sched_verbose > 1)
26868 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26869 INSN_UID (ready[index]));
26871 /* Put IMUL producer (ready[index]) at the top of ready list. */
26872 insn = ready[index];
26873 for (i = index; i < n_ready - 1; i++)
26874 ready[i] = ready[i + 1];
26875 ready[n_ready - 1] = insn;
26876 return issue_rate;
26879 /* Skip selective scheduling since HID is not populated in it. */
26880 if (clock_var != 0
26881 && !sel_sched_p ()
26882 && swap_top_of_ready_list (ready, n_ready))
26884 if (sched_verbose > 1)
26885 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26886 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26887 /* Swap 2 top elements of ready list. */
26888 insn = ready[n_ready - 1];
26889 ready[n_ready - 1] = ready[n_ready - 2];
26890 ready[n_ready - 2] = insn;
26892 return issue_rate;
26895 static bool
26896 ix86_class_likely_spilled_p (reg_class_t);
26898 /* Returns true if lhs of insn is HW function argument register and set up
26899 is_spilled to true if it is likely spilled HW register. */
26900 static bool
26901 insn_is_function_arg (rtx insn, bool* is_spilled)
26903 rtx dst;
26905 if (!NONDEBUG_INSN_P (insn))
26906 return false;
26907 /* Call instructions are not movable, ignore it. */
26908 if (CALL_P (insn))
26909 return false;
26910 insn = PATTERN (insn);
26911 if (GET_CODE (insn) == PARALLEL)
26912 insn = XVECEXP (insn, 0, 0);
26913 if (GET_CODE (insn) != SET)
26914 return false;
26915 dst = SET_DEST (insn);
26916 if (REG_P (dst) && HARD_REGISTER_P (dst)
26917 && ix86_function_arg_regno_p (REGNO (dst)))
26919 /* Is it likely spilled HW register? */
26920 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26921 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26922 *is_spilled = true;
26923 return true;
26925 return false;
26928 /* Add output dependencies for chain of function adjacent arguments if only
26929 there is a move to likely spilled HW register. Return first argument
26930 if at least one dependence was added or NULL otherwise. */
26931 static rtx_insn *
26932 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
26934 rtx_insn *insn;
26935 rtx_insn *last = call;
26936 rtx_insn *first_arg = NULL;
26937 bool is_spilled = false;
26939 head = PREV_INSN (head);
26941 /* Find nearest to call argument passing instruction. */
26942 while (true)
26944 last = PREV_INSN (last);
26945 if (last == head)
26946 return NULL;
26947 if (!NONDEBUG_INSN_P (last))
26948 continue;
26949 if (insn_is_function_arg (last, &is_spilled))
26950 break;
26951 return NULL;
26954 first_arg = last;
26955 while (true)
26957 insn = PREV_INSN (last);
26958 if (!INSN_P (insn))
26959 break;
26960 if (insn == head)
26961 break;
26962 if (!NONDEBUG_INSN_P (insn))
26964 last = insn;
26965 continue;
26967 if (insn_is_function_arg (insn, &is_spilled))
26969 /* Add output depdendence between two function arguments if chain
26970 of output arguments contains likely spilled HW registers. */
26971 if (is_spilled)
26972 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26973 first_arg = last = insn;
26975 else
26976 break;
26978 if (!is_spilled)
26979 return NULL;
26980 return first_arg;
26983 /* Add output or anti dependency from insn to first_arg to restrict its code
26984 motion. */
26985 static void
26986 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
26988 rtx set;
26989 rtx tmp;
26991 /* Add anti dependencies for bounds stores. */
26992 if (INSN_P (insn)
26993 && GET_CODE (PATTERN (insn)) == PARALLEL
26994 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
26995 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_BNDSTX)
26997 add_dependence (first_arg, insn, REG_DEP_ANTI);
26998 return;
27001 set = single_set (insn);
27002 if (!set)
27003 return;
27004 tmp = SET_DEST (set);
27005 if (REG_P (tmp))
27007 /* Add output dependency to the first function argument. */
27008 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
27009 return;
27011 /* Add anti dependency. */
27012 add_dependence (first_arg, insn, REG_DEP_ANTI);
27015 /* Avoid cross block motion of function argument through adding dependency
27016 from the first non-jump instruction in bb. */
27017 static void
27018 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
27020 rtx_insn *insn = BB_END (bb);
27022 while (insn)
27024 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
27026 rtx set = single_set (insn);
27027 if (set)
27029 avoid_func_arg_motion (arg, insn);
27030 return;
27033 if (insn == BB_HEAD (bb))
27034 return;
27035 insn = PREV_INSN (insn);
27039 /* Hook for pre-reload schedule - avoid motion of function arguments
27040 passed in likely spilled HW registers. */
27041 static void
27042 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
27044 rtx_insn *insn;
27045 rtx_insn *first_arg = NULL;
27046 if (reload_completed)
27047 return;
27048 while (head != tail && DEBUG_INSN_P (head))
27049 head = NEXT_INSN (head);
27050 for (insn = tail; insn != head; insn = PREV_INSN (insn))
27051 if (INSN_P (insn) && CALL_P (insn))
27053 first_arg = add_parameter_dependencies (insn, head);
27054 if (first_arg)
27056 /* Add dependee for first argument to predecessors if only
27057 region contains more than one block. */
27058 basic_block bb = BLOCK_FOR_INSN (insn);
27059 int rgn = CONTAINING_RGN (bb->index);
27060 int nr_blks = RGN_NR_BLOCKS (rgn);
27061 /* Skip trivial regions and region head blocks that can have
27062 predecessors outside of region. */
27063 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
27065 edge e;
27066 edge_iterator ei;
27068 /* Regions are SCCs with the exception of selective
27069 scheduling with pipelining of outer blocks enabled.
27070 So also check that immediate predecessors of a non-head
27071 block are in the same region. */
27072 FOR_EACH_EDGE (e, ei, bb->preds)
27074 /* Avoid creating of loop-carried dependencies through
27075 using topological ordering in the region. */
27076 if (rgn == CONTAINING_RGN (e->src->index)
27077 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
27078 add_dependee_for_func_arg (first_arg, e->src);
27081 insn = first_arg;
27082 if (insn == head)
27083 break;
27086 else if (first_arg)
27087 avoid_func_arg_motion (first_arg, insn);
27090 /* Hook for pre-reload schedule - set priority of moves from likely spilled
27091 HW registers to maximum, to schedule them at soon as possible. These are
27092 moves from function argument registers at the top of the function entry
27093 and moves from function return value registers after call. */
27094 static int
27095 ix86_adjust_priority (rtx_insn *insn, int priority)
27097 rtx set;
27099 if (reload_completed)
27100 return priority;
27102 if (!NONDEBUG_INSN_P (insn))
27103 return priority;
27105 set = single_set (insn);
27106 if (set)
27108 rtx tmp = SET_SRC (set);
27109 if (REG_P (tmp)
27110 && HARD_REGISTER_P (tmp)
27111 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
27112 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
27113 return current_sched_info->sched_max_insns_priority;
27116 return priority;
27119 /* Model decoder of Core 2/i7.
27120 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
27121 track the instruction fetch block boundaries and make sure that long
27122 (9+ bytes) instructions are assigned to D0. */
27124 /* Maximum length of an insn that can be handled by
27125 a secondary decoder unit. '8' for Core 2/i7. */
27126 static int core2i7_secondary_decoder_max_insn_size;
27128 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
27129 '16' for Core 2/i7. */
27130 static int core2i7_ifetch_block_size;
27132 /* Maximum number of instructions decoder can handle per cycle.
27133 '6' for Core 2/i7. */
27134 static int core2i7_ifetch_block_max_insns;
27136 typedef struct ix86_first_cycle_multipass_data_ *
27137 ix86_first_cycle_multipass_data_t;
27138 typedef const struct ix86_first_cycle_multipass_data_ *
27139 const_ix86_first_cycle_multipass_data_t;
27141 /* A variable to store target state across calls to max_issue within
27142 one cycle. */
27143 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
27144 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
27146 /* Initialize DATA. */
27147 static void
27148 core2i7_first_cycle_multipass_init (void *_data)
27150 ix86_first_cycle_multipass_data_t data
27151 = (ix86_first_cycle_multipass_data_t) _data;
27153 data->ifetch_block_len = 0;
27154 data->ifetch_block_n_insns = 0;
27155 data->ready_try_change = NULL;
27156 data->ready_try_change_size = 0;
27159 /* Advancing the cycle; reset ifetch block counts. */
27160 static void
27161 core2i7_dfa_post_advance_cycle (void)
27163 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
27165 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27167 data->ifetch_block_len = 0;
27168 data->ifetch_block_n_insns = 0;
27171 static int min_insn_size (rtx_insn *);
27173 /* Filter out insns from ready_try that the core will not be able to issue
27174 on current cycle due to decoder. */
27175 static void
27176 core2i7_first_cycle_multipass_filter_ready_try
27177 (const_ix86_first_cycle_multipass_data_t data,
27178 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
27180 while (n_ready--)
27182 rtx_insn *insn;
27183 int insn_size;
27185 if (ready_try[n_ready])
27186 continue;
27188 insn = get_ready_element (n_ready);
27189 insn_size = min_insn_size (insn);
27191 if (/* If this is a too long an insn for a secondary decoder ... */
27192 (!first_cycle_insn_p
27193 && insn_size > core2i7_secondary_decoder_max_insn_size)
27194 /* ... or it would not fit into the ifetch block ... */
27195 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
27196 /* ... or the decoder is full already ... */
27197 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
27198 /* ... mask the insn out. */
27200 ready_try[n_ready] = 1;
27202 if (data->ready_try_change)
27203 bitmap_set_bit (data->ready_try_change, n_ready);
27208 /* Prepare for a new round of multipass lookahead scheduling. */
27209 static void
27210 core2i7_first_cycle_multipass_begin (void *_data,
27211 signed char *ready_try, int n_ready,
27212 bool first_cycle_insn_p)
27214 ix86_first_cycle_multipass_data_t data
27215 = (ix86_first_cycle_multipass_data_t) _data;
27216 const_ix86_first_cycle_multipass_data_t prev_data
27217 = ix86_first_cycle_multipass_data;
27219 /* Restore the state from the end of the previous round. */
27220 data->ifetch_block_len = prev_data->ifetch_block_len;
27221 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
27223 /* Filter instructions that cannot be issued on current cycle due to
27224 decoder restrictions. */
27225 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27226 first_cycle_insn_p);
27229 /* INSN is being issued in current solution. Account for its impact on
27230 the decoder model. */
27231 static void
27232 core2i7_first_cycle_multipass_issue (void *_data,
27233 signed char *ready_try, int n_ready,
27234 rtx_insn *insn, const void *_prev_data)
27236 ix86_first_cycle_multipass_data_t data
27237 = (ix86_first_cycle_multipass_data_t) _data;
27238 const_ix86_first_cycle_multipass_data_t prev_data
27239 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
27241 int insn_size = min_insn_size (insn);
27243 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
27244 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
27245 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
27246 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27248 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
27249 if (!data->ready_try_change)
27251 data->ready_try_change = sbitmap_alloc (n_ready);
27252 data->ready_try_change_size = n_ready;
27254 else if (data->ready_try_change_size < n_ready)
27256 data->ready_try_change = sbitmap_resize (data->ready_try_change,
27257 n_ready, 0);
27258 data->ready_try_change_size = n_ready;
27260 bitmap_clear (data->ready_try_change);
27262 /* Filter out insns from ready_try that the core will not be able to issue
27263 on current cycle due to decoder. */
27264 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27265 false);
27268 /* Revert the effect on ready_try. */
27269 static void
27270 core2i7_first_cycle_multipass_backtrack (const void *_data,
27271 signed char *ready_try,
27272 int n_ready ATTRIBUTE_UNUSED)
27274 const_ix86_first_cycle_multipass_data_t data
27275 = (const_ix86_first_cycle_multipass_data_t) _data;
27276 unsigned int i = 0;
27277 sbitmap_iterator sbi;
27279 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
27280 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
27282 ready_try[i] = 0;
27286 /* Save the result of multipass lookahead scheduling for the next round. */
27287 static void
27288 core2i7_first_cycle_multipass_end (const void *_data)
27290 const_ix86_first_cycle_multipass_data_t data
27291 = (const_ix86_first_cycle_multipass_data_t) _data;
27292 ix86_first_cycle_multipass_data_t next_data
27293 = ix86_first_cycle_multipass_data;
27295 if (data != NULL)
27297 next_data->ifetch_block_len = data->ifetch_block_len;
27298 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
27302 /* Deallocate target data. */
27303 static void
27304 core2i7_first_cycle_multipass_fini (void *_data)
27306 ix86_first_cycle_multipass_data_t data
27307 = (ix86_first_cycle_multipass_data_t) _data;
27309 if (data->ready_try_change)
27311 sbitmap_free (data->ready_try_change);
27312 data->ready_try_change = NULL;
27313 data->ready_try_change_size = 0;
27317 /* Prepare for scheduling pass. */
27318 static void
27319 ix86_sched_init_global (FILE *, int, int)
27321 /* Install scheduling hooks for current CPU. Some of these hooks are used
27322 in time-critical parts of the scheduler, so we only set them up when
27323 they are actually used. */
27324 switch (ix86_tune)
27326 case PROCESSOR_CORE2:
27327 case PROCESSOR_NEHALEM:
27328 case PROCESSOR_SANDYBRIDGE:
27329 case PROCESSOR_HASWELL:
27330 /* Do not perform multipass scheduling for pre-reload schedule
27331 to save compile time. */
27332 if (reload_completed)
27334 targetm.sched.dfa_post_advance_cycle
27335 = core2i7_dfa_post_advance_cycle;
27336 targetm.sched.first_cycle_multipass_init
27337 = core2i7_first_cycle_multipass_init;
27338 targetm.sched.first_cycle_multipass_begin
27339 = core2i7_first_cycle_multipass_begin;
27340 targetm.sched.first_cycle_multipass_issue
27341 = core2i7_first_cycle_multipass_issue;
27342 targetm.sched.first_cycle_multipass_backtrack
27343 = core2i7_first_cycle_multipass_backtrack;
27344 targetm.sched.first_cycle_multipass_end
27345 = core2i7_first_cycle_multipass_end;
27346 targetm.sched.first_cycle_multipass_fini
27347 = core2i7_first_cycle_multipass_fini;
27349 /* Set decoder parameters. */
27350 core2i7_secondary_decoder_max_insn_size = 8;
27351 core2i7_ifetch_block_size = 16;
27352 core2i7_ifetch_block_max_insns = 6;
27353 break;
27355 /* ... Fall through ... */
27356 default:
27357 targetm.sched.dfa_post_advance_cycle = NULL;
27358 targetm.sched.first_cycle_multipass_init = NULL;
27359 targetm.sched.first_cycle_multipass_begin = NULL;
27360 targetm.sched.first_cycle_multipass_issue = NULL;
27361 targetm.sched.first_cycle_multipass_backtrack = NULL;
27362 targetm.sched.first_cycle_multipass_end = NULL;
27363 targetm.sched.first_cycle_multipass_fini = NULL;
27364 break;
27369 /* Compute the alignment given to a constant that is being placed in memory.
27370 EXP is the constant and ALIGN is the alignment that the object would
27371 ordinarily have.
27372 The value of this function is used instead of that alignment to align
27373 the object. */
27376 ix86_constant_alignment (tree exp, int align)
27378 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
27379 || TREE_CODE (exp) == INTEGER_CST)
27381 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
27382 return 64;
27383 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
27384 return 128;
27386 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
27387 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
27388 return BITS_PER_WORD;
27390 return align;
27393 /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
27394 the data type, and ALIGN is the alignment that the object would
27395 ordinarily have. */
27397 static int
27398 iamcu_alignment (tree type, int align)
27400 enum machine_mode mode;
27402 if (align < 32 || TYPE_USER_ALIGN (type))
27403 return align;
27405 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
27406 bytes. */
27407 mode = TYPE_MODE (strip_array_types (type));
27408 switch (GET_MODE_CLASS (mode))
27410 case MODE_INT:
27411 case MODE_COMPLEX_INT:
27412 case MODE_COMPLEX_FLOAT:
27413 case MODE_FLOAT:
27414 case MODE_DECIMAL_FLOAT:
27415 return 32;
27416 default:
27417 return align;
27421 /* Compute the alignment for a static variable.
27422 TYPE is the data type, and ALIGN is the alignment that
27423 the object would ordinarily have. The value of this function is used
27424 instead of that alignment to align the object. */
27427 ix86_data_alignment (tree type, int align, bool opt)
27429 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
27430 for symbols from other compilation units or symbols that don't need
27431 to bind locally. In order to preserve some ABI compatibility with
27432 those compilers, ensure we don't decrease alignment from what we
27433 used to assume. */
27435 int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
27437 /* A data structure, equal or greater than the size of a cache line
27438 (64 bytes in the Pentium 4 and other recent Intel processors, including
27439 processors based on Intel Core microarchitecture) should be aligned
27440 so that its base address is a multiple of a cache line size. */
27442 int max_align
27443 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
27445 if (max_align < BITS_PER_WORD)
27446 max_align = BITS_PER_WORD;
27448 switch (ix86_align_data_type)
27450 case ix86_align_data_type_abi: opt = false; break;
27451 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
27452 case ix86_align_data_type_cacheline: break;
27455 if (TARGET_IAMCU)
27456 align = iamcu_alignment (type, align);
27458 if (opt
27459 && AGGREGATE_TYPE_P (type)
27460 && TYPE_SIZE (type)
27461 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
27463 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
27464 && align < max_align_compat)
27465 align = max_align_compat;
27466 if (wi::geu_p (TYPE_SIZE (type), max_align)
27467 && align < max_align)
27468 align = max_align;
27471 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27472 to 16byte boundary. */
27473 if (TARGET_64BIT)
27475 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
27476 && TYPE_SIZE (type)
27477 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27478 && wi::geu_p (TYPE_SIZE (type), 128)
27479 && align < 128)
27480 return 128;
27483 if (!opt)
27484 return align;
27486 if (TREE_CODE (type) == ARRAY_TYPE)
27488 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27489 return 64;
27490 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27491 return 128;
27493 else if (TREE_CODE (type) == COMPLEX_TYPE)
27496 if (TYPE_MODE (type) == DCmode && align < 64)
27497 return 64;
27498 if ((TYPE_MODE (type) == XCmode
27499 || TYPE_MODE (type) == TCmode) && align < 128)
27500 return 128;
27502 else if ((TREE_CODE (type) == RECORD_TYPE
27503 || TREE_CODE (type) == UNION_TYPE
27504 || TREE_CODE (type) == QUAL_UNION_TYPE)
27505 && TYPE_FIELDS (type))
27507 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27508 return 64;
27509 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27510 return 128;
27512 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27513 || TREE_CODE (type) == INTEGER_TYPE)
27515 if (TYPE_MODE (type) == DFmode && align < 64)
27516 return 64;
27517 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27518 return 128;
27521 return align;
27524 /* Compute the alignment for a local variable or a stack slot. EXP is
27525 the data type or decl itself, MODE is the widest mode available and
27526 ALIGN is the alignment that the object would ordinarily have. The
27527 value of this macro is used instead of that alignment to align the
27528 object. */
27530 unsigned int
27531 ix86_local_alignment (tree exp, machine_mode mode,
27532 unsigned int align)
27534 tree type, decl;
27536 if (exp && DECL_P (exp))
27538 type = TREE_TYPE (exp);
27539 decl = exp;
27541 else
27543 type = exp;
27544 decl = NULL;
27547 /* Don't do dynamic stack realignment for long long objects with
27548 -mpreferred-stack-boundary=2. */
27549 if (!TARGET_64BIT
27550 && align == 64
27551 && ix86_preferred_stack_boundary < 64
27552 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
27553 && (!type || !TYPE_USER_ALIGN (type))
27554 && (!decl || !DECL_USER_ALIGN (decl)))
27555 align = 32;
27557 /* If TYPE is NULL, we are allocating a stack slot for caller-save
27558 register in MODE. We will return the largest alignment of XF
27559 and DF. */
27560 if (!type)
27562 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
27563 align = GET_MODE_ALIGNMENT (DFmode);
27564 return align;
27567 /* Don't increase alignment for Intel MCU psABI. */
27568 if (TARGET_IAMCU)
27569 return align;
27571 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27572 to 16byte boundary. Exact wording is:
27574 An array uses the same alignment as its elements, except that a local or
27575 global array variable of length at least 16 bytes or
27576 a C99 variable-length array variable always has alignment of at least 16 bytes.
27578 This was added to allow use of aligned SSE instructions at arrays. This
27579 rule is meant for static storage (where compiler can not do the analysis
27580 by itself). We follow it for automatic variables only when convenient.
27581 We fully control everything in the function compiled and functions from
27582 other unit can not rely on the alignment.
27584 Exclude va_list type. It is the common case of local array where
27585 we can not benefit from the alignment.
27587 TODO: Probably one should optimize for size only when var is not escaping. */
27588 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
27589 && TARGET_SSE)
27591 if (AGGREGATE_TYPE_P (type)
27592 && (va_list_type_node == NULL_TREE
27593 || (TYPE_MAIN_VARIANT (type)
27594 != TYPE_MAIN_VARIANT (va_list_type_node)))
27595 && TYPE_SIZE (type)
27596 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27597 && wi::geu_p (TYPE_SIZE (type), 16)
27598 && align < 128)
27599 return 128;
27601 if (TREE_CODE (type) == ARRAY_TYPE)
27603 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27604 return 64;
27605 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27606 return 128;
27608 else if (TREE_CODE (type) == COMPLEX_TYPE)
27610 if (TYPE_MODE (type) == DCmode && align < 64)
27611 return 64;
27612 if ((TYPE_MODE (type) == XCmode
27613 || TYPE_MODE (type) == TCmode) && align < 128)
27614 return 128;
27616 else if ((TREE_CODE (type) == RECORD_TYPE
27617 || TREE_CODE (type) == UNION_TYPE
27618 || TREE_CODE (type) == QUAL_UNION_TYPE)
27619 && TYPE_FIELDS (type))
27621 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27622 return 64;
27623 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27624 return 128;
27626 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27627 || TREE_CODE (type) == INTEGER_TYPE)
27630 if (TYPE_MODE (type) == DFmode && align < 64)
27631 return 64;
27632 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27633 return 128;
27635 return align;
27638 /* Compute the minimum required alignment for dynamic stack realignment
27639 purposes for a local variable, parameter or a stack slot. EXP is
27640 the data type or decl itself, MODE is its mode and ALIGN is the
27641 alignment that the object would ordinarily have. */
27643 unsigned int
27644 ix86_minimum_alignment (tree exp, machine_mode mode,
27645 unsigned int align)
27647 tree type, decl;
27649 if (exp && DECL_P (exp))
27651 type = TREE_TYPE (exp);
27652 decl = exp;
27654 else
27656 type = exp;
27657 decl = NULL;
27660 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
27661 return align;
27663 /* Don't do dynamic stack realignment for long long objects with
27664 -mpreferred-stack-boundary=2. */
27665 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
27666 && (!type || !TYPE_USER_ALIGN (type))
27667 && (!decl || !DECL_USER_ALIGN (decl)))
27668 return 32;
27670 return align;
27673 /* Find a location for the static chain incoming to a nested function.
27674 This is a register, unless all free registers are used by arguments. */
27676 static rtx
27677 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
27679 unsigned regno;
27681 /* While this function won't be called by the middle-end when a static
27682 chain isn't needed, it's also used throughout the backend so it's
27683 easiest to keep this check centralized. */
27684 if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
27685 return NULL;
27687 if (TARGET_64BIT)
27689 /* We always use R10 in 64-bit mode. */
27690 regno = R10_REG;
27692 else
27694 const_tree fntype, fndecl;
27695 unsigned int ccvt;
27697 /* By default in 32-bit mode we use ECX to pass the static chain. */
27698 regno = CX_REG;
27700 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
27702 fntype = TREE_TYPE (fndecl_or_type);
27703 fndecl = fndecl_or_type;
27705 else
27707 fntype = fndecl_or_type;
27708 fndecl = NULL;
27711 ccvt = ix86_get_callcvt (fntype);
27712 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
27714 /* Fastcall functions use ecx/edx for arguments, which leaves
27715 us with EAX for the static chain.
27716 Thiscall functions use ecx for arguments, which also
27717 leaves us with EAX for the static chain. */
27718 regno = AX_REG;
27720 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
27722 /* Thiscall functions use ecx for arguments, which leaves
27723 us with EAX and EDX for the static chain.
27724 We are using for abi-compatibility EAX. */
27725 regno = AX_REG;
27727 else if (ix86_function_regparm (fntype, fndecl) == 3)
27729 /* For regparm 3, we have no free call-clobbered registers in
27730 which to store the static chain. In order to implement this,
27731 we have the trampoline push the static chain to the stack.
27732 However, we can't push a value below the return address when
27733 we call the nested function directly, so we have to use an
27734 alternate entry point. For this we use ESI, and have the
27735 alternate entry point push ESI, so that things appear the
27736 same once we're executing the nested function. */
27737 if (incoming_p)
27739 if (fndecl == current_function_decl)
27740 ix86_static_chain_on_stack = true;
27741 return gen_frame_mem (SImode,
27742 plus_constant (Pmode,
27743 arg_pointer_rtx, -8));
27745 regno = SI_REG;
27749 return gen_rtx_REG (Pmode, regno);
27752 /* Emit RTL insns to initialize the variable parts of a trampoline.
27753 FNDECL is the decl of the target address; M_TRAMP is a MEM for
27754 the trampoline, and CHAIN_VALUE is an RTX for the static chain
27755 to be passed to the target function. */
27757 static void
27758 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
27760 rtx mem, fnaddr;
27761 int opcode;
27762 int offset = 0;
27764 fnaddr = XEXP (DECL_RTL (fndecl), 0);
27766 if (TARGET_64BIT)
27768 int size;
27770 /* Load the function address to r11. Try to load address using
27771 the shorter movl instead of movabs. We may want to support
27772 movq for kernel mode, but kernel does not use trampolines at
27773 the moment. FNADDR is a 32bit address and may not be in
27774 DImode when ptr_mode == SImode. Always use movl in this
27775 case. */
27776 if (ptr_mode == SImode
27777 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
27779 fnaddr = copy_addr_to_reg (fnaddr);
27781 mem = adjust_address (m_tramp, HImode, offset);
27782 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
27784 mem = adjust_address (m_tramp, SImode, offset + 2);
27785 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
27786 offset += 6;
27788 else
27790 mem = adjust_address (m_tramp, HImode, offset);
27791 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
27793 mem = adjust_address (m_tramp, DImode, offset + 2);
27794 emit_move_insn (mem, fnaddr);
27795 offset += 10;
27798 /* Load static chain using movabs to r10. Use the shorter movl
27799 instead of movabs when ptr_mode == SImode. */
27800 if (ptr_mode == SImode)
27802 opcode = 0xba41;
27803 size = 6;
27805 else
27807 opcode = 0xba49;
27808 size = 10;
27811 mem = adjust_address (m_tramp, HImode, offset);
27812 emit_move_insn (mem, gen_int_mode (opcode, HImode));
27814 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
27815 emit_move_insn (mem, chain_value);
27816 offset += size;
27818 /* Jump to r11; the last (unused) byte is a nop, only there to
27819 pad the write out to a single 32-bit store. */
27820 mem = adjust_address (m_tramp, SImode, offset);
27821 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
27822 offset += 4;
27824 else
27826 rtx disp, chain;
27828 /* Depending on the static chain location, either load a register
27829 with a constant, or push the constant to the stack. All of the
27830 instructions are the same size. */
27831 chain = ix86_static_chain (fndecl, true);
27832 if (REG_P (chain))
27834 switch (REGNO (chain))
27836 case AX_REG:
27837 opcode = 0xb8; break;
27838 case CX_REG:
27839 opcode = 0xb9; break;
27840 default:
27841 gcc_unreachable ();
27844 else
27845 opcode = 0x68;
27847 mem = adjust_address (m_tramp, QImode, offset);
27848 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27850 mem = adjust_address (m_tramp, SImode, offset + 1);
27851 emit_move_insn (mem, chain_value);
27852 offset += 5;
27854 mem = adjust_address (m_tramp, QImode, offset);
27855 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27857 mem = adjust_address (m_tramp, SImode, offset + 1);
27859 /* Compute offset from the end of the jmp to the target function.
27860 In the case in which the trampoline stores the static chain on
27861 the stack, we need to skip the first insn which pushes the
27862 (call-saved) register static chain; this push is 1 byte. */
27863 offset += 5;
27864 disp = expand_binop (SImode, sub_optab, fnaddr,
27865 plus_constant (Pmode, XEXP (m_tramp, 0),
27866 offset - (MEM_P (chain) ? 1 : 0)),
27867 NULL_RTX, 1, OPTAB_DIRECT);
27868 emit_move_insn (mem, disp);
27871 gcc_assert (offset <= TRAMPOLINE_SIZE);
27873 #ifdef HAVE_ENABLE_EXECUTE_STACK
27874 #ifdef CHECK_EXECUTE_STACK_ENABLED
27875 if (CHECK_EXECUTE_STACK_ENABLED)
27876 #endif
27877 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27878 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27879 #endif
27882 /* The following file contains several enumerations and data structures
27883 built from the definitions in i386-builtin-types.def. */
27885 #include "i386-builtin-types.inc"
27887 /* Table for the ix86 builtin non-function types. */
27888 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27890 /* Retrieve an element from the above table, building some of
27891 the types lazily. */
27893 static tree
27894 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27896 unsigned int index;
27897 tree type, itype;
27899 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27901 type = ix86_builtin_type_tab[(int) tcode];
27902 if (type != NULL)
27903 return type;
27905 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27906 if (tcode <= IX86_BT_LAST_VECT)
27908 machine_mode mode;
27910 index = tcode - IX86_BT_LAST_PRIM - 1;
27911 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27912 mode = ix86_builtin_type_vect_mode[index];
27914 type = build_vector_type_for_mode (itype, mode);
27916 else
27918 int quals;
27920 index = tcode - IX86_BT_LAST_VECT - 1;
27921 if (tcode <= IX86_BT_LAST_PTR)
27922 quals = TYPE_UNQUALIFIED;
27923 else
27924 quals = TYPE_QUAL_CONST;
27926 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27927 if (quals != TYPE_UNQUALIFIED)
27928 itype = build_qualified_type (itype, quals);
27930 type = build_pointer_type (itype);
27933 ix86_builtin_type_tab[(int) tcode] = type;
27934 return type;
27937 /* Table for the ix86 builtin function types. */
27938 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27940 /* Retrieve an element from the above table, building some of
27941 the types lazily. */
27943 static tree
27944 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27946 tree type;
27948 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27950 type = ix86_builtin_func_type_tab[(int) tcode];
27951 if (type != NULL)
27952 return type;
27954 if (tcode <= IX86_BT_LAST_FUNC)
27956 unsigned start = ix86_builtin_func_start[(int) tcode];
27957 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27958 tree rtype, atype, args = void_list_node;
27959 unsigned i;
27961 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27962 for (i = after - 1; i > start; --i)
27964 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27965 args = tree_cons (NULL, atype, args);
27968 type = build_function_type (rtype, args);
27970 else
27972 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27973 enum ix86_builtin_func_type icode;
27975 icode = ix86_builtin_func_alias_base[index];
27976 type = ix86_get_builtin_func_type (icode);
27979 ix86_builtin_func_type_tab[(int) tcode] = type;
27980 return type;
27984 /* Codes for all the SSE/MMX builtins. */
27985 enum ix86_builtins
27987 IX86_BUILTIN_ADDPS,
27988 IX86_BUILTIN_ADDSS,
27989 IX86_BUILTIN_DIVPS,
27990 IX86_BUILTIN_DIVSS,
27991 IX86_BUILTIN_MULPS,
27992 IX86_BUILTIN_MULSS,
27993 IX86_BUILTIN_SUBPS,
27994 IX86_BUILTIN_SUBSS,
27996 IX86_BUILTIN_CMPEQPS,
27997 IX86_BUILTIN_CMPLTPS,
27998 IX86_BUILTIN_CMPLEPS,
27999 IX86_BUILTIN_CMPGTPS,
28000 IX86_BUILTIN_CMPGEPS,
28001 IX86_BUILTIN_CMPNEQPS,
28002 IX86_BUILTIN_CMPNLTPS,
28003 IX86_BUILTIN_CMPNLEPS,
28004 IX86_BUILTIN_CMPNGTPS,
28005 IX86_BUILTIN_CMPNGEPS,
28006 IX86_BUILTIN_CMPORDPS,
28007 IX86_BUILTIN_CMPUNORDPS,
28008 IX86_BUILTIN_CMPEQSS,
28009 IX86_BUILTIN_CMPLTSS,
28010 IX86_BUILTIN_CMPLESS,
28011 IX86_BUILTIN_CMPNEQSS,
28012 IX86_BUILTIN_CMPNLTSS,
28013 IX86_BUILTIN_CMPNLESS,
28014 IX86_BUILTIN_CMPORDSS,
28015 IX86_BUILTIN_CMPUNORDSS,
28017 IX86_BUILTIN_COMIEQSS,
28018 IX86_BUILTIN_COMILTSS,
28019 IX86_BUILTIN_COMILESS,
28020 IX86_BUILTIN_COMIGTSS,
28021 IX86_BUILTIN_COMIGESS,
28022 IX86_BUILTIN_COMINEQSS,
28023 IX86_BUILTIN_UCOMIEQSS,
28024 IX86_BUILTIN_UCOMILTSS,
28025 IX86_BUILTIN_UCOMILESS,
28026 IX86_BUILTIN_UCOMIGTSS,
28027 IX86_BUILTIN_UCOMIGESS,
28028 IX86_BUILTIN_UCOMINEQSS,
28030 IX86_BUILTIN_CVTPI2PS,
28031 IX86_BUILTIN_CVTPS2PI,
28032 IX86_BUILTIN_CVTSI2SS,
28033 IX86_BUILTIN_CVTSI642SS,
28034 IX86_BUILTIN_CVTSS2SI,
28035 IX86_BUILTIN_CVTSS2SI64,
28036 IX86_BUILTIN_CVTTPS2PI,
28037 IX86_BUILTIN_CVTTSS2SI,
28038 IX86_BUILTIN_CVTTSS2SI64,
28040 IX86_BUILTIN_MAXPS,
28041 IX86_BUILTIN_MAXSS,
28042 IX86_BUILTIN_MINPS,
28043 IX86_BUILTIN_MINSS,
28045 IX86_BUILTIN_LOADUPS,
28046 IX86_BUILTIN_STOREUPS,
28047 IX86_BUILTIN_MOVSS,
28049 IX86_BUILTIN_MOVHLPS,
28050 IX86_BUILTIN_MOVLHPS,
28051 IX86_BUILTIN_LOADHPS,
28052 IX86_BUILTIN_LOADLPS,
28053 IX86_BUILTIN_STOREHPS,
28054 IX86_BUILTIN_STORELPS,
28056 IX86_BUILTIN_MASKMOVQ,
28057 IX86_BUILTIN_MOVMSKPS,
28058 IX86_BUILTIN_PMOVMSKB,
28060 IX86_BUILTIN_MOVNTPS,
28061 IX86_BUILTIN_MOVNTQ,
28063 IX86_BUILTIN_LOADDQU,
28064 IX86_BUILTIN_STOREDQU,
28066 IX86_BUILTIN_PACKSSWB,
28067 IX86_BUILTIN_PACKSSDW,
28068 IX86_BUILTIN_PACKUSWB,
28070 IX86_BUILTIN_PADDB,
28071 IX86_BUILTIN_PADDW,
28072 IX86_BUILTIN_PADDD,
28073 IX86_BUILTIN_PADDQ,
28074 IX86_BUILTIN_PADDSB,
28075 IX86_BUILTIN_PADDSW,
28076 IX86_BUILTIN_PADDUSB,
28077 IX86_BUILTIN_PADDUSW,
28078 IX86_BUILTIN_PSUBB,
28079 IX86_BUILTIN_PSUBW,
28080 IX86_BUILTIN_PSUBD,
28081 IX86_BUILTIN_PSUBQ,
28082 IX86_BUILTIN_PSUBSB,
28083 IX86_BUILTIN_PSUBSW,
28084 IX86_BUILTIN_PSUBUSB,
28085 IX86_BUILTIN_PSUBUSW,
28087 IX86_BUILTIN_PAND,
28088 IX86_BUILTIN_PANDN,
28089 IX86_BUILTIN_POR,
28090 IX86_BUILTIN_PXOR,
28092 IX86_BUILTIN_PAVGB,
28093 IX86_BUILTIN_PAVGW,
28095 IX86_BUILTIN_PCMPEQB,
28096 IX86_BUILTIN_PCMPEQW,
28097 IX86_BUILTIN_PCMPEQD,
28098 IX86_BUILTIN_PCMPGTB,
28099 IX86_BUILTIN_PCMPGTW,
28100 IX86_BUILTIN_PCMPGTD,
28102 IX86_BUILTIN_PMADDWD,
28104 IX86_BUILTIN_PMAXSW,
28105 IX86_BUILTIN_PMAXUB,
28106 IX86_BUILTIN_PMINSW,
28107 IX86_BUILTIN_PMINUB,
28109 IX86_BUILTIN_PMULHUW,
28110 IX86_BUILTIN_PMULHW,
28111 IX86_BUILTIN_PMULLW,
28113 IX86_BUILTIN_PSADBW,
28114 IX86_BUILTIN_PSHUFW,
28116 IX86_BUILTIN_PSLLW,
28117 IX86_BUILTIN_PSLLD,
28118 IX86_BUILTIN_PSLLQ,
28119 IX86_BUILTIN_PSRAW,
28120 IX86_BUILTIN_PSRAD,
28121 IX86_BUILTIN_PSRLW,
28122 IX86_BUILTIN_PSRLD,
28123 IX86_BUILTIN_PSRLQ,
28124 IX86_BUILTIN_PSLLWI,
28125 IX86_BUILTIN_PSLLDI,
28126 IX86_BUILTIN_PSLLQI,
28127 IX86_BUILTIN_PSRAWI,
28128 IX86_BUILTIN_PSRADI,
28129 IX86_BUILTIN_PSRLWI,
28130 IX86_BUILTIN_PSRLDI,
28131 IX86_BUILTIN_PSRLQI,
28133 IX86_BUILTIN_PUNPCKHBW,
28134 IX86_BUILTIN_PUNPCKHWD,
28135 IX86_BUILTIN_PUNPCKHDQ,
28136 IX86_BUILTIN_PUNPCKLBW,
28137 IX86_BUILTIN_PUNPCKLWD,
28138 IX86_BUILTIN_PUNPCKLDQ,
28140 IX86_BUILTIN_SHUFPS,
28142 IX86_BUILTIN_RCPPS,
28143 IX86_BUILTIN_RCPSS,
28144 IX86_BUILTIN_RSQRTPS,
28145 IX86_BUILTIN_RSQRTPS_NR,
28146 IX86_BUILTIN_RSQRTSS,
28147 IX86_BUILTIN_RSQRTF,
28148 IX86_BUILTIN_SQRTPS,
28149 IX86_BUILTIN_SQRTPS_NR,
28150 IX86_BUILTIN_SQRTSS,
28152 IX86_BUILTIN_UNPCKHPS,
28153 IX86_BUILTIN_UNPCKLPS,
28155 IX86_BUILTIN_ANDPS,
28156 IX86_BUILTIN_ANDNPS,
28157 IX86_BUILTIN_ORPS,
28158 IX86_BUILTIN_XORPS,
28160 IX86_BUILTIN_EMMS,
28161 IX86_BUILTIN_LDMXCSR,
28162 IX86_BUILTIN_STMXCSR,
28163 IX86_BUILTIN_SFENCE,
28165 IX86_BUILTIN_FXSAVE,
28166 IX86_BUILTIN_FXRSTOR,
28167 IX86_BUILTIN_FXSAVE64,
28168 IX86_BUILTIN_FXRSTOR64,
28170 IX86_BUILTIN_XSAVE,
28171 IX86_BUILTIN_XRSTOR,
28172 IX86_BUILTIN_XSAVE64,
28173 IX86_BUILTIN_XRSTOR64,
28175 IX86_BUILTIN_XSAVEOPT,
28176 IX86_BUILTIN_XSAVEOPT64,
28178 IX86_BUILTIN_XSAVEC,
28179 IX86_BUILTIN_XSAVEC64,
28181 IX86_BUILTIN_XSAVES,
28182 IX86_BUILTIN_XRSTORS,
28183 IX86_BUILTIN_XSAVES64,
28184 IX86_BUILTIN_XRSTORS64,
28186 /* 3DNow! Original */
28187 IX86_BUILTIN_FEMMS,
28188 IX86_BUILTIN_PAVGUSB,
28189 IX86_BUILTIN_PF2ID,
28190 IX86_BUILTIN_PFACC,
28191 IX86_BUILTIN_PFADD,
28192 IX86_BUILTIN_PFCMPEQ,
28193 IX86_BUILTIN_PFCMPGE,
28194 IX86_BUILTIN_PFCMPGT,
28195 IX86_BUILTIN_PFMAX,
28196 IX86_BUILTIN_PFMIN,
28197 IX86_BUILTIN_PFMUL,
28198 IX86_BUILTIN_PFRCP,
28199 IX86_BUILTIN_PFRCPIT1,
28200 IX86_BUILTIN_PFRCPIT2,
28201 IX86_BUILTIN_PFRSQIT1,
28202 IX86_BUILTIN_PFRSQRT,
28203 IX86_BUILTIN_PFSUB,
28204 IX86_BUILTIN_PFSUBR,
28205 IX86_BUILTIN_PI2FD,
28206 IX86_BUILTIN_PMULHRW,
28208 /* 3DNow! Athlon Extensions */
28209 IX86_BUILTIN_PF2IW,
28210 IX86_BUILTIN_PFNACC,
28211 IX86_BUILTIN_PFPNACC,
28212 IX86_BUILTIN_PI2FW,
28213 IX86_BUILTIN_PSWAPDSI,
28214 IX86_BUILTIN_PSWAPDSF,
28216 /* SSE2 */
28217 IX86_BUILTIN_ADDPD,
28218 IX86_BUILTIN_ADDSD,
28219 IX86_BUILTIN_DIVPD,
28220 IX86_BUILTIN_DIVSD,
28221 IX86_BUILTIN_MULPD,
28222 IX86_BUILTIN_MULSD,
28223 IX86_BUILTIN_SUBPD,
28224 IX86_BUILTIN_SUBSD,
28226 IX86_BUILTIN_CMPEQPD,
28227 IX86_BUILTIN_CMPLTPD,
28228 IX86_BUILTIN_CMPLEPD,
28229 IX86_BUILTIN_CMPGTPD,
28230 IX86_BUILTIN_CMPGEPD,
28231 IX86_BUILTIN_CMPNEQPD,
28232 IX86_BUILTIN_CMPNLTPD,
28233 IX86_BUILTIN_CMPNLEPD,
28234 IX86_BUILTIN_CMPNGTPD,
28235 IX86_BUILTIN_CMPNGEPD,
28236 IX86_BUILTIN_CMPORDPD,
28237 IX86_BUILTIN_CMPUNORDPD,
28238 IX86_BUILTIN_CMPEQSD,
28239 IX86_BUILTIN_CMPLTSD,
28240 IX86_BUILTIN_CMPLESD,
28241 IX86_BUILTIN_CMPNEQSD,
28242 IX86_BUILTIN_CMPNLTSD,
28243 IX86_BUILTIN_CMPNLESD,
28244 IX86_BUILTIN_CMPORDSD,
28245 IX86_BUILTIN_CMPUNORDSD,
28247 IX86_BUILTIN_COMIEQSD,
28248 IX86_BUILTIN_COMILTSD,
28249 IX86_BUILTIN_COMILESD,
28250 IX86_BUILTIN_COMIGTSD,
28251 IX86_BUILTIN_COMIGESD,
28252 IX86_BUILTIN_COMINEQSD,
28253 IX86_BUILTIN_UCOMIEQSD,
28254 IX86_BUILTIN_UCOMILTSD,
28255 IX86_BUILTIN_UCOMILESD,
28256 IX86_BUILTIN_UCOMIGTSD,
28257 IX86_BUILTIN_UCOMIGESD,
28258 IX86_BUILTIN_UCOMINEQSD,
28260 IX86_BUILTIN_MAXPD,
28261 IX86_BUILTIN_MAXSD,
28262 IX86_BUILTIN_MINPD,
28263 IX86_BUILTIN_MINSD,
28265 IX86_BUILTIN_ANDPD,
28266 IX86_BUILTIN_ANDNPD,
28267 IX86_BUILTIN_ORPD,
28268 IX86_BUILTIN_XORPD,
28270 IX86_BUILTIN_SQRTPD,
28271 IX86_BUILTIN_SQRTSD,
28273 IX86_BUILTIN_UNPCKHPD,
28274 IX86_BUILTIN_UNPCKLPD,
28276 IX86_BUILTIN_SHUFPD,
28278 IX86_BUILTIN_LOADUPD,
28279 IX86_BUILTIN_STOREUPD,
28280 IX86_BUILTIN_MOVSD,
28282 IX86_BUILTIN_LOADHPD,
28283 IX86_BUILTIN_LOADLPD,
28285 IX86_BUILTIN_CVTDQ2PD,
28286 IX86_BUILTIN_CVTDQ2PS,
28288 IX86_BUILTIN_CVTPD2DQ,
28289 IX86_BUILTIN_CVTPD2PI,
28290 IX86_BUILTIN_CVTPD2PS,
28291 IX86_BUILTIN_CVTTPD2DQ,
28292 IX86_BUILTIN_CVTTPD2PI,
28294 IX86_BUILTIN_CVTPI2PD,
28295 IX86_BUILTIN_CVTSI2SD,
28296 IX86_BUILTIN_CVTSI642SD,
28298 IX86_BUILTIN_CVTSD2SI,
28299 IX86_BUILTIN_CVTSD2SI64,
28300 IX86_BUILTIN_CVTSD2SS,
28301 IX86_BUILTIN_CVTSS2SD,
28302 IX86_BUILTIN_CVTTSD2SI,
28303 IX86_BUILTIN_CVTTSD2SI64,
28305 IX86_BUILTIN_CVTPS2DQ,
28306 IX86_BUILTIN_CVTPS2PD,
28307 IX86_BUILTIN_CVTTPS2DQ,
28309 IX86_BUILTIN_MOVNTI,
28310 IX86_BUILTIN_MOVNTI64,
28311 IX86_BUILTIN_MOVNTPD,
28312 IX86_BUILTIN_MOVNTDQ,
28314 IX86_BUILTIN_MOVQ128,
28316 /* SSE2 MMX */
28317 IX86_BUILTIN_MASKMOVDQU,
28318 IX86_BUILTIN_MOVMSKPD,
28319 IX86_BUILTIN_PMOVMSKB128,
28321 IX86_BUILTIN_PACKSSWB128,
28322 IX86_BUILTIN_PACKSSDW128,
28323 IX86_BUILTIN_PACKUSWB128,
28325 IX86_BUILTIN_PADDB128,
28326 IX86_BUILTIN_PADDW128,
28327 IX86_BUILTIN_PADDD128,
28328 IX86_BUILTIN_PADDQ128,
28329 IX86_BUILTIN_PADDSB128,
28330 IX86_BUILTIN_PADDSW128,
28331 IX86_BUILTIN_PADDUSB128,
28332 IX86_BUILTIN_PADDUSW128,
28333 IX86_BUILTIN_PSUBB128,
28334 IX86_BUILTIN_PSUBW128,
28335 IX86_BUILTIN_PSUBD128,
28336 IX86_BUILTIN_PSUBQ128,
28337 IX86_BUILTIN_PSUBSB128,
28338 IX86_BUILTIN_PSUBSW128,
28339 IX86_BUILTIN_PSUBUSB128,
28340 IX86_BUILTIN_PSUBUSW128,
28342 IX86_BUILTIN_PAND128,
28343 IX86_BUILTIN_PANDN128,
28344 IX86_BUILTIN_POR128,
28345 IX86_BUILTIN_PXOR128,
28347 IX86_BUILTIN_PAVGB128,
28348 IX86_BUILTIN_PAVGW128,
28350 IX86_BUILTIN_PCMPEQB128,
28351 IX86_BUILTIN_PCMPEQW128,
28352 IX86_BUILTIN_PCMPEQD128,
28353 IX86_BUILTIN_PCMPGTB128,
28354 IX86_BUILTIN_PCMPGTW128,
28355 IX86_BUILTIN_PCMPGTD128,
28357 IX86_BUILTIN_PMADDWD128,
28359 IX86_BUILTIN_PMAXSW128,
28360 IX86_BUILTIN_PMAXUB128,
28361 IX86_BUILTIN_PMINSW128,
28362 IX86_BUILTIN_PMINUB128,
28364 IX86_BUILTIN_PMULUDQ,
28365 IX86_BUILTIN_PMULUDQ128,
28366 IX86_BUILTIN_PMULHUW128,
28367 IX86_BUILTIN_PMULHW128,
28368 IX86_BUILTIN_PMULLW128,
28370 IX86_BUILTIN_PSADBW128,
28371 IX86_BUILTIN_PSHUFHW,
28372 IX86_BUILTIN_PSHUFLW,
28373 IX86_BUILTIN_PSHUFD,
28375 IX86_BUILTIN_PSLLDQI128,
28376 IX86_BUILTIN_PSLLWI128,
28377 IX86_BUILTIN_PSLLDI128,
28378 IX86_BUILTIN_PSLLQI128,
28379 IX86_BUILTIN_PSRAWI128,
28380 IX86_BUILTIN_PSRADI128,
28381 IX86_BUILTIN_PSRLDQI128,
28382 IX86_BUILTIN_PSRLWI128,
28383 IX86_BUILTIN_PSRLDI128,
28384 IX86_BUILTIN_PSRLQI128,
28386 IX86_BUILTIN_PSLLDQ128,
28387 IX86_BUILTIN_PSLLW128,
28388 IX86_BUILTIN_PSLLD128,
28389 IX86_BUILTIN_PSLLQ128,
28390 IX86_BUILTIN_PSRAW128,
28391 IX86_BUILTIN_PSRAD128,
28392 IX86_BUILTIN_PSRLW128,
28393 IX86_BUILTIN_PSRLD128,
28394 IX86_BUILTIN_PSRLQ128,
28396 IX86_BUILTIN_PUNPCKHBW128,
28397 IX86_BUILTIN_PUNPCKHWD128,
28398 IX86_BUILTIN_PUNPCKHDQ128,
28399 IX86_BUILTIN_PUNPCKHQDQ128,
28400 IX86_BUILTIN_PUNPCKLBW128,
28401 IX86_BUILTIN_PUNPCKLWD128,
28402 IX86_BUILTIN_PUNPCKLDQ128,
28403 IX86_BUILTIN_PUNPCKLQDQ128,
28405 IX86_BUILTIN_CLFLUSH,
28406 IX86_BUILTIN_MFENCE,
28407 IX86_BUILTIN_LFENCE,
28408 IX86_BUILTIN_PAUSE,
28410 IX86_BUILTIN_FNSTENV,
28411 IX86_BUILTIN_FLDENV,
28412 IX86_BUILTIN_FNSTSW,
28413 IX86_BUILTIN_FNCLEX,
28415 IX86_BUILTIN_BSRSI,
28416 IX86_BUILTIN_BSRDI,
28417 IX86_BUILTIN_RDPMC,
28418 IX86_BUILTIN_RDTSC,
28419 IX86_BUILTIN_RDTSCP,
28420 IX86_BUILTIN_ROLQI,
28421 IX86_BUILTIN_ROLHI,
28422 IX86_BUILTIN_RORQI,
28423 IX86_BUILTIN_RORHI,
28425 /* SSE3. */
28426 IX86_BUILTIN_ADDSUBPS,
28427 IX86_BUILTIN_HADDPS,
28428 IX86_BUILTIN_HSUBPS,
28429 IX86_BUILTIN_MOVSHDUP,
28430 IX86_BUILTIN_MOVSLDUP,
28431 IX86_BUILTIN_ADDSUBPD,
28432 IX86_BUILTIN_HADDPD,
28433 IX86_BUILTIN_HSUBPD,
28434 IX86_BUILTIN_LDDQU,
28436 IX86_BUILTIN_MONITOR,
28437 IX86_BUILTIN_MWAIT,
28439 /* SSSE3. */
28440 IX86_BUILTIN_PHADDW,
28441 IX86_BUILTIN_PHADDD,
28442 IX86_BUILTIN_PHADDSW,
28443 IX86_BUILTIN_PHSUBW,
28444 IX86_BUILTIN_PHSUBD,
28445 IX86_BUILTIN_PHSUBSW,
28446 IX86_BUILTIN_PMADDUBSW,
28447 IX86_BUILTIN_PMULHRSW,
28448 IX86_BUILTIN_PSHUFB,
28449 IX86_BUILTIN_PSIGNB,
28450 IX86_BUILTIN_PSIGNW,
28451 IX86_BUILTIN_PSIGND,
28452 IX86_BUILTIN_PALIGNR,
28453 IX86_BUILTIN_PABSB,
28454 IX86_BUILTIN_PABSW,
28455 IX86_BUILTIN_PABSD,
28457 IX86_BUILTIN_PHADDW128,
28458 IX86_BUILTIN_PHADDD128,
28459 IX86_BUILTIN_PHADDSW128,
28460 IX86_BUILTIN_PHSUBW128,
28461 IX86_BUILTIN_PHSUBD128,
28462 IX86_BUILTIN_PHSUBSW128,
28463 IX86_BUILTIN_PMADDUBSW128,
28464 IX86_BUILTIN_PMULHRSW128,
28465 IX86_BUILTIN_PSHUFB128,
28466 IX86_BUILTIN_PSIGNB128,
28467 IX86_BUILTIN_PSIGNW128,
28468 IX86_BUILTIN_PSIGND128,
28469 IX86_BUILTIN_PALIGNR128,
28470 IX86_BUILTIN_PABSB128,
28471 IX86_BUILTIN_PABSW128,
28472 IX86_BUILTIN_PABSD128,
28474 /* AMDFAM10 - SSE4A New Instructions. */
28475 IX86_BUILTIN_MOVNTSD,
28476 IX86_BUILTIN_MOVNTSS,
28477 IX86_BUILTIN_EXTRQI,
28478 IX86_BUILTIN_EXTRQ,
28479 IX86_BUILTIN_INSERTQI,
28480 IX86_BUILTIN_INSERTQ,
28482 /* SSE4.1. */
28483 IX86_BUILTIN_BLENDPD,
28484 IX86_BUILTIN_BLENDPS,
28485 IX86_BUILTIN_BLENDVPD,
28486 IX86_BUILTIN_BLENDVPS,
28487 IX86_BUILTIN_PBLENDVB128,
28488 IX86_BUILTIN_PBLENDW128,
28490 IX86_BUILTIN_DPPD,
28491 IX86_BUILTIN_DPPS,
28493 IX86_BUILTIN_INSERTPS128,
28495 IX86_BUILTIN_MOVNTDQA,
28496 IX86_BUILTIN_MPSADBW128,
28497 IX86_BUILTIN_PACKUSDW128,
28498 IX86_BUILTIN_PCMPEQQ,
28499 IX86_BUILTIN_PHMINPOSUW128,
28501 IX86_BUILTIN_PMAXSB128,
28502 IX86_BUILTIN_PMAXSD128,
28503 IX86_BUILTIN_PMAXUD128,
28504 IX86_BUILTIN_PMAXUW128,
28506 IX86_BUILTIN_PMINSB128,
28507 IX86_BUILTIN_PMINSD128,
28508 IX86_BUILTIN_PMINUD128,
28509 IX86_BUILTIN_PMINUW128,
28511 IX86_BUILTIN_PMOVSXBW128,
28512 IX86_BUILTIN_PMOVSXBD128,
28513 IX86_BUILTIN_PMOVSXBQ128,
28514 IX86_BUILTIN_PMOVSXWD128,
28515 IX86_BUILTIN_PMOVSXWQ128,
28516 IX86_BUILTIN_PMOVSXDQ128,
28518 IX86_BUILTIN_PMOVZXBW128,
28519 IX86_BUILTIN_PMOVZXBD128,
28520 IX86_BUILTIN_PMOVZXBQ128,
28521 IX86_BUILTIN_PMOVZXWD128,
28522 IX86_BUILTIN_PMOVZXWQ128,
28523 IX86_BUILTIN_PMOVZXDQ128,
28525 IX86_BUILTIN_PMULDQ128,
28526 IX86_BUILTIN_PMULLD128,
28528 IX86_BUILTIN_ROUNDSD,
28529 IX86_BUILTIN_ROUNDSS,
28531 IX86_BUILTIN_ROUNDPD,
28532 IX86_BUILTIN_ROUNDPS,
28534 IX86_BUILTIN_FLOORPD,
28535 IX86_BUILTIN_CEILPD,
28536 IX86_BUILTIN_TRUNCPD,
28537 IX86_BUILTIN_RINTPD,
28538 IX86_BUILTIN_ROUNDPD_AZ,
28540 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
28541 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
28542 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
28544 IX86_BUILTIN_FLOORPS,
28545 IX86_BUILTIN_CEILPS,
28546 IX86_BUILTIN_TRUNCPS,
28547 IX86_BUILTIN_RINTPS,
28548 IX86_BUILTIN_ROUNDPS_AZ,
28550 IX86_BUILTIN_FLOORPS_SFIX,
28551 IX86_BUILTIN_CEILPS_SFIX,
28552 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
28554 IX86_BUILTIN_PTESTZ,
28555 IX86_BUILTIN_PTESTC,
28556 IX86_BUILTIN_PTESTNZC,
28558 IX86_BUILTIN_VEC_INIT_V2SI,
28559 IX86_BUILTIN_VEC_INIT_V4HI,
28560 IX86_BUILTIN_VEC_INIT_V8QI,
28561 IX86_BUILTIN_VEC_EXT_V2DF,
28562 IX86_BUILTIN_VEC_EXT_V2DI,
28563 IX86_BUILTIN_VEC_EXT_V4SF,
28564 IX86_BUILTIN_VEC_EXT_V4SI,
28565 IX86_BUILTIN_VEC_EXT_V8HI,
28566 IX86_BUILTIN_VEC_EXT_V2SI,
28567 IX86_BUILTIN_VEC_EXT_V4HI,
28568 IX86_BUILTIN_VEC_EXT_V16QI,
28569 IX86_BUILTIN_VEC_SET_V2DI,
28570 IX86_BUILTIN_VEC_SET_V4SF,
28571 IX86_BUILTIN_VEC_SET_V4SI,
28572 IX86_BUILTIN_VEC_SET_V8HI,
28573 IX86_BUILTIN_VEC_SET_V4HI,
28574 IX86_BUILTIN_VEC_SET_V16QI,
28576 IX86_BUILTIN_VEC_PACK_SFIX,
28577 IX86_BUILTIN_VEC_PACK_SFIX256,
28579 /* SSE4.2. */
28580 IX86_BUILTIN_CRC32QI,
28581 IX86_BUILTIN_CRC32HI,
28582 IX86_BUILTIN_CRC32SI,
28583 IX86_BUILTIN_CRC32DI,
28585 IX86_BUILTIN_PCMPESTRI128,
28586 IX86_BUILTIN_PCMPESTRM128,
28587 IX86_BUILTIN_PCMPESTRA128,
28588 IX86_BUILTIN_PCMPESTRC128,
28589 IX86_BUILTIN_PCMPESTRO128,
28590 IX86_BUILTIN_PCMPESTRS128,
28591 IX86_BUILTIN_PCMPESTRZ128,
28592 IX86_BUILTIN_PCMPISTRI128,
28593 IX86_BUILTIN_PCMPISTRM128,
28594 IX86_BUILTIN_PCMPISTRA128,
28595 IX86_BUILTIN_PCMPISTRC128,
28596 IX86_BUILTIN_PCMPISTRO128,
28597 IX86_BUILTIN_PCMPISTRS128,
28598 IX86_BUILTIN_PCMPISTRZ128,
28600 IX86_BUILTIN_PCMPGTQ,
28602 /* AES instructions */
28603 IX86_BUILTIN_AESENC128,
28604 IX86_BUILTIN_AESENCLAST128,
28605 IX86_BUILTIN_AESDEC128,
28606 IX86_BUILTIN_AESDECLAST128,
28607 IX86_BUILTIN_AESIMC128,
28608 IX86_BUILTIN_AESKEYGENASSIST128,
28610 /* PCLMUL instruction */
28611 IX86_BUILTIN_PCLMULQDQ128,
28613 /* AVX */
28614 IX86_BUILTIN_ADDPD256,
28615 IX86_BUILTIN_ADDPS256,
28616 IX86_BUILTIN_ADDSUBPD256,
28617 IX86_BUILTIN_ADDSUBPS256,
28618 IX86_BUILTIN_ANDPD256,
28619 IX86_BUILTIN_ANDPS256,
28620 IX86_BUILTIN_ANDNPD256,
28621 IX86_BUILTIN_ANDNPS256,
28622 IX86_BUILTIN_BLENDPD256,
28623 IX86_BUILTIN_BLENDPS256,
28624 IX86_BUILTIN_BLENDVPD256,
28625 IX86_BUILTIN_BLENDVPS256,
28626 IX86_BUILTIN_DIVPD256,
28627 IX86_BUILTIN_DIVPS256,
28628 IX86_BUILTIN_DPPS256,
28629 IX86_BUILTIN_HADDPD256,
28630 IX86_BUILTIN_HADDPS256,
28631 IX86_BUILTIN_HSUBPD256,
28632 IX86_BUILTIN_HSUBPS256,
28633 IX86_BUILTIN_MAXPD256,
28634 IX86_BUILTIN_MAXPS256,
28635 IX86_BUILTIN_MINPD256,
28636 IX86_BUILTIN_MINPS256,
28637 IX86_BUILTIN_MULPD256,
28638 IX86_BUILTIN_MULPS256,
28639 IX86_BUILTIN_ORPD256,
28640 IX86_BUILTIN_ORPS256,
28641 IX86_BUILTIN_SHUFPD256,
28642 IX86_BUILTIN_SHUFPS256,
28643 IX86_BUILTIN_SUBPD256,
28644 IX86_BUILTIN_SUBPS256,
28645 IX86_BUILTIN_XORPD256,
28646 IX86_BUILTIN_XORPS256,
28647 IX86_BUILTIN_CMPSD,
28648 IX86_BUILTIN_CMPSS,
28649 IX86_BUILTIN_CMPPD,
28650 IX86_BUILTIN_CMPPS,
28651 IX86_BUILTIN_CMPPD256,
28652 IX86_BUILTIN_CMPPS256,
28653 IX86_BUILTIN_CVTDQ2PD256,
28654 IX86_BUILTIN_CVTDQ2PS256,
28655 IX86_BUILTIN_CVTPD2PS256,
28656 IX86_BUILTIN_CVTPS2DQ256,
28657 IX86_BUILTIN_CVTPS2PD256,
28658 IX86_BUILTIN_CVTTPD2DQ256,
28659 IX86_BUILTIN_CVTPD2DQ256,
28660 IX86_BUILTIN_CVTTPS2DQ256,
28661 IX86_BUILTIN_EXTRACTF128PD256,
28662 IX86_BUILTIN_EXTRACTF128PS256,
28663 IX86_BUILTIN_EXTRACTF128SI256,
28664 IX86_BUILTIN_VZEROALL,
28665 IX86_BUILTIN_VZEROUPPER,
28666 IX86_BUILTIN_VPERMILVARPD,
28667 IX86_BUILTIN_VPERMILVARPS,
28668 IX86_BUILTIN_VPERMILVARPD256,
28669 IX86_BUILTIN_VPERMILVARPS256,
28670 IX86_BUILTIN_VPERMILPD,
28671 IX86_BUILTIN_VPERMILPS,
28672 IX86_BUILTIN_VPERMILPD256,
28673 IX86_BUILTIN_VPERMILPS256,
28674 IX86_BUILTIN_VPERMIL2PD,
28675 IX86_BUILTIN_VPERMIL2PS,
28676 IX86_BUILTIN_VPERMIL2PD256,
28677 IX86_BUILTIN_VPERMIL2PS256,
28678 IX86_BUILTIN_VPERM2F128PD256,
28679 IX86_BUILTIN_VPERM2F128PS256,
28680 IX86_BUILTIN_VPERM2F128SI256,
28681 IX86_BUILTIN_VBROADCASTSS,
28682 IX86_BUILTIN_VBROADCASTSD256,
28683 IX86_BUILTIN_VBROADCASTSS256,
28684 IX86_BUILTIN_VBROADCASTPD256,
28685 IX86_BUILTIN_VBROADCASTPS256,
28686 IX86_BUILTIN_VINSERTF128PD256,
28687 IX86_BUILTIN_VINSERTF128PS256,
28688 IX86_BUILTIN_VINSERTF128SI256,
28689 IX86_BUILTIN_LOADUPD256,
28690 IX86_BUILTIN_LOADUPS256,
28691 IX86_BUILTIN_STOREUPD256,
28692 IX86_BUILTIN_STOREUPS256,
28693 IX86_BUILTIN_LDDQU256,
28694 IX86_BUILTIN_MOVNTDQ256,
28695 IX86_BUILTIN_MOVNTPD256,
28696 IX86_BUILTIN_MOVNTPS256,
28697 IX86_BUILTIN_LOADDQU256,
28698 IX86_BUILTIN_STOREDQU256,
28699 IX86_BUILTIN_MASKLOADPD,
28700 IX86_BUILTIN_MASKLOADPS,
28701 IX86_BUILTIN_MASKSTOREPD,
28702 IX86_BUILTIN_MASKSTOREPS,
28703 IX86_BUILTIN_MASKLOADPD256,
28704 IX86_BUILTIN_MASKLOADPS256,
28705 IX86_BUILTIN_MASKSTOREPD256,
28706 IX86_BUILTIN_MASKSTOREPS256,
28707 IX86_BUILTIN_MOVSHDUP256,
28708 IX86_BUILTIN_MOVSLDUP256,
28709 IX86_BUILTIN_MOVDDUP256,
28711 IX86_BUILTIN_SQRTPD256,
28712 IX86_BUILTIN_SQRTPS256,
28713 IX86_BUILTIN_SQRTPS_NR256,
28714 IX86_BUILTIN_RSQRTPS256,
28715 IX86_BUILTIN_RSQRTPS_NR256,
28717 IX86_BUILTIN_RCPPS256,
28719 IX86_BUILTIN_ROUNDPD256,
28720 IX86_BUILTIN_ROUNDPS256,
28722 IX86_BUILTIN_FLOORPD256,
28723 IX86_BUILTIN_CEILPD256,
28724 IX86_BUILTIN_TRUNCPD256,
28725 IX86_BUILTIN_RINTPD256,
28726 IX86_BUILTIN_ROUNDPD_AZ256,
28728 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
28729 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
28730 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
28732 IX86_BUILTIN_FLOORPS256,
28733 IX86_BUILTIN_CEILPS256,
28734 IX86_BUILTIN_TRUNCPS256,
28735 IX86_BUILTIN_RINTPS256,
28736 IX86_BUILTIN_ROUNDPS_AZ256,
28738 IX86_BUILTIN_FLOORPS_SFIX256,
28739 IX86_BUILTIN_CEILPS_SFIX256,
28740 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
28742 IX86_BUILTIN_UNPCKHPD256,
28743 IX86_BUILTIN_UNPCKLPD256,
28744 IX86_BUILTIN_UNPCKHPS256,
28745 IX86_BUILTIN_UNPCKLPS256,
28747 IX86_BUILTIN_SI256_SI,
28748 IX86_BUILTIN_PS256_PS,
28749 IX86_BUILTIN_PD256_PD,
28750 IX86_BUILTIN_SI_SI256,
28751 IX86_BUILTIN_PS_PS256,
28752 IX86_BUILTIN_PD_PD256,
28754 IX86_BUILTIN_VTESTZPD,
28755 IX86_BUILTIN_VTESTCPD,
28756 IX86_BUILTIN_VTESTNZCPD,
28757 IX86_BUILTIN_VTESTZPS,
28758 IX86_BUILTIN_VTESTCPS,
28759 IX86_BUILTIN_VTESTNZCPS,
28760 IX86_BUILTIN_VTESTZPD256,
28761 IX86_BUILTIN_VTESTCPD256,
28762 IX86_BUILTIN_VTESTNZCPD256,
28763 IX86_BUILTIN_VTESTZPS256,
28764 IX86_BUILTIN_VTESTCPS256,
28765 IX86_BUILTIN_VTESTNZCPS256,
28766 IX86_BUILTIN_PTESTZ256,
28767 IX86_BUILTIN_PTESTC256,
28768 IX86_BUILTIN_PTESTNZC256,
28770 IX86_BUILTIN_MOVMSKPD256,
28771 IX86_BUILTIN_MOVMSKPS256,
28773 /* AVX2 */
28774 IX86_BUILTIN_MPSADBW256,
28775 IX86_BUILTIN_PABSB256,
28776 IX86_BUILTIN_PABSW256,
28777 IX86_BUILTIN_PABSD256,
28778 IX86_BUILTIN_PACKSSDW256,
28779 IX86_BUILTIN_PACKSSWB256,
28780 IX86_BUILTIN_PACKUSDW256,
28781 IX86_BUILTIN_PACKUSWB256,
28782 IX86_BUILTIN_PADDB256,
28783 IX86_BUILTIN_PADDW256,
28784 IX86_BUILTIN_PADDD256,
28785 IX86_BUILTIN_PADDQ256,
28786 IX86_BUILTIN_PADDSB256,
28787 IX86_BUILTIN_PADDSW256,
28788 IX86_BUILTIN_PADDUSB256,
28789 IX86_BUILTIN_PADDUSW256,
28790 IX86_BUILTIN_PALIGNR256,
28791 IX86_BUILTIN_AND256I,
28792 IX86_BUILTIN_ANDNOT256I,
28793 IX86_BUILTIN_PAVGB256,
28794 IX86_BUILTIN_PAVGW256,
28795 IX86_BUILTIN_PBLENDVB256,
28796 IX86_BUILTIN_PBLENDVW256,
28797 IX86_BUILTIN_PCMPEQB256,
28798 IX86_BUILTIN_PCMPEQW256,
28799 IX86_BUILTIN_PCMPEQD256,
28800 IX86_BUILTIN_PCMPEQQ256,
28801 IX86_BUILTIN_PCMPGTB256,
28802 IX86_BUILTIN_PCMPGTW256,
28803 IX86_BUILTIN_PCMPGTD256,
28804 IX86_BUILTIN_PCMPGTQ256,
28805 IX86_BUILTIN_PHADDW256,
28806 IX86_BUILTIN_PHADDD256,
28807 IX86_BUILTIN_PHADDSW256,
28808 IX86_BUILTIN_PHSUBW256,
28809 IX86_BUILTIN_PHSUBD256,
28810 IX86_BUILTIN_PHSUBSW256,
28811 IX86_BUILTIN_PMADDUBSW256,
28812 IX86_BUILTIN_PMADDWD256,
28813 IX86_BUILTIN_PMAXSB256,
28814 IX86_BUILTIN_PMAXSW256,
28815 IX86_BUILTIN_PMAXSD256,
28816 IX86_BUILTIN_PMAXUB256,
28817 IX86_BUILTIN_PMAXUW256,
28818 IX86_BUILTIN_PMAXUD256,
28819 IX86_BUILTIN_PMINSB256,
28820 IX86_BUILTIN_PMINSW256,
28821 IX86_BUILTIN_PMINSD256,
28822 IX86_BUILTIN_PMINUB256,
28823 IX86_BUILTIN_PMINUW256,
28824 IX86_BUILTIN_PMINUD256,
28825 IX86_BUILTIN_PMOVMSKB256,
28826 IX86_BUILTIN_PMOVSXBW256,
28827 IX86_BUILTIN_PMOVSXBD256,
28828 IX86_BUILTIN_PMOVSXBQ256,
28829 IX86_BUILTIN_PMOVSXWD256,
28830 IX86_BUILTIN_PMOVSXWQ256,
28831 IX86_BUILTIN_PMOVSXDQ256,
28832 IX86_BUILTIN_PMOVZXBW256,
28833 IX86_BUILTIN_PMOVZXBD256,
28834 IX86_BUILTIN_PMOVZXBQ256,
28835 IX86_BUILTIN_PMOVZXWD256,
28836 IX86_BUILTIN_PMOVZXWQ256,
28837 IX86_BUILTIN_PMOVZXDQ256,
28838 IX86_BUILTIN_PMULDQ256,
28839 IX86_BUILTIN_PMULHRSW256,
28840 IX86_BUILTIN_PMULHUW256,
28841 IX86_BUILTIN_PMULHW256,
28842 IX86_BUILTIN_PMULLW256,
28843 IX86_BUILTIN_PMULLD256,
28844 IX86_BUILTIN_PMULUDQ256,
28845 IX86_BUILTIN_POR256,
28846 IX86_BUILTIN_PSADBW256,
28847 IX86_BUILTIN_PSHUFB256,
28848 IX86_BUILTIN_PSHUFD256,
28849 IX86_BUILTIN_PSHUFHW256,
28850 IX86_BUILTIN_PSHUFLW256,
28851 IX86_BUILTIN_PSIGNB256,
28852 IX86_BUILTIN_PSIGNW256,
28853 IX86_BUILTIN_PSIGND256,
28854 IX86_BUILTIN_PSLLDQI256,
28855 IX86_BUILTIN_PSLLWI256,
28856 IX86_BUILTIN_PSLLW256,
28857 IX86_BUILTIN_PSLLDI256,
28858 IX86_BUILTIN_PSLLD256,
28859 IX86_BUILTIN_PSLLQI256,
28860 IX86_BUILTIN_PSLLQ256,
28861 IX86_BUILTIN_PSRAWI256,
28862 IX86_BUILTIN_PSRAW256,
28863 IX86_BUILTIN_PSRADI256,
28864 IX86_BUILTIN_PSRAD256,
28865 IX86_BUILTIN_PSRLDQI256,
28866 IX86_BUILTIN_PSRLWI256,
28867 IX86_BUILTIN_PSRLW256,
28868 IX86_BUILTIN_PSRLDI256,
28869 IX86_BUILTIN_PSRLD256,
28870 IX86_BUILTIN_PSRLQI256,
28871 IX86_BUILTIN_PSRLQ256,
28872 IX86_BUILTIN_PSUBB256,
28873 IX86_BUILTIN_PSUBW256,
28874 IX86_BUILTIN_PSUBD256,
28875 IX86_BUILTIN_PSUBQ256,
28876 IX86_BUILTIN_PSUBSB256,
28877 IX86_BUILTIN_PSUBSW256,
28878 IX86_BUILTIN_PSUBUSB256,
28879 IX86_BUILTIN_PSUBUSW256,
28880 IX86_BUILTIN_PUNPCKHBW256,
28881 IX86_BUILTIN_PUNPCKHWD256,
28882 IX86_BUILTIN_PUNPCKHDQ256,
28883 IX86_BUILTIN_PUNPCKHQDQ256,
28884 IX86_BUILTIN_PUNPCKLBW256,
28885 IX86_BUILTIN_PUNPCKLWD256,
28886 IX86_BUILTIN_PUNPCKLDQ256,
28887 IX86_BUILTIN_PUNPCKLQDQ256,
28888 IX86_BUILTIN_PXOR256,
28889 IX86_BUILTIN_MOVNTDQA256,
28890 IX86_BUILTIN_VBROADCASTSS_PS,
28891 IX86_BUILTIN_VBROADCASTSS_PS256,
28892 IX86_BUILTIN_VBROADCASTSD_PD256,
28893 IX86_BUILTIN_VBROADCASTSI256,
28894 IX86_BUILTIN_PBLENDD256,
28895 IX86_BUILTIN_PBLENDD128,
28896 IX86_BUILTIN_PBROADCASTB256,
28897 IX86_BUILTIN_PBROADCASTW256,
28898 IX86_BUILTIN_PBROADCASTD256,
28899 IX86_BUILTIN_PBROADCASTQ256,
28900 IX86_BUILTIN_PBROADCASTB128,
28901 IX86_BUILTIN_PBROADCASTW128,
28902 IX86_BUILTIN_PBROADCASTD128,
28903 IX86_BUILTIN_PBROADCASTQ128,
28904 IX86_BUILTIN_VPERMVARSI256,
28905 IX86_BUILTIN_VPERMDF256,
28906 IX86_BUILTIN_VPERMVARSF256,
28907 IX86_BUILTIN_VPERMDI256,
28908 IX86_BUILTIN_VPERMTI256,
28909 IX86_BUILTIN_VEXTRACT128I256,
28910 IX86_BUILTIN_VINSERT128I256,
28911 IX86_BUILTIN_MASKLOADD,
28912 IX86_BUILTIN_MASKLOADQ,
28913 IX86_BUILTIN_MASKLOADD256,
28914 IX86_BUILTIN_MASKLOADQ256,
28915 IX86_BUILTIN_MASKSTORED,
28916 IX86_BUILTIN_MASKSTOREQ,
28917 IX86_BUILTIN_MASKSTORED256,
28918 IX86_BUILTIN_MASKSTOREQ256,
28919 IX86_BUILTIN_PSLLVV4DI,
28920 IX86_BUILTIN_PSLLVV2DI,
28921 IX86_BUILTIN_PSLLVV8SI,
28922 IX86_BUILTIN_PSLLVV4SI,
28923 IX86_BUILTIN_PSRAVV8SI,
28924 IX86_BUILTIN_PSRAVV4SI,
28925 IX86_BUILTIN_PSRLVV4DI,
28926 IX86_BUILTIN_PSRLVV2DI,
28927 IX86_BUILTIN_PSRLVV8SI,
28928 IX86_BUILTIN_PSRLVV4SI,
28930 IX86_BUILTIN_GATHERSIV2DF,
28931 IX86_BUILTIN_GATHERSIV4DF,
28932 IX86_BUILTIN_GATHERDIV2DF,
28933 IX86_BUILTIN_GATHERDIV4DF,
28934 IX86_BUILTIN_GATHERSIV4SF,
28935 IX86_BUILTIN_GATHERSIV8SF,
28936 IX86_BUILTIN_GATHERDIV4SF,
28937 IX86_BUILTIN_GATHERDIV8SF,
28938 IX86_BUILTIN_GATHERSIV2DI,
28939 IX86_BUILTIN_GATHERSIV4DI,
28940 IX86_BUILTIN_GATHERDIV2DI,
28941 IX86_BUILTIN_GATHERDIV4DI,
28942 IX86_BUILTIN_GATHERSIV4SI,
28943 IX86_BUILTIN_GATHERSIV8SI,
28944 IX86_BUILTIN_GATHERDIV4SI,
28945 IX86_BUILTIN_GATHERDIV8SI,
28947 /* AVX512F */
28948 IX86_BUILTIN_SI512_SI256,
28949 IX86_BUILTIN_PD512_PD256,
28950 IX86_BUILTIN_PS512_PS256,
28951 IX86_BUILTIN_SI512_SI,
28952 IX86_BUILTIN_PD512_PD,
28953 IX86_BUILTIN_PS512_PS,
28954 IX86_BUILTIN_ADDPD512,
28955 IX86_BUILTIN_ADDPS512,
28956 IX86_BUILTIN_ADDSD_ROUND,
28957 IX86_BUILTIN_ADDSS_ROUND,
28958 IX86_BUILTIN_ALIGND512,
28959 IX86_BUILTIN_ALIGNQ512,
28960 IX86_BUILTIN_BLENDMD512,
28961 IX86_BUILTIN_BLENDMPD512,
28962 IX86_BUILTIN_BLENDMPS512,
28963 IX86_BUILTIN_BLENDMQ512,
28964 IX86_BUILTIN_BROADCASTF32X4_512,
28965 IX86_BUILTIN_BROADCASTF64X4_512,
28966 IX86_BUILTIN_BROADCASTI32X4_512,
28967 IX86_BUILTIN_BROADCASTI64X4_512,
28968 IX86_BUILTIN_BROADCASTSD512,
28969 IX86_BUILTIN_BROADCASTSS512,
28970 IX86_BUILTIN_CMPD512,
28971 IX86_BUILTIN_CMPPD512,
28972 IX86_BUILTIN_CMPPS512,
28973 IX86_BUILTIN_CMPQ512,
28974 IX86_BUILTIN_CMPSD_MASK,
28975 IX86_BUILTIN_CMPSS_MASK,
28976 IX86_BUILTIN_COMIDF,
28977 IX86_BUILTIN_COMISF,
28978 IX86_BUILTIN_COMPRESSPD512,
28979 IX86_BUILTIN_COMPRESSPDSTORE512,
28980 IX86_BUILTIN_COMPRESSPS512,
28981 IX86_BUILTIN_COMPRESSPSSTORE512,
28982 IX86_BUILTIN_CVTDQ2PD512,
28983 IX86_BUILTIN_CVTDQ2PS512,
28984 IX86_BUILTIN_CVTPD2DQ512,
28985 IX86_BUILTIN_CVTPD2PS512,
28986 IX86_BUILTIN_CVTPD2UDQ512,
28987 IX86_BUILTIN_CVTPH2PS512,
28988 IX86_BUILTIN_CVTPS2DQ512,
28989 IX86_BUILTIN_CVTPS2PD512,
28990 IX86_BUILTIN_CVTPS2PH512,
28991 IX86_BUILTIN_CVTPS2UDQ512,
28992 IX86_BUILTIN_CVTSD2SS_ROUND,
28993 IX86_BUILTIN_CVTSI2SD64,
28994 IX86_BUILTIN_CVTSI2SS32,
28995 IX86_BUILTIN_CVTSI2SS64,
28996 IX86_BUILTIN_CVTSS2SD_ROUND,
28997 IX86_BUILTIN_CVTTPD2DQ512,
28998 IX86_BUILTIN_CVTTPD2UDQ512,
28999 IX86_BUILTIN_CVTTPS2DQ512,
29000 IX86_BUILTIN_CVTTPS2UDQ512,
29001 IX86_BUILTIN_CVTUDQ2PD512,
29002 IX86_BUILTIN_CVTUDQ2PS512,
29003 IX86_BUILTIN_CVTUSI2SD32,
29004 IX86_BUILTIN_CVTUSI2SD64,
29005 IX86_BUILTIN_CVTUSI2SS32,
29006 IX86_BUILTIN_CVTUSI2SS64,
29007 IX86_BUILTIN_DIVPD512,
29008 IX86_BUILTIN_DIVPS512,
29009 IX86_BUILTIN_DIVSD_ROUND,
29010 IX86_BUILTIN_DIVSS_ROUND,
29011 IX86_BUILTIN_EXPANDPD512,
29012 IX86_BUILTIN_EXPANDPD512Z,
29013 IX86_BUILTIN_EXPANDPDLOAD512,
29014 IX86_BUILTIN_EXPANDPDLOAD512Z,
29015 IX86_BUILTIN_EXPANDPS512,
29016 IX86_BUILTIN_EXPANDPS512Z,
29017 IX86_BUILTIN_EXPANDPSLOAD512,
29018 IX86_BUILTIN_EXPANDPSLOAD512Z,
29019 IX86_BUILTIN_EXTRACTF32X4,
29020 IX86_BUILTIN_EXTRACTF64X4,
29021 IX86_BUILTIN_EXTRACTI32X4,
29022 IX86_BUILTIN_EXTRACTI64X4,
29023 IX86_BUILTIN_FIXUPIMMPD512_MASK,
29024 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
29025 IX86_BUILTIN_FIXUPIMMPS512_MASK,
29026 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
29027 IX86_BUILTIN_FIXUPIMMSD128_MASK,
29028 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
29029 IX86_BUILTIN_FIXUPIMMSS128_MASK,
29030 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
29031 IX86_BUILTIN_GETEXPPD512,
29032 IX86_BUILTIN_GETEXPPS512,
29033 IX86_BUILTIN_GETEXPSD128,
29034 IX86_BUILTIN_GETEXPSS128,
29035 IX86_BUILTIN_GETMANTPD512,
29036 IX86_BUILTIN_GETMANTPS512,
29037 IX86_BUILTIN_GETMANTSD128,
29038 IX86_BUILTIN_GETMANTSS128,
29039 IX86_BUILTIN_INSERTF32X4,
29040 IX86_BUILTIN_INSERTF64X4,
29041 IX86_BUILTIN_INSERTI32X4,
29042 IX86_BUILTIN_INSERTI64X4,
29043 IX86_BUILTIN_LOADAPD512,
29044 IX86_BUILTIN_LOADAPS512,
29045 IX86_BUILTIN_LOADDQUDI512,
29046 IX86_BUILTIN_LOADDQUSI512,
29047 IX86_BUILTIN_LOADUPD512,
29048 IX86_BUILTIN_LOADUPS512,
29049 IX86_BUILTIN_MAXPD512,
29050 IX86_BUILTIN_MAXPS512,
29051 IX86_BUILTIN_MAXSD_ROUND,
29052 IX86_BUILTIN_MAXSS_ROUND,
29053 IX86_BUILTIN_MINPD512,
29054 IX86_BUILTIN_MINPS512,
29055 IX86_BUILTIN_MINSD_ROUND,
29056 IX86_BUILTIN_MINSS_ROUND,
29057 IX86_BUILTIN_MOVAPD512,
29058 IX86_BUILTIN_MOVAPS512,
29059 IX86_BUILTIN_MOVDDUP512,
29060 IX86_BUILTIN_MOVDQA32LOAD512,
29061 IX86_BUILTIN_MOVDQA32STORE512,
29062 IX86_BUILTIN_MOVDQA32_512,
29063 IX86_BUILTIN_MOVDQA64LOAD512,
29064 IX86_BUILTIN_MOVDQA64STORE512,
29065 IX86_BUILTIN_MOVDQA64_512,
29066 IX86_BUILTIN_MOVNTDQ512,
29067 IX86_BUILTIN_MOVNTDQA512,
29068 IX86_BUILTIN_MOVNTPD512,
29069 IX86_BUILTIN_MOVNTPS512,
29070 IX86_BUILTIN_MOVSHDUP512,
29071 IX86_BUILTIN_MOVSLDUP512,
29072 IX86_BUILTIN_MULPD512,
29073 IX86_BUILTIN_MULPS512,
29074 IX86_BUILTIN_MULSD_ROUND,
29075 IX86_BUILTIN_MULSS_ROUND,
29076 IX86_BUILTIN_PABSD512,
29077 IX86_BUILTIN_PABSQ512,
29078 IX86_BUILTIN_PADDD512,
29079 IX86_BUILTIN_PADDQ512,
29080 IX86_BUILTIN_PANDD512,
29081 IX86_BUILTIN_PANDND512,
29082 IX86_BUILTIN_PANDNQ512,
29083 IX86_BUILTIN_PANDQ512,
29084 IX86_BUILTIN_PBROADCASTD512,
29085 IX86_BUILTIN_PBROADCASTD512_GPR,
29086 IX86_BUILTIN_PBROADCASTMB512,
29087 IX86_BUILTIN_PBROADCASTMW512,
29088 IX86_BUILTIN_PBROADCASTQ512,
29089 IX86_BUILTIN_PBROADCASTQ512_GPR,
29090 IX86_BUILTIN_PCMPEQD512_MASK,
29091 IX86_BUILTIN_PCMPEQQ512_MASK,
29092 IX86_BUILTIN_PCMPGTD512_MASK,
29093 IX86_BUILTIN_PCMPGTQ512_MASK,
29094 IX86_BUILTIN_PCOMPRESSD512,
29095 IX86_BUILTIN_PCOMPRESSDSTORE512,
29096 IX86_BUILTIN_PCOMPRESSQ512,
29097 IX86_BUILTIN_PCOMPRESSQSTORE512,
29098 IX86_BUILTIN_PEXPANDD512,
29099 IX86_BUILTIN_PEXPANDD512Z,
29100 IX86_BUILTIN_PEXPANDDLOAD512,
29101 IX86_BUILTIN_PEXPANDDLOAD512Z,
29102 IX86_BUILTIN_PEXPANDQ512,
29103 IX86_BUILTIN_PEXPANDQ512Z,
29104 IX86_BUILTIN_PEXPANDQLOAD512,
29105 IX86_BUILTIN_PEXPANDQLOAD512Z,
29106 IX86_BUILTIN_PMAXSD512,
29107 IX86_BUILTIN_PMAXSQ512,
29108 IX86_BUILTIN_PMAXUD512,
29109 IX86_BUILTIN_PMAXUQ512,
29110 IX86_BUILTIN_PMINSD512,
29111 IX86_BUILTIN_PMINSQ512,
29112 IX86_BUILTIN_PMINUD512,
29113 IX86_BUILTIN_PMINUQ512,
29114 IX86_BUILTIN_PMOVDB512,
29115 IX86_BUILTIN_PMOVDB512_MEM,
29116 IX86_BUILTIN_PMOVDW512,
29117 IX86_BUILTIN_PMOVDW512_MEM,
29118 IX86_BUILTIN_PMOVQB512,
29119 IX86_BUILTIN_PMOVQB512_MEM,
29120 IX86_BUILTIN_PMOVQD512,
29121 IX86_BUILTIN_PMOVQD512_MEM,
29122 IX86_BUILTIN_PMOVQW512,
29123 IX86_BUILTIN_PMOVQW512_MEM,
29124 IX86_BUILTIN_PMOVSDB512,
29125 IX86_BUILTIN_PMOVSDB512_MEM,
29126 IX86_BUILTIN_PMOVSDW512,
29127 IX86_BUILTIN_PMOVSDW512_MEM,
29128 IX86_BUILTIN_PMOVSQB512,
29129 IX86_BUILTIN_PMOVSQB512_MEM,
29130 IX86_BUILTIN_PMOVSQD512,
29131 IX86_BUILTIN_PMOVSQD512_MEM,
29132 IX86_BUILTIN_PMOVSQW512,
29133 IX86_BUILTIN_PMOVSQW512_MEM,
29134 IX86_BUILTIN_PMOVSXBD512,
29135 IX86_BUILTIN_PMOVSXBQ512,
29136 IX86_BUILTIN_PMOVSXDQ512,
29137 IX86_BUILTIN_PMOVSXWD512,
29138 IX86_BUILTIN_PMOVSXWQ512,
29139 IX86_BUILTIN_PMOVUSDB512,
29140 IX86_BUILTIN_PMOVUSDB512_MEM,
29141 IX86_BUILTIN_PMOVUSDW512,
29142 IX86_BUILTIN_PMOVUSDW512_MEM,
29143 IX86_BUILTIN_PMOVUSQB512,
29144 IX86_BUILTIN_PMOVUSQB512_MEM,
29145 IX86_BUILTIN_PMOVUSQD512,
29146 IX86_BUILTIN_PMOVUSQD512_MEM,
29147 IX86_BUILTIN_PMOVUSQW512,
29148 IX86_BUILTIN_PMOVUSQW512_MEM,
29149 IX86_BUILTIN_PMOVZXBD512,
29150 IX86_BUILTIN_PMOVZXBQ512,
29151 IX86_BUILTIN_PMOVZXDQ512,
29152 IX86_BUILTIN_PMOVZXWD512,
29153 IX86_BUILTIN_PMOVZXWQ512,
29154 IX86_BUILTIN_PMULDQ512,
29155 IX86_BUILTIN_PMULLD512,
29156 IX86_BUILTIN_PMULUDQ512,
29157 IX86_BUILTIN_PORD512,
29158 IX86_BUILTIN_PORQ512,
29159 IX86_BUILTIN_PROLD512,
29160 IX86_BUILTIN_PROLQ512,
29161 IX86_BUILTIN_PROLVD512,
29162 IX86_BUILTIN_PROLVQ512,
29163 IX86_BUILTIN_PRORD512,
29164 IX86_BUILTIN_PRORQ512,
29165 IX86_BUILTIN_PRORVD512,
29166 IX86_BUILTIN_PRORVQ512,
29167 IX86_BUILTIN_PSHUFD512,
29168 IX86_BUILTIN_PSLLD512,
29169 IX86_BUILTIN_PSLLDI512,
29170 IX86_BUILTIN_PSLLQ512,
29171 IX86_BUILTIN_PSLLQI512,
29172 IX86_BUILTIN_PSLLVV16SI,
29173 IX86_BUILTIN_PSLLVV8DI,
29174 IX86_BUILTIN_PSRAD512,
29175 IX86_BUILTIN_PSRADI512,
29176 IX86_BUILTIN_PSRAQ512,
29177 IX86_BUILTIN_PSRAQI512,
29178 IX86_BUILTIN_PSRAVV16SI,
29179 IX86_BUILTIN_PSRAVV8DI,
29180 IX86_BUILTIN_PSRLD512,
29181 IX86_BUILTIN_PSRLDI512,
29182 IX86_BUILTIN_PSRLQ512,
29183 IX86_BUILTIN_PSRLQI512,
29184 IX86_BUILTIN_PSRLVV16SI,
29185 IX86_BUILTIN_PSRLVV8DI,
29186 IX86_BUILTIN_PSUBD512,
29187 IX86_BUILTIN_PSUBQ512,
29188 IX86_BUILTIN_PTESTMD512,
29189 IX86_BUILTIN_PTESTMQ512,
29190 IX86_BUILTIN_PTESTNMD512,
29191 IX86_BUILTIN_PTESTNMQ512,
29192 IX86_BUILTIN_PUNPCKHDQ512,
29193 IX86_BUILTIN_PUNPCKHQDQ512,
29194 IX86_BUILTIN_PUNPCKLDQ512,
29195 IX86_BUILTIN_PUNPCKLQDQ512,
29196 IX86_BUILTIN_PXORD512,
29197 IX86_BUILTIN_PXORQ512,
29198 IX86_BUILTIN_RCP14PD512,
29199 IX86_BUILTIN_RCP14PS512,
29200 IX86_BUILTIN_RCP14SD,
29201 IX86_BUILTIN_RCP14SS,
29202 IX86_BUILTIN_RNDSCALEPD,
29203 IX86_BUILTIN_RNDSCALEPS,
29204 IX86_BUILTIN_RNDSCALESD,
29205 IX86_BUILTIN_RNDSCALESS,
29206 IX86_BUILTIN_RSQRT14PD512,
29207 IX86_BUILTIN_RSQRT14PS512,
29208 IX86_BUILTIN_RSQRT14SD,
29209 IX86_BUILTIN_RSQRT14SS,
29210 IX86_BUILTIN_SCALEFPD512,
29211 IX86_BUILTIN_SCALEFPS512,
29212 IX86_BUILTIN_SCALEFSD,
29213 IX86_BUILTIN_SCALEFSS,
29214 IX86_BUILTIN_SHUFPD512,
29215 IX86_BUILTIN_SHUFPS512,
29216 IX86_BUILTIN_SHUF_F32x4,
29217 IX86_BUILTIN_SHUF_F64x2,
29218 IX86_BUILTIN_SHUF_I32x4,
29219 IX86_BUILTIN_SHUF_I64x2,
29220 IX86_BUILTIN_SQRTPD512,
29221 IX86_BUILTIN_SQRTPD512_MASK,
29222 IX86_BUILTIN_SQRTPS512_MASK,
29223 IX86_BUILTIN_SQRTPS_NR512,
29224 IX86_BUILTIN_SQRTSD_ROUND,
29225 IX86_BUILTIN_SQRTSS_ROUND,
29226 IX86_BUILTIN_STOREAPD512,
29227 IX86_BUILTIN_STOREAPS512,
29228 IX86_BUILTIN_STOREDQUDI512,
29229 IX86_BUILTIN_STOREDQUSI512,
29230 IX86_BUILTIN_STOREUPD512,
29231 IX86_BUILTIN_STOREUPS512,
29232 IX86_BUILTIN_SUBPD512,
29233 IX86_BUILTIN_SUBPS512,
29234 IX86_BUILTIN_SUBSD_ROUND,
29235 IX86_BUILTIN_SUBSS_ROUND,
29236 IX86_BUILTIN_UCMPD512,
29237 IX86_BUILTIN_UCMPQ512,
29238 IX86_BUILTIN_UNPCKHPD512,
29239 IX86_BUILTIN_UNPCKHPS512,
29240 IX86_BUILTIN_UNPCKLPD512,
29241 IX86_BUILTIN_UNPCKLPS512,
29242 IX86_BUILTIN_VCVTSD2SI32,
29243 IX86_BUILTIN_VCVTSD2SI64,
29244 IX86_BUILTIN_VCVTSD2USI32,
29245 IX86_BUILTIN_VCVTSD2USI64,
29246 IX86_BUILTIN_VCVTSS2SI32,
29247 IX86_BUILTIN_VCVTSS2SI64,
29248 IX86_BUILTIN_VCVTSS2USI32,
29249 IX86_BUILTIN_VCVTSS2USI64,
29250 IX86_BUILTIN_VCVTTSD2SI32,
29251 IX86_BUILTIN_VCVTTSD2SI64,
29252 IX86_BUILTIN_VCVTTSD2USI32,
29253 IX86_BUILTIN_VCVTTSD2USI64,
29254 IX86_BUILTIN_VCVTTSS2SI32,
29255 IX86_BUILTIN_VCVTTSS2SI64,
29256 IX86_BUILTIN_VCVTTSS2USI32,
29257 IX86_BUILTIN_VCVTTSS2USI64,
29258 IX86_BUILTIN_VFMADDPD512_MASK,
29259 IX86_BUILTIN_VFMADDPD512_MASK3,
29260 IX86_BUILTIN_VFMADDPD512_MASKZ,
29261 IX86_BUILTIN_VFMADDPS512_MASK,
29262 IX86_BUILTIN_VFMADDPS512_MASK3,
29263 IX86_BUILTIN_VFMADDPS512_MASKZ,
29264 IX86_BUILTIN_VFMADDSD3_ROUND,
29265 IX86_BUILTIN_VFMADDSS3_ROUND,
29266 IX86_BUILTIN_VFMADDSUBPD512_MASK,
29267 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
29268 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
29269 IX86_BUILTIN_VFMADDSUBPS512_MASK,
29270 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
29271 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
29272 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
29273 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
29274 IX86_BUILTIN_VFMSUBPD512_MASK3,
29275 IX86_BUILTIN_VFMSUBPS512_MASK3,
29276 IX86_BUILTIN_VFMSUBSD3_MASK3,
29277 IX86_BUILTIN_VFMSUBSS3_MASK3,
29278 IX86_BUILTIN_VFNMADDPD512_MASK,
29279 IX86_BUILTIN_VFNMADDPS512_MASK,
29280 IX86_BUILTIN_VFNMSUBPD512_MASK,
29281 IX86_BUILTIN_VFNMSUBPD512_MASK3,
29282 IX86_BUILTIN_VFNMSUBPS512_MASK,
29283 IX86_BUILTIN_VFNMSUBPS512_MASK3,
29284 IX86_BUILTIN_VPCLZCNTD512,
29285 IX86_BUILTIN_VPCLZCNTQ512,
29286 IX86_BUILTIN_VPCONFLICTD512,
29287 IX86_BUILTIN_VPCONFLICTQ512,
29288 IX86_BUILTIN_VPERMDF512,
29289 IX86_BUILTIN_VPERMDI512,
29290 IX86_BUILTIN_VPERMI2VARD512,
29291 IX86_BUILTIN_VPERMI2VARPD512,
29292 IX86_BUILTIN_VPERMI2VARPS512,
29293 IX86_BUILTIN_VPERMI2VARQ512,
29294 IX86_BUILTIN_VPERMILPD512,
29295 IX86_BUILTIN_VPERMILPS512,
29296 IX86_BUILTIN_VPERMILVARPD512,
29297 IX86_BUILTIN_VPERMILVARPS512,
29298 IX86_BUILTIN_VPERMT2VARD512,
29299 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
29300 IX86_BUILTIN_VPERMT2VARPD512,
29301 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
29302 IX86_BUILTIN_VPERMT2VARPS512,
29303 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
29304 IX86_BUILTIN_VPERMT2VARQ512,
29305 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
29306 IX86_BUILTIN_VPERMVARDF512,
29307 IX86_BUILTIN_VPERMVARDI512,
29308 IX86_BUILTIN_VPERMVARSF512,
29309 IX86_BUILTIN_VPERMVARSI512,
29310 IX86_BUILTIN_VTERNLOGD512_MASK,
29311 IX86_BUILTIN_VTERNLOGD512_MASKZ,
29312 IX86_BUILTIN_VTERNLOGQ512_MASK,
29313 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
29315 /* Mask arithmetic operations */
29316 IX86_BUILTIN_KAND16,
29317 IX86_BUILTIN_KANDN16,
29318 IX86_BUILTIN_KNOT16,
29319 IX86_BUILTIN_KOR16,
29320 IX86_BUILTIN_KORTESTC16,
29321 IX86_BUILTIN_KORTESTZ16,
29322 IX86_BUILTIN_KUNPCKBW,
29323 IX86_BUILTIN_KXNOR16,
29324 IX86_BUILTIN_KXOR16,
29325 IX86_BUILTIN_KMOV16,
29327 /* AVX512VL. */
29328 IX86_BUILTIN_PMOVUSQD256_MEM,
29329 IX86_BUILTIN_PMOVUSQD128_MEM,
29330 IX86_BUILTIN_PMOVSQD256_MEM,
29331 IX86_BUILTIN_PMOVSQD128_MEM,
29332 IX86_BUILTIN_PMOVQD256_MEM,
29333 IX86_BUILTIN_PMOVQD128_MEM,
29334 IX86_BUILTIN_PMOVUSQW256_MEM,
29335 IX86_BUILTIN_PMOVUSQW128_MEM,
29336 IX86_BUILTIN_PMOVSQW256_MEM,
29337 IX86_BUILTIN_PMOVSQW128_MEM,
29338 IX86_BUILTIN_PMOVQW256_MEM,
29339 IX86_BUILTIN_PMOVQW128_MEM,
29340 IX86_BUILTIN_PMOVUSQB256_MEM,
29341 IX86_BUILTIN_PMOVUSQB128_MEM,
29342 IX86_BUILTIN_PMOVSQB256_MEM,
29343 IX86_BUILTIN_PMOVSQB128_MEM,
29344 IX86_BUILTIN_PMOVQB256_MEM,
29345 IX86_BUILTIN_PMOVQB128_MEM,
29346 IX86_BUILTIN_PMOVUSDW256_MEM,
29347 IX86_BUILTIN_PMOVUSDW128_MEM,
29348 IX86_BUILTIN_PMOVSDW256_MEM,
29349 IX86_BUILTIN_PMOVSDW128_MEM,
29350 IX86_BUILTIN_PMOVDW256_MEM,
29351 IX86_BUILTIN_PMOVDW128_MEM,
29352 IX86_BUILTIN_PMOVUSDB256_MEM,
29353 IX86_BUILTIN_PMOVUSDB128_MEM,
29354 IX86_BUILTIN_PMOVSDB256_MEM,
29355 IX86_BUILTIN_PMOVSDB128_MEM,
29356 IX86_BUILTIN_PMOVDB256_MEM,
29357 IX86_BUILTIN_PMOVDB128_MEM,
29358 IX86_BUILTIN_MOVDQA64LOAD256_MASK,
29359 IX86_BUILTIN_MOVDQA64LOAD128_MASK,
29360 IX86_BUILTIN_MOVDQA32LOAD256_MASK,
29361 IX86_BUILTIN_MOVDQA32LOAD128_MASK,
29362 IX86_BUILTIN_MOVDQA64STORE256_MASK,
29363 IX86_BUILTIN_MOVDQA64STORE128_MASK,
29364 IX86_BUILTIN_MOVDQA32STORE256_MASK,
29365 IX86_BUILTIN_MOVDQA32STORE128_MASK,
29366 IX86_BUILTIN_LOADAPD256_MASK,
29367 IX86_BUILTIN_LOADAPD128_MASK,
29368 IX86_BUILTIN_LOADAPS256_MASK,
29369 IX86_BUILTIN_LOADAPS128_MASK,
29370 IX86_BUILTIN_STOREAPD256_MASK,
29371 IX86_BUILTIN_STOREAPD128_MASK,
29372 IX86_BUILTIN_STOREAPS256_MASK,
29373 IX86_BUILTIN_STOREAPS128_MASK,
29374 IX86_BUILTIN_LOADUPD256_MASK,
29375 IX86_BUILTIN_LOADUPD128_MASK,
29376 IX86_BUILTIN_LOADUPS256_MASK,
29377 IX86_BUILTIN_LOADUPS128_MASK,
29378 IX86_BUILTIN_STOREUPD256_MASK,
29379 IX86_BUILTIN_STOREUPD128_MASK,
29380 IX86_BUILTIN_STOREUPS256_MASK,
29381 IX86_BUILTIN_STOREUPS128_MASK,
29382 IX86_BUILTIN_LOADDQUDI256_MASK,
29383 IX86_BUILTIN_LOADDQUDI128_MASK,
29384 IX86_BUILTIN_LOADDQUSI256_MASK,
29385 IX86_BUILTIN_LOADDQUSI128_MASK,
29386 IX86_BUILTIN_LOADDQUHI256_MASK,
29387 IX86_BUILTIN_LOADDQUHI128_MASK,
29388 IX86_BUILTIN_LOADDQUQI256_MASK,
29389 IX86_BUILTIN_LOADDQUQI128_MASK,
29390 IX86_BUILTIN_STOREDQUDI256_MASK,
29391 IX86_BUILTIN_STOREDQUDI128_MASK,
29392 IX86_BUILTIN_STOREDQUSI256_MASK,
29393 IX86_BUILTIN_STOREDQUSI128_MASK,
29394 IX86_BUILTIN_STOREDQUHI256_MASK,
29395 IX86_BUILTIN_STOREDQUHI128_MASK,
29396 IX86_BUILTIN_STOREDQUQI256_MASK,
29397 IX86_BUILTIN_STOREDQUQI128_MASK,
29398 IX86_BUILTIN_COMPRESSPDSTORE256,
29399 IX86_BUILTIN_COMPRESSPDSTORE128,
29400 IX86_BUILTIN_COMPRESSPSSTORE256,
29401 IX86_BUILTIN_COMPRESSPSSTORE128,
29402 IX86_BUILTIN_PCOMPRESSQSTORE256,
29403 IX86_BUILTIN_PCOMPRESSQSTORE128,
29404 IX86_BUILTIN_PCOMPRESSDSTORE256,
29405 IX86_BUILTIN_PCOMPRESSDSTORE128,
29406 IX86_BUILTIN_EXPANDPDLOAD256,
29407 IX86_BUILTIN_EXPANDPDLOAD128,
29408 IX86_BUILTIN_EXPANDPSLOAD256,
29409 IX86_BUILTIN_EXPANDPSLOAD128,
29410 IX86_BUILTIN_PEXPANDQLOAD256,
29411 IX86_BUILTIN_PEXPANDQLOAD128,
29412 IX86_BUILTIN_PEXPANDDLOAD256,
29413 IX86_BUILTIN_PEXPANDDLOAD128,
29414 IX86_BUILTIN_EXPANDPDLOAD256Z,
29415 IX86_BUILTIN_EXPANDPDLOAD128Z,
29416 IX86_BUILTIN_EXPANDPSLOAD256Z,
29417 IX86_BUILTIN_EXPANDPSLOAD128Z,
29418 IX86_BUILTIN_PEXPANDQLOAD256Z,
29419 IX86_BUILTIN_PEXPANDQLOAD128Z,
29420 IX86_BUILTIN_PEXPANDDLOAD256Z,
29421 IX86_BUILTIN_PEXPANDDLOAD128Z,
29422 IX86_BUILTIN_PALIGNR256_MASK,
29423 IX86_BUILTIN_PALIGNR128_MASK,
29424 IX86_BUILTIN_MOVDQA64_256_MASK,
29425 IX86_BUILTIN_MOVDQA64_128_MASK,
29426 IX86_BUILTIN_MOVDQA32_256_MASK,
29427 IX86_BUILTIN_MOVDQA32_128_MASK,
29428 IX86_BUILTIN_MOVAPD256_MASK,
29429 IX86_BUILTIN_MOVAPD128_MASK,
29430 IX86_BUILTIN_MOVAPS256_MASK,
29431 IX86_BUILTIN_MOVAPS128_MASK,
29432 IX86_BUILTIN_MOVDQUHI256_MASK,
29433 IX86_BUILTIN_MOVDQUHI128_MASK,
29434 IX86_BUILTIN_MOVDQUQI256_MASK,
29435 IX86_BUILTIN_MOVDQUQI128_MASK,
29436 IX86_BUILTIN_MINPS128_MASK,
29437 IX86_BUILTIN_MAXPS128_MASK,
29438 IX86_BUILTIN_MINPD128_MASK,
29439 IX86_BUILTIN_MAXPD128_MASK,
29440 IX86_BUILTIN_MAXPD256_MASK,
29441 IX86_BUILTIN_MAXPS256_MASK,
29442 IX86_BUILTIN_MINPD256_MASK,
29443 IX86_BUILTIN_MINPS256_MASK,
29444 IX86_BUILTIN_MULPS128_MASK,
29445 IX86_BUILTIN_DIVPS128_MASK,
29446 IX86_BUILTIN_MULPD128_MASK,
29447 IX86_BUILTIN_DIVPD128_MASK,
29448 IX86_BUILTIN_DIVPD256_MASK,
29449 IX86_BUILTIN_DIVPS256_MASK,
29450 IX86_BUILTIN_MULPD256_MASK,
29451 IX86_BUILTIN_MULPS256_MASK,
29452 IX86_BUILTIN_ADDPD128_MASK,
29453 IX86_BUILTIN_ADDPD256_MASK,
29454 IX86_BUILTIN_ADDPS128_MASK,
29455 IX86_BUILTIN_ADDPS256_MASK,
29456 IX86_BUILTIN_SUBPD128_MASK,
29457 IX86_BUILTIN_SUBPD256_MASK,
29458 IX86_BUILTIN_SUBPS128_MASK,
29459 IX86_BUILTIN_SUBPS256_MASK,
29460 IX86_BUILTIN_XORPD256_MASK,
29461 IX86_BUILTIN_XORPD128_MASK,
29462 IX86_BUILTIN_XORPS256_MASK,
29463 IX86_BUILTIN_XORPS128_MASK,
29464 IX86_BUILTIN_ORPD256_MASK,
29465 IX86_BUILTIN_ORPD128_MASK,
29466 IX86_BUILTIN_ORPS256_MASK,
29467 IX86_BUILTIN_ORPS128_MASK,
29468 IX86_BUILTIN_BROADCASTF32x2_256,
29469 IX86_BUILTIN_BROADCASTI32x2_256,
29470 IX86_BUILTIN_BROADCASTI32x2_128,
29471 IX86_BUILTIN_BROADCASTF64X2_256,
29472 IX86_BUILTIN_BROADCASTI64X2_256,
29473 IX86_BUILTIN_BROADCASTF32X4_256,
29474 IX86_BUILTIN_BROADCASTI32X4_256,
29475 IX86_BUILTIN_EXTRACTF32X4_256,
29476 IX86_BUILTIN_EXTRACTI32X4_256,
29477 IX86_BUILTIN_DBPSADBW256,
29478 IX86_BUILTIN_DBPSADBW128,
29479 IX86_BUILTIN_CVTTPD2QQ256,
29480 IX86_BUILTIN_CVTTPD2QQ128,
29481 IX86_BUILTIN_CVTTPD2UQQ256,
29482 IX86_BUILTIN_CVTTPD2UQQ128,
29483 IX86_BUILTIN_CVTPD2QQ256,
29484 IX86_BUILTIN_CVTPD2QQ128,
29485 IX86_BUILTIN_CVTPD2UQQ256,
29486 IX86_BUILTIN_CVTPD2UQQ128,
29487 IX86_BUILTIN_CVTPD2UDQ256_MASK,
29488 IX86_BUILTIN_CVTPD2UDQ128_MASK,
29489 IX86_BUILTIN_CVTTPS2QQ256,
29490 IX86_BUILTIN_CVTTPS2QQ128,
29491 IX86_BUILTIN_CVTTPS2UQQ256,
29492 IX86_BUILTIN_CVTTPS2UQQ128,
29493 IX86_BUILTIN_CVTTPS2DQ256_MASK,
29494 IX86_BUILTIN_CVTTPS2DQ128_MASK,
29495 IX86_BUILTIN_CVTTPS2UDQ256,
29496 IX86_BUILTIN_CVTTPS2UDQ128,
29497 IX86_BUILTIN_CVTTPD2DQ256_MASK,
29498 IX86_BUILTIN_CVTTPD2DQ128_MASK,
29499 IX86_BUILTIN_CVTTPD2UDQ256_MASK,
29500 IX86_BUILTIN_CVTTPD2UDQ128_MASK,
29501 IX86_BUILTIN_CVTPD2DQ256_MASK,
29502 IX86_BUILTIN_CVTPD2DQ128_MASK,
29503 IX86_BUILTIN_CVTDQ2PD256_MASK,
29504 IX86_BUILTIN_CVTDQ2PD128_MASK,
29505 IX86_BUILTIN_CVTUDQ2PD256_MASK,
29506 IX86_BUILTIN_CVTUDQ2PD128_MASK,
29507 IX86_BUILTIN_CVTDQ2PS256_MASK,
29508 IX86_BUILTIN_CVTDQ2PS128_MASK,
29509 IX86_BUILTIN_CVTUDQ2PS256_MASK,
29510 IX86_BUILTIN_CVTUDQ2PS128_MASK,
29511 IX86_BUILTIN_CVTPS2PD256_MASK,
29512 IX86_BUILTIN_CVTPS2PD128_MASK,
29513 IX86_BUILTIN_PBROADCASTB256_MASK,
29514 IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
29515 IX86_BUILTIN_PBROADCASTB128_MASK,
29516 IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
29517 IX86_BUILTIN_PBROADCASTW256_MASK,
29518 IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
29519 IX86_BUILTIN_PBROADCASTW128_MASK,
29520 IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
29521 IX86_BUILTIN_PBROADCASTD256_MASK,
29522 IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
29523 IX86_BUILTIN_PBROADCASTD128_MASK,
29524 IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
29525 IX86_BUILTIN_PBROADCASTQ256_MASK,
29526 IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
29527 IX86_BUILTIN_PBROADCASTQ128_MASK,
29528 IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
29529 IX86_BUILTIN_BROADCASTSS256,
29530 IX86_BUILTIN_BROADCASTSS128,
29531 IX86_BUILTIN_BROADCASTSD256,
29532 IX86_BUILTIN_EXTRACTF64X2_256,
29533 IX86_BUILTIN_EXTRACTI64X2_256,
29534 IX86_BUILTIN_INSERTF32X4_256,
29535 IX86_BUILTIN_INSERTI32X4_256,
29536 IX86_BUILTIN_PMOVSXBW256_MASK,
29537 IX86_BUILTIN_PMOVSXBW128_MASK,
29538 IX86_BUILTIN_PMOVSXBD256_MASK,
29539 IX86_BUILTIN_PMOVSXBD128_MASK,
29540 IX86_BUILTIN_PMOVSXBQ256_MASK,
29541 IX86_BUILTIN_PMOVSXBQ128_MASK,
29542 IX86_BUILTIN_PMOVSXWD256_MASK,
29543 IX86_BUILTIN_PMOVSXWD128_MASK,
29544 IX86_BUILTIN_PMOVSXWQ256_MASK,
29545 IX86_BUILTIN_PMOVSXWQ128_MASK,
29546 IX86_BUILTIN_PMOVSXDQ256_MASK,
29547 IX86_BUILTIN_PMOVSXDQ128_MASK,
29548 IX86_BUILTIN_PMOVZXBW256_MASK,
29549 IX86_BUILTIN_PMOVZXBW128_MASK,
29550 IX86_BUILTIN_PMOVZXBD256_MASK,
29551 IX86_BUILTIN_PMOVZXBD128_MASK,
29552 IX86_BUILTIN_PMOVZXBQ256_MASK,
29553 IX86_BUILTIN_PMOVZXBQ128_MASK,
29554 IX86_BUILTIN_PMOVZXWD256_MASK,
29555 IX86_BUILTIN_PMOVZXWD128_MASK,
29556 IX86_BUILTIN_PMOVZXWQ256_MASK,
29557 IX86_BUILTIN_PMOVZXWQ128_MASK,
29558 IX86_BUILTIN_PMOVZXDQ256_MASK,
29559 IX86_BUILTIN_PMOVZXDQ128_MASK,
29560 IX86_BUILTIN_REDUCEPD256_MASK,
29561 IX86_BUILTIN_REDUCEPD128_MASK,
29562 IX86_BUILTIN_REDUCEPS256_MASK,
29563 IX86_BUILTIN_REDUCEPS128_MASK,
29564 IX86_BUILTIN_REDUCESD_MASK,
29565 IX86_BUILTIN_REDUCESS_MASK,
29566 IX86_BUILTIN_VPERMVARHI256_MASK,
29567 IX86_BUILTIN_VPERMVARHI128_MASK,
29568 IX86_BUILTIN_VPERMT2VARHI256,
29569 IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
29570 IX86_BUILTIN_VPERMT2VARHI128,
29571 IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
29572 IX86_BUILTIN_VPERMI2VARHI256,
29573 IX86_BUILTIN_VPERMI2VARHI128,
29574 IX86_BUILTIN_RCP14PD256,
29575 IX86_BUILTIN_RCP14PD128,
29576 IX86_BUILTIN_RCP14PS256,
29577 IX86_BUILTIN_RCP14PS128,
29578 IX86_BUILTIN_RSQRT14PD256_MASK,
29579 IX86_BUILTIN_RSQRT14PD128_MASK,
29580 IX86_BUILTIN_RSQRT14PS256_MASK,
29581 IX86_BUILTIN_RSQRT14PS128_MASK,
29582 IX86_BUILTIN_SQRTPD256_MASK,
29583 IX86_BUILTIN_SQRTPD128_MASK,
29584 IX86_BUILTIN_SQRTPS256_MASK,
29585 IX86_BUILTIN_SQRTPS128_MASK,
29586 IX86_BUILTIN_PADDB128_MASK,
29587 IX86_BUILTIN_PADDW128_MASK,
29588 IX86_BUILTIN_PADDD128_MASK,
29589 IX86_BUILTIN_PADDQ128_MASK,
29590 IX86_BUILTIN_PSUBB128_MASK,
29591 IX86_BUILTIN_PSUBW128_MASK,
29592 IX86_BUILTIN_PSUBD128_MASK,
29593 IX86_BUILTIN_PSUBQ128_MASK,
29594 IX86_BUILTIN_PADDSB128_MASK,
29595 IX86_BUILTIN_PADDSW128_MASK,
29596 IX86_BUILTIN_PSUBSB128_MASK,
29597 IX86_BUILTIN_PSUBSW128_MASK,
29598 IX86_BUILTIN_PADDUSB128_MASK,
29599 IX86_BUILTIN_PADDUSW128_MASK,
29600 IX86_BUILTIN_PSUBUSB128_MASK,
29601 IX86_BUILTIN_PSUBUSW128_MASK,
29602 IX86_BUILTIN_PADDB256_MASK,
29603 IX86_BUILTIN_PADDW256_MASK,
29604 IX86_BUILTIN_PADDD256_MASK,
29605 IX86_BUILTIN_PADDQ256_MASK,
29606 IX86_BUILTIN_PADDSB256_MASK,
29607 IX86_BUILTIN_PADDSW256_MASK,
29608 IX86_BUILTIN_PADDUSB256_MASK,
29609 IX86_BUILTIN_PADDUSW256_MASK,
29610 IX86_BUILTIN_PSUBB256_MASK,
29611 IX86_BUILTIN_PSUBW256_MASK,
29612 IX86_BUILTIN_PSUBD256_MASK,
29613 IX86_BUILTIN_PSUBQ256_MASK,
29614 IX86_BUILTIN_PSUBSB256_MASK,
29615 IX86_BUILTIN_PSUBSW256_MASK,
29616 IX86_BUILTIN_PSUBUSB256_MASK,
29617 IX86_BUILTIN_PSUBUSW256_MASK,
29618 IX86_BUILTIN_SHUF_F64x2_256,
29619 IX86_BUILTIN_SHUF_I64x2_256,
29620 IX86_BUILTIN_SHUF_I32x4_256,
29621 IX86_BUILTIN_SHUF_F32x4_256,
29622 IX86_BUILTIN_PMOVWB128,
29623 IX86_BUILTIN_PMOVWB256,
29624 IX86_BUILTIN_PMOVSWB128,
29625 IX86_BUILTIN_PMOVSWB256,
29626 IX86_BUILTIN_PMOVUSWB128,
29627 IX86_BUILTIN_PMOVUSWB256,
29628 IX86_BUILTIN_PMOVDB128,
29629 IX86_BUILTIN_PMOVDB256,
29630 IX86_BUILTIN_PMOVSDB128,
29631 IX86_BUILTIN_PMOVSDB256,
29632 IX86_BUILTIN_PMOVUSDB128,
29633 IX86_BUILTIN_PMOVUSDB256,
29634 IX86_BUILTIN_PMOVDW128,
29635 IX86_BUILTIN_PMOVDW256,
29636 IX86_BUILTIN_PMOVSDW128,
29637 IX86_BUILTIN_PMOVSDW256,
29638 IX86_BUILTIN_PMOVUSDW128,
29639 IX86_BUILTIN_PMOVUSDW256,
29640 IX86_BUILTIN_PMOVQB128,
29641 IX86_BUILTIN_PMOVQB256,
29642 IX86_BUILTIN_PMOVSQB128,
29643 IX86_BUILTIN_PMOVSQB256,
29644 IX86_BUILTIN_PMOVUSQB128,
29645 IX86_BUILTIN_PMOVUSQB256,
29646 IX86_BUILTIN_PMOVQW128,
29647 IX86_BUILTIN_PMOVQW256,
29648 IX86_BUILTIN_PMOVSQW128,
29649 IX86_BUILTIN_PMOVSQW256,
29650 IX86_BUILTIN_PMOVUSQW128,
29651 IX86_BUILTIN_PMOVUSQW256,
29652 IX86_BUILTIN_PMOVQD128,
29653 IX86_BUILTIN_PMOVQD256,
29654 IX86_BUILTIN_PMOVSQD128,
29655 IX86_BUILTIN_PMOVSQD256,
29656 IX86_BUILTIN_PMOVUSQD128,
29657 IX86_BUILTIN_PMOVUSQD256,
29658 IX86_BUILTIN_RANGEPD256,
29659 IX86_BUILTIN_RANGEPD128,
29660 IX86_BUILTIN_RANGEPS256,
29661 IX86_BUILTIN_RANGEPS128,
29662 IX86_BUILTIN_GETEXPPS256,
29663 IX86_BUILTIN_GETEXPPD256,
29664 IX86_BUILTIN_GETEXPPS128,
29665 IX86_BUILTIN_GETEXPPD128,
29666 IX86_BUILTIN_FIXUPIMMPD256_MASK,
29667 IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
29668 IX86_BUILTIN_FIXUPIMMPS256_MASK,
29669 IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
29670 IX86_BUILTIN_FIXUPIMMPD128_MASK,
29671 IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
29672 IX86_BUILTIN_FIXUPIMMPS128_MASK,
29673 IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
29674 IX86_BUILTIN_PABSQ256,
29675 IX86_BUILTIN_PABSQ128,
29676 IX86_BUILTIN_PABSD256_MASK,
29677 IX86_BUILTIN_PABSD128_MASK,
29678 IX86_BUILTIN_PMULHRSW256_MASK,
29679 IX86_BUILTIN_PMULHRSW128_MASK,
29680 IX86_BUILTIN_PMULHUW128_MASK,
29681 IX86_BUILTIN_PMULHUW256_MASK,
29682 IX86_BUILTIN_PMULHW256_MASK,
29683 IX86_BUILTIN_PMULHW128_MASK,
29684 IX86_BUILTIN_PMULLW256_MASK,
29685 IX86_BUILTIN_PMULLW128_MASK,
29686 IX86_BUILTIN_PMULLQ256,
29687 IX86_BUILTIN_PMULLQ128,
29688 IX86_BUILTIN_ANDPD256_MASK,
29689 IX86_BUILTIN_ANDPD128_MASK,
29690 IX86_BUILTIN_ANDPS256_MASK,
29691 IX86_BUILTIN_ANDPS128_MASK,
29692 IX86_BUILTIN_ANDNPD256_MASK,
29693 IX86_BUILTIN_ANDNPD128_MASK,
29694 IX86_BUILTIN_ANDNPS256_MASK,
29695 IX86_BUILTIN_ANDNPS128_MASK,
29696 IX86_BUILTIN_PSLLWI128_MASK,
29697 IX86_BUILTIN_PSLLDI128_MASK,
29698 IX86_BUILTIN_PSLLQI128_MASK,
29699 IX86_BUILTIN_PSLLW128_MASK,
29700 IX86_BUILTIN_PSLLD128_MASK,
29701 IX86_BUILTIN_PSLLQ128_MASK,
29702 IX86_BUILTIN_PSLLWI256_MASK ,
29703 IX86_BUILTIN_PSLLW256_MASK,
29704 IX86_BUILTIN_PSLLDI256_MASK,
29705 IX86_BUILTIN_PSLLD256_MASK,
29706 IX86_BUILTIN_PSLLQI256_MASK,
29707 IX86_BUILTIN_PSLLQ256_MASK,
29708 IX86_BUILTIN_PSRADI128_MASK,
29709 IX86_BUILTIN_PSRAD128_MASK,
29710 IX86_BUILTIN_PSRADI256_MASK,
29711 IX86_BUILTIN_PSRAD256_MASK,
29712 IX86_BUILTIN_PSRAQI128_MASK,
29713 IX86_BUILTIN_PSRAQ128_MASK,
29714 IX86_BUILTIN_PSRAQI256_MASK,
29715 IX86_BUILTIN_PSRAQ256_MASK,
29716 IX86_BUILTIN_PANDD256,
29717 IX86_BUILTIN_PANDD128,
29718 IX86_BUILTIN_PSRLDI128_MASK,
29719 IX86_BUILTIN_PSRLD128_MASK,
29720 IX86_BUILTIN_PSRLDI256_MASK,
29721 IX86_BUILTIN_PSRLD256_MASK,
29722 IX86_BUILTIN_PSRLQI128_MASK,
29723 IX86_BUILTIN_PSRLQ128_MASK,
29724 IX86_BUILTIN_PSRLQI256_MASK,
29725 IX86_BUILTIN_PSRLQ256_MASK,
29726 IX86_BUILTIN_PANDQ256,
29727 IX86_BUILTIN_PANDQ128,
29728 IX86_BUILTIN_PANDND256,
29729 IX86_BUILTIN_PANDND128,
29730 IX86_BUILTIN_PANDNQ256,
29731 IX86_BUILTIN_PANDNQ128,
29732 IX86_BUILTIN_PORD256,
29733 IX86_BUILTIN_PORD128,
29734 IX86_BUILTIN_PORQ256,
29735 IX86_BUILTIN_PORQ128,
29736 IX86_BUILTIN_PXORD256,
29737 IX86_BUILTIN_PXORD128,
29738 IX86_BUILTIN_PXORQ256,
29739 IX86_BUILTIN_PXORQ128,
29740 IX86_BUILTIN_PACKSSWB256_MASK,
29741 IX86_BUILTIN_PACKSSWB128_MASK,
29742 IX86_BUILTIN_PACKUSWB256_MASK,
29743 IX86_BUILTIN_PACKUSWB128_MASK,
29744 IX86_BUILTIN_RNDSCALEPS256,
29745 IX86_BUILTIN_RNDSCALEPD256,
29746 IX86_BUILTIN_RNDSCALEPS128,
29747 IX86_BUILTIN_RNDSCALEPD128,
29748 IX86_BUILTIN_VTERNLOGQ256_MASK,
29749 IX86_BUILTIN_VTERNLOGQ256_MASKZ,
29750 IX86_BUILTIN_VTERNLOGD256_MASK,
29751 IX86_BUILTIN_VTERNLOGD256_MASKZ,
29752 IX86_BUILTIN_VTERNLOGQ128_MASK,
29753 IX86_BUILTIN_VTERNLOGQ128_MASKZ,
29754 IX86_BUILTIN_VTERNLOGD128_MASK,
29755 IX86_BUILTIN_VTERNLOGD128_MASKZ,
29756 IX86_BUILTIN_SCALEFPD256,
29757 IX86_BUILTIN_SCALEFPS256,
29758 IX86_BUILTIN_SCALEFPD128,
29759 IX86_BUILTIN_SCALEFPS128,
29760 IX86_BUILTIN_VFMADDPD256_MASK,
29761 IX86_BUILTIN_VFMADDPD256_MASK3,
29762 IX86_BUILTIN_VFMADDPD256_MASKZ,
29763 IX86_BUILTIN_VFMADDPD128_MASK,
29764 IX86_BUILTIN_VFMADDPD128_MASK3,
29765 IX86_BUILTIN_VFMADDPD128_MASKZ,
29766 IX86_BUILTIN_VFMADDPS256_MASK,
29767 IX86_BUILTIN_VFMADDPS256_MASK3,
29768 IX86_BUILTIN_VFMADDPS256_MASKZ,
29769 IX86_BUILTIN_VFMADDPS128_MASK,
29770 IX86_BUILTIN_VFMADDPS128_MASK3,
29771 IX86_BUILTIN_VFMADDPS128_MASKZ,
29772 IX86_BUILTIN_VFMSUBPD256_MASK3,
29773 IX86_BUILTIN_VFMSUBPD128_MASK3,
29774 IX86_BUILTIN_VFMSUBPS256_MASK3,
29775 IX86_BUILTIN_VFMSUBPS128_MASK3,
29776 IX86_BUILTIN_VFNMADDPD256_MASK,
29777 IX86_BUILTIN_VFNMADDPD128_MASK,
29778 IX86_BUILTIN_VFNMADDPS256_MASK,
29779 IX86_BUILTIN_VFNMADDPS128_MASK,
29780 IX86_BUILTIN_VFNMSUBPD256_MASK,
29781 IX86_BUILTIN_VFNMSUBPD256_MASK3,
29782 IX86_BUILTIN_VFNMSUBPD128_MASK,
29783 IX86_BUILTIN_VFNMSUBPD128_MASK3,
29784 IX86_BUILTIN_VFNMSUBPS256_MASK,
29785 IX86_BUILTIN_VFNMSUBPS256_MASK3,
29786 IX86_BUILTIN_VFNMSUBPS128_MASK,
29787 IX86_BUILTIN_VFNMSUBPS128_MASK3,
29788 IX86_BUILTIN_VFMADDSUBPD256_MASK,
29789 IX86_BUILTIN_VFMADDSUBPD256_MASK3,
29790 IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
29791 IX86_BUILTIN_VFMADDSUBPD128_MASK,
29792 IX86_BUILTIN_VFMADDSUBPD128_MASK3,
29793 IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
29794 IX86_BUILTIN_VFMADDSUBPS256_MASK,
29795 IX86_BUILTIN_VFMADDSUBPS256_MASK3,
29796 IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
29797 IX86_BUILTIN_VFMADDSUBPS128_MASK,
29798 IX86_BUILTIN_VFMADDSUBPS128_MASK3,
29799 IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
29800 IX86_BUILTIN_VFMSUBADDPD256_MASK3,
29801 IX86_BUILTIN_VFMSUBADDPD128_MASK3,
29802 IX86_BUILTIN_VFMSUBADDPS256_MASK3,
29803 IX86_BUILTIN_VFMSUBADDPS128_MASK3,
29804 IX86_BUILTIN_INSERTF64X2_256,
29805 IX86_BUILTIN_INSERTI64X2_256,
29806 IX86_BUILTIN_PSRAVV16HI,
29807 IX86_BUILTIN_PSRAVV8HI,
29808 IX86_BUILTIN_PMADDUBSW256_MASK,
29809 IX86_BUILTIN_PMADDUBSW128_MASK,
29810 IX86_BUILTIN_PMADDWD256_MASK,
29811 IX86_BUILTIN_PMADDWD128_MASK,
29812 IX86_BUILTIN_PSRLVV16HI,
29813 IX86_BUILTIN_PSRLVV8HI,
29814 IX86_BUILTIN_CVTPS2DQ256_MASK,
29815 IX86_BUILTIN_CVTPS2DQ128_MASK,
29816 IX86_BUILTIN_CVTPS2UDQ256,
29817 IX86_BUILTIN_CVTPS2UDQ128,
29818 IX86_BUILTIN_CVTPS2QQ256,
29819 IX86_BUILTIN_CVTPS2QQ128,
29820 IX86_BUILTIN_CVTPS2UQQ256,
29821 IX86_BUILTIN_CVTPS2UQQ128,
29822 IX86_BUILTIN_GETMANTPS256,
29823 IX86_BUILTIN_GETMANTPS128,
29824 IX86_BUILTIN_GETMANTPD256,
29825 IX86_BUILTIN_GETMANTPD128,
29826 IX86_BUILTIN_MOVDDUP256_MASK,
29827 IX86_BUILTIN_MOVDDUP128_MASK,
29828 IX86_BUILTIN_MOVSHDUP256_MASK,
29829 IX86_BUILTIN_MOVSHDUP128_MASK,
29830 IX86_BUILTIN_MOVSLDUP256_MASK,
29831 IX86_BUILTIN_MOVSLDUP128_MASK,
29832 IX86_BUILTIN_CVTQQ2PS256,
29833 IX86_BUILTIN_CVTQQ2PS128,
29834 IX86_BUILTIN_CVTUQQ2PS256,
29835 IX86_BUILTIN_CVTUQQ2PS128,
29836 IX86_BUILTIN_CVTQQ2PD256,
29837 IX86_BUILTIN_CVTQQ2PD128,
29838 IX86_BUILTIN_CVTUQQ2PD256,
29839 IX86_BUILTIN_CVTUQQ2PD128,
29840 IX86_BUILTIN_VPERMT2VARQ256,
29841 IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
29842 IX86_BUILTIN_VPERMT2VARD256,
29843 IX86_BUILTIN_VPERMT2VARD256_MASKZ,
29844 IX86_BUILTIN_VPERMI2VARQ256,
29845 IX86_BUILTIN_VPERMI2VARD256,
29846 IX86_BUILTIN_VPERMT2VARPD256,
29847 IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
29848 IX86_BUILTIN_VPERMT2VARPS256,
29849 IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
29850 IX86_BUILTIN_VPERMI2VARPD256,
29851 IX86_BUILTIN_VPERMI2VARPS256,
29852 IX86_BUILTIN_VPERMT2VARQ128,
29853 IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
29854 IX86_BUILTIN_VPERMT2VARD128,
29855 IX86_BUILTIN_VPERMT2VARD128_MASKZ,
29856 IX86_BUILTIN_VPERMI2VARQ128,
29857 IX86_BUILTIN_VPERMI2VARD128,
29858 IX86_BUILTIN_VPERMT2VARPD128,
29859 IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
29860 IX86_BUILTIN_VPERMT2VARPS128,
29861 IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
29862 IX86_BUILTIN_VPERMI2VARPD128,
29863 IX86_BUILTIN_VPERMI2VARPS128,
29864 IX86_BUILTIN_PSHUFB256_MASK,
29865 IX86_BUILTIN_PSHUFB128_MASK,
29866 IX86_BUILTIN_PSHUFHW256_MASK,
29867 IX86_BUILTIN_PSHUFHW128_MASK,
29868 IX86_BUILTIN_PSHUFLW256_MASK,
29869 IX86_BUILTIN_PSHUFLW128_MASK,
29870 IX86_BUILTIN_PSHUFD256_MASK,
29871 IX86_BUILTIN_PSHUFD128_MASK,
29872 IX86_BUILTIN_SHUFPD256_MASK,
29873 IX86_BUILTIN_SHUFPD128_MASK,
29874 IX86_BUILTIN_SHUFPS256_MASK,
29875 IX86_BUILTIN_SHUFPS128_MASK,
29876 IX86_BUILTIN_PROLVQ256,
29877 IX86_BUILTIN_PROLVQ128,
29878 IX86_BUILTIN_PROLQ256,
29879 IX86_BUILTIN_PROLQ128,
29880 IX86_BUILTIN_PRORVQ256,
29881 IX86_BUILTIN_PRORVQ128,
29882 IX86_BUILTIN_PRORQ256,
29883 IX86_BUILTIN_PRORQ128,
29884 IX86_BUILTIN_PSRAVQ128,
29885 IX86_BUILTIN_PSRAVQ256,
29886 IX86_BUILTIN_PSLLVV4DI_MASK,
29887 IX86_BUILTIN_PSLLVV2DI_MASK,
29888 IX86_BUILTIN_PSLLVV8SI_MASK,
29889 IX86_BUILTIN_PSLLVV4SI_MASK,
29890 IX86_BUILTIN_PSRAVV8SI_MASK,
29891 IX86_BUILTIN_PSRAVV4SI_MASK,
29892 IX86_BUILTIN_PSRLVV4DI_MASK,
29893 IX86_BUILTIN_PSRLVV2DI_MASK,
29894 IX86_BUILTIN_PSRLVV8SI_MASK,
29895 IX86_BUILTIN_PSRLVV4SI_MASK,
29896 IX86_BUILTIN_PSRAWI256_MASK,
29897 IX86_BUILTIN_PSRAW256_MASK,
29898 IX86_BUILTIN_PSRAWI128_MASK,
29899 IX86_BUILTIN_PSRAW128_MASK,
29900 IX86_BUILTIN_PSRLWI256_MASK,
29901 IX86_BUILTIN_PSRLW256_MASK,
29902 IX86_BUILTIN_PSRLWI128_MASK,
29903 IX86_BUILTIN_PSRLW128_MASK,
29904 IX86_BUILTIN_PRORVD256,
29905 IX86_BUILTIN_PROLVD256,
29906 IX86_BUILTIN_PRORD256,
29907 IX86_BUILTIN_PROLD256,
29908 IX86_BUILTIN_PRORVD128,
29909 IX86_BUILTIN_PROLVD128,
29910 IX86_BUILTIN_PRORD128,
29911 IX86_BUILTIN_PROLD128,
29912 IX86_BUILTIN_FPCLASSPD256,
29913 IX86_BUILTIN_FPCLASSPD128,
29914 IX86_BUILTIN_FPCLASSSD,
29915 IX86_BUILTIN_FPCLASSPS256,
29916 IX86_BUILTIN_FPCLASSPS128,
29917 IX86_BUILTIN_FPCLASSSS,
29918 IX86_BUILTIN_CVTB2MASK128,
29919 IX86_BUILTIN_CVTB2MASK256,
29920 IX86_BUILTIN_CVTW2MASK128,
29921 IX86_BUILTIN_CVTW2MASK256,
29922 IX86_BUILTIN_CVTD2MASK128,
29923 IX86_BUILTIN_CVTD2MASK256,
29924 IX86_BUILTIN_CVTQ2MASK128,
29925 IX86_BUILTIN_CVTQ2MASK256,
29926 IX86_BUILTIN_CVTMASK2B128,
29927 IX86_BUILTIN_CVTMASK2B256,
29928 IX86_BUILTIN_CVTMASK2W128,
29929 IX86_BUILTIN_CVTMASK2W256,
29930 IX86_BUILTIN_CVTMASK2D128,
29931 IX86_BUILTIN_CVTMASK2D256,
29932 IX86_BUILTIN_CVTMASK2Q128,
29933 IX86_BUILTIN_CVTMASK2Q256,
29934 IX86_BUILTIN_PCMPEQB128_MASK,
29935 IX86_BUILTIN_PCMPEQB256_MASK,
29936 IX86_BUILTIN_PCMPEQW128_MASK,
29937 IX86_BUILTIN_PCMPEQW256_MASK,
29938 IX86_BUILTIN_PCMPEQD128_MASK,
29939 IX86_BUILTIN_PCMPEQD256_MASK,
29940 IX86_BUILTIN_PCMPEQQ128_MASK,
29941 IX86_BUILTIN_PCMPEQQ256_MASK,
29942 IX86_BUILTIN_PCMPGTB128_MASK,
29943 IX86_BUILTIN_PCMPGTB256_MASK,
29944 IX86_BUILTIN_PCMPGTW128_MASK,
29945 IX86_BUILTIN_PCMPGTW256_MASK,
29946 IX86_BUILTIN_PCMPGTD128_MASK,
29947 IX86_BUILTIN_PCMPGTD256_MASK,
29948 IX86_BUILTIN_PCMPGTQ128_MASK,
29949 IX86_BUILTIN_PCMPGTQ256_MASK,
29950 IX86_BUILTIN_PTESTMB128,
29951 IX86_BUILTIN_PTESTMB256,
29952 IX86_BUILTIN_PTESTMW128,
29953 IX86_BUILTIN_PTESTMW256,
29954 IX86_BUILTIN_PTESTMD128,
29955 IX86_BUILTIN_PTESTMD256,
29956 IX86_BUILTIN_PTESTMQ128,
29957 IX86_BUILTIN_PTESTMQ256,
29958 IX86_BUILTIN_PTESTNMB128,
29959 IX86_BUILTIN_PTESTNMB256,
29960 IX86_BUILTIN_PTESTNMW128,
29961 IX86_BUILTIN_PTESTNMW256,
29962 IX86_BUILTIN_PTESTNMD128,
29963 IX86_BUILTIN_PTESTNMD256,
29964 IX86_BUILTIN_PTESTNMQ128,
29965 IX86_BUILTIN_PTESTNMQ256,
29966 IX86_BUILTIN_PBROADCASTMB128,
29967 IX86_BUILTIN_PBROADCASTMB256,
29968 IX86_BUILTIN_PBROADCASTMW128,
29969 IX86_BUILTIN_PBROADCASTMW256,
29970 IX86_BUILTIN_COMPRESSPD256,
29971 IX86_BUILTIN_COMPRESSPD128,
29972 IX86_BUILTIN_COMPRESSPS256,
29973 IX86_BUILTIN_COMPRESSPS128,
29974 IX86_BUILTIN_PCOMPRESSQ256,
29975 IX86_BUILTIN_PCOMPRESSQ128,
29976 IX86_BUILTIN_PCOMPRESSD256,
29977 IX86_BUILTIN_PCOMPRESSD128,
29978 IX86_BUILTIN_EXPANDPD256,
29979 IX86_BUILTIN_EXPANDPD128,
29980 IX86_BUILTIN_EXPANDPS256,
29981 IX86_BUILTIN_EXPANDPS128,
29982 IX86_BUILTIN_PEXPANDQ256,
29983 IX86_BUILTIN_PEXPANDQ128,
29984 IX86_BUILTIN_PEXPANDD256,
29985 IX86_BUILTIN_PEXPANDD128,
29986 IX86_BUILTIN_EXPANDPD256Z,
29987 IX86_BUILTIN_EXPANDPD128Z,
29988 IX86_BUILTIN_EXPANDPS256Z,
29989 IX86_BUILTIN_EXPANDPS128Z,
29990 IX86_BUILTIN_PEXPANDQ256Z,
29991 IX86_BUILTIN_PEXPANDQ128Z,
29992 IX86_BUILTIN_PEXPANDD256Z,
29993 IX86_BUILTIN_PEXPANDD128Z,
29994 IX86_BUILTIN_PMAXSD256_MASK,
29995 IX86_BUILTIN_PMINSD256_MASK,
29996 IX86_BUILTIN_PMAXUD256_MASK,
29997 IX86_BUILTIN_PMINUD256_MASK,
29998 IX86_BUILTIN_PMAXSD128_MASK,
29999 IX86_BUILTIN_PMINSD128_MASK,
30000 IX86_BUILTIN_PMAXUD128_MASK,
30001 IX86_BUILTIN_PMINUD128_MASK,
30002 IX86_BUILTIN_PMAXSQ256_MASK,
30003 IX86_BUILTIN_PMINSQ256_MASK,
30004 IX86_BUILTIN_PMAXUQ256_MASK,
30005 IX86_BUILTIN_PMINUQ256_MASK,
30006 IX86_BUILTIN_PMAXSQ128_MASK,
30007 IX86_BUILTIN_PMINSQ128_MASK,
30008 IX86_BUILTIN_PMAXUQ128_MASK,
30009 IX86_BUILTIN_PMINUQ128_MASK,
30010 IX86_BUILTIN_PMINSB256_MASK,
30011 IX86_BUILTIN_PMINUB256_MASK,
30012 IX86_BUILTIN_PMAXSB256_MASK,
30013 IX86_BUILTIN_PMAXUB256_MASK,
30014 IX86_BUILTIN_PMINSB128_MASK,
30015 IX86_BUILTIN_PMINUB128_MASK,
30016 IX86_BUILTIN_PMAXSB128_MASK,
30017 IX86_BUILTIN_PMAXUB128_MASK,
30018 IX86_BUILTIN_PMINSW256_MASK,
30019 IX86_BUILTIN_PMINUW256_MASK,
30020 IX86_BUILTIN_PMAXSW256_MASK,
30021 IX86_BUILTIN_PMAXUW256_MASK,
30022 IX86_BUILTIN_PMINSW128_MASK,
30023 IX86_BUILTIN_PMINUW128_MASK,
30024 IX86_BUILTIN_PMAXSW128_MASK,
30025 IX86_BUILTIN_PMAXUW128_MASK,
30026 IX86_BUILTIN_VPCONFLICTQ256,
30027 IX86_BUILTIN_VPCONFLICTD256,
30028 IX86_BUILTIN_VPCLZCNTQ256,
30029 IX86_BUILTIN_VPCLZCNTD256,
30030 IX86_BUILTIN_UNPCKHPD256_MASK,
30031 IX86_BUILTIN_UNPCKHPD128_MASK,
30032 IX86_BUILTIN_UNPCKHPS256_MASK,
30033 IX86_BUILTIN_UNPCKHPS128_MASK,
30034 IX86_BUILTIN_UNPCKLPD256_MASK,
30035 IX86_BUILTIN_UNPCKLPD128_MASK,
30036 IX86_BUILTIN_UNPCKLPS256_MASK,
30037 IX86_BUILTIN_VPCONFLICTQ128,
30038 IX86_BUILTIN_VPCONFLICTD128,
30039 IX86_BUILTIN_VPCLZCNTQ128,
30040 IX86_BUILTIN_VPCLZCNTD128,
30041 IX86_BUILTIN_UNPCKLPS128_MASK,
30042 IX86_BUILTIN_ALIGND256,
30043 IX86_BUILTIN_ALIGNQ256,
30044 IX86_BUILTIN_ALIGND128,
30045 IX86_BUILTIN_ALIGNQ128,
30046 IX86_BUILTIN_CVTPS2PH256_MASK,
30047 IX86_BUILTIN_CVTPS2PH_MASK,
30048 IX86_BUILTIN_CVTPH2PS_MASK,
30049 IX86_BUILTIN_CVTPH2PS256_MASK,
30050 IX86_BUILTIN_PUNPCKHDQ128_MASK,
30051 IX86_BUILTIN_PUNPCKHDQ256_MASK,
30052 IX86_BUILTIN_PUNPCKHQDQ128_MASK,
30053 IX86_BUILTIN_PUNPCKHQDQ256_MASK,
30054 IX86_BUILTIN_PUNPCKLDQ128_MASK,
30055 IX86_BUILTIN_PUNPCKLDQ256_MASK,
30056 IX86_BUILTIN_PUNPCKLQDQ128_MASK,
30057 IX86_BUILTIN_PUNPCKLQDQ256_MASK,
30058 IX86_BUILTIN_PUNPCKHBW128_MASK,
30059 IX86_BUILTIN_PUNPCKHBW256_MASK,
30060 IX86_BUILTIN_PUNPCKHWD128_MASK,
30061 IX86_BUILTIN_PUNPCKHWD256_MASK,
30062 IX86_BUILTIN_PUNPCKLBW128_MASK,
30063 IX86_BUILTIN_PUNPCKLBW256_MASK,
30064 IX86_BUILTIN_PUNPCKLWD128_MASK,
30065 IX86_BUILTIN_PUNPCKLWD256_MASK,
30066 IX86_BUILTIN_PSLLVV16HI,
30067 IX86_BUILTIN_PSLLVV8HI,
30068 IX86_BUILTIN_PACKSSDW256_MASK,
30069 IX86_BUILTIN_PACKSSDW128_MASK,
30070 IX86_BUILTIN_PACKUSDW256_MASK,
30071 IX86_BUILTIN_PACKUSDW128_MASK,
30072 IX86_BUILTIN_PAVGB256_MASK,
30073 IX86_BUILTIN_PAVGW256_MASK,
30074 IX86_BUILTIN_PAVGB128_MASK,
30075 IX86_BUILTIN_PAVGW128_MASK,
30076 IX86_BUILTIN_VPERMVARSF256_MASK,
30077 IX86_BUILTIN_VPERMVARDF256_MASK,
30078 IX86_BUILTIN_VPERMDF256_MASK,
30079 IX86_BUILTIN_PABSB256_MASK,
30080 IX86_BUILTIN_PABSB128_MASK,
30081 IX86_BUILTIN_PABSW256_MASK,
30082 IX86_BUILTIN_PABSW128_MASK,
30083 IX86_BUILTIN_VPERMILVARPD_MASK,
30084 IX86_BUILTIN_VPERMILVARPS_MASK,
30085 IX86_BUILTIN_VPERMILVARPD256_MASK,
30086 IX86_BUILTIN_VPERMILVARPS256_MASK,
30087 IX86_BUILTIN_VPERMILPD_MASK,
30088 IX86_BUILTIN_VPERMILPS_MASK,
30089 IX86_BUILTIN_VPERMILPD256_MASK,
30090 IX86_BUILTIN_VPERMILPS256_MASK,
30091 IX86_BUILTIN_BLENDMQ256,
30092 IX86_BUILTIN_BLENDMD256,
30093 IX86_BUILTIN_BLENDMPD256,
30094 IX86_BUILTIN_BLENDMPS256,
30095 IX86_BUILTIN_BLENDMQ128,
30096 IX86_BUILTIN_BLENDMD128,
30097 IX86_BUILTIN_BLENDMPD128,
30098 IX86_BUILTIN_BLENDMPS128,
30099 IX86_BUILTIN_BLENDMW256,
30100 IX86_BUILTIN_BLENDMB256,
30101 IX86_BUILTIN_BLENDMW128,
30102 IX86_BUILTIN_BLENDMB128,
30103 IX86_BUILTIN_PMULLD256_MASK,
30104 IX86_BUILTIN_PMULLD128_MASK,
30105 IX86_BUILTIN_PMULUDQ256_MASK,
30106 IX86_BUILTIN_PMULDQ256_MASK,
30107 IX86_BUILTIN_PMULDQ128_MASK,
30108 IX86_BUILTIN_PMULUDQ128_MASK,
30109 IX86_BUILTIN_CVTPD2PS256_MASK,
30110 IX86_BUILTIN_CVTPD2PS_MASK,
30111 IX86_BUILTIN_VPERMVARSI256_MASK,
30112 IX86_BUILTIN_VPERMVARDI256_MASK,
30113 IX86_BUILTIN_VPERMDI256_MASK,
30114 IX86_BUILTIN_CMPQ256,
30115 IX86_BUILTIN_CMPD256,
30116 IX86_BUILTIN_UCMPQ256,
30117 IX86_BUILTIN_UCMPD256,
30118 IX86_BUILTIN_CMPB256,
30119 IX86_BUILTIN_CMPW256,
30120 IX86_BUILTIN_UCMPB256,
30121 IX86_BUILTIN_UCMPW256,
30122 IX86_BUILTIN_CMPPD256_MASK,
30123 IX86_BUILTIN_CMPPS256_MASK,
30124 IX86_BUILTIN_CMPQ128,
30125 IX86_BUILTIN_CMPD128,
30126 IX86_BUILTIN_UCMPQ128,
30127 IX86_BUILTIN_UCMPD128,
30128 IX86_BUILTIN_CMPB128,
30129 IX86_BUILTIN_CMPW128,
30130 IX86_BUILTIN_UCMPB128,
30131 IX86_BUILTIN_UCMPW128,
30132 IX86_BUILTIN_CMPPD128_MASK,
30133 IX86_BUILTIN_CMPPS128_MASK,
30135 IX86_BUILTIN_GATHER3SIV8SF,
30136 IX86_BUILTIN_GATHER3SIV4SF,
30137 IX86_BUILTIN_GATHER3SIV4DF,
30138 IX86_BUILTIN_GATHER3SIV2DF,
30139 IX86_BUILTIN_GATHER3DIV8SF,
30140 IX86_BUILTIN_GATHER3DIV4SF,
30141 IX86_BUILTIN_GATHER3DIV4DF,
30142 IX86_BUILTIN_GATHER3DIV2DF,
30143 IX86_BUILTIN_GATHER3SIV8SI,
30144 IX86_BUILTIN_GATHER3SIV4SI,
30145 IX86_BUILTIN_GATHER3SIV4DI,
30146 IX86_BUILTIN_GATHER3SIV2DI,
30147 IX86_BUILTIN_GATHER3DIV8SI,
30148 IX86_BUILTIN_GATHER3DIV4SI,
30149 IX86_BUILTIN_GATHER3DIV4DI,
30150 IX86_BUILTIN_GATHER3DIV2DI,
30151 IX86_BUILTIN_SCATTERSIV8SF,
30152 IX86_BUILTIN_SCATTERSIV4SF,
30153 IX86_BUILTIN_SCATTERSIV4DF,
30154 IX86_BUILTIN_SCATTERSIV2DF,
30155 IX86_BUILTIN_SCATTERDIV8SF,
30156 IX86_BUILTIN_SCATTERDIV4SF,
30157 IX86_BUILTIN_SCATTERDIV4DF,
30158 IX86_BUILTIN_SCATTERDIV2DF,
30159 IX86_BUILTIN_SCATTERSIV8SI,
30160 IX86_BUILTIN_SCATTERSIV4SI,
30161 IX86_BUILTIN_SCATTERSIV4DI,
30162 IX86_BUILTIN_SCATTERSIV2DI,
30163 IX86_BUILTIN_SCATTERDIV8SI,
30164 IX86_BUILTIN_SCATTERDIV4SI,
30165 IX86_BUILTIN_SCATTERDIV4DI,
30166 IX86_BUILTIN_SCATTERDIV2DI,
30168 /* AVX512DQ. */
30169 IX86_BUILTIN_RANGESD128,
30170 IX86_BUILTIN_RANGESS128,
30171 IX86_BUILTIN_KUNPCKWD,
30172 IX86_BUILTIN_KUNPCKDQ,
30173 IX86_BUILTIN_BROADCASTF32x2_512,
30174 IX86_BUILTIN_BROADCASTI32x2_512,
30175 IX86_BUILTIN_BROADCASTF64X2_512,
30176 IX86_BUILTIN_BROADCASTI64X2_512,
30177 IX86_BUILTIN_BROADCASTF32X8_512,
30178 IX86_BUILTIN_BROADCASTI32X8_512,
30179 IX86_BUILTIN_EXTRACTF64X2_512,
30180 IX86_BUILTIN_EXTRACTF32X8,
30181 IX86_BUILTIN_EXTRACTI64X2_512,
30182 IX86_BUILTIN_EXTRACTI32X8,
30183 IX86_BUILTIN_REDUCEPD512_MASK,
30184 IX86_BUILTIN_REDUCEPS512_MASK,
30185 IX86_BUILTIN_PMULLQ512,
30186 IX86_BUILTIN_XORPD512,
30187 IX86_BUILTIN_XORPS512,
30188 IX86_BUILTIN_ORPD512,
30189 IX86_BUILTIN_ORPS512,
30190 IX86_BUILTIN_ANDPD512,
30191 IX86_BUILTIN_ANDPS512,
30192 IX86_BUILTIN_ANDNPD512,
30193 IX86_BUILTIN_ANDNPS512,
30194 IX86_BUILTIN_INSERTF32X8,
30195 IX86_BUILTIN_INSERTI32X8,
30196 IX86_BUILTIN_INSERTF64X2_512,
30197 IX86_BUILTIN_INSERTI64X2_512,
30198 IX86_BUILTIN_FPCLASSPD512,
30199 IX86_BUILTIN_FPCLASSPS512,
30200 IX86_BUILTIN_CVTD2MASK512,
30201 IX86_BUILTIN_CVTQ2MASK512,
30202 IX86_BUILTIN_CVTMASK2D512,
30203 IX86_BUILTIN_CVTMASK2Q512,
30204 IX86_BUILTIN_CVTPD2QQ512,
30205 IX86_BUILTIN_CVTPS2QQ512,
30206 IX86_BUILTIN_CVTPD2UQQ512,
30207 IX86_BUILTIN_CVTPS2UQQ512,
30208 IX86_BUILTIN_CVTQQ2PS512,
30209 IX86_BUILTIN_CVTUQQ2PS512,
30210 IX86_BUILTIN_CVTQQ2PD512,
30211 IX86_BUILTIN_CVTUQQ2PD512,
30212 IX86_BUILTIN_CVTTPS2QQ512,
30213 IX86_BUILTIN_CVTTPS2UQQ512,
30214 IX86_BUILTIN_CVTTPD2QQ512,
30215 IX86_BUILTIN_CVTTPD2UQQ512,
30216 IX86_BUILTIN_RANGEPS512,
30217 IX86_BUILTIN_RANGEPD512,
30219 /* AVX512BW. */
30220 IX86_BUILTIN_PACKUSDW512,
30221 IX86_BUILTIN_PACKSSDW512,
30222 IX86_BUILTIN_LOADDQUHI512_MASK,
30223 IX86_BUILTIN_LOADDQUQI512_MASK,
30224 IX86_BUILTIN_PSLLDQ512,
30225 IX86_BUILTIN_PSRLDQ512,
30226 IX86_BUILTIN_STOREDQUHI512_MASK,
30227 IX86_BUILTIN_STOREDQUQI512_MASK,
30228 IX86_BUILTIN_PALIGNR512,
30229 IX86_BUILTIN_PALIGNR512_MASK,
30230 IX86_BUILTIN_MOVDQUHI512_MASK,
30231 IX86_BUILTIN_MOVDQUQI512_MASK,
30232 IX86_BUILTIN_PSADBW512,
30233 IX86_BUILTIN_DBPSADBW512,
30234 IX86_BUILTIN_PBROADCASTB512,
30235 IX86_BUILTIN_PBROADCASTB512_GPR,
30236 IX86_BUILTIN_PBROADCASTW512,
30237 IX86_BUILTIN_PBROADCASTW512_GPR,
30238 IX86_BUILTIN_PMOVSXBW512_MASK,
30239 IX86_BUILTIN_PMOVZXBW512_MASK,
30240 IX86_BUILTIN_VPERMVARHI512_MASK,
30241 IX86_BUILTIN_VPERMT2VARHI512,
30242 IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
30243 IX86_BUILTIN_VPERMI2VARHI512,
30244 IX86_BUILTIN_PAVGB512,
30245 IX86_BUILTIN_PAVGW512,
30246 IX86_BUILTIN_PADDB512,
30247 IX86_BUILTIN_PSUBB512,
30248 IX86_BUILTIN_PSUBSB512,
30249 IX86_BUILTIN_PADDSB512,
30250 IX86_BUILTIN_PSUBUSB512,
30251 IX86_BUILTIN_PADDUSB512,
30252 IX86_BUILTIN_PSUBW512,
30253 IX86_BUILTIN_PADDW512,
30254 IX86_BUILTIN_PSUBSW512,
30255 IX86_BUILTIN_PADDSW512,
30256 IX86_BUILTIN_PSUBUSW512,
30257 IX86_BUILTIN_PADDUSW512,
30258 IX86_BUILTIN_PMAXUW512,
30259 IX86_BUILTIN_PMAXSW512,
30260 IX86_BUILTIN_PMINUW512,
30261 IX86_BUILTIN_PMINSW512,
30262 IX86_BUILTIN_PMAXUB512,
30263 IX86_BUILTIN_PMAXSB512,
30264 IX86_BUILTIN_PMINUB512,
30265 IX86_BUILTIN_PMINSB512,
30266 IX86_BUILTIN_PMOVWB512,
30267 IX86_BUILTIN_PMOVSWB512,
30268 IX86_BUILTIN_PMOVUSWB512,
30269 IX86_BUILTIN_PMULHRSW512_MASK,
30270 IX86_BUILTIN_PMULHUW512_MASK,
30271 IX86_BUILTIN_PMULHW512_MASK,
30272 IX86_BUILTIN_PMULLW512_MASK,
30273 IX86_BUILTIN_PSLLWI512_MASK,
30274 IX86_BUILTIN_PSLLW512_MASK,
30275 IX86_BUILTIN_PACKSSWB512,
30276 IX86_BUILTIN_PACKUSWB512,
30277 IX86_BUILTIN_PSRAVV32HI,
30278 IX86_BUILTIN_PMADDUBSW512_MASK,
30279 IX86_BUILTIN_PMADDWD512_MASK,
30280 IX86_BUILTIN_PSRLVV32HI,
30281 IX86_BUILTIN_PUNPCKHBW512,
30282 IX86_BUILTIN_PUNPCKHWD512,
30283 IX86_BUILTIN_PUNPCKLBW512,
30284 IX86_BUILTIN_PUNPCKLWD512,
30285 IX86_BUILTIN_PSHUFB512,
30286 IX86_BUILTIN_PSHUFHW512,
30287 IX86_BUILTIN_PSHUFLW512,
30288 IX86_BUILTIN_PSRAWI512,
30289 IX86_BUILTIN_PSRAW512,
30290 IX86_BUILTIN_PSRLWI512,
30291 IX86_BUILTIN_PSRLW512,
30292 IX86_BUILTIN_CVTB2MASK512,
30293 IX86_BUILTIN_CVTW2MASK512,
30294 IX86_BUILTIN_CVTMASK2B512,
30295 IX86_BUILTIN_CVTMASK2W512,
30296 IX86_BUILTIN_PCMPEQB512_MASK,
30297 IX86_BUILTIN_PCMPEQW512_MASK,
30298 IX86_BUILTIN_PCMPGTB512_MASK,
30299 IX86_BUILTIN_PCMPGTW512_MASK,
30300 IX86_BUILTIN_PTESTMB512,
30301 IX86_BUILTIN_PTESTMW512,
30302 IX86_BUILTIN_PTESTNMB512,
30303 IX86_BUILTIN_PTESTNMW512,
30304 IX86_BUILTIN_PSLLVV32HI,
30305 IX86_BUILTIN_PABSB512,
30306 IX86_BUILTIN_PABSW512,
30307 IX86_BUILTIN_BLENDMW512,
30308 IX86_BUILTIN_BLENDMB512,
30309 IX86_BUILTIN_CMPB512,
30310 IX86_BUILTIN_CMPW512,
30311 IX86_BUILTIN_UCMPB512,
30312 IX86_BUILTIN_UCMPW512,
30314 /* Alternate 4 and 8 element gather/scatter for the vectorizer
30315 where all operands are 32-byte or 64-byte wide respectively. */
30316 IX86_BUILTIN_GATHERALTSIV4DF,
30317 IX86_BUILTIN_GATHERALTDIV8SF,
30318 IX86_BUILTIN_GATHERALTSIV4DI,
30319 IX86_BUILTIN_GATHERALTDIV8SI,
30320 IX86_BUILTIN_GATHER3ALTDIV16SF,
30321 IX86_BUILTIN_GATHER3ALTDIV16SI,
30322 IX86_BUILTIN_GATHER3ALTSIV4DF,
30323 IX86_BUILTIN_GATHER3ALTDIV8SF,
30324 IX86_BUILTIN_GATHER3ALTSIV4DI,
30325 IX86_BUILTIN_GATHER3ALTDIV8SI,
30326 IX86_BUILTIN_GATHER3ALTSIV8DF,
30327 IX86_BUILTIN_GATHER3ALTSIV8DI,
30328 IX86_BUILTIN_GATHER3DIV16SF,
30329 IX86_BUILTIN_GATHER3DIV16SI,
30330 IX86_BUILTIN_GATHER3DIV8DF,
30331 IX86_BUILTIN_GATHER3DIV8DI,
30332 IX86_BUILTIN_GATHER3SIV16SF,
30333 IX86_BUILTIN_GATHER3SIV16SI,
30334 IX86_BUILTIN_GATHER3SIV8DF,
30335 IX86_BUILTIN_GATHER3SIV8DI,
30336 IX86_BUILTIN_SCATTERDIV16SF,
30337 IX86_BUILTIN_SCATTERDIV16SI,
30338 IX86_BUILTIN_SCATTERDIV8DF,
30339 IX86_BUILTIN_SCATTERDIV8DI,
30340 IX86_BUILTIN_SCATTERSIV16SF,
30341 IX86_BUILTIN_SCATTERSIV16SI,
30342 IX86_BUILTIN_SCATTERSIV8DF,
30343 IX86_BUILTIN_SCATTERSIV8DI,
30345 /* AVX512PF */
30346 IX86_BUILTIN_GATHERPFQPD,
30347 IX86_BUILTIN_GATHERPFDPS,
30348 IX86_BUILTIN_GATHERPFDPD,
30349 IX86_BUILTIN_GATHERPFQPS,
30350 IX86_BUILTIN_SCATTERPFDPD,
30351 IX86_BUILTIN_SCATTERPFDPS,
30352 IX86_BUILTIN_SCATTERPFQPD,
30353 IX86_BUILTIN_SCATTERPFQPS,
30355 /* AVX-512ER */
30356 IX86_BUILTIN_EXP2PD_MASK,
30357 IX86_BUILTIN_EXP2PS_MASK,
30358 IX86_BUILTIN_EXP2PS,
30359 IX86_BUILTIN_RCP28PD,
30360 IX86_BUILTIN_RCP28PS,
30361 IX86_BUILTIN_RCP28SD,
30362 IX86_BUILTIN_RCP28SS,
30363 IX86_BUILTIN_RSQRT28PD,
30364 IX86_BUILTIN_RSQRT28PS,
30365 IX86_BUILTIN_RSQRT28SD,
30366 IX86_BUILTIN_RSQRT28SS,
30368 /* AVX-512IFMA */
30369 IX86_BUILTIN_VPMADD52LUQ512,
30370 IX86_BUILTIN_VPMADD52HUQ512,
30371 IX86_BUILTIN_VPMADD52LUQ256,
30372 IX86_BUILTIN_VPMADD52HUQ256,
30373 IX86_BUILTIN_VPMADD52LUQ128,
30374 IX86_BUILTIN_VPMADD52HUQ128,
30375 IX86_BUILTIN_VPMADD52LUQ512_MASKZ,
30376 IX86_BUILTIN_VPMADD52HUQ512_MASKZ,
30377 IX86_BUILTIN_VPMADD52LUQ256_MASKZ,
30378 IX86_BUILTIN_VPMADD52HUQ256_MASKZ,
30379 IX86_BUILTIN_VPMADD52LUQ128_MASKZ,
30380 IX86_BUILTIN_VPMADD52HUQ128_MASKZ,
30382 /* AVX-512VBMI */
30383 IX86_BUILTIN_VPMULTISHIFTQB512,
30384 IX86_BUILTIN_VPMULTISHIFTQB256,
30385 IX86_BUILTIN_VPMULTISHIFTQB128,
30386 IX86_BUILTIN_VPERMVARQI512_MASK,
30387 IX86_BUILTIN_VPERMT2VARQI512,
30388 IX86_BUILTIN_VPERMT2VARQI512_MASKZ,
30389 IX86_BUILTIN_VPERMI2VARQI512,
30390 IX86_BUILTIN_VPERMVARQI256_MASK,
30391 IX86_BUILTIN_VPERMVARQI128_MASK,
30392 IX86_BUILTIN_VPERMT2VARQI256,
30393 IX86_BUILTIN_VPERMT2VARQI256_MASKZ,
30394 IX86_BUILTIN_VPERMT2VARQI128,
30395 IX86_BUILTIN_VPERMT2VARQI128_MASKZ,
30396 IX86_BUILTIN_VPERMI2VARQI256,
30397 IX86_BUILTIN_VPERMI2VARQI128,
30399 /* SHA builtins. */
30400 IX86_BUILTIN_SHA1MSG1,
30401 IX86_BUILTIN_SHA1MSG2,
30402 IX86_BUILTIN_SHA1NEXTE,
30403 IX86_BUILTIN_SHA1RNDS4,
30404 IX86_BUILTIN_SHA256MSG1,
30405 IX86_BUILTIN_SHA256MSG2,
30406 IX86_BUILTIN_SHA256RNDS2,
30408 /* CLWB instructions. */
30409 IX86_BUILTIN_CLWB,
30411 /* PCOMMIT instructions. */
30412 IX86_BUILTIN_PCOMMIT,
30414 /* CLFLUSHOPT instructions. */
30415 IX86_BUILTIN_CLFLUSHOPT,
30417 /* TFmode support builtins. */
30418 IX86_BUILTIN_INFQ,
30419 IX86_BUILTIN_HUGE_VALQ,
30420 IX86_BUILTIN_FABSQ,
30421 IX86_BUILTIN_COPYSIGNQ,
30423 /* Vectorizer support builtins. */
30424 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
30425 IX86_BUILTIN_CPYSGNPS,
30426 IX86_BUILTIN_CPYSGNPD,
30427 IX86_BUILTIN_CPYSGNPS256,
30428 IX86_BUILTIN_CPYSGNPS512,
30429 IX86_BUILTIN_CPYSGNPD256,
30430 IX86_BUILTIN_CPYSGNPD512,
30431 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
30432 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
30435 /* FMA4 instructions. */
30436 IX86_BUILTIN_VFMADDSS,
30437 IX86_BUILTIN_VFMADDSD,
30438 IX86_BUILTIN_VFMADDPS,
30439 IX86_BUILTIN_VFMADDPD,
30440 IX86_BUILTIN_VFMADDPS256,
30441 IX86_BUILTIN_VFMADDPD256,
30442 IX86_BUILTIN_VFMADDSUBPS,
30443 IX86_BUILTIN_VFMADDSUBPD,
30444 IX86_BUILTIN_VFMADDSUBPS256,
30445 IX86_BUILTIN_VFMADDSUBPD256,
30447 /* FMA3 instructions. */
30448 IX86_BUILTIN_VFMADDSS3,
30449 IX86_BUILTIN_VFMADDSD3,
30451 /* XOP instructions. */
30452 IX86_BUILTIN_VPCMOV,
30453 IX86_BUILTIN_VPCMOV_V2DI,
30454 IX86_BUILTIN_VPCMOV_V4SI,
30455 IX86_BUILTIN_VPCMOV_V8HI,
30456 IX86_BUILTIN_VPCMOV_V16QI,
30457 IX86_BUILTIN_VPCMOV_V4SF,
30458 IX86_BUILTIN_VPCMOV_V2DF,
30459 IX86_BUILTIN_VPCMOV256,
30460 IX86_BUILTIN_VPCMOV_V4DI256,
30461 IX86_BUILTIN_VPCMOV_V8SI256,
30462 IX86_BUILTIN_VPCMOV_V16HI256,
30463 IX86_BUILTIN_VPCMOV_V32QI256,
30464 IX86_BUILTIN_VPCMOV_V8SF256,
30465 IX86_BUILTIN_VPCMOV_V4DF256,
30467 IX86_BUILTIN_VPPERM,
30469 IX86_BUILTIN_VPMACSSWW,
30470 IX86_BUILTIN_VPMACSWW,
30471 IX86_BUILTIN_VPMACSSWD,
30472 IX86_BUILTIN_VPMACSWD,
30473 IX86_BUILTIN_VPMACSSDD,
30474 IX86_BUILTIN_VPMACSDD,
30475 IX86_BUILTIN_VPMACSSDQL,
30476 IX86_BUILTIN_VPMACSSDQH,
30477 IX86_BUILTIN_VPMACSDQL,
30478 IX86_BUILTIN_VPMACSDQH,
30479 IX86_BUILTIN_VPMADCSSWD,
30480 IX86_BUILTIN_VPMADCSWD,
30482 IX86_BUILTIN_VPHADDBW,
30483 IX86_BUILTIN_VPHADDBD,
30484 IX86_BUILTIN_VPHADDBQ,
30485 IX86_BUILTIN_VPHADDWD,
30486 IX86_BUILTIN_VPHADDWQ,
30487 IX86_BUILTIN_VPHADDDQ,
30488 IX86_BUILTIN_VPHADDUBW,
30489 IX86_BUILTIN_VPHADDUBD,
30490 IX86_BUILTIN_VPHADDUBQ,
30491 IX86_BUILTIN_VPHADDUWD,
30492 IX86_BUILTIN_VPHADDUWQ,
30493 IX86_BUILTIN_VPHADDUDQ,
30494 IX86_BUILTIN_VPHSUBBW,
30495 IX86_BUILTIN_VPHSUBWD,
30496 IX86_BUILTIN_VPHSUBDQ,
30498 IX86_BUILTIN_VPROTB,
30499 IX86_BUILTIN_VPROTW,
30500 IX86_BUILTIN_VPROTD,
30501 IX86_BUILTIN_VPROTQ,
30502 IX86_BUILTIN_VPROTB_IMM,
30503 IX86_BUILTIN_VPROTW_IMM,
30504 IX86_BUILTIN_VPROTD_IMM,
30505 IX86_BUILTIN_VPROTQ_IMM,
30507 IX86_BUILTIN_VPSHLB,
30508 IX86_BUILTIN_VPSHLW,
30509 IX86_BUILTIN_VPSHLD,
30510 IX86_BUILTIN_VPSHLQ,
30511 IX86_BUILTIN_VPSHAB,
30512 IX86_BUILTIN_VPSHAW,
30513 IX86_BUILTIN_VPSHAD,
30514 IX86_BUILTIN_VPSHAQ,
30516 IX86_BUILTIN_VFRCZSS,
30517 IX86_BUILTIN_VFRCZSD,
30518 IX86_BUILTIN_VFRCZPS,
30519 IX86_BUILTIN_VFRCZPD,
30520 IX86_BUILTIN_VFRCZPS256,
30521 IX86_BUILTIN_VFRCZPD256,
30523 IX86_BUILTIN_VPCOMEQUB,
30524 IX86_BUILTIN_VPCOMNEUB,
30525 IX86_BUILTIN_VPCOMLTUB,
30526 IX86_BUILTIN_VPCOMLEUB,
30527 IX86_BUILTIN_VPCOMGTUB,
30528 IX86_BUILTIN_VPCOMGEUB,
30529 IX86_BUILTIN_VPCOMFALSEUB,
30530 IX86_BUILTIN_VPCOMTRUEUB,
30532 IX86_BUILTIN_VPCOMEQUW,
30533 IX86_BUILTIN_VPCOMNEUW,
30534 IX86_BUILTIN_VPCOMLTUW,
30535 IX86_BUILTIN_VPCOMLEUW,
30536 IX86_BUILTIN_VPCOMGTUW,
30537 IX86_BUILTIN_VPCOMGEUW,
30538 IX86_BUILTIN_VPCOMFALSEUW,
30539 IX86_BUILTIN_VPCOMTRUEUW,
30541 IX86_BUILTIN_VPCOMEQUD,
30542 IX86_BUILTIN_VPCOMNEUD,
30543 IX86_BUILTIN_VPCOMLTUD,
30544 IX86_BUILTIN_VPCOMLEUD,
30545 IX86_BUILTIN_VPCOMGTUD,
30546 IX86_BUILTIN_VPCOMGEUD,
30547 IX86_BUILTIN_VPCOMFALSEUD,
30548 IX86_BUILTIN_VPCOMTRUEUD,
30550 IX86_BUILTIN_VPCOMEQUQ,
30551 IX86_BUILTIN_VPCOMNEUQ,
30552 IX86_BUILTIN_VPCOMLTUQ,
30553 IX86_BUILTIN_VPCOMLEUQ,
30554 IX86_BUILTIN_VPCOMGTUQ,
30555 IX86_BUILTIN_VPCOMGEUQ,
30556 IX86_BUILTIN_VPCOMFALSEUQ,
30557 IX86_BUILTIN_VPCOMTRUEUQ,
30559 IX86_BUILTIN_VPCOMEQB,
30560 IX86_BUILTIN_VPCOMNEB,
30561 IX86_BUILTIN_VPCOMLTB,
30562 IX86_BUILTIN_VPCOMLEB,
30563 IX86_BUILTIN_VPCOMGTB,
30564 IX86_BUILTIN_VPCOMGEB,
30565 IX86_BUILTIN_VPCOMFALSEB,
30566 IX86_BUILTIN_VPCOMTRUEB,
30568 IX86_BUILTIN_VPCOMEQW,
30569 IX86_BUILTIN_VPCOMNEW,
30570 IX86_BUILTIN_VPCOMLTW,
30571 IX86_BUILTIN_VPCOMLEW,
30572 IX86_BUILTIN_VPCOMGTW,
30573 IX86_BUILTIN_VPCOMGEW,
30574 IX86_BUILTIN_VPCOMFALSEW,
30575 IX86_BUILTIN_VPCOMTRUEW,
30577 IX86_BUILTIN_VPCOMEQD,
30578 IX86_BUILTIN_VPCOMNED,
30579 IX86_BUILTIN_VPCOMLTD,
30580 IX86_BUILTIN_VPCOMLED,
30581 IX86_BUILTIN_VPCOMGTD,
30582 IX86_BUILTIN_VPCOMGED,
30583 IX86_BUILTIN_VPCOMFALSED,
30584 IX86_BUILTIN_VPCOMTRUED,
30586 IX86_BUILTIN_VPCOMEQQ,
30587 IX86_BUILTIN_VPCOMNEQ,
30588 IX86_BUILTIN_VPCOMLTQ,
30589 IX86_BUILTIN_VPCOMLEQ,
30590 IX86_BUILTIN_VPCOMGTQ,
30591 IX86_BUILTIN_VPCOMGEQ,
30592 IX86_BUILTIN_VPCOMFALSEQ,
30593 IX86_BUILTIN_VPCOMTRUEQ,
30595 /* LWP instructions. */
30596 IX86_BUILTIN_LLWPCB,
30597 IX86_BUILTIN_SLWPCB,
30598 IX86_BUILTIN_LWPVAL32,
30599 IX86_BUILTIN_LWPVAL64,
30600 IX86_BUILTIN_LWPINS32,
30601 IX86_BUILTIN_LWPINS64,
30603 IX86_BUILTIN_CLZS,
30605 /* RTM */
30606 IX86_BUILTIN_XBEGIN,
30607 IX86_BUILTIN_XEND,
30608 IX86_BUILTIN_XABORT,
30609 IX86_BUILTIN_XTEST,
30611 /* MPX */
30612 IX86_BUILTIN_BNDMK,
30613 IX86_BUILTIN_BNDSTX,
30614 IX86_BUILTIN_BNDLDX,
30615 IX86_BUILTIN_BNDCL,
30616 IX86_BUILTIN_BNDCU,
30617 IX86_BUILTIN_BNDRET,
30618 IX86_BUILTIN_BNDNARROW,
30619 IX86_BUILTIN_BNDINT,
30620 IX86_BUILTIN_SIZEOF,
30621 IX86_BUILTIN_BNDLOWER,
30622 IX86_BUILTIN_BNDUPPER,
30624 /* BMI instructions. */
30625 IX86_BUILTIN_BEXTR32,
30626 IX86_BUILTIN_BEXTR64,
30627 IX86_BUILTIN_CTZS,
30629 /* TBM instructions. */
30630 IX86_BUILTIN_BEXTRI32,
30631 IX86_BUILTIN_BEXTRI64,
30633 /* BMI2 instructions. */
30634 IX86_BUILTIN_BZHI32,
30635 IX86_BUILTIN_BZHI64,
30636 IX86_BUILTIN_PDEP32,
30637 IX86_BUILTIN_PDEP64,
30638 IX86_BUILTIN_PEXT32,
30639 IX86_BUILTIN_PEXT64,
30641 /* ADX instructions. */
30642 IX86_BUILTIN_ADDCARRYX32,
30643 IX86_BUILTIN_ADDCARRYX64,
30645 /* SBB instructions. */
30646 IX86_BUILTIN_SBB32,
30647 IX86_BUILTIN_SBB64,
30649 /* FSGSBASE instructions. */
30650 IX86_BUILTIN_RDFSBASE32,
30651 IX86_BUILTIN_RDFSBASE64,
30652 IX86_BUILTIN_RDGSBASE32,
30653 IX86_BUILTIN_RDGSBASE64,
30654 IX86_BUILTIN_WRFSBASE32,
30655 IX86_BUILTIN_WRFSBASE64,
30656 IX86_BUILTIN_WRGSBASE32,
30657 IX86_BUILTIN_WRGSBASE64,
30659 /* RDRND instructions. */
30660 IX86_BUILTIN_RDRAND16_STEP,
30661 IX86_BUILTIN_RDRAND32_STEP,
30662 IX86_BUILTIN_RDRAND64_STEP,
30664 /* RDSEED instructions. */
30665 IX86_BUILTIN_RDSEED16_STEP,
30666 IX86_BUILTIN_RDSEED32_STEP,
30667 IX86_BUILTIN_RDSEED64_STEP,
30669 /* F16C instructions. */
30670 IX86_BUILTIN_CVTPH2PS,
30671 IX86_BUILTIN_CVTPH2PS256,
30672 IX86_BUILTIN_CVTPS2PH,
30673 IX86_BUILTIN_CVTPS2PH256,
30675 /* MONITORX and MWAITX instrucions. */
30676 IX86_BUILTIN_MONITORX,
30677 IX86_BUILTIN_MWAITX,
30679 /* CFString built-in for darwin */
30680 IX86_BUILTIN_CFSTRING,
30682 /* Builtins to get CPU type and supported features. */
30683 IX86_BUILTIN_CPU_INIT,
30684 IX86_BUILTIN_CPU_IS,
30685 IX86_BUILTIN_CPU_SUPPORTS,
30687 /* Read/write FLAGS register built-ins. */
30688 IX86_BUILTIN_READ_FLAGS,
30689 IX86_BUILTIN_WRITE_FLAGS,
30691 IX86_BUILTIN_MAX
30694 /* Table for the ix86 builtin decls. */
30695 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
30697 /* Table of all of the builtin functions that are possible with different ISA's
30698 but are waiting to be built until a function is declared to use that
30699 ISA. */
30700 struct builtin_isa {
30701 const char *name; /* function name */
30702 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
30703 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
30704 bool const_p; /* true if the declaration is constant */
30705 bool leaf_p; /* true if the declaration has leaf attribute */
30706 bool nothrow_p; /* true if the declaration has nothrow attribute */
30707 bool set_and_not_built_p;
30710 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
30712 /* Bits that can still enable any inclusion of a builtin. */
30713 static HOST_WIDE_INT deferred_isa_values = 0;
30715 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
30716 of which isa_flags to use in the ix86_builtins_isa array. Stores the
30717 function decl in the ix86_builtins array. Returns the function decl or
30718 NULL_TREE, if the builtin was not added.
30720 If the front end has a special hook for builtin functions, delay adding
30721 builtin functions that aren't in the current ISA until the ISA is changed
30722 with function specific optimization. Doing so, can save about 300K for the
30723 default compiler. When the builtin is expanded, check at that time whether
30724 it is valid.
30726 If the front end doesn't have a special hook, record all builtins, even if
30727 it isn't an instruction set in the current ISA in case the user uses
30728 function specific options for a different ISA, so that we don't get scope
30729 errors if a builtin is added in the middle of a function scope. */
30731 static inline tree
30732 def_builtin (HOST_WIDE_INT mask, const char *name,
30733 enum ix86_builtin_func_type tcode,
30734 enum ix86_builtins code)
30736 tree decl = NULL_TREE;
30738 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
30740 ix86_builtins_isa[(int) code].isa = mask;
30742 mask &= ~OPTION_MASK_ISA_64BIT;
30743 if (mask == 0
30744 || (mask & ix86_isa_flags) != 0
30745 || (lang_hooks.builtin_function
30746 == lang_hooks.builtin_function_ext_scope))
30749 tree type = ix86_get_builtin_func_type (tcode);
30750 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30751 NULL, NULL_TREE);
30752 ix86_builtins[(int) code] = decl;
30753 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
30755 else
30757 /* Just a MASK where set_and_not_built_p == true can potentially
30758 include a builtin. */
30759 deferred_isa_values |= mask;
30760 ix86_builtins[(int) code] = NULL_TREE;
30761 ix86_builtins_isa[(int) code].tcode = tcode;
30762 ix86_builtins_isa[(int) code].name = name;
30763 ix86_builtins_isa[(int) code].leaf_p = false;
30764 ix86_builtins_isa[(int) code].nothrow_p = false;
30765 ix86_builtins_isa[(int) code].const_p = false;
30766 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
30770 return decl;
30773 /* Like def_builtin, but also marks the function decl "const". */
30775 static inline tree
30776 def_builtin_const (HOST_WIDE_INT mask, const char *name,
30777 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30779 tree decl = def_builtin (mask, name, tcode, code);
30780 if (decl)
30781 TREE_READONLY (decl) = 1;
30782 else
30783 ix86_builtins_isa[(int) code].const_p = true;
30785 return decl;
30788 /* Add any new builtin functions for a given ISA that may not have been
30789 declared. This saves a bit of space compared to adding all of the
30790 declarations to the tree, even if we didn't use them. */
30792 static void
30793 ix86_add_new_builtins (HOST_WIDE_INT isa)
30795 if ((isa & deferred_isa_values) == 0)
30796 return;
30798 /* Bits in ISA value can be removed from potential isa values. */
30799 deferred_isa_values &= ~isa;
30801 int i;
30802 tree saved_current_target_pragma = current_target_pragma;
30803 current_target_pragma = NULL_TREE;
30805 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
30807 if ((ix86_builtins_isa[i].isa & isa) != 0
30808 && ix86_builtins_isa[i].set_and_not_built_p)
30810 tree decl, type;
30812 /* Don't define the builtin again. */
30813 ix86_builtins_isa[i].set_and_not_built_p = false;
30815 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
30816 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
30817 type, i, BUILT_IN_MD, NULL,
30818 NULL_TREE);
30820 ix86_builtins[i] = decl;
30821 if (ix86_builtins_isa[i].const_p)
30822 TREE_READONLY (decl) = 1;
30823 if (ix86_builtins_isa[i].leaf_p)
30824 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
30825 NULL_TREE);
30826 if (ix86_builtins_isa[i].nothrow_p)
30827 TREE_NOTHROW (decl) = 1;
30831 current_target_pragma = saved_current_target_pragma;
30834 /* Bits for builtin_description.flag. */
30836 /* Set when we don't support the comparison natively, and should
30837 swap_comparison in order to support it. */
30838 #define BUILTIN_DESC_SWAP_OPERANDS 1
30840 struct builtin_description
30842 const HOST_WIDE_INT mask;
30843 const enum insn_code icode;
30844 const char *const name;
30845 const enum ix86_builtins code;
30846 const enum rtx_code comparison;
30847 const int flag;
30850 static const struct builtin_description bdesc_comi[] =
30852 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
30853 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
30854 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
30855 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
30856 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
30857 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
30858 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
30859 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
30860 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
30861 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
30862 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
30863 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
30864 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
30865 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
30866 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
30867 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
30868 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
30869 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
30870 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
30871 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
30872 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
30873 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
30874 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
30875 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
30878 static const struct builtin_description bdesc_pcmpestr[] =
30880 /* SSE4.2 */
30881 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
30882 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
30883 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
30884 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
30885 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
30886 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
30887 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
30890 static const struct builtin_description bdesc_pcmpistr[] =
30892 /* SSE4.2 */
30893 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
30894 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
30895 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
30896 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
30897 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
30898 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
30899 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
30902 /* Special builtins with variable number of arguments. */
30903 static const struct builtin_description bdesc_special_args[] =
30905 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
30906 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
30907 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
30909 /* 80387 (for use internally for atomic compound assignment). */
30910 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
30911 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
30912 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
30913 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
30915 /* MMX */
30916 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30918 /* 3DNow! */
30919 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30921 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
30922 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
30923 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
30924 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30925 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30926 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30927 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30928 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30929 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30931 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30932 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30933 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30934 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30935 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30936 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30937 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30938 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30940 /* SSE */
30941 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30942 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30943 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30945 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30946 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30947 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30948 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30950 /* SSE or 3DNow!A */
30951 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30952 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
30954 /* SSE2 */
30955 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30956 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30957 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30958 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
30959 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30960 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
30961 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
30962 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
30963 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
30964 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30966 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30967 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30969 /* SSE3 */
30970 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30972 /* SSE4.1 */
30973 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
30975 /* SSE4A */
30976 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30977 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30979 /* AVX */
30980 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
30981 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
30983 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30984 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30985 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30986 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
30987 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
30989 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30990 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30991 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30992 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30993 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30994 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
30995 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30997 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
30998 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30999 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
31001 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
31002 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
31003 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
31004 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
31005 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
31006 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
31007 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
31008 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
31010 /* AVX2 */
31011 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
31012 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
31013 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
31014 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
31015 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
31016 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
31017 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
31018 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
31019 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
31021 /* AVX512F */
31022 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
31023 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
31024 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
31025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
31026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
31027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
31028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
31029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
31030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
31031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
31032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
31033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
31034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
31035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
31036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
31037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
31038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
31039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
31040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
31041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
31042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
31043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
31044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
31045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
31046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
31047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
31048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
31049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
31050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
31051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
31052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
31053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
31054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
31055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
31056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
31057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
31058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
31059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
31060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
31061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
31062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
31063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
31064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
31065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
31066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
31067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
31068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
31070 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
31071 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
31072 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
31073 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
31074 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
31075 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
31077 /* FSGSBASE */
31078 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
31079 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
31080 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
31081 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
31082 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
31083 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
31084 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
31085 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
31087 /* RTM */
31088 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
31089 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
31090 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
31092 /* AVX512BW */
31093 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_SI },
31094 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_DI },
31095 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_SI },
31096 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_DI },
31098 /* AVX512VL */
31099 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_HI },
31100 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_QI },
31101 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_SI },
31102 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_HI },
31103 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31104 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31105 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31106 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31107 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
31108 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
31109 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
31110 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
31111 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31112 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31113 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31114 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31115 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
31116 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
31117 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
31118 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
31119 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31120 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31121 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31122 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31123 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
31124 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
31125 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
31126 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
31127 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31128 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31129 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31130 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31131 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
31132 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
31133 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
31134 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
31135 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_HI },
31136 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_QI },
31137 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_SI },
31138 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_HI },
31139 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
31140 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
31141 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
31142 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
31143 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
31144 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
31145 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
31146 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
31147 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31148 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31149 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31150 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31151 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31152 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31153 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31154 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31155 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31156 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31157 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31158 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31159 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31160 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31161 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31162 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31163 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
31164 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
31165 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
31166 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
31167 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
31168 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
31169 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
31170 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31171 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
31172 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31173 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
31174 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31175 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31176 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31177 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31178 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31179 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31180 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31181 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31182 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31183 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31184 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31185 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31186 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31187 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31188 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31189 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31190 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31191 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31192 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31194 /* PCOMMIT. */
31195 { OPTION_MASK_ISA_PCOMMIT, CODE_FOR_pcommit, "__builtin_ia32_pcommit", IX86_BUILTIN_PCOMMIT, UNKNOWN, (int) VOID_FTYPE_VOID },
31198 /* Builtins with variable number of arguments. */
31199 static const struct builtin_description bdesc_args[] =
31201 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
31202 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
31203 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
31204 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
31205 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
31206 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
31207 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
31209 /* MMX */
31210 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31211 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31212 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31213 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31214 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31215 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31217 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31218 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31219 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31220 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31221 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31222 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31223 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31224 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31226 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31227 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31229 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31230 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31231 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31232 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31234 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31235 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31236 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31237 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31238 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31239 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31241 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31242 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31243 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31244 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31245 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
31246 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
31248 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31249 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
31250 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31252 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
31254 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31255 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31256 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31257 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31258 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31259 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31261 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31262 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31263 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31264 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31265 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31266 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31268 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31269 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31270 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31271 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31273 /* 3DNow! */
31274 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31275 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31276 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31277 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31279 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31280 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31281 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31282 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31283 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31284 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31285 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31286 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31287 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31288 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31289 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31290 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31291 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31292 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31293 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31295 /* 3DNow!A */
31296 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31297 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31298 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31299 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31300 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31301 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31303 /* SSE */
31304 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
31305 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31306 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31307 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31308 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31309 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31310 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31311 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31312 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31313 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31314 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31315 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31317 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31319 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31320 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31321 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31322 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31323 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31324 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31325 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31326 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31328 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31329 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31330 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31331 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31332 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31333 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31334 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31335 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31336 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31337 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31338 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
31339 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31340 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31341 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31342 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31343 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31344 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31345 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31346 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31347 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31349 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31350 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31351 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31352 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31354 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31355 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31356 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31357 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31359 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31361 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31362 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31363 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31364 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31365 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31367 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
31368 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
31369 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
31371 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
31373 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31374 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31375 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31377 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
31378 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
31380 /* SSE MMX or 3Dnow!A */
31381 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31382 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31383 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31385 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31386 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31387 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31388 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31390 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
31391 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
31393 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
31395 /* SSE2 */
31396 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31398 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
31399 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
31400 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31401 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
31402 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
31404 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31405 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31406 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
31407 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31408 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31410 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
31412 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31413 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31414 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31415 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31417 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31418 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
31419 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31421 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31422 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31423 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31424 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31425 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31426 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31427 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31428 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31431 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31432 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31433 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31434 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
31435 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31436 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31437 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31438 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31439 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31440 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31441 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31442 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31443 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31444 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31445 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31446 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31447 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31448 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31449 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31451 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31452 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31453 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31454 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31456 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31457 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31458 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31459 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31461 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31463 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31464 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31465 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31467 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31469 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31470 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31471 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31472 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31473 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31474 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31475 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31476 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31478 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31479 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31480 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31481 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31482 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31483 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31484 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31485 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31487 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31488 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
31490 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31491 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31492 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31493 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31495 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31496 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31498 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31499 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31500 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31501 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31502 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31503 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31505 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31506 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31507 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31508 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31510 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31511 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31512 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31513 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31514 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31515 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31516 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31517 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31519 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31520 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31521 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31523 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31524 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
31526 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
31527 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31529 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
31531 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
31532 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
31533 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
31534 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
31536 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31537 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31538 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31539 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31540 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31541 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31542 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31544 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31545 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31546 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31547 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31548 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31549 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31550 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31552 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31553 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31554 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31555 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31557 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
31558 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31559 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31561 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
31563 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31565 /* SSE2 MMX */
31566 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31567 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31569 /* SSE3 */
31570 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
31571 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31573 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31574 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31575 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31576 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31577 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31578 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31580 /* SSSE3 */
31581 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31582 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
31583 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31584 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
31585 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31586 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31588 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31589 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31590 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31591 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31592 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31593 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31594 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31595 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31596 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31597 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31598 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31599 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31600 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
31601 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
31602 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31603 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31604 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31605 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31606 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31607 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31608 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31609 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31610 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31611 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31613 /* SSSE3. */
31614 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
31615 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
31617 /* SSE4.1 */
31618 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31619 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31620 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
31621 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
31622 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31623 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31624 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31625 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
31626 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
31627 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
31629 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31630 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31631 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31632 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31633 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31634 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31635 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31636 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31637 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31638 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31639 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31640 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31641 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31643 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31644 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31645 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31646 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31647 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31648 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31649 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31650 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31651 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31652 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31653 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31654 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31656 /* SSE4.1 */
31657 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31658 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31659 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31660 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31662 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
31663 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
31664 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
31665 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
31667 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31668 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31670 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31671 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31673 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
31674 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
31675 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
31676 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
31678 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
31679 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
31681 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31682 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31684 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31685 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31686 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31688 /* SSE4.2 */
31689 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31690 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
31691 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
31692 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31693 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31695 /* SSE4A */
31696 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
31697 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
31698 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
31699 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31701 /* AES */
31702 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
31703 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31705 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31706 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31707 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31708 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31710 /* PCLMUL */
31711 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
31713 /* AVX */
31714 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31715 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31716 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31717 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31718 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31719 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31720 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31721 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31722 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31724 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31725 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31727 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31728 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31729 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31730 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31731 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31732 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31733 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31734 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31735 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31736 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31737 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31738 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31739 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31741 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
31742 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
31743 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
31744 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31746 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31747 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31748 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
31749 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
31750 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31751 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31752 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31753 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31754 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31755 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31756 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31757 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31758 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31759 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
31760 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
31761 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
31762 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
31763 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
31764 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
31765 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31766 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
31767 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31768 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31769 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31770 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31771 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31772 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31773 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31774 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31775 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31776 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31777 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
31778 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
31779 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
31781 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31782 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31783 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31785 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31786 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31787 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31788 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31789 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31791 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31793 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31794 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31796 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
31797 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
31798 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
31799 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
31801 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31802 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31804 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31805 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31807 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
31808 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
31809 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
31810 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
31812 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
31813 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
31815 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31816 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31818 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31819 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31820 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31821 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31823 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31824 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31825 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31826 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
31827 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
31828 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
31830 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31831 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31832 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31833 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31834 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31835 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31836 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31837 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31838 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31839 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31840 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31841 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31842 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31843 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31844 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31846 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
31847 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
31849 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31850 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31852 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31854 /* AVX2 */
31855 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
31856 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
31857 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
31858 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
31859 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31860 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31861 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31862 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31863 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31864 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31865 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31866 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31867 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31868 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31869 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31870 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31871 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
31872 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31873 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31874 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31875 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31876 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
31877 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
31878 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31879 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31880 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31881 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31882 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31883 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31884 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31885 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31886 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31887 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31888 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31889 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31890 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31891 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31892 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31893 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
31894 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31895 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31896 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31897 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31898 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31899 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31900 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31901 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31902 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31903 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31904 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31905 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31906 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
31907 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31908 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31909 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31910 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31911 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31912 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31913 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31914 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31915 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31916 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31917 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31918 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31919 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31920 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31921 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31922 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31923 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31924 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31925 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31926 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31927 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31928 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31929 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
31930 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31931 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31932 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31933 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31934 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31935 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31936 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31937 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31938 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31939 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31940 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31941 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31942 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31943 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31944 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31945 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31946 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31947 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31948 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31949 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31950 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31951 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31952 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31953 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31954 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31955 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31956 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31957 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31958 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31959 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31960 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31961 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31962 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31963 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31964 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31965 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31966 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31967 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31968 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31969 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31970 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31971 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31972 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31973 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31974 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
31975 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31976 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
31977 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
31978 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31979 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31980 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31981 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31982 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31983 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31984 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31985 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31986 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31987 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
31988 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
31989 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
31990 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
31991 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31992 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31993 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31994 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31995 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31996 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31997 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31998 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31999 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32000 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32002 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
32004 /* BMI */
32005 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32006 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32007 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
32009 /* TBM */
32010 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32011 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32013 /* F16C */
32014 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
32015 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
32016 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
32017 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
32019 /* BMI2 */
32020 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32021 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32022 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32023 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32024 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32025 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32027 /* AVX512F */
32028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
32029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
32030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
32031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
32032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
32033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
32034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
32035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
32036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
32041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
32042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
32043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
32044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
32045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
32046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
32047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
32048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
32051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
32052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
32053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
32054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
32059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
32060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
32061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
32062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
32063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
32064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
32065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
32066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
32082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
32083 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
32084 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
32085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
32086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
32087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
32088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
32090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32096 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
32106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
32107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
32108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
32109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
32110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
32111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
32112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
32113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
32114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
32115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
32116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
32117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
32118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
32119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
32120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
32121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
32122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
32123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
32124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
32125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
32126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
32127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
32128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
32129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
32130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
32131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
32133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
32145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
32147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32150 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
32151 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32152 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
32153 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32154 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32155 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32156 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
32157 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32158 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
32159 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32160 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32161 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32162 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32163 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32164 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
32165 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32166 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
32167 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32168 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32169 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32170 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32171 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32172 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32173 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32174 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32175 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32176 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32177 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32178 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32179 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32180 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32181 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32182 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
32183 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
32184 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
32185 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
32186 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
32187 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
32188 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
32189 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
32190 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32191 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32192 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32193 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32194 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32195 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32196 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32197 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32198 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32199 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32200 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32201 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32202 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32203 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32204 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32205 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
32206 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32207 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32208 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32209 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32210 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
32211 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
32212 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
32213 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
32214 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32215 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32216 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32217 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32218 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32219 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32220 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
32221 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
32222 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
32223 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
32225 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
32226 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
32227 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
32228 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32229 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32230 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
32231 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32232 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32234 /* Mask arithmetic operations */
32235 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32236 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32237 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
32238 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32239 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32240 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32241 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
32242 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32243 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32244 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
32246 /* SHA */
32247 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32248 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32249 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32250 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
32251 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32252 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32253 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
32255 /* AVX512VL. */
32256 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT },
32257 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT },
32258 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32259 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32260 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32261 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32262 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32263 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32264 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32265 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32266 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32267 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32268 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32269 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32270 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32271 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32272 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32273 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32274 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32275 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32276 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32277 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32278 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32279 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32280 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32281 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32282 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32283 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32284 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32285 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32286 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32287 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32288 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32289 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32290 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32291 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32292 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32293 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32294 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32295 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32296 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32297 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32298 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32299 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32300 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32301 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32302 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32303 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32304 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32305 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32306 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32307 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32308 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32309 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_QI },
32310 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_QI },
32311 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI },
32312 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI },
32313 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32314 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32315 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32316 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32317 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32318 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32319 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32320 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32321 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32322 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32323 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32324 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32325 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32326 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32327 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32328 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32329 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32330 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32331 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32332 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32333 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32334 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32335 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32336 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32337 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32338 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32339 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32340 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32341 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32342 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32343 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32344 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32345 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_QI },
32346 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_QI },
32347 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_SI },
32348 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_SI },
32349 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32350 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_HI },
32351 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_HI },
32352 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_HI },
32353 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32354 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_QI },
32355 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32356 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_QI },
32357 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32358 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_QI },
32359 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32360 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
32361 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32362 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
32363 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32364 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32365 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32366 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_QI },
32367 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_QI },
32368 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI },
32369 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI },
32370 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32371 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32372 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32373 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32374 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32375 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32376 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32377 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32378 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32379 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32380 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32381 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32382 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32383 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32384 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32385 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32386 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32387 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32388 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32389 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32390 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32391 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32392 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32393 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32394 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32395 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32396 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32397 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32398 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32399 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32400 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32401 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32402 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32403 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32404 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32405 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32406 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32407 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32408 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32409 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32410 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32411 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32412 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32413 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32414 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32415 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32416 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32417 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32418 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32419 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32420 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32421 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32422 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32423 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32424 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32425 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32426 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32427 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32428 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32429 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32430 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32431 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32432 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32433 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32434 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32435 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32436 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32437 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32438 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32439 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32440 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32441 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32442 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32443 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32444 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32445 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32446 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32447 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32448 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32449 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32450 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32451 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32452 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32453 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32454 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32455 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32456 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32457 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32458 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32459 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32460 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32461 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32462 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32463 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32464 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32465 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32466 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32467 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32468 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32469 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32470 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32471 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32472 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32473 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32474 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32475 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32476 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32477 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32478 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32479 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32480 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32481 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32482 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32483 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32484 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32485 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32486 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32487 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32488 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32489 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32490 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32491 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32492 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32493 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32494 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32495 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32496 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32497 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32498 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32499 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32500 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32501 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32502 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32503 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32504 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32505 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32506 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32507 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32508 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32509 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32510 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32511 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32512 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32513 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32514 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32515 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32516 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32517 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32518 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32519 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32520 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32521 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32522 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32523 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32524 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32525 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32526 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32527 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32528 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32529 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32530 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32531 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32532 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32533 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32534 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32535 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32536 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32537 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32538 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32539 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32540 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32541 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32542 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32543 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32544 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32545 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32546 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32547 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32548 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32549 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32550 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32551 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32552 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32553 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32554 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32555 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32556 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32557 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32558 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32559 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32560 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32561 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32562 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32563 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32564 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32565 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32566 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32567 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32568 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32569 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32570 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32571 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32572 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32573 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32574 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32575 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32576 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32577 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32578 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32579 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32580 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32581 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32582 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32583 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32584 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32585 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32586 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32587 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32588 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32589 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32590 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32591 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32592 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32593 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32594 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32595 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32596 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32597 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32598 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32599 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32600 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32601 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32602 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32603 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32604 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32605 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32606 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32607 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32608 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32609 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32610 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32611 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32612 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32613 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32614 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32615 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32616 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32617 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32618 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32619 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32620 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32621 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32622 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32623 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32624 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32625 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32626 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32627 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32628 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32629 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32630 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32631 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32632 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32633 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32634 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32635 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32636 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32637 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32638 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI },
32639 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI },
32640 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32641 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32642 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_HI },
32643 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_QI },
32644 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_QI },
32645 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_QI },
32646 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32647 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32648 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32649 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32650 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32651 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32652 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32653 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32654 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32655 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32656 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32657 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32658 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32659 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32660 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32661 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32662 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32663 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32664 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32665 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32666 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32667 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32668 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32669 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32670 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32671 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32672 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32673 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32674 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32675 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32676 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32677 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32678 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32679 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32680 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32681 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32682 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32683 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32684 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32685 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32686 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32687 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32688 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32689 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32690 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32691 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32692 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32693 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32694 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32695 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32696 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32697 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32698 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32699 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32700 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32701 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32702 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32703 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32704 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32705 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32706 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32707 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32708 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32709 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32710 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32711 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32712 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32713 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32714 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32715 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32716 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32717 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32718 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32719 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32720 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32721 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32722 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32723 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32724 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32725 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32726 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32727 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32728 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32729 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32730 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32731 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32732 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32733 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32734 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32735 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32736 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32737 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32738 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32739 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32740 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32741 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32742 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32743 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32744 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32745 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32746 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_QI },
32747 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_QI },
32748 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
32749 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_QI },
32750 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_QI },
32751 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
32752 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) HI_FTYPE_V16QI },
32753 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) SI_FTYPE_V32QI },
32754 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) QI_FTYPE_V8HI },
32755 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) HI_FTYPE_V16HI },
32756 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) QI_FTYPE_V4SI },
32757 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) QI_FTYPE_V8SI },
32758 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) QI_FTYPE_V2DI },
32759 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) QI_FTYPE_V4DI },
32760 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_HI },
32761 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_SI },
32762 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_QI },
32763 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_HI },
32764 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_QI },
32765 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_QI },
32766 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_QI },
32767 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_QI },
32768 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32769 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32770 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32771 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32772 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32773 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32774 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32775 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32776 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32777 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32778 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32779 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32780 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32781 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32782 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32783 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32784 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32785 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32786 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32787 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32788 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32789 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32790 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32791 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32792 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32793 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32794 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32795 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32796 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32797 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32798 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32799 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32800 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_QI },
32801 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_QI },
32802 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_HI },
32803 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_HI },
32804 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32805 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32806 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32807 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32808 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32809 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32810 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32811 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32812 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32813 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32814 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32815 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32816 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32817 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32818 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32819 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32820 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32821 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32822 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32823 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32824 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32825 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32826 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32827 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32828 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32829 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32830 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32831 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32832 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32833 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32834 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32835 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32836 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32837 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32838 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32839 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32840 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32841 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32842 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32843 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32844 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32845 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32846 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32847 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32848 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32849 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32850 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32851 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32852 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32853 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32854 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32855 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32856 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32857 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32858 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32859 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32860 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32861 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32862 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32863 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32864 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32865 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32866 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32867 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32868 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32869 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32870 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32871 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32872 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32873 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32874 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32875 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32876 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32877 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32878 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI },
32879 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI },
32880 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_QI },
32881 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_QI },
32882 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_QI },
32883 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_QI },
32884 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32885 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32886 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32887 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32888 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32889 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32890 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32891 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32892 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32893 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32894 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32895 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32896 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32897 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32898 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32899 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32900 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32901 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32902 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32903 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32904 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32905 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32906 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32907 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32908 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32909 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32910 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32911 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32912 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32913 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32914 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32915 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32916 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32917 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32918 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32919 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32920 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32921 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32922 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32923 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32924 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32925 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32926 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32927 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32928 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32929 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32930 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32931 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32932 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32933 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32934 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32935 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32936 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32937 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32938 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32939 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32940 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32941 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32942 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32943 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_QI },
32944 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_QI },
32945 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32946 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32947 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32948 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32949 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32950 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32951 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32952 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32953 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32954 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32955 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32956 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_QI },
32957 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_QI },
32958 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32959 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32960 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32961 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32962 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32963 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32964 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32965 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32966 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI },
32967 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI },
32969 /* AVX512DQ. */
32970 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
32971 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
32972 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
32973 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
32974 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_HI },
32975 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_HI },
32976 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_QI },
32977 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_QI },
32978 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_QI },
32979 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_QI },
32980 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32981 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
32982 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32983 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32984 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32985 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32986 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32987 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32988 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32989 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI},
32990 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32991 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI },
32992 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI },
32993 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI },
32994 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI },
32995 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_QI },
32996 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_HI },
32997 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) HI_FTYPE_V16SI },
32998 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) QI_FTYPE_V8DI },
32999 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_HI },
33000 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_QI },
33002 /* AVX512BW. */
33003 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) SI_FTYPE_SI_SI },
33004 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) DI_FTYPE_DI_DI },
33005 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
33006 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
33007 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
33008 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
33009 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
33010 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT },
33011 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
33012 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
33013 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
33014 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI },
33015 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_DI },
33016 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_DI },
33017 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_SI },
33018 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_SI },
33019 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
33020 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
33021 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33022 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33023 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33024 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33025 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33026 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33027 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33028 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33029 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33030 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33031 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33032 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33033 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33034 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33035 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33036 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33037 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33038 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33039 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33040 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33041 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33042 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33043 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33044 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33045 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33046 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33047 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
33048 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
33049 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
33050 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33051 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33052 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33053 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33054 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
33055 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
33056 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
33057 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
33058 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33059 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_SI },
33060 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_HI },
33061 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33062 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33063 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33064 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33065 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33066 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33067 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
33068 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
33069 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
33070 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
33071 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
33072 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
33073 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) DI_FTYPE_V64QI },
33074 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) SI_FTYPE_V32HI },
33075 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_DI },
33076 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_SI },
33077 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
33078 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
33079 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
33080 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
33081 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
33082 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
33083 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
33084 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
33085 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33086 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
33087 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
33088 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
33089 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
33090 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
33091 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
33092 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
33093 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
33095 /* AVX512IFMA */
33096 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33097 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33098 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33099 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33100 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33101 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33102 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33103 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33104 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33105 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33106 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33107 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33109 /* AVX512VBMI */
33110 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33111 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33112 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33113 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33114 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33115 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33116 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33117 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33118 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33119 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33120 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_maskz, "__builtin_ia32_vpermt2varqi256_maskz", IX86_BUILTIN_VPERMT2VARQI256_MASKZ, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33121 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_mask, "__builtin_ia32_vpermt2varqi128_mask", IX86_BUILTIN_VPERMT2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33122 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_maskz, "__builtin_ia32_vpermt2varqi128_maskz", IX86_BUILTIN_VPERMT2VARQI128_MASKZ, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33123 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv32qi3_mask, "__builtin_ia32_vpermi2varqi256_mask", IX86_BUILTIN_VPERMI2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33124 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33127 /* Builtins with rounding support. */
33128 static const struct builtin_description bdesc_round_args[] =
33130 /* AVX512F */
33131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
33136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
33137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
33138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
33139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
33140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
33141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
33142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
33144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
33146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
33148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
33150 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
33151 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
33152 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
33153 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
33154 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33155 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33156 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33157 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33158 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
33159 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
33160 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
33161 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
33162 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33163 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33164 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33165 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33166 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
33167 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
33168 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
33169 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
33170 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
33171 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
33172 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
33173 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
33174 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33175 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33176 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33177 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33178 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
33179 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
33180 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33181 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33182 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33183 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33184 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33185 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33186 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33187 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33188 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33189 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33190 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33191 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33192 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33193 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33194 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
33195 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
33196 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33197 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33198 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33199 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33200 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33201 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33202 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33203 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33204 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33205 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33206 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33207 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33208 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33209 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33210 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
33211 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
33212 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
33213 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
33214 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33215 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33216 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33217 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33218 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
33219 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
33220 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
33221 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
33222 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33223 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33224 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33225 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33226 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33227 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33228 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33229 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33230 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33231 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33232 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
33233 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
33234 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33235 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33236 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33237 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33238 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33239 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33240 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33241 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33242 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33243 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33244 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33245 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33246 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33247 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33248 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33249 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33251 /* AVX512ER */
33252 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33253 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33254 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33255 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33256 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33257 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33258 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33259 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33260 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33261 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33263 /* AVX512DQ. */
33264 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33265 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33266 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33267 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33268 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33269 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33270 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33271 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33272 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33273 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33274 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33275 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33276 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33277 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33278 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
33279 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
33282 /* Bultins for MPX. */
33283 static const struct builtin_description bdesc_mpx[] =
33285 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
33286 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33287 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33290 /* Const builtins for MPX. */
33291 static const struct builtin_description bdesc_mpx_const[] =
33293 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
33294 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
33295 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
33296 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
33297 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
33298 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
33299 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
33300 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
33303 /* FMA4 and XOP. */
33304 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
33305 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
33306 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
33307 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
33308 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
33309 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
33310 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
33311 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
33312 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
33313 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
33314 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
33315 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
33316 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
33317 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
33318 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
33319 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
33320 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
33321 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
33322 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
33323 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
33324 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
33325 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
33326 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
33327 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
33328 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
33329 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
33330 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
33331 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
33332 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
33333 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
33334 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
33335 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
33336 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
33337 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
33338 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
33339 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
33340 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
33341 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
33342 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
33343 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
33344 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
33345 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
33346 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
33347 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
33348 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
33349 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
33350 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
33351 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
33352 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
33353 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
33354 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
33355 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
33357 static const struct builtin_description bdesc_multi_arg[] =
33359 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
33360 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
33361 UNKNOWN, (int)MULTI_ARG_3_SF },
33362 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
33363 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
33364 UNKNOWN, (int)MULTI_ARG_3_DF },
33366 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
33367 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
33368 UNKNOWN, (int)MULTI_ARG_3_SF },
33369 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
33370 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
33371 UNKNOWN, (int)MULTI_ARG_3_DF },
33373 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
33374 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
33375 UNKNOWN, (int)MULTI_ARG_3_SF },
33376 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
33377 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
33378 UNKNOWN, (int)MULTI_ARG_3_DF },
33379 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
33380 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
33381 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33382 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
33383 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
33384 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33386 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
33387 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
33388 UNKNOWN, (int)MULTI_ARG_3_SF },
33389 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
33390 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
33391 UNKNOWN, (int)MULTI_ARG_3_DF },
33392 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
33393 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
33394 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33395 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
33396 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
33397 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33399 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
33400 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
33401 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
33402 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
33403 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
33404 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
33405 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
33407 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33408 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33409 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
33410 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
33411 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
33412 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
33413 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
33415 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
33417 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33418 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33419 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33420 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33421 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33422 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33423 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33424 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33425 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33426 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33427 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33428 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33430 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33431 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
33432 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
33433 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
33434 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
33435 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
33436 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
33437 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
33438 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33439 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
33440 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
33441 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
33442 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33443 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
33444 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
33445 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
33447 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
33448 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
33449 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
33450 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
33451 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
33452 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
33454 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33455 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33456 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33457 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33458 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33459 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33460 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33461 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33462 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33463 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33464 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33465 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33466 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33467 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33468 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33470 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
33471 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33472 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33473 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
33474 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
33475 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
33476 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
33478 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
33479 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33480 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33481 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
33482 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
33483 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
33484 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
33486 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
33487 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33488 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33489 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
33490 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
33491 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
33492 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
33494 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33495 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33496 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33497 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
33498 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
33499 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
33500 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
33502 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
33503 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33504 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33505 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
33506 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
33507 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
33508 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
33510 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
33511 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33512 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33513 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
33514 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
33515 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
33516 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
33518 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
33519 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33520 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33521 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
33522 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
33523 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
33524 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
33526 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33527 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33528 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33529 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
33530 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
33531 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
33532 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
33534 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33535 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33536 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33537 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33538 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33539 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33540 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33541 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33543 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33544 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33545 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33546 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33547 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33548 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33549 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33550 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33552 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
33553 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
33554 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
33555 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
33559 /* TM vector builtins. */
33561 /* Reuse the existing x86-specific `struct builtin_description' cause
33562 we're lazy. Add casts to make them fit. */
33563 static const struct builtin_description bdesc_tm[] =
33565 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33566 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33567 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33568 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33569 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33570 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33571 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33573 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33574 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33575 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33576 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33577 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33578 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33579 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33581 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33582 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33583 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33584 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33585 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33586 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33587 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33589 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
33590 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
33591 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
33594 /* TM callbacks. */
33596 /* Return the builtin decl needed to load a vector of TYPE. */
33598 static tree
33599 ix86_builtin_tm_load (tree type)
33601 if (TREE_CODE (type) == VECTOR_TYPE)
33603 switch (tree_to_uhwi (TYPE_SIZE (type)))
33605 case 64:
33606 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
33607 case 128:
33608 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
33609 case 256:
33610 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
33613 return NULL_TREE;
33616 /* Return the builtin decl needed to store a vector of TYPE. */
33618 static tree
33619 ix86_builtin_tm_store (tree type)
33621 if (TREE_CODE (type) == VECTOR_TYPE)
33623 switch (tree_to_uhwi (TYPE_SIZE (type)))
33625 case 64:
33626 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
33627 case 128:
33628 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
33629 case 256:
33630 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
33633 return NULL_TREE;
33636 /* Initialize the transactional memory vector load/store builtins. */
33638 static void
33639 ix86_init_tm_builtins (void)
33641 enum ix86_builtin_func_type ftype;
33642 const struct builtin_description *d;
33643 size_t i;
33644 tree decl;
33645 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
33646 tree attrs_log, attrs_type_log;
33648 if (!flag_tm)
33649 return;
33651 /* If there are no builtins defined, we must be compiling in a
33652 language without trans-mem support. */
33653 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
33654 return;
33656 /* Use whatever attributes a normal TM load has. */
33657 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
33658 attrs_load = DECL_ATTRIBUTES (decl);
33659 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33660 /* Use whatever attributes a normal TM store has. */
33661 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
33662 attrs_store = DECL_ATTRIBUTES (decl);
33663 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33664 /* Use whatever attributes a normal TM log has. */
33665 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
33666 attrs_log = DECL_ATTRIBUTES (decl);
33667 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33669 for (i = 0, d = bdesc_tm;
33670 i < ARRAY_SIZE (bdesc_tm);
33671 i++, d++)
33673 if ((d->mask & ix86_isa_flags) != 0
33674 || (lang_hooks.builtin_function
33675 == lang_hooks.builtin_function_ext_scope))
33677 tree type, attrs, attrs_type;
33678 enum built_in_function code = (enum built_in_function) d->code;
33680 ftype = (enum ix86_builtin_func_type) d->flag;
33681 type = ix86_get_builtin_func_type (ftype);
33683 if (BUILTIN_TM_LOAD_P (code))
33685 attrs = attrs_load;
33686 attrs_type = attrs_type_load;
33688 else if (BUILTIN_TM_STORE_P (code))
33690 attrs = attrs_store;
33691 attrs_type = attrs_type_store;
33693 else
33695 attrs = attrs_log;
33696 attrs_type = attrs_type_log;
33698 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
33699 /* The builtin without the prefix for
33700 calling it directly. */
33701 d->name + strlen ("__builtin_"),
33702 attrs);
33703 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
33704 set the TYPE_ATTRIBUTES. */
33705 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
33707 set_builtin_decl (code, decl, false);
33712 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
33713 in the current target ISA to allow the user to compile particular modules
33714 with different target specific options that differ from the command line
33715 options. */
33716 static void
33717 ix86_init_mmx_sse_builtins (void)
33719 const struct builtin_description * d;
33720 enum ix86_builtin_func_type ftype;
33721 size_t i;
33723 /* Add all special builtins with variable number of operands. */
33724 for (i = 0, d = bdesc_special_args;
33725 i < ARRAY_SIZE (bdesc_special_args);
33726 i++, d++)
33728 if (d->name == 0)
33729 continue;
33731 ftype = (enum ix86_builtin_func_type) d->flag;
33732 def_builtin (d->mask, d->name, ftype, d->code);
33735 /* Add all builtins with variable number of operands. */
33736 for (i = 0, d = bdesc_args;
33737 i < ARRAY_SIZE (bdesc_args);
33738 i++, d++)
33740 if (d->name == 0)
33741 continue;
33743 ftype = (enum ix86_builtin_func_type) d->flag;
33744 def_builtin_const (d->mask, d->name, ftype, d->code);
33747 /* Add all builtins with rounding. */
33748 for (i = 0, d = bdesc_round_args;
33749 i < ARRAY_SIZE (bdesc_round_args);
33750 i++, d++)
33752 if (d->name == 0)
33753 continue;
33755 ftype = (enum ix86_builtin_func_type) d->flag;
33756 def_builtin_const (d->mask, d->name, ftype, d->code);
33759 /* pcmpestr[im] insns. */
33760 for (i = 0, d = bdesc_pcmpestr;
33761 i < ARRAY_SIZE (bdesc_pcmpestr);
33762 i++, d++)
33764 if (d->code == IX86_BUILTIN_PCMPESTRM128)
33765 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
33766 else
33767 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
33768 def_builtin_const (d->mask, d->name, ftype, d->code);
33771 /* pcmpistr[im] insns. */
33772 for (i = 0, d = bdesc_pcmpistr;
33773 i < ARRAY_SIZE (bdesc_pcmpistr);
33774 i++, d++)
33776 if (d->code == IX86_BUILTIN_PCMPISTRM128)
33777 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
33778 else
33779 ftype = INT_FTYPE_V16QI_V16QI_INT;
33780 def_builtin_const (d->mask, d->name, ftype, d->code);
33783 /* comi/ucomi insns. */
33784 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
33786 if (d->mask == OPTION_MASK_ISA_SSE2)
33787 ftype = INT_FTYPE_V2DF_V2DF;
33788 else
33789 ftype = INT_FTYPE_V4SF_V4SF;
33790 def_builtin_const (d->mask, d->name, ftype, d->code);
33793 /* SSE */
33794 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
33795 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
33796 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
33797 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
33799 /* SSE or 3DNow!A */
33800 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33801 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
33802 IX86_BUILTIN_MASKMOVQ);
33804 /* SSE2 */
33805 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
33806 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
33808 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
33809 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
33810 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
33811 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
33813 /* SSE3. */
33814 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
33815 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
33816 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
33817 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
33819 /* AES */
33820 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
33821 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
33822 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
33823 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
33824 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
33825 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
33826 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
33827 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
33828 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
33829 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
33830 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
33831 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
33833 /* PCLMUL */
33834 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
33835 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
33837 /* RDRND */
33838 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
33839 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
33840 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
33841 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
33842 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
33843 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
33844 IX86_BUILTIN_RDRAND64_STEP);
33846 /* AVX2 */
33847 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
33848 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
33849 IX86_BUILTIN_GATHERSIV2DF);
33851 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
33852 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
33853 IX86_BUILTIN_GATHERSIV4DF);
33855 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
33856 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
33857 IX86_BUILTIN_GATHERDIV2DF);
33859 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
33860 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
33861 IX86_BUILTIN_GATHERDIV4DF);
33863 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
33864 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
33865 IX86_BUILTIN_GATHERSIV4SF);
33867 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
33868 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
33869 IX86_BUILTIN_GATHERSIV8SF);
33871 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
33872 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
33873 IX86_BUILTIN_GATHERDIV4SF);
33875 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
33876 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
33877 IX86_BUILTIN_GATHERDIV8SF);
33879 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
33880 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
33881 IX86_BUILTIN_GATHERSIV2DI);
33883 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
33884 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
33885 IX86_BUILTIN_GATHERSIV4DI);
33887 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
33888 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
33889 IX86_BUILTIN_GATHERDIV2DI);
33891 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
33892 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
33893 IX86_BUILTIN_GATHERDIV4DI);
33895 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
33896 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
33897 IX86_BUILTIN_GATHERSIV4SI);
33899 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
33900 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
33901 IX86_BUILTIN_GATHERSIV8SI);
33903 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
33904 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
33905 IX86_BUILTIN_GATHERDIV4SI);
33907 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
33908 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
33909 IX86_BUILTIN_GATHERDIV8SI);
33911 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
33912 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
33913 IX86_BUILTIN_GATHERALTSIV4DF);
33915 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
33916 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
33917 IX86_BUILTIN_GATHERALTDIV8SF);
33919 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
33920 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
33921 IX86_BUILTIN_GATHERALTSIV4DI);
33923 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
33924 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
33925 IX86_BUILTIN_GATHERALTDIV8SI);
33927 /* AVX512F */
33928 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
33929 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
33930 IX86_BUILTIN_GATHER3SIV16SF);
33932 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
33933 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
33934 IX86_BUILTIN_GATHER3SIV8DF);
33936 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
33937 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
33938 IX86_BUILTIN_GATHER3DIV16SF);
33940 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
33941 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
33942 IX86_BUILTIN_GATHER3DIV8DF);
33944 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
33945 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
33946 IX86_BUILTIN_GATHER3SIV16SI);
33948 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
33949 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
33950 IX86_BUILTIN_GATHER3SIV8DI);
33952 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
33953 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
33954 IX86_BUILTIN_GATHER3DIV16SI);
33956 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
33957 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
33958 IX86_BUILTIN_GATHER3DIV8DI);
33960 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
33961 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
33962 IX86_BUILTIN_GATHER3ALTSIV8DF);
33964 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
33965 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
33966 IX86_BUILTIN_GATHER3ALTDIV16SF);
33968 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
33969 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
33970 IX86_BUILTIN_GATHER3ALTSIV8DI);
33972 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
33973 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
33974 IX86_BUILTIN_GATHER3ALTDIV16SI);
33976 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
33977 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
33978 IX86_BUILTIN_SCATTERSIV16SF);
33980 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
33981 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
33982 IX86_BUILTIN_SCATTERSIV8DF);
33984 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
33985 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
33986 IX86_BUILTIN_SCATTERDIV16SF);
33988 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
33989 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
33990 IX86_BUILTIN_SCATTERDIV8DF);
33992 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
33993 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
33994 IX86_BUILTIN_SCATTERSIV16SI);
33996 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
33997 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
33998 IX86_BUILTIN_SCATTERSIV8DI);
34000 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
34001 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
34002 IX86_BUILTIN_SCATTERDIV16SI);
34004 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
34005 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
34006 IX86_BUILTIN_SCATTERDIV8DI);
34008 /* AVX512VL */
34009 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
34010 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
34011 IX86_BUILTIN_GATHER3SIV2DF);
34013 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
34014 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
34015 IX86_BUILTIN_GATHER3SIV4DF);
34017 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
34018 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
34019 IX86_BUILTIN_GATHER3DIV2DF);
34021 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
34022 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
34023 IX86_BUILTIN_GATHER3DIV4DF);
34025 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
34026 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
34027 IX86_BUILTIN_GATHER3SIV4SF);
34029 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
34030 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
34031 IX86_BUILTIN_GATHER3SIV8SF);
34033 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
34034 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
34035 IX86_BUILTIN_GATHER3DIV4SF);
34037 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
34038 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
34039 IX86_BUILTIN_GATHER3DIV8SF);
34041 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
34042 V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
34043 IX86_BUILTIN_GATHER3SIV2DI);
34045 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
34046 V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
34047 IX86_BUILTIN_GATHER3SIV4DI);
34049 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
34050 V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
34051 IX86_BUILTIN_GATHER3DIV2DI);
34053 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
34054 V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
34055 IX86_BUILTIN_GATHER3DIV4DI);
34057 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
34058 V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
34059 IX86_BUILTIN_GATHER3SIV4SI);
34061 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
34062 V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
34063 IX86_BUILTIN_GATHER3SIV8SI);
34065 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
34066 V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
34067 IX86_BUILTIN_GATHER3DIV4SI);
34069 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
34070 V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
34071 IX86_BUILTIN_GATHER3DIV8SI);
34073 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
34074 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
34075 IX86_BUILTIN_GATHER3ALTSIV4DF);
34077 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
34078 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
34079 IX86_BUILTIN_GATHER3ALTDIV8SF);
34081 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
34082 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
34083 IX86_BUILTIN_GATHER3ALTSIV4DI);
34085 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
34086 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
34087 IX86_BUILTIN_GATHER3ALTDIV8SI);
34089 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
34090 VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
34091 IX86_BUILTIN_SCATTERSIV8SF);
34093 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
34094 VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
34095 IX86_BUILTIN_SCATTERSIV4SF);
34097 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
34098 VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
34099 IX86_BUILTIN_SCATTERSIV4DF);
34101 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
34102 VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
34103 IX86_BUILTIN_SCATTERSIV2DF);
34105 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
34106 VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
34107 IX86_BUILTIN_SCATTERDIV8SF);
34109 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
34110 VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
34111 IX86_BUILTIN_SCATTERDIV4SF);
34113 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
34114 VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
34115 IX86_BUILTIN_SCATTERDIV4DF);
34117 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
34118 VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
34119 IX86_BUILTIN_SCATTERDIV2DF);
34121 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
34122 VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
34123 IX86_BUILTIN_SCATTERSIV8SI);
34125 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
34126 VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
34127 IX86_BUILTIN_SCATTERSIV4SI);
34129 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
34130 VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
34131 IX86_BUILTIN_SCATTERSIV4DI);
34133 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
34134 VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
34135 IX86_BUILTIN_SCATTERSIV2DI);
34137 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
34138 VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
34139 IX86_BUILTIN_SCATTERDIV8SI);
34141 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
34142 VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
34143 IX86_BUILTIN_SCATTERDIV4SI);
34145 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
34146 VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
34147 IX86_BUILTIN_SCATTERDIV4DI);
34149 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
34150 VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
34151 IX86_BUILTIN_SCATTERDIV2DI);
34153 /* AVX512PF */
34154 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
34155 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
34156 IX86_BUILTIN_GATHERPFDPD);
34157 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
34158 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
34159 IX86_BUILTIN_GATHERPFDPS);
34160 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
34161 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
34162 IX86_BUILTIN_GATHERPFQPD);
34163 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
34164 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
34165 IX86_BUILTIN_GATHERPFQPS);
34166 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
34167 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
34168 IX86_BUILTIN_SCATTERPFDPD);
34169 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
34170 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
34171 IX86_BUILTIN_SCATTERPFDPS);
34172 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
34173 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
34174 IX86_BUILTIN_SCATTERPFQPD);
34175 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
34176 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
34177 IX86_BUILTIN_SCATTERPFQPS);
34179 /* SHA */
34180 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
34181 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
34182 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
34183 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
34184 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
34185 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
34186 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
34187 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
34188 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
34189 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
34190 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
34191 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
34192 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
34193 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
34195 /* RTM. */
34196 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
34197 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
34199 /* MMX access to the vec_init patterns. */
34200 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
34201 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
34203 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
34204 V4HI_FTYPE_HI_HI_HI_HI,
34205 IX86_BUILTIN_VEC_INIT_V4HI);
34207 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
34208 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
34209 IX86_BUILTIN_VEC_INIT_V8QI);
34211 /* Access to the vec_extract patterns. */
34212 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
34213 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
34214 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
34215 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
34216 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
34217 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
34218 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
34219 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
34220 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
34221 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
34223 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34224 "__builtin_ia32_vec_ext_v4hi",
34225 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
34227 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
34228 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
34230 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
34231 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
34233 /* Access to the vec_set patterns. */
34234 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
34235 "__builtin_ia32_vec_set_v2di",
34236 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
34238 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
34239 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
34241 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
34242 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
34244 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
34245 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
34247 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34248 "__builtin_ia32_vec_set_v4hi",
34249 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
34251 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
34252 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
34254 /* RDSEED */
34255 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
34256 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
34257 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
34258 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
34259 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
34260 "__builtin_ia32_rdseed_di_step",
34261 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
34263 /* ADCX */
34264 def_builtin (0, "__builtin_ia32_addcarryx_u32",
34265 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
34266 def_builtin (OPTION_MASK_ISA_64BIT,
34267 "__builtin_ia32_addcarryx_u64",
34268 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34269 IX86_BUILTIN_ADDCARRYX64);
34271 /* SBB */
34272 def_builtin (0, "__builtin_ia32_sbb_u32",
34273 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
34274 def_builtin (OPTION_MASK_ISA_64BIT,
34275 "__builtin_ia32_sbb_u64",
34276 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34277 IX86_BUILTIN_SBB64);
34279 /* Read/write FLAGS. */
34280 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
34281 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34282 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
34283 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34284 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
34285 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
34286 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
34287 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
34289 /* CLFLUSHOPT. */
34290 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
34291 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
34293 /* CLWB. */
34294 def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
34295 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
34297 /* MONITORX and MWAITX. */
34298 def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_monitorx",
34299 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITORX);
34300 def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_mwaitx",
34301 VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAITX);
34303 /* Add FMA4 multi-arg argument instructions */
34304 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
34306 if (d->name == 0)
34307 continue;
34309 ftype = (enum ix86_builtin_func_type) d->flag;
34310 def_builtin_const (d->mask, d->name, ftype, d->code);
34314 static void
34315 ix86_init_mpx_builtins ()
34317 const struct builtin_description * d;
34318 enum ix86_builtin_func_type ftype;
34319 tree decl;
34320 size_t i;
34322 for (i = 0, d = bdesc_mpx;
34323 i < ARRAY_SIZE (bdesc_mpx);
34324 i++, d++)
34326 if (d->name == 0)
34327 continue;
34329 ftype = (enum ix86_builtin_func_type) d->flag;
34330 decl = def_builtin (d->mask, d->name, ftype, d->code);
34332 /* With no leaf and nothrow flags for MPX builtins
34333 abnormal edges may follow its call when setjmp
34334 presents in the function. Since we may have a lot
34335 of MPX builtins calls it causes lots of useless
34336 edges and enormous PHI nodes. To avoid this we mark
34337 MPX builtins as leaf and nothrow. */
34338 if (decl)
34340 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34341 NULL_TREE);
34342 TREE_NOTHROW (decl) = 1;
34344 else
34346 ix86_builtins_isa[(int)d->code].leaf_p = true;
34347 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34351 for (i = 0, d = bdesc_mpx_const;
34352 i < ARRAY_SIZE (bdesc_mpx_const);
34353 i++, d++)
34355 if (d->name == 0)
34356 continue;
34358 ftype = (enum ix86_builtin_func_type) d->flag;
34359 decl = def_builtin_const (d->mask, d->name, ftype, d->code);
34361 if (decl)
34363 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34364 NULL_TREE);
34365 TREE_NOTHROW (decl) = 1;
34367 else
34369 ix86_builtins_isa[(int)d->code].leaf_p = true;
34370 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34375 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
34376 to return a pointer to VERSION_DECL if the outcome of the expression
34377 formed by PREDICATE_CHAIN is true. This function will be called during
34378 version dispatch to decide which function version to execute. It returns
34379 the basic block at the end, to which more conditions can be added. */
34381 static basic_block
34382 add_condition_to_bb (tree function_decl, tree version_decl,
34383 tree predicate_chain, basic_block new_bb)
34385 gimple return_stmt;
34386 tree convert_expr, result_var;
34387 gimple convert_stmt;
34388 gimple call_cond_stmt;
34389 gimple if_else_stmt;
34391 basic_block bb1, bb2, bb3;
34392 edge e12, e23;
34394 tree cond_var, and_expr_var = NULL_TREE;
34395 gimple_seq gseq;
34397 tree predicate_decl, predicate_arg;
34399 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
34401 gcc_assert (new_bb != NULL);
34402 gseq = bb_seq (new_bb);
34405 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
34406 build_fold_addr_expr (version_decl));
34407 result_var = create_tmp_var (ptr_type_node);
34408 convert_stmt = gimple_build_assign (result_var, convert_expr);
34409 return_stmt = gimple_build_return (result_var);
34411 if (predicate_chain == NULL_TREE)
34413 gimple_seq_add_stmt (&gseq, convert_stmt);
34414 gimple_seq_add_stmt (&gseq, return_stmt);
34415 set_bb_seq (new_bb, gseq);
34416 gimple_set_bb (convert_stmt, new_bb);
34417 gimple_set_bb (return_stmt, new_bb);
34418 pop_cfun ();
34419 return new_bb;
34422 while (predicate_chain != NULL)
34424 cond_var = create_tmp_var (integer_type_node);
34425 predicate_decl = TREE_PURPOSE (predicate_chain);
34426 predicate_arg = TREE_VALUE (predicate_chain);
34427 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
34428 gimple_call_set_lhs (call_cond_stmt, cond_var);
34430 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
34431 gimple_set_bb (call_cond_stmt, new_bb);
34432 gimple_seq_add_stmt (&gseq, call_cond_stmt);
34434 predicate_chain = TREE_CHAIN (predicate_chain);
34436 if (and_expr_var == NULL)
34437 and_expr_var = cond_var;
34438 else
34440 gimple assign_stmt;
34441 /* Use MIN_EXPR to check if any integer is zero?.
34442 and_expr_var = min_expr <cond_var, and_expr_var> */
34443 assign_stmt = gimple_build_assign (and_expr_var,
34444 build2 (MIN_EXPR, integer_type_node,
34445 cond_var, and_expr_var));
34447 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
34448 gimple_set_bb (assign_stmt, new_bb);
34449 gimple_seq_add_stmt (&gseq, assign_stmt);
34453 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
34454 integer_zero_node,
34455 NULL_TREE, NULL_TREE);
34456 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
34457 gimple_set_bb (if_else_stmt, new_bb);
34458 gimple_seq_add_stmt (&gseq, if_else_stmt);
34460 gimple_seq_add_stmt (&gseq, convert_stmt);
34461 gimple_seq_add_stmt (&gseq, return_stmt);
34462 set_bb_seq (new_bb, gseq);
34464 bb1 = new_bb;
34465 e12 = split_block (bb1, if_else_stmt);
34466 bb2 = e12->dest;
34467 e12->flags &= ~EDGE_FALLTHRU;
34468 e12->flags |= EDGE_TRUE_VALUE;
34470 e23 = split_block (bb2, return_stmt);
34472 gimple_set_bb (convert_stmt, bb2);
34473 gimple_set_bb (return_stmt, bb2);
34475 bb3 = e23->dest;
34476 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
34478 remove_edge (e23);
34479 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
34481 pop_cfun ();
34483 return bb3;
34486 /* This parses the attribute arguments to target in DECL and determines
34487 the right builtin to use to match the platform specification.
34488 It returns the priority value for this version decl. If PREDICATE_LIST
34489 is not NULL, it stores the list of cpu features that need to be checked
34490 before dispatching this function. */
34492 static unsigned int
34493 get_builtin_code_for_version (tree decl, tree *predicate_list)
34495 tree attrs;
34496 struct cl_target_option cur_target;
34497 tree target_node;
34498 struct cl_target_option *new_target;
34499 const char *arg_str = NULL;
34500 const char *attrs_str = NULL;
34501 char *tok_str = NULL;
34502 char *token;
34504 /* Priority of i386 features, greater value is higher priority. This is
34505 used to decide the order in which function dispatch must happen. For
34506 instance, a version specialized for SSE4.2 should be checked for dispatch
34507 before a version for SSE3, as SSE4.2 implies SSE3. */
34508 enum feature_priority
34510 P_ZERO = 0,
34511 P_MMX,
34512 P_SSE,
34513 P_SSE2,
34514 P_SSE3,
34515 P_SSSE3,
34516 P_PROC_SSSE3,
34517 P_SSE4_A,
34518 P_PROC_SSE4_A,
34519 P_SSE4_1,
34520 P_SSE4_2,
34521 P_PROC_SSE4_2,
34522 P_POPCNT,
34523 P_AVX,
34524 P_PROC_AVX,
34525 P_BMI,
34526 P_PROC_BMI,
34527 P_FMA4,
34528 P_XOP,
34529 P_PROC_XOP,
34530 P_FMA,
34531 P_PROC_FMA,
34532 P_BMI2,
34533 P_AVX2,
34534 P_PROC_AVX2,
34535 P_AVX512F,
34536 P_PROC_AVX512F
34539 enum feature_priority priority = P_ZERO;
34541 /* These are the target attribute strings for which a dispatcher is
34542 available, from fold_builtin_cpu. */
34544 static struct _feature_list
34546 const char *const name;
34547 const enum feature_priority priority;
34549 const feature_list[] =
34551 {"mmx", P_MMX},
34552 {"sse", P_SSE},
34553 {"sse2", P_SSE2},
34554 {"sse3", P_SSE3},
34555 {"sse4a", P_SSE4_A},
34556 {"ssse3", P_SSSE3},
34557 {"sse4.1", P_SSE4_1},
34558 {"sse4.2", P_SSE4_2},
34559 {"popcnt", P_POPCNT},
34560 {"avx", P_AVX},
34561 {"bmi", P_BMI},
34562 {"fma4", P_FMA4},
34563 {"xop", P_XOP},
34564 {"fma", P_FMA},
34565 {"bmi2", P_BMI2},
34566 {"avx2", P_AVX2},
34567 {"avx512f", P_AVX512F}
34571 static unsigned int NUM_FEATURES
34572 = sizeof (feature_list) / sizeof (struct _feature_list);
34574 unsigned int i;
34576 tree predicate_chain = NULL_TREE;
34577 tree predicate_decl, predicate_arg;
34579 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34580 gcc_assert (attrs != NULL);
34582 attrs = TREE_VALUE (TREE_VALUE (attrs));
34584 gcc_assert (TREE_CODE (attrs) == STRING_CST);
34585 attrs_str = TREE_STRING_POINTER (attrs);
34587 /* Return priority zero for default function. */
34588 if (strcmp (attrs_str, "default") == 0)
34589 return 0;
34591 /* Handle arch= if specified. For priority, set it to be 1 more than
34592 the best instruction set the processor can handle. For instance, if
34593 there is a version for atom and a version for ssse3 (the highest ISA
34594 priority for atom), the atom version must be checked for dispatch
34595 before the ssse3 version. */
34596 if (strstr (attrs_str, "arch=") != NULL)
34598 cl_target_option_save (&cur_target, &global_options);
34599 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
34600 &global_options_set);
34602 gcc_assert (target_node);
34603 new_target = TREE_TARGET_OPTION (target_node);
34604 gcc_assert (new_target);
34606 if (new_target->arch_specified && new_target->arch > 0)
34608 switch (new_target->arch)
34610 case PROCESSOR_CORE2:
34611 arg_str = "core2";
34612 priority = P_PROC_SSSE3;
34613 break;
34614 case PROCESSOR_NEHALEM:
34615 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
34616 arg_str = "westmere";
34617 else
34618 /* We translate "arch=corei7" and "arch=nehalem" to
34619 "corei7" so that it will be mapped to M_INTEL_COREI7
34620 as cpu type to cover all M_INTEL_COREI7_XXXs. */
34621 arg_str = "corei7";
34622 priority = P_PROC_SSE4_2;
34623 break;
34624 case PROCESSOR_SANDYBRIDGE:
34625 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
34626 arg_str = "ivybridge";
34627 else
34628 arg_str = "sandybridge";
34629 priority = P_PROC_AVX;
34630 break;
34631 case PROCESSOR_HASWELL:
34632 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
34633 arg_str = "broadwell";
34634 else
34635 arg_str = "haswell";
34636 priority = P_PROC_AVX2;
34637 break;
34638 case PROCESSOR_BONNELL:
34639 arg_str = "bonnell";
34640 priority = P_PROC_SSSE3;
34641 break;
34642 case PROCESSOR_KNL:
34643 arg_str = "knl";
34644 priority = P_PROC_AVX512F;
34645 break;
34646 case PROCESSOR_SILVERMONT:
34647 arg_str = "silvermont";
34648 priority = P_PROC_SSE4_2;
34649 break;
34650 case PROCESSOR_AMDFAM10:
34651 arg_str = "amdfam10h";
34652 priority = P_PROC_SSE4_A;
34653 break;
34654 case PROCESSOR_BTVER1:
34655 arg_str = "btver1";
34656 priority = P_PROC_SSE4_A;
34657 break;
34658 case PROCESSOR_BTVER2:
34659 arg_str = "btver2";
34660 priority = P_PROC_BMI;
34661 break;
34662 case PROCESSOR_BDVER1:
34663 arg_str = "bdver1";
34664 priority = P_PROC_XOP;
34665 break;
34666 case PROCESSOR_BDVER2:
34667 arg_str = "bdver2";
34668 priority = P_PROC_FMA;
34669 break;
34670 case PROCESSOR_BDVER3:
34671 arg_str = "bdver3";
34672 priority = P_PROC_FMA;
34673 break;
34674 case PROCESSOR_BDVER4:
34675 arg_str = "bdver4";
34676 priority = P_PROC_AVX2;
34677 break;
34681 cl_target_option_restore (&global_options, &cur_target);
34683 if (predicate_list && arg_str == NULL)
34685 error_at (DECL_SOURCE_LOCATION (decl),
34686 "No dispatcher found for the versioning attributes");
34687 return 0;
34690 if (predicate_list)
34692 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
34693 /* For a C string literal the length includes the trailing NULL. */
34694 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
34695 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34696 predicate_chain);
34700 /* Process feature name. */
34701 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
34702 strcpy (tok_str, attrs_str);
34703 token = strtok (tok_str, ",");
34704 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
34706 while (token != NULL)
34708 /* Do not process "arch=" */
34709 if (strncmp (token, "arch=", 5) == 0)
34711 token = strtok (NULL, ",");
34712 continue;
34714 for (i = 0; i < NUM_FEATURES; ++i)
34716 if (strcmp (token, feature_list[i].name) == 0)
34718 if (predicate_list)
34720 predicate_arg = build_string_literal (
34721 strlen (feature_list[i].name) + 1,
34722 feature_list[i].name);
34723 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34724 predicate_chain);
34726 /* Find the maximum priority feature. */
34727 if (feature_list[i].priority > priority)
34728 priority = feature_list[i].priority;
34730 break;
34733 if (predicate_list && i == NUM_FEATURES)
34735 error_at (DECL_SOURCE_LOCATION (decl),
34736 "No dispatcher found for %s", token);
34737 return 0;
34739 token = strtok (NULL, ",");
34741 free (tok_str);
34743 if (predicate_list && predicate_chain == NULL_TREE)
34745 error_at (DECL_SOURCE_LOCATION (decl),
34746 "No dispatcher found for the versioning attributes : %s",
34747 attrs_str);
34748 return 0;
34750 else if (predicate_list)
34752 predicate_chain = nreverse (predicate_chain);
34753 *predicate_list = predicate_chain;
34756 return priority;
34759 /* This compares the priority of target features in function DECL1
34760 and DECL2. It returns positive value if DECL1 is higher priority,
34761 negative value if DECL2 is higher priority and 0 if they are the
34762 same. */
34764 static int
34765 ix86_compare_version_priority (tree decl1, tree decl2)
34767 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
34768 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
34770 return (int)priority1 - (int)priority2;
34773 /* V1 and V2 point to function versions with different priorities
34774 based on the target ISA. This function compares their priorities. */
34776 static int
34777 feature_compare (const void *v1, const void *v2)
34779 typedef struct _function_version_info
34781 tree version_decl;
34782 tree predicate_chain;
34783 unsigned int dispatch_priority;
34784 } function_version_info;
34786 const function_version_info c1 = *(const function_version_info *)v1;
34787 const function_version_info c2 = *(const function_version_info *)v2;
34788 return (c2.dispatch_priority - c1.dispatch_priority);
34791 /* This function generates the dispatch function for
34792 multi-versioned functions. DISPATCH_DECL is the function which will
34793 contain the dispatch logic. FNDECLS are the function choices for
34794 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
34795 in DISPATCH_DECL in which the dispatch code is generated. */
34797 static int
34798 dispatch_function_versions (tree dispatch_decl,
34799 void *fndecls_p,
34800 basic_block *empty_bb)
34802 tree default_decl;
34803 gimple ifunc_cpu_init_stmt;
34804 gimple_seq gseq;
34805 int ix;
34806 tree ele;
34807 vec<tree> *fndecls;
34808 unsigned int num_versions = 0;
34809 unsigned int actual_versions = 0;
34810 unsigned int i;
34812 struct _function_version_info
34814 tree version_decl;
34815 tree predicate_chain;
34816 unsigned int dispatch_priority;
34817 }*function_version_info;
34819 gcc_assert (dispatch_decl != NULL
34820 && fndecls_p != NULL
34821 && empty_bb != NULL);
34823 /*fndecls_p is actually a vector. */
34824 fndecls = static_cast<vec<tree> *> (fndecls_p);
34826 /* At least one more version other than the default. */
34827 num_versions = fndecls->length ();
34828 gcc_assert (num_versions >= 2);
34830 function_version_info = (struct _function_version_info *)
34831 XNEWVEC (struct _function_version_info, (num_versions - 1));
34833 /* The first version in the vector is the default decl. */
34834 default_decl = (*fndecls)[0];
34836 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
34838 gseq = bb_seq (*empty_bb);
34839 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
34840 constructors, so explicity call __builtin_cpu_init here. */
34841 ifunc_cpu_init_stmt = gimple_build_call_vec (
34842 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
34843 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
34844 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
34845 set_bb_seq (*empty_bb, gseq);
34847 pop_cfun ();
34850 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
34852 tree version_decl = ele;
34853 tree predicate_chain = NULL_TREE;
34854 unsigned int priority;
34855 /* Get attribute string, parse it and find the right predicate decl.
34856 The predicate function could be a lengthy combination of many
34857 features, like arch-type and various isa-variants. */
34858 priority = get_builtin_code_for_version (version_decl,
34859 &predicate_chain);
34861 if (predicate_chain == NULL_TREE)
34862 continue;
34864 function_version_info [actual_versions].version_decl = version_decl;
34865 function_version_info [actual_versions].predicate_chain
34866 = predicate_chain;
34867 function_version_info [actual_versions].dispatch_priority = priority;
34868 actual_versions++;
34871 /* Sort the versions according to descending order of dispatch priority. The
34872 priority is based on the ISA. This is not a perfect solution. There
34873 could still be ambiguity. If more than one function version is suitable
34874 to execute, which one should be dispatched? In future, allow the user
34875 to specify a dispatch priority next to the version. */
34876 qsort (function_version_info, actual_versions,
34877 sizeof (struct _function_version_info), feature_compare);
34879 for (i = 0; i < actual_versions; ++i)
34880 *empty_bb = add_condition_to_bb (dispatch_decl,
34881 function_version_info[i].version_decl,
34882 function_version_info[i].predicate_chain,
34883 *empty_bb);
34885 /* dispatch default version at the end. */
34886 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
34887 NULL, *empty_bb);
34889 free (function_version_info);
34890 return 0;
34893 /* Comparator function to be used in qsort routine to sort attribute
34894 specification strings to "target". */
34896 static int
34897 attr_strcmp (const void *v1, const void *v2)
34899 const char *c1 = *(char *const*)v1;
34900 const char *c2 = *(char *const*)v2;
34901 return strcmp (c1, c2);
34904 /* ARGLIST is the argument to target attribute. This function tokenizes
34905 the comma separated arguments, sorts them and returns a string which
34906 is a unique identifier for the comma separated arguments. It also
34907 replaces non-identifier characters "=,-" with "_". */
34909 static char *
34910 sorted_attr_string (tree arglist)
34912 tree arg;
34913 size_t str_len_sum = 0;
34914 char **args = NULL;
34915 char *attr_str, *ret_str;
34916 char *attr = NULL;
34917 unsigned int argnum = 1;
34918 unsigned int i;
34920 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34922 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34923 size_t len = strlen (str);
34924 str_len_sum += len + 1;
34925 if (arg != arglist)
34926 argnum++;
34927 for (i = 0; i < strlen (str); i++)
34928 if (str[i] == ',')
34929 argnum++;
34932 attr_str = XNEWVEC (char, str_len_sum);
34933 str_len_sum = 0;
34934 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34936 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34937 size_t len = strlen (str);
34938 memcpy (attr_str + str_len_sum, str, len);
34939 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
34940 str_len_sum += len + 1;
34943 /* Replace "=,-" with "_". */
34944 for (i = 0; i < strlen (attr_str); i++)
34945 if (attr_str[i] == '=' || attr_str[i]== '-')
34946 attr_str[i] = '_';
34948 if (argnum == 1)
34949 return attr_str;
34951 args = XNEWVEC (char *, argnum);
34953 i = 0;
34954 attr = strtok (attr_str, ",");
34955 while (attr != NULL)
34957 args[i] = attr;
34958 i++;
34959 attr = strtok (NULL, ",");
34962 qsort (args, argnum, sizeof (char *), attr_strcmp);
34964 ret_str = XNEWVEC (char, str_len_sum);
34965 str_len_sum = 0;
34966 for (i = 0; i < argnum; i++)
34968 size_t len = strlen (args[i]);
34969 memcpy (ret_str + str_len_sum, args[i], len);
34970 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
34971 str_len_sum += len + 1;
34974 XDELETEVEC (args);
34975 XDELETEVEC (attr_str);
34976 return ret_str;
34979 /* This function changes the assembler name for functions that are
34980 versions. If DECL is a function version and has a "target"
34981 attribute, it appends the attribute string to its assembler name. */
34983 static tree
34984 ix86_mangle_function_version_assembler_name (tree decl, tree id)
34986 tree version_attr;
34987 const char *orig_name, *version_string;
34988 char *attr_str, *assembler_name;
34990 if (DECL_DECLARED_INLINE_P (decl)
34991 && lookup_attribute ("gnu_inline",
34992 DECL_ATTRIBUTES (decl)))
34993 error_at (DECL_SOURCE_LOCATION (decl),
34994 "Function versions cannot be marked as gnu_inline,"
34995 " bodies have to be generated");
34997 if (DECL_VIRTUAL_P (decl)
34998 || DECL_VINDEX (decl))
34999 sorry ("Virtual function multiversioning not supported");
35001 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
35003 /* target attribute string cannot be NULL. */
35004 gcc_assert (version_attr != NULL_TREE);
35006 orig_name = IDENTIFIER_POINTER (id);
35007 version_string
35008 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
35010 if (strcmp (version_string, "default") == 0)
35011 return id;
35013 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
35014 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
35016 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
35018 /* Allow assembler name to be modified if already set. */
35019 if (DECL_ASSEMBLER_NAME_SET_P (decl))
35020 SET_DECL_RTL (decl, NULL);
35022 tree ret = get_identifier (assembler_name);
35023 XDELETEVEC (attr_str);
35024 XDELETEVEC (assembler_name);
35025 return ret;
35028 /* This function returns true if FN1 and FN2 are versions of the same function,
35029 that is, the target strings of the function decls are different. This assumes
35030 that FN1 and FN2 have the same signature. */
35032 static bool
35033 ix86_function_versions (tree fn1, tree fn2)
35035 tree attr1, attr2;
35036 char *target1, *target2;
35037 bool result;
35039 if (TREE_CODE (fn1) != FUNCTION_DECL
35040 || TREE_CODE (fn2) != FUNCTION_DECL)
35041 return false;
35043 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
35044 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
35046 /* At least one function decl should have the target attribute specified. */
35047 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
35048 return false;
35050 /* Diagnose missing target attribute if one of the decls is already
35051 multi-versioned. */
35052 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
35054 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
35056 if (attr2 != NULL_TREE)
35058 std::swap (fn1, fn2);
35059 attr1 = attr2;
35061 error_at (DECL_SOURCE_LOCATION (fn2),
35062 "missing %<target%> attribute for multi-versioned %D",
35063 fn2);
35064 inform (DECL_SOURCE_LOCATION (fn1),
35065 "previous declaration of %D", fn1);
35066 /* Prevent diagnosing of the same error multiple times. */
35067 DECL_ATTRIBUTES (fn2)
35068 = tree_cons (get_identifier ("target"),
35069 copy_node (TREE_VALUE (attr1)),
35070 DECL_ATTRIBUTES (fn2));
35072 return false;
35075 target1 = sorted_attr_string (TREE_VALUE (attr1));
35076 target2 = sorted_attr_string (TREE_VALUE (attr2));
35078 /* The sorted target strings must be different for fn1 and fn2
35079 to be versions. */
35080 if (strcmp (target1, target2) == 0)
35081 result = false;
35082 else
35083 result = true;
35085 XDELETEVEC (target1);
35086 XDELETEVEC (target2);
35088 return result;
35091 static tree
35092 ix86_mangle_decl_assembler_name (tree decl, tree id)
35094 /* For function version, add the target suffix to the assembler name. */
35095 if (TREE_CODE (decl) == FUNCTION_DECL
35096 && DECL_FUNCTION_VERSIONED (decl))
35097 id = ix86_mangle_function_version_assembler_name (decl, id);
35098 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
35099 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
35100 #endif
35102 return id;
35105 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
35106 is true, append the full path name of the source file. */
35108 static char *
35109 make_name (tree decl, const char *suffix, bool make_unique)
35111 char *global_var_name;
35112 int name_len;
35113 const char *name;
35114 const char *unique_name = NULL;
35116 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
35118 /* Get a unique name that can be used globally without any chances
35119 of collision at link time. */
35120 if (make_unique)
35121 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
35123 name_len = strlen (name) + strlen (suffix) + 2;
35125 if (make_unique)
35126 name_len += strlen (unique_name) + 1;
35127 global_var_name = XNEWVEC (char, name_len);
35129 /* Use '.' to concatenate names as it is demangler friendly. */
35130 if (make_unique)
35131 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
35132 suffix);
35133 else
35134 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
35136 return global_var_name;
35139 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
35141 /* Make a dispatcher declaration for the multi-versioned function DECL.
35142 Calls to DECL function will be replaced with calls to the dispatcher
35143 by the front-end. Return the decl created. */
35145 static tree
35146 make_dispatcher_decl (const tree decl)
35148 tree func_decl;
35149 char *func_name;
35150 tree fn_type, func_type;
35151 bool is_uniq = false;
35153 if (TREE_PUBLIC (decl) == 0)
35154 is_uniq = true;
35156 func_name = make_name (decl, "ifunc", is_uniq);
35158 fn_type = TREE_TYPE (decl);
35159 func_type = build_function_type (TREE_TYPE (fn_type),
35160 TYPE_ARG_TYPES (fn_type));
35162 func_decl = build_fn_decl (func_name, func_type);
35163 XDELETEVEC (func_name);
35164 TREE_USED (func_decl) = 1;
35165 DECL_CONTEXT (func_decl) = NULL_TREE;
35166 DECL_INITIAL (func_decl) = error_mark_node;
35167 DECL_ARTIFICIAL (func_decl) = 1;
35168 /* Mark this func as external, the resolver will flip it again if
35169 it gets generated. */
35170 DECL_EXTERNAL (func_decl) = 1;
35171 /* This will be of type IFUNCs have to be externally visible. */
35172 TREE_PUBLIC (func_decl) = 1;
35174 return func_decl;
35177 #endif
35179 /* Returns true if decl is multi-versioned and DECL is the default function,
35180 that is it is not tagged with target specific optimization. */
35182 static bool
35183 is_function_default_version (const tree decl)
35185 if (TREE_CODE (decl) != FUNCTION_DECL
35186 || !DECL_FUNCTION_VERSIONED (decl))
35187 return false;
35188 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
35189 gcc_assert (attr);
35190 attr = TREE_VALUE (TREE_VALUE (attr));
35191 return (TREE_CODE (attr) == STRING_CST
35192 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
35195 /* Make a dispatcher declaration for the multi-versioned function DECL.
35196 Calls to DECL function will be replaced with calls to the dispatcher
35197 by the front-end. Returns the decl of the dispatcher function. */
35199 static tree
35200 ix86_get_function_versions_dispatcher (void *decl)
35202 tree fn = (tree) decl;
35203 struct cgraph_node *node = NULL;
35204 struct cgraph_node *default_node = NULL;
35205 struct cgraph_function_version_info *node_v = NULL;
35206 struct cgraph_function_version_info *first_v = NULL;
35208 tree dispatch_decl = NULL;
35210 struct cgraph_function_version_info *default_version_info = NULL;
35212 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
35214 node = cgraph_node::get (fn);
35215 gcc_assert (node != NULL);
35217 node_v = node->function_version ();
35218 gcc_assert (node_v != NULL);
35220 if (node_v->dispatcher_resolver != NULL)
35221 return node_v->dispatcher_resolver;
35223 /* Find the default version and make it the first node. */
35224 first_v = node_v;
35225 /* Go to the beginning of the chain. */
35226 while (first_v->prev != NULL)
35227 first_v = first_v->prev;
35228 default_version_info = first_v;
35229 while (default_version_info != NULL)
35231 if (is_function_default_version
35232 (default_version_info->this_node->decl))
35233 break;
35234 default_version_info = default_version_info->next;
35237 /* If there is no default node, just return NULL. */
35238 if (default_version_info == NULL)
35239 return NULL;
35241 /* Make default info the first node. */
35242 if (first_v != default_version_info)
35244 default_version_info->prev->next = default_version_info->next;
35245 if (default_version_info->next)
35246 default_version_info->next->prev = default_version_info->prev;
35247 first_v->prev = default_version_info;
35248 default_version_info->next = first_v;
35249 default_version_info->prev = NULL;
35252 default_node = default_version_info->this_node;
35254 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
35255 if (targetm.has_ifunc_p ())
35257 struct cgraph_function_version_info *it_v = NULL;
35258 struct cgraph_node *dispatcher_node = NULL;
35259 struct cgraph_function_version_info *dispatcher_version_info = NULL;
35261 /* Right now, the dispatching is done via ifunc. */
35262 dispatch_decl = make_dispatcher_decl (default_node->decl);
35264 dispatcher_node = cgraph_node::get_create (dispatch_decl);
35265 gcc_assert (dispatcher_node != NULL);
35266 dispatcher_node->dispatcher_function = 1;
35267 dispatcher_version_info
35268 = dispatcher_node->insert_new_function_version ();
35269 dispatcher_version_info->next = default_version_info;
35270 dispatcher_node->definition = 1;
35272 /* Set the dispatcher for all the versions. */
35273 it_v = default_version_info;
35274 while (it_v != NULL)
35276 it_v->dispatcher_resolver = dispatch_decl;
35277 it_v = it_v->next;
35280 else
35281 #endif
35283 error_at (DECL_SOURCE_LOCATION (default_node->decl),
35284 "multiversioning needs ifunc which is not supported "
35285 "on this target");
35288 return dispatch_decl;
35291 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
35292 it to CHAIN. */
35294 static tree
35295 make_attribute (const char *name, const char *arg_name, tree chain)
35297 tree attr_name;
35298 tree attr_arg_name;
35299 tree attr_args;
35300 tree attr;
35302 attr_name = get_identifier (name);
35303 attr_arg_name = build_string (strlen (arg_name), arg_name);
35304 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
35305 attr = tree_cons (attr_name, attr_args, chain);
35306 return attr;
35309 /* Make the resolver function decl to dispatch the versions of
35310 a multi-versioned function, DEFAULT_DECL. Create an
35311 empty basic block in the resolver and store the pointer in
35312 EMPTY_BB. Return the decl of the resolver function. */
35314 static tree
35315 make_resolver_func (const tree default_decl,
35316 const tree dispatch_decl,
35317 basic_block *empty_bb)
35319 char *resolver_name;
35320 tree decl, type, decl_name, t;
35321 bool is_uniq = false;
35323 /* IFUNC's have to be globally visible. So, if the default_decl is
35324 not, then the name of the IFUNC should be made unique. */
35325 if (TREE_PUBLIC (default_decl) == 0)
35326 is_uniq = true;
35328 /* Append the filename to the resolver function if the versions are
35329 not externally visible. This is because the resolver function has
35330 to be externally visible for the loader to find it. So, appending
35331 the filename will prevent conflicts with a resolver function from
35332 another module which is based on the same version name. */
35333 resolver_name = make_name (default_decl, "resolver", is_uniq);
35335 /* The resolver function should return a (void *). */
35336 type = build_function_type_list (ptr_type_node, NULL_TREE);
35338 decl = build_fn_decl (resolver_name, type);
35339 decl_name = get_identifier (resolver_name);
35340 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
35342 DECL_NAME (decl) = decl_name;
35343 TREE_USED (decl) = 1;
35344 DECL_ARTIFICIAL (decl) = 1;
35345 DECL_IGNORED_P (decl) = 0;
35346 /* IFUNC resolvers have to be externally visible. */
35347 TREE_PUBLIC (decl) = 1;
35348 DECL_UNINLINABLE (decl) = 1;
35350 /* Resolver is not external, body is generated. */
35351 DECL_EXTERNAL (decl) = 0;
35352 DECL_EXTERNAL (dispatch_decl) = 0;
35354 DECL_CONTEXT (decl) = NULL_TREE;
35355 DECL_INITIAL (decl) = make_node (BLOCK);
35356 DECL_STATIC_CONSTRUCTOR (decl) = 0;
35358 if (DECL_COMDAT_GROUP (default_decl)
35359 || TREE_PUBLIC (default_decl))
35361 /* In this case, each translation unit with a call to this
35362 versioned function will put out a resolver. Ensure it
35363 is comdat to keep just one copy. */
35364 DECL_COMDAT (decl) = 1;
35365 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
35367 /* Build result decl and add to function_decl. */
35368 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
35369 DECL_ARTIFICIAL (t) = 1;
35370 DECL_IGNORED_P (t) = 1;
35371 DECL_RESULT (decl) = t;
35373 gimplify_function_tree (decl);
35374 push_cfun (DECL_STRUCT_FUNCTION (decl));
35375 *empty_bb = init_lowered_empty_function (decl, false, 0);
35377 cgraph_node::add_new_function (decl, true);
35378 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
35380 pop_cfun ();
35382 gcc_assert (dispatch_decl != NULL);
35383 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
35384 DECL_ATTRIBUTES (dispatch_decl)
35385 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
35387 /* Create the alias for dispatch to resolver here. */
35388 /*cgraph_create_function_alias (dispatch_decl, decl);*/
35389 cgraph_node::create_same_body_alias (dispatch_decl, decl);
35390 XDELETEVEC (resolver_name);
35391 return decl;
35394 /* Generate the dispatching code body to dispatch multi-versioned function
35395 DECL. The target hook is called to process the "target" attributes and
35396 provide the code to dispatch the right function at run-time. NODE points
35397 to the dispatcher decl whose body will be created. */
35399 static tree
35400 ix86_generate_version_dispatcher_body (void *node_p)
35402 tree resolver_decl;
35403 basic_block empty_bb;
35404 tree default_ver_decl;
35405 struct cgraph_node *versn;
35406 struct cgraph_node *node;
35408 struct cgraph_function_version_info *node_version_info = NULL;
35409 struct cgraph_function_version_info *versn_info = NULL;
35411 node = (cgraph_node *)node_p;
35413 node_version_info = node->function_version ();
35414 gcc_assert (node->dispatcher_function
35415 && node_version_info != NULL);
35417 if (node_version_info->dispatcher_resolver)
35418 return node_version_info->dispatcher_resolver;
35420 /* The first version in the chain corresponds to the default version. */
35421 default_ver_decl = node_version_info->next->this_node->decl;
35423 /* node is going to be an alias, so remove the finalized bit. */
35424 node->definition = false;
35426 resolver_decl = make_resolver_func (default_ver_decl,
35427 node->decl, &empty_bb);
35429 node_version_info->dispatcher_resolver = resolver_decl;
35431 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
35433 auto_vec<tree, 2> fn_ver_vec;
35435 for (versn_info = node_version_info->next; versn_info;
35436 versn_info = versn_info->next)
35438 versn = versn_info->this_node;
35439 /* Check for virtual functions here again, as by this time it should
35440 have been determined if this function needs a vtable index or
35441 not. This happens for methods in derived classes that override
35442 virtual methods in base classes but are not explicitly marked as
35443 virtual. */
35444 if (DECL_VINDEX (versn->decl))
35445 sorry ("Virtual function multiversioning not supported");
35447 fn_ver_vec.safe_push (versn->decl);
35450 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
35451 cgraph_edge::rebuild_edges ();
35452 pop_cfun ();
35453 return resolver_decl;
35455 /* This builds the processor_model struct type defined in
35456 libgcc/config/i386/cpuinfo.c */
35458 static tree
35459 build_processor_model_struct (void)
35461 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
35462 "__cpu_features"};
35463 tree field = NULL_TREE, field_chain = NULL_TREE;
35464 int i;
35465 tree type = make_node (RECORD_TYPE);
35467 /* The first 3 fields are unsigned int. */
35468 for (i = 0; i < 3; ++i)
35470 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35471 get_identifier (field_name[i]), unsigned_type_node);
35472 if (field_chain != NULL_TREE)
35473 DECL_CHAIN (field) = field_chain;
35474 field_chain = field;
35477 /* The last field is an array of unsigned integers of size one. */
35478 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35479 get_identifier (field_name[3]),
35480 build_array_type (unsigned_type_node,
35481 build_index_type (size_one_node)));
35482 if (field_chain != NULL_TREE)
35483 DECL_CHAIN (field) = field_chain;
35484 field_chain = field;
35486 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
35487 return type;
35490 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
35492 static tree
35493 make_var_decl (tree type, const char *name)
35495 tree new_decl;
35497 new_decl = build_decl (UNKNOWN_LOCATION,
35498 VAR_DECL,
35499 get_identifier(name),
35500 type);
35502 DECL_EXTERNAL (new_decl) = 1;
35503 TREE_STATIC (new_decl) = 1;
35504 TREE_PUBLIC (new_decl) = 1;
35505 DECL_INITIAL (new_decl) = 0;
35506 DECL_ARTIFICIAL (new_decl) = 0;
35507 DECL_PRESERVE_P (new_decl) = 1;
35509 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
35510 assemble_variable (new_decl, 0, 0, 0);
35512 return new_decl;
35515 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
35516 into an integer defined in libgcc/config/i386/cpuinfo.c */
35518 static tree
35519 fold_builtin_cpu (tree fndecl, tree *args)
35521 unsigned int i;
35522 enum ix86_builtins fn_code = (enum ix86_builtins)
35523 DECL_FUNCTION_CODE (fndecl);
35524 tree param_string_cst = NULL;
35526 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
35527 enum processor_features
35529 F_CMOV = 0,
35530 F_MMX,
35531 F_POPCNT,
35532 F_SSE,
35533 F_SSE2,
35534 F_SSE3,
35535 F_SSSE3,
35536 F_SSE4_1,
35537 F_SSE4_2,
35538 F_AVX,
35539 F_AVX2,
35540 F_SSE4_A,
35541 F_FMA4,
35542 F_XOP,
35543 F_FMA,
35544 F_AVX512F,
35545 F_BMI,
35546 F_BMI2,
35547 F_MAX
35550 /* These are the values for vendor types and cpu types and subtypes
35551 in cpuinfo.c. Cpu types and subtypes should be subtracted by
35552 the corresponding start value. */
35553 enum processor_model
35555 M_INTEL = 1,
35556 M_AMD,
35557 M_CPU_TYPE_START,
35558 M_INTEL_BONNELL,
35559 M_INTEL_CORE2,
35560 M_INTEL_COREI7,
35561 M_AMDFAM10H,
35562 M_AMDFAM15H,
35563 M_INTEL_SILVERMONT,
35564 M_INTEL_KNL,
35565 M_AMD_BTVER1,
35566 M_AMD_BTVER2,
35567 M_CPU_SUBTYPE_START,
35568 M_INTEL_COREI7_NEHALEM,
35569 M_INTEL_COREI7_WESTMERE,
35570 M_INTEL_COREI7_SANDYBRIDGE,
35571 M_AMDFAM10H_BARCELONA,
35572 M_AMDFAM10H_SHANGHAI,
35573 M_AMDFAM10H_ISTANBUL,
35574 M_AMDFAM15H_BDVER1,
35575 M_AMDFAM15H_BDVER2,
35576 M_AMDFAM15H_BDVER3,
35577 M_AMDFAM15H_BDVER4,
35578 M_INTEL_COREI7_IVYBRIDGE,
35579 M_INTEL_COREI7_HASWELL,
35580 M_INTEL_COREI7_BROADWELL
35583 static struct _arch_names_table
35585 const char *const name;
35586 const enum processor_model model;
35588 const arch_names_table[] =
35590 {"amd", M_AMD},
35591 {"intel", M_INTEL},
35592 {"atom", M_INTEL_BONNELL},
35593 {"slm", M_INTEL_SILVERMONT},
35594 {"core2", M_INTEL_CORE2},
35595 {"corei7", M_INTEL_COREI7},
35596 {"nehalem", M_INTEL_COREI7_NEHALEM},
35597 {"westmere", M_INTEL_COREI7_WESTMERE},
35598 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
35599 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
35600 {"haswell", M_INTEL_COREI7_HASWELL},
35601 {"broadwell", M_INTEL_COREI7_BROADWELL},
35602 {"bonnell", M_INTEL_BONNELL},
35603 {"silvermont", M_INTEL_SILVERMONT},
35604 {"knl", M_INTEL_KNL},
35605 {"amdfam10h", M_AMDFAM10H},
35606 {"barcelona", M_AMDFAM10H_BARCELONA},
35607 {"shanghai", M_AMDFAM10H_SHANGHAI},
35608 {"istanbul", M_AMDFAM10H_ISTANBUL},
35609 {"btver1", M_AMD_BTVER1},
35610 {"amdfam15h", M_AMDFAM15H},
35611 {"bdver1", M_AMDFAM15H_BDVER1},
35612 {"bdver2", M_AMDFAM15H_BDVER2},
35613 {"bdver3", M_AMDFAM15H_BDVER3},
35614 {"bdver4", M_AMDFAM15H_BDVER4},
35615 {"btver2", M_AMD_BTVER2},
35618 static struct _isa_names_table
35620 const char *const name;
35621 const enum processor_features feature;
35623 const isa_names_table[] =
35625 {"cmov", F_CMOV},
35626 {"mmx", F_MMX},
35627 {"popcnt", F_POPCNT},
35628 {"sse", F_SSE},
35629 {"sse2", F_SSE2},
35630 {"sse3", F_SSE3},
35631 {"ssse3", F_SSSE3},
35632 {"sse4a", F_SSE4_A},
35633 {"sse4.1", F_SSE4_1},
35634 {"sse4.2", F_SSE4_2},
35635 {"avx", F_AVX},
35636 {"fma4", F_FMA4},
35637 {"xop", F_XOP},
35638 {"fma", F_FMA},
35639 {"avx2", F_AVX2},
35640 {"avx512f",F_AVX512F},
35641 {"bmi", F_BMI},
35642 {"bmi2", F_BMI2}
35645 tree __processor_model_type = build_processor_model_struct ();
35646 tree __cpu_model_var = make_var_decl (__processor_model_type,
35647 "__cpu_model");
35650 varpool_node::add (__cpu_model_var);
35652 gcc_assert ((args != NULL) && (*args != NULL));
35654 param_string_cst = *args;
35655 while (param_string_cst
35656 && TREE_CODE (param_string_cst) != STRING_CST)
35658 /* *args must be a expr that can contain other EXPRS leading to a
35659 STRING_CST. */
35660 if (!EXPR_P (param_string_cst))
35662 error ("Parameter to builtin must be a string constant or literal");
35663 return integer_zero_node;
35665 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
35668 gcc_assert (param_string_cst);
35670 if (fn_code == IX86_BUILTIN_CPU_IS)
35672 tree ref;
35673 tree field;
35674 tree final;
35676 unsigned int field_val = 0;
35677 unsigned int NUM_ARCH_NAMES
35678 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
35680 for (i = 0; i < NUM_ARCH_NAMES; i++)
35681 if (strcmp (arch_names_table[i].name,
35682 TREE_STRING_POINTER (param_string_cst)) == 0)
35683 break;
35685 if (i == NUM_ARCH_NAMES)
35687 error ("Parameter to builtin not valid: %s",
35688 TREE_STRING_POINTER (param_string_cst));
35689 return integer_zero_node;
35692 field = TYPE_FIELDS (__processor_model_type);
35693 field_val = arch_names_table[i].model;
35695 /* CPU types are stored in the next field. */
35696 if (field_val > M_CPU_TYPE_START
35697 && field_val < M_CPU_SUBTYPE_START)
35699 field = DECL_CHAIN (field);
35700 field_val -= M_CPU_TYPE_START;
35703 /* CPU subtypes are stored in the next field. */
35704 if (field_val > M_CPU_SUBTYPE_START)
35706 field = DECL_CHAIN ( DECL_CHAIN (field));
35707 field_val -= M_CPU_SUBTYPE_START;
35710 /* Get the appropriate field in __cpu_model. */
35711 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35712 field, NULL_TREE);
35714 /* Check the value. */
35715 final = build2 (EQ_EXPR, unsigned_type_node, ref,
35716 build_int_cstu (unsigned_type_node, field_val));
35717 return build1 (CONVERT_EXPR, integer_type_node, final);
35719 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35721 tree ref;
35722 tree array_elt;
35723 tree field;
35724 tree final;
35726 unsigned int field_val = 0;
35727 unsigned int NUM_ISA_NAMES
35728 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
35730 for (i = 0; i < NUM_ISA_NAMES; i++)
35731 if (strcmp (isa_names_table[i].name,
35732 TREE_STRING_POINTER (param_string_cst)) == 0)
35733 break;
35735 if (i == NUM_ISA_NAMES)
35737 error ("Parameter to builtin not valid: %s",
35738 TREE_STRING_POINTER (param_string_cst));
35739 return integer_zero_node;
35742 field = TYPE_FIELDS (__processor_model_type);
35743 /* Get the last field, which is __cpu_features. */
35744 while (DECL_CHAIN (field))
35745 field = DECL_CHAIN (field);
35747 /* Get the appropriate field: __cpu_model.__cpu_features */
35748 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35749 field, NULL_TREE);
35751 /* Access the 0th element of __cpu_features array. */
35752 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
35753 integer_zero_node, NULL_TREE, NULL_TREE);
35755 field_val = (1 << isa_names_table[i].feature);
35756 /* Return __cpu_model.__cpu_features[0] & field_val */
35757 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
35758 build_int_cstu (unsigned_type_node, field_val));
35759 return build1 (CONVERT_EXPR, integer_type_node, final);
35761 gcc_unreachable ();
35764 static tree
35765 ix86_fold_builtin (tree fndecl, int n_args,
35766 tree *args, bool ignore ATTRIBUTE_UNUSED)
35768 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35770 enum ix86_builtins fn_code = (enum ix86_builtins)
35771 DECL_FUNCTION_CODE (fndecl);
35772 if (fn_code == IX86_BUILTIN_CPU_IS
35773 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35775 gcc_assert (n_args == 1);
35776 return fold_builtin_cpu (fndecl, args);
35780 #ifdef SUBTARGET_FOLD_BUILTIN
35781 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
35782 #endif
35784 return NULL_TREE;
35787 /* Make builtins to detect cpu type and features supported. NAME is
35788 the builtin name, CODE is the builtin code, and FTYPE is the function
35789 type of the builtin. */
35791 static void
35792 make_cpu_type_builtin (const char* name, int code,
35793 enum ix86_builtin_func_type ftype, bool is_const)
35795 tree decl;
35796 tree type;
35798 type = ix86_get_builtin_func_type (ftype);
35799 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
35800 NULL, NULL_TREE);
35801 gcc_assert (decl != NULL_TREE);
35802 ix86_builtins[(int) code] = decl;
35803 TREE_READONLY (decl) = is_const;
35806 /* Make builtins to get CPU type and features supported. The created
35807 builtins are :
35809 __builtin_cpu_init (), to detect cpu type and features,
35810 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
35811 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
35814 static void
35815 ix86_init_platform_type_builtins (void)
35817 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
35818 INT_FTYPE_VOID, false);
35819 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
35820 INT_FTYPE_PCCHAR, true);
35821 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
35822 INT_FTYPE_PCCHAR, true);
35825 /* Internal method for ix86_init_builtins. */
35827 static void
35828 ix86_init_builtins_va_builtins_abi (void)
35830 tree ms_va_ref, sysv_va_ref;
35831 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
35832 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
35833 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
35834 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
35836 if (!TARGET_64BIT)
35837 return;
35838 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
35839 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
35840 ms_va_ref = build_reference_type (ms_va_list_type_node);
35841 sysv_va_ref =
35842 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
35844 fnvoid_va_end_ms =
35845 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35846 fnvoid_va_start_ms =
35847 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35848 fnvoid_va_end_sysv =
35849 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
35850 fnvoid_va_start_sysv =
35851 build_varargs_function_type_list (void_type_node, sysv_va_ref,
35852 NULL_TREE);
35853 fnvoid_va_copy_ms =
35854 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
35855 NULL_TREE);
35856 fnvoid_va_copy_sysv =
35857 build_function_type_list (void_type_node, sysv_va_ref,
35858 sysv_va_ref, NULL_TREE);
35860 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
35861 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
35862 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
35863 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
35864 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
35865 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
35866 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
35867 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35868 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
35869 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35870 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
35871 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35874 static void
35875 ix86_init_builtin_types (void)
35877 tree float128_type_node, float80_type_node;
35879 /* The __float80 type. */
35880 float80_type_node = long_double_type_node;
35881 if (TYPE_MODE (float80_type_node) != XFmode)
35883 /* The __float80 type. */
35884 float80_type_node = make_node (REAL_TYPE);
35886 TYPE_PRECISION (float80_type_node) = 80;
35887 layout_type (float80_type_node);
35889 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
35891 /* The __float128 type. */
35892 float128_type_node = make_node (REAL_TYPE);
35893 TYPE_PRECISION (float128_type_node) = 128;
35894 layout_type (float128_type_node);
35895 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
35897 /* This macro is built by i386-builtin-types.awk. */
35898 DEFINE_BUILTIN_PRIMITIVE_TYPES;
35901 static void
35902 ix86_init_builtins (void)
35904 tree t;
35906 ix86_init_builtin_types ();
35908 /* Builtins to get CPU type and features. */
35909 ix86_init_platform_type_builtins ();
35911 /* TFmode support builtins. */
35912 def_builtin_const (0, "__builtin_infq",
35913 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
35914 def_builtin_const (0, "__builtin_huge_valq",
35915 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
35917 /* We will expand them to normal call if SSE isn't available since
35918 they are used by libgcc. */
35919 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
35920 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
35921 BUILT_IN_MD, "__fabstf2", NULL_TREE);
35922 TREE_READONLY (t) = 1;
35923 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
35925 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
35926 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
35927 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
35928 TREE_READONLY (t) = 1;
35929 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
35931 ix86_init_tm_builtins ();
35932 ix86_init_mmx_sse_builtins ();
35933 ix86_init_mpx_builtins ();
35935 if (TARGET_LP64)
35936 ix86_init_builtins_va_builtins_abi ();
35938 #ifdef SUBTARGET_INIT_BUILTINS
35939 SUBTARGET_INIT_BUILTINS;
35940 #endif
35943 /* Return the ix86 builtin for CODE. */
35945 static tree
35946 ix86_builtin_decl (unsigned code, bool)
35948 if (code >= IX86_BUILTIN_MAX)
35949 return error_mark_node;
35951 return ix86_builtins[code];
35954 /* Errors in the source file can cause expand_expr to return const0_rtx
35955 where we expect a vector. To avoid crashing, use one of the vector
35956 clear instructions. */
35957 static rtx
35958 safe_vector_operand (rtx x, machine_mode mode)
35960 if (x == const0_rtx)
35961 x = CONST0_RTX (mode);
35962 return x;
35965 /* Fixup modeless constants to fit required mode. */
35966 static rtx
35967 fixup_modeless_constant (rtx x, machine_mode mode)
35969 if (GET_MODE (x) == VOIDmode)
35970 x = convert_to_mode (mode, x, 1);
35971 return x;
35974 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
35976 static rtx
35977 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
35979 rtx pat;
35980 tree arg0 = CALL_EXPR_ARG (exp, 0);
35981 tree arg1 = CALL_EXPR_ARG (exp, 1);
35982 rtx op0 = expand_normal (arg0);
35983 rtx op1 = expand_normal (arg1);
35984 machine_mode tmode = insn_data[icode].operand[0].mode;
35985 machine_mode mode0 = insn_data[icode].operand[1].mode;
35986 machine_mode mode1 = insn_data[icode].operand[2].mode;
35988 if (VECTOR_MODE_P (mode0))
35989 op0 = safe_vector_operand (op0, mode0);
35990 if (VECTOR_MODE_P (mode1))
35991 op1 = safe_vector_operand (op1, mode1);
35993 if (optimize || !target
35994 || GET_MODE (target) != tmode
35995 || !insn_data[icode].operand[0].predicate (target, tmode))
35996 target = gen_reg_rtx (tmode);
35998 if (GET_MODE (op1) == SImode && mode1 == TImode)
36000 rtx x = gen_reg_rtx (V4SImode);
36001 emit_insn (gen_sse2_loadd (x, op1));
36002 op1 = gen_lowpart (TImode, x);
36005 if (!insn_data[icode].operand[1].predicate (op0, mode0))
36006 op0 = copy_to_mode_reg (mode0, op0);
36007 if (!insn_data[icode].operand[2].predicate (op1, mode1))
36008 op1 = copy_to_mode_reg (mode1, op1);
36010 pat = GEN_FCN (icode) (target, op0, op1);
36011 if (! pat)
36012 return 0;
36014 emit_insn (pat);
36016 return target;
36019 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
36021 static rtx
36022 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
36023 enum ix86_builtin_func_type m_type,
36024 enum rtx_code sub_code)
36026 rtx pat;
36027 int i;
36028 int nargs;
36029 bool comparison_p = false;
36030 bool tf_p = false;
36031 bool last_arg_constant = false;
36032 int num_memory = 0;
36033 struct {
36034 rtx op;
36035 machine_mode mode;
36036 } args[4];
36038 machine_mode tmode = insn_data[icode].operand[0].mode;
36040 switch (m_type)
36042 case MULTI_ARG_4_DF2_DI_I:
36043 case MULTI_ARG_4_DF2_DI_I1:
36044 case MULTI_ARG_4_SF2_SI_I:
36045 case MULTI_ARG_4_SF2_SI_I1:
36046 nargs = 4;
36047 last_arg_constant = true;
36048 break;
36050 case MULTI_ARG_3_SF:
36051 case MULTI_ARG_3_DF:
36052 case MULTI_ARG_3_SF2:
36053 case MULTI_ARG_3_DF2:
36054 case MULTI_ARG_3_DI:
36055 case MULTI_ARG_3_SI:
36056 case MULTI_ARG_3_SI_DI:
36057 case MULTI_ARG_3_HI:
36058 case MULTI_ARG_3_HI_SI:
36059 case MULTI_ARG_3_QI:
36060 case MULTI_ARG_3_DI2:
36061 case MULTI_ARG_3_SI2:
36062 case MULTI_ARG_3_HI2:
36063 case MULTI_ARG_3_QI2:
36064 nargs = 3;
36065 break;
36067 case MULTI_ARG_2_SF:
36068 case MULTI_ARG_2_DF:
36069 case MULTI_ARG_2_DI:
36070 case MULTI_ARG_2_SI:
36071 case MULTI_ARG_2_HI:
36072 case MULTI_ARG_2_QI:
36073 nargs = 2;
36074 break;
36076 case MULTI_ARG_2_DI_IMM:
36077 case MULTI_ARG_2_SI_IMM:
36078 case MULTI_ARG_2_HI_IMM:
36079 case MULTI_ARG_2_QI_IMM:
36080 nargs = 2;
36081 last_arg_constant = true;
36082 break;
36084 case MULTI_ARG_1_SF:
36085 case MULTI_ARG_1_DF:
36086 case MULTI_ARG_1_SF2:
36087 case MULTI_ARG_1_DF2:
36088 case MULTI_ARG_1_DI:
36089 case MULTI_ARG_1_SI:
36090 case MULTI_ARG_1_HI:
36091 case MULTI_ARG_1_QI:
36092 case MULTI_ARG_1_SI_DI:
36093 case MULTI_ARG_1_HI_DI:
36094 case MULTI_ARG_1_HI_SI:
36095 case MULTI_ARG_1_QI_DI:
36096 case MULTI_ARG_1_QI_SI:
36097 case MULTI_ARG_1_QI_HI:
36098 nargs = 1;
36099 break;
36101 case MULTI_ARG_2_DI_CMP:
36102 case MULTI_ARG_2_SI_CMP:
36103 case MULTI_ARG_2_HI_CMP:
36104 case MULTI_ARG_2_QI_CMP:
36105 nargs = 2;
36106 comparison_p = true;
36107 break;
36109 case MULTI_ARG_2_SF_TF:
36110 case MULTI_ARG_2_DF_TF:
36111 case MULTI_ARG_2_DI_TF:
36112 case MULTI_ARG_2_SI_TF:
36113 case MULTI_ARG_2_HI_TF:
36114 case MULTI_ARG_2_QI_TF:
36115 nargs = 2;
36116 tf_p = true;
36117 break;
36119 default:
36120 gcc_unreachable ();
36123 if (optimize || !target
36124 || GET_MODE (target) != tmode
36125 || !insn_data[icode].operand[0].predicate (target, tmode))
36126 target = gen_reg_rtx (tmode);
36128 gcc_assert (nargs <= 4);
36130 for (i = 0; i < nargs; i++)
36132 tree arg = CALL_EXPR_ARG (exp, i);
36133 rtx op = expand_normal (arg);
36134 int adjust = (comparison_p) ? 1 : 0;
36135 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
36137 if (last_arg_constant && i == nargs - 1)
36139 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
36141 enum insn_code new_icode = icode;
36142 switch (icode)
36144 case CODE_FOR_xop_vpermil2v2df3:
36145 case CODE_FOR_xop_vpermil2v4sf3:
36146 case CODE_FOR_xop_vpermil2v4df3:
36147 case CODE_FOR_xop_vpermil2v8sf3:
36148 error ("the last argument must be a 2-bit immediate");
36149 return gen_reg_rtx (tmode);
36150 case CODE_FOR_xop_rotlv2di3:
36151 new_icode = CODE_FOR_rotlv2di3;
36152 goto xop_rotl;
36153 case CODE_FOR_xop_rotlv4si3:
36154 new_icode = CODE_FOR_rotlv4si3;
36155 goto xop_rotl;
36156 case CODE_FOR_xop_rotlv8hi3:
36157 new_icode = CODE_FOR_rotlv8hi3;
36158 goto xop_rotl;
36159 case CODE_FOR_xop_rotlv16qi3:
36160 new_icode = CODE_FOR_rotlv16qi3;
36161 xop_rotl:
36162 if (CONST_INT_P (op))
36164 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
36165 op = GEN_INT (INTVAL (op) & mask);
36166 gcc_checking_assert
36167 (insn_data[icode].operand[i + 1].predicate (op, mode));
36169 else
36171 gcc_checking_assert
36172 (nargs == 2
36173 && insn_data[new_icode].operand[0].mode == tmode
36174 && insn_data[new_icode].operand[1].mode == tmode
36175 && insn_data[new_icode].operand[2].mode == mode
36176 && insn_data[new_icode].operand[0].predicate
36177 == insn_data[icode].operand[0].predicate
36178 && insn_data[new_icode].operand[1].predicate
36179 == insn_data[icode].operand[1].predicate);
36180 icode = new_icode;
36181 goto non_constant;
36183 break;
36184 default:
36185 gcc_unreachable ();
36189 else
36191 non_constant:
36192 if (VECTOR_MODE_P (mode))
36193 op = safe_vector_operand (op, mode);
36195 /* If we aren't optimizing, only allow one memory operand to be
36196 generated. */
36197 if (memory_operand (op, mode))
36198 num_memory++;
36200 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
36202 if (optimize
36203 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
36204 || num_memory > 1)
36205 op = force_reg (mode, op);
36208 args[i].op = op;
36209 args[i].mode = mode;
36212 switch (nargs)
36214 case 1:
36215 pat = GEN_FCN (icode) (target, args[0].op);
36216 break;
36218 case 2:
36219 if (tf_p)
36220 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
36221 GEN_INT ((int)sub_code));
36222 else if (! comparison_p)
36223 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
36224 else
36226 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
36227 args[0].op,
36228 args[1].op);
36230 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
36232 break;
36234 case 3:
36235 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
36236 break;
36238 case 4:
36239 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
36240 break;
36242 default:
36243 gcc_unreachable ();
36246 if (! pat)
36247 return 0;
36249 emit_insn (pat);
36250 return target;
36253 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
36254 insns with vec_merge. */
36256 static rtx
36257 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
36258 rtx target)
36260 rtx pat;
36261 tree arg0 = CALL_EXPR_ARG (exp, 0);
36262 rtx op1, op0 = expand_normal (arg0);
36263 machine_mode tmode = insn_data[icode].operand[0].mode;
36264 machine_mode mode0 = insn_data[icode].operand[1].mode;
36266 if (optimize || !target
36267 || GET_MODE (target) != tmode
36268 || !insn_data[icode].operand[0].predicate (target, tmode))
36269 target = gen_reg_rtx (tmode);
36271 if (VECTOR_MODE_P (mode0))
36272 op0 = safe_vector_operand (op0, mode0);
36274 if ((optimize && !register_operand (op0, mode0))
36275 || !insn_data[icode].operand[1].predicate (op0, mode0))
36276 op0 = copy_to_mode_reg (mode0, op0);
36278 op1 = op0;
36279 if (!insn_data[icode].operand[2].predicate (op1, mode0))
36280 op1 = copy_to_mode_reg (mode0, op1);
36282 pat = GEN_FCN (icode) (target, op0, op1);
36283 if (! pat)
36284 return 0;
36285 emit_insn (pat);
36286 return target;
36289 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
36291 static rtx
36292 ix86_expand_sse_compare (const struct builtin_description *d,
36293 tree exp, rtx target, bool swap)
36295 rtx pat;
36296 tree arg0 = CALL_EXPR_ARG (exp, 0);
36297 tree arg1 = CALL_EXPR_ARG (exp, 1);
36298 rtx op0 = expand_normal (arg0);
36299 rtx op1 = expand_normal (arg1);
36300 rtx op2;
36301 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36302 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36303 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36304 enum rtx_code comparison = d->comparison;
36306 if (VECTOR_MODE_P (mode0))
36307 op0 = safe_vector_operand (op0, mode0);
36308 if (VECTOR_MODE_P (mode1))
36309 op1 = safe_vector_operand (op1, mode1);
36311 /* Swap operands if we have a comparison that isn't available in
36312 hardware. */
36313 if (swap)
36314 std::swap (op0, op1);
36316 if (optimize || !target
36317 || GET_MODE (target) != tmode
36318 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36319 target = gen_reg_rtx (tmode);
36321 if ((optimize && !register_operand (op0, mode0))
36322 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
36323 op0 = copy_to_mode_reg (mode0, op0);
36324 if ((optimize && !register_operand (op1, mode1))
36325 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
36326 op1 = copy_to_mode_reg (mode1, op1);
36328 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
36329 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36330 if (! pat)
36331 return 0;
36332 emit_insn (pat);
36333 return target;
36336 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
36338 static rtx
36339 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
36340 rtx target)
36342 rtx pat;
36343 tree arg0 = CALL_EXPR_ARG (exp, 0);
36344 tree arg1 = CALL_EXPR_ARG (exp, 1);
36345 rtx op0 = expand_normal (arg0);
36346 rtx op1 = expand_normal (arg1);
36347 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36348 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36349 enum rtx_code comparison = d->comparison;
36351 if (VECTOR_MODE_P (mode0))
36352 op0 = safe_vector_operand (op0, mode0);
36353 if (VECTOR_MODE_P (mode1))
36354 op1 = safe_vector_operand (op1, mode1);
36356 /* Swap operands if we have a comparison that isn't available in
36357 hardware. */
36358 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
36359 std::swap (op0, op1);
36361 target = gen_reg_rtx (SImode);
36362 emit_move_insn (target, const0_rtx);
36363 target = gen_rtx_SUBREG (QImode, target, 0);
36365 if ((optimize && !register_operand (op0, mode0))
36366 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36367 op0 = copy_to_mode_reg (mode0, op0);
36368 if ((optimize && !register_operand (op1, mode1))
36369 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36370 op1 = copy_to_mode_reg (mode1, op1);
36372 pat = GEN_FCN (d->icode) (op0, op1);
36373 if (! pat)
36374 return 0;
36375 emit_insn (pat);
36376 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36377 gen_rtx_fmt_ee (comparison, QImode,
36378 SET_DEST (pat),
36379 const0_rtx)));
36381 return SUBREG_REG (target);
36384 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
36386 static rtx
36387 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
36388 rtx target)
36390 rtx pat;
36391 tree arg0 = CALL_EXPR_ARG (exp, 0);
36392 rtx op1, op0 = expand_normal (arg0);
36393 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36394 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36396 if (optimize || target == 0
36397 || GET_MODE (target) != tmode
36398 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36399 target = gen_reg_rtx (tmode);
36401 if (VECTOR_MODE_P (mode0))
36402 op0 = safe_vector_operand (op0, mode0);
36404 if ((optimize && !register_operand (op0, mode0))
36405 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36406 op0 = copy_to_mode_reg (mode0, op0);
36408 op1 = GEN_INT (d->comparison);
36410 pat = GEN_FCN (d->icode) (target, op0, op1);
36411 if (! pat)
36412 return 0;
36413 emit_insn (pat);
36414 return target;
36417 static rtx
36418 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
36419 tree exp, rtx target)
36421 rtx pat;
36422 tree arg0 = CALL_EXPR_ARG (exp, 0);
36423 tree arg1 = CALL_EXPR_ARG (exp, 1);
36424 rtx op0 = expand_normal (arg0);
36425 rtx op1 = expand_normal (arg1);
36426 rtx op2;
36427 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36428 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36429 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36431 if (optimize || target == 0
36432 || GET_MODE (target) != tmode
36433 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36434 target = gen_reg_rtx (tmode);
36436 op0 = safe_vector_operand (op0, mode0);
36437 op1 = safe_vector_operand (op1, mode1);
36439 if ((optimize && !register_operand (op0, mode0))
36440 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36441 op0 = copy_to_mode_reg (mode0, op0);
36442 if ((optimize && !register_operand (op1, mode1))
36443 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36444 op1 = copy_to_mode_reg (mode1, op1);
36446 op2 = GEN_INT (d->comparison);
36448 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36449 if (! pat)
36450 return 0;
36451 emit_insn (pat);
36452 return target;
36455 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
36457 static rtx
36458 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
36459 rtx target)
36461 rtx pat;
36462 tree arg0 = CALL_EXPR_ARG (exp, 0);
36463 tree arg1 = CALL_EXPR_ARG (exp, 1);
36464 rtx op0 = expand_normal (arg0);
36465 rtx op1 = expand_normal (arg1);
36466 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36467 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36468 enum rtx_code comparison = d->comparison;
36470 if (VECTOR_MODE_P (mode0))
36471 op0 = safe_vector_operand (op0, mode0);
36472 if (VECTOR_MODE_P (mode1))
36473 op1 = safe_vector_operand (op1, mode1);
36475 target = gen_reg_rtx (SImode);
36476 emit_move_insn (target, const0_rtx);
36477 target = gen_rtx_SUBREG (QImode, target, 0);
36479 if ((optimize && !register_operand (op0, mode0))
36480 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36481 op0 = copy_to_mode_reg (mode0, op0);
36482 if ((optimize && !register_operand (op1, mode1))
36483 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36484 op1 = copy_to_mode_reg (mode1, op1);
36486 pat = GEN_FCN (d->icode) (op0, op1);
36487 if (! pat)
36488 return 0;
36489 emit_insn (pat);
36490 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36491 gen_rtx_fmt_ee (comparison, QImode,
36492 SET_DEST (pat),
36493 const0_rtx)));
36495 return SUBREG_REG (target);
36498 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
36500 static rtx
36501 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
36502 tree exp, rtx target)
36504 rtx pat;
36505 tree arg0 = CALL_EXPR_ARG (exp, 0);
36506 tree arg1 = CALL_EXPR_ARG (exp, 1);
36507 tree arg2 = CALL_EXPR_ARG (exp, 2);
36508 tree arg3 = CALL_EXPR_ARG (exp, 3);
36509 tree arg4 = CALL_EXPR_ARG (exp, 4);
36510 rtx scratch0, scratch1;
36511 rtx op0 = expand_normal (arg0);
36512 rtx op1 = expand_normal (arg1);
36513 rtx op2 = expand_normal (arg2);
36514 rtx op3 = expand_normal (arg3);
36515 rtx op4 = expand_normal (arg4);
36516 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
36518 tmode0 = insn_data[d->icode].operand[0].mode;
36519 tmode1 = insn_data[d->icode].operand[1].mode;
36520 modev2 = insn_data[d->icode].operand[2].mode;
36521 modei3 = insn_data[d->icode].operand[3].mode;
36522 modev4 = insn_data[d->icode].operand[4].mode;
36523 modei5 = insn_data[d->icode].operand[5].mode;
36524 modeimm = insn_data[d->icode].operand[6].mode;
36526 if (VECTOR_MODE_P (modev2))
36527 op0 = safe_vector_operand (op0, modev2);
36528 if (VECTOR_MODE_P (modev4))
36529 op2 = safe_vector_operand (op2, modev4);
36531 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36532 op0 = copy_to_mode_reg (modev2, op0);
36533 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
36534 op1 = copy_to_mode_reg (modei3, op1);
36535 if ((optimize && !register_operand (op2, modev4))
36536 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
36537 op2 = copy_to_mode_reg (modev4, op2);
36538 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
36539 op3 = copy_to_mode_reg (modei5, op3);
36541 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
36543 error ("the fifth argument must be an 8-bit immediate");
36544 return const0_rtx;
36547 if (d->code == IX86_BUILTIN_PCMPESTRI128)
36549 if (optimize || !target
36550 || GET_MODE (target) != tmode0
36551 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36552 target = gen_reg_rtx (tmode0);
36554 scratch1 = gen_reg_rtx (tmode1);
36556 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
36558 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
36560 if (optimize || !target
36561 || GET_MODE (target) != tmode1
36562 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36563 target = gen_reg_rtx (tmode1);
36565 scratch0 = gen_reg_rtx (tmode0);
36567 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
36569 else
36571 gcc_assert (d->flag);
36573 scratch0 = gen_reg_rtx (tmode0);
36574 scratch1 = gen_reg_rtx (tmode1);
36576 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
36579 if (! pat)
36580 return 0;
36582 emit_insn (pat);
36584 if (d->flag)
36586 target = gen_reg_rtx (SImode);
36587 emit_move_insn (target, const0_rtx);
36588 target = gen_rtx_SUBREG (QImode, target, 0);
36590 emit_insn
36591 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36592 gen_rtx_fmt_ee (EQ, QImode,
36593 gen_rtx_REG ((machine_mode) d->flag,
36594 FLAGS_REG),
36595 const0_rtx)));
36596 return SUBREG_REG (target);
36598 else
36599 return target;
36603 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
36605 static rtx
36606 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
36607 tree exp, rtx target)
36609 rtx pat;
36610 tree arg0 = CALL_EXPR_ARG (exp, 0);
36611 tree arg1 = CALL_EXPR_ARG (exp, 1);
36612 tree arg2 = CALL_EXPR_ARG (exp, 2);
36613 rtx scratch0, scratch1;
36614 rtx op0 = expand_normal (arg0);
36615 rtx op1 = expand_normal (arg1);
36616 rtx op2 = expand_normal (arg2);
36617 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
36619 tmode0 = insn_data[d->icode].operand[0].mode;
36620 tmode1 = insn_data[d->icode].operand[1].mode;
36621 modev2 = insn_data[d->icode].operand[2].mode;
36622 modev3 = insn_data[d->icode].operand[3].mode;
36623 modeimm = insn_data[d->icode].operand[4].mode;
36625 if (VECTOR_MODE_P (modev2))
36626 op0 = safe_vector_operand (op0, modev2);
36627 if (VECTOR_MODE_P (modev3))
36628 op1 = safe_vector_operand (op1, modev3);
36630 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36631 op0 = copy_to_mode_reg (modev2, op0);
36632 if ((optimize && !register_operand (op1, modev3))
36633 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
36634 op1 = copy_to_mode_reg (modev3, op1);
36636 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
36638 error ("the third argument must be an 8-bit immediate");
36639 return const0_rtx;
36642 if (d->code == IX86_BUILTIN_PCMPISTRI128)
36644 if (optimize || !target
36645 || GET_MODE (target) != tmode0
36646 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36647 target = gen_reg_rtx (tmode0);
36649 scratch1 = gen_reg_rtx (tmode1);
36651 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
36653 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
36655 if (optimize || !target
36656 || GET_MODE (target) != tmode1
36657 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36658 target = gen_reg_rtx (tmode1);
36660 scratch0 = gen_reg_rtx (tmode0);
36662 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
36664 else
36666 gcc_assert (d->flag);
36668 scratch0 = gen_reg_rtx (tmode0);
36669 scratch1 = gen_reg_rtx (tmode1);
36671 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
36674 if (! pat)
36675 return 0;
36677 emit_insn (pat);
36679 if (d->flag)
36681 target = gen_reg_rtx (SImode);
36682 emit_move_insn (target, const0_rtx);
36683 target = gen_rtx_SUBREG (QImode, target, 0);
36685 emit_insn
36686 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36687 gen_rtx_fmt_ee (EQ, QImode,
36688 gen_rtx_REG ((machine_mode) d->flag,
36689 FLAGS_REG),
36690 const0_rtx)));
36691 return SUBREG_REG (target);
36693 else
36694 return target;
36697 /* Subroutine of ix86_expand_builtin to take care of insns with
36698 variable number of operands. */
36700 static rtx
36701 ix86_expand_args_builtin (const struct builtin_description *d,
36702 tree exp, rtx target)
36704 rtx pat, real_target;
36705 unsigned int i, nargs;
36706 unsigned int nargs_constant = 0;
36707 unsigned int mask_pos = 0;
36708 int num_memory = 0;
36709 struct
36711 rtx op;
36712 machine_mode mode;
36713 } args[6];
36714 bool last_arg_count = false;
36715 enum insn_code icode = d->icode;
36716 const struct insn_data_d *insn_p = &insn_data[icode];
36717 machine_mode tmode = insn_p->operand[0].mode;
36718 machine_mode rmode = VOIDmode;
36719 bool swap = false;
36720 enum rtx_code comparison = d->comparison;
36722 switch ((enum ix86_builtin_func_type) d->flag)
36724 case V2DF_FTYPE_V2DF_ROUND:
36725 case V4DF_FTYPE_V4DF_ROUND:
36726 case V4SF_FTYPE_V4SF_ROUND:
36727 case V8SF_FTYPE_V8SF_ROUND:
36728 case V4SI_FTYPE_V4SF_ROUND:
36729 case V8SI_FTYPE_V8SF_ROUND:
36730 return ix86_expand_sse_round (d, exp, target);
36731 case V4SI_FTYPE_V2DF_V2DF_ROUND:
36732 case V8SI_FTYPE_V4DF_V4DF_ROUND:
36733 case V16SI_FTYPE_V8DF_V8DF_ROUND:
36734 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
36735 case INT_FTYPE_V8SF_V8SF_PTEST:
36736 case INT_FTYPE_V4DI_V4DI_PTEST:
36737 case INT_FTYPE_V4DF_V4DF_PTEST:
36738 case INT_FTYPE_V4SF_V4SF_PTEST:
36739 case INT_FTYPE_V2DI_V2DI_PTEST:
36740 case INT_FTYPE_V2DF_V2DF_PTEST:
36741 return ix86_expand_sse_ptest (d, exp, target);
36742 case FLOAT128_FTYPE_FLOAT128:
36743 case FLOAT_FTYPE_FLOAT:
36744 case INT_FTYPE_INT:
36745 case UINT64_FTYPE_INT:
36746 case UINT16_FTYPE_UINT16:
36747 case INT64_FTYPE_INT64:
36748 case INT64_FTYPE_V4SF:
36749 case INT64_FTYPE_V2DF:
36750 case INT_FTYPE_V16QI:
36751 case INT_FTYPE_V8QI:
36752 case INT_FTYPE_V8SF:
36753 case INT_FTYPE_V4DF:
36754 case INT_FTYPE_V4SF:
36755 case INT_FTYPE_V2DF:
36756 case INT_FTYPE_V32QI:
36757 case V16QI_FTYPE_V16QI:
36758 case V8SI_FTYPE_V8SF:
36759 case V8SI_FTYPE_V4SI:
36760 case V8HI_FTYPE_V8HI:
36761 case V8HI_FTYPE_V16QI:
36762 case V8QI_FTYPE_V8QI:
36763 case V8SF_FTYPE_V8SF:
36764 case V8SF_FTYPE_V8SI:
36765 case V8SF_FTYPE_V4SF:
36766 case V8SF_FTYPE_V8HI:
36767 case V4SI_FTYPE_V4SI:
36768 case V4SI_FTYPE_V16QI:
36769 case V4SI_FTYPE_V4SF:
36770 case V4SI_FTYPE_V8SI:
36771 case V4SI_FTYPE_V8HI:
36772 case V4SI_FTYPE_V4DF:
36773 case V4SI_FTYPE_V2DF:
36774 case V4HI_FTYPE_V4HI:
36775 case V4DF_FTYPE_V4DF:
36776 case V4DF_FTYPE_V4SI:
36777 case V4DF_FTYPE_V4SF:
36778 case V4DF_FTYPE_V2DF:
36779 case V4SF_FTYPE_V4SF:
36780 case V4SF_FTYPE_V4SI:
36781 case V4SF_FTYPE_V8SF:
36782 case V4SF_FTYPE_V4DF:
36783 case V4SF_FTYPE_V8HI:
36784 case V4SF_FTYPE_V2DF:
36785 case V2DI_FTYPE_V2DI:
36786 case V2DI_FTYPE_V16QI:
36787 case V2DI_FTYPE_V8HI:
36788 case V2DI_FTYPE_V4SI:
36789 case V2DF_FTYPE_V2DF:
36790 case V2DF_FTYPE_V4SI:
36791 case V2DF_FTYPE_V4DF:
36792 case V2DF_FTYPE_V4SF:
36793 case V2DF_FTYPE_V2SI:
36794 case V2SI_FTYPE_V2SI:
36795 case V2SI_FTYPE_V4SF:
36796 case V2SI_FTYPE_V2SF:
36797 case V2SI_FTYPE_V2DF:
36798 case V2SF_FTYPE_V2SF:
36799 case V2SF_FTYPE_V2SI:
36800 case V32QI_FTYPE_V32QI:
36801 case V32QI_FTYPE_V16QI:
36802 case V16HI_FTYPE_V16HI:
36803 case V16HI_FTYPE_V8HI:
36804 case V8SI_FTYPE_V8SI:
36805 case V16HI_FTYPE_V16QI:
36806 case V8SI_FTYPE_V16QI:
36807 case V4DI_FTYPE_V16QI:
36808 case V8SI_FTYPE_V8HI:
36809 case V4DI_FTYPE_V8HI:
36810 case V4DI_FTYPE_V4SI:
36811 case V4DI_FTYPE_V2DI:
36812 case HI_FTYPE_HI:
36813 case HI_FTYPE_V16QI:
36814 case SI_FTYPE_V32QI:
36815 case DI_FTYPE_V64QI:
36816 case V16QI_FTYPE_HI:
36817 case V32QI_FTYPE_SI:
36818 case V64QI_FTYPE_DI:
36819 case V8HI_FTYPE_QI:
36820 case V16HI_FTYPE_HI:
36821 case V32HI_FTYPE_SI:
36822 case V4SI_FTYPE_QI:
36823 case V8SI_FTYPE_QI:
36824 case V4SI_FTYPE_HI:
36825 case V8SI_FTYPE_HI:
36826 case QI_FTYPE_V8HI:
36827 case HI_FTYPE_V16HI:
36828 case SI_FTYPE_V32HI:
36829 case QI_FTYPE_V4SI:
36830 case QI_FTYPE_V8SI:
36831 case HI_FTYPE_V16SI:
36832 case QI_FTYPE_V2DI:
36833 case QI_FTYPE_V4DI:
36834 case QI_FTYPE_V8DI:
36835 case UINT_FTYPE_V2DF:
36836 case UINT_FTYPE_V4SF:
36837 case UINT64_FTYPE_V2DF:
36838 case UINT64_FTYPE_V4SF:
36839 case V16QI_FTYPE_V8DI:
36840 case V16HI_FTYPE_V16SI:
36841 case V16SI_FTYPE_HI:
36842 case V2DI_FTYPE_QI:
36843 case V4DI_FTYPE_QI:
36844 case V16SI_FTYPE_V16SI:
36845 case V16SI_FTYPE_INT:
36846 case V16SF_FTYPE_FLOAT:
36847 case V16SF_FTYPE_V8SF:
36848 case V16SI_FTYPE_V8SI:
36849 case V16SF_FTYPE_V4SF:
36850 case V16SI_FTYPE_V4SI:
36851 case V16SF_FTYPE_V16SF:
36852 case V8HI_FTYPE_V8DI:
36853 case V8UHI_FTYPE_V8UHI:
36854 case V8SI_FTYPE_V8DI:
36855 case V8SF_FTYPE_V8DF:
36856 case V8DI_FTYPE_QI:
36857 case V8DI_FTYPE_INT64:
36858 case V8DI_FTYPE_V4DI:
36859 case V8DI_FTYPE_V8DI:
36860 case V8DF_FTYPE_DOUBLE:
36861 case V8DF_FTYPE_V4DF:
36862 case V8DF_FTYPE_V2DF:
36863 case V8DF_FTYPE_V8DF:
36864 case V8DF_FTYPE_V8SI:
36865 nargs = 1;
36866 break;
36867 case V4SF_FTYPE_V4SF_VEC_MERGE:
36868 case V2DF_FTYPE_V2DF_VEC_MERGE:
36869 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
36870 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
36871 case V16QI_FTYPE_V16QI_V16QI:
36872 case V16QI_FTYPE_V8HI_V8HI:
36873 case V16SI_FTYPE_V16SI_V16SI:
36874 case V16SF_FTYPE_V16SF_V16SF:
36875 case V16SF_FTYPE_V16SF_V16SI:
36876 case V8QI_FTYPE_V8QI_V8QI:
36877 case V8QI_FTYPE_V4HI_V4HI:
36878 case V8HI_FTYPE_V8HI_V8HI:
36879 case V8HI_FTYPE_V16QI_V16QI:
36880 case V8HI_FTYPE_V4SI_V4SI:
36881 case V8SF_FTYPE_V8SF_V8SF:
36882 case V8SF_FTYPE_V8SF_V8SI:
36883 case V8DI_FTYPE_V8DI_V8DI:
36884 case V8DF_FTYPE_V8DF_V8DF:
36885 case V8DF_FTYPE_V8DF_V8DI:
36886 case V4SI_FTYPE_V4SI_V4SI:
36887 case V4SI_FTYPE_V8HI_V8HI:
36888 case V4SI_FTYPE_V4SF_V4SF:
36889 case V4SI_FTYPE_V2DF_V2DF:
36890 case V4HI_FTYPE_V4HI_V4HI:
36891 case V4HI_FTYPE_V8QI_V8QI:
36892 case V4HI_FTYPE_V2SI_V2SI:
36893 case V4DF_FTYPE_V4DF_V4DF:
36894 case V4DF_FTYPE_V4DF_V4DI:
36895 case V4SF_FTYPE_V4SF_V4SF:
36896 case V4SF_FTYPE_V4SF_V4SI:
36897 case V4SF_FTYPE_V4SF_V2SI:
36898 case V4SF_FTYPE_V4SF_V2DF:
36899 case V4SF_FTYPE_V4SF_UINT:
36900 case V4SF_FTYPE_V4SF_UINT64:
36901 case V4SF_FTYPE_V4SF_DI:
36902 case V4SF_FTYPE_V4SF_SI:
36903 case V2DI_FTYPE_V2DI_V2DI:
36904 case V2DI_FTYPE_V16QI_V16QI:
36905 case V2DI_FTYPE_V4SI_V4SI:
36906 case V2UDI_FTYPE_V4USI_V4USI:
36907 case V2DI_FTYPE_V2DI_V16QI:
36908 case V2DI_FTYPE_V2DF_V2DF:
36909 case V2SI_FTYPE_V2SI_V2SI:
36910 case V2SI_FTYPE_V4HI_V4HI:
36911 case V2SI_FTYPE_V2SF_V2SF:
36912 case V2DF_FTYPE_V2DF_V2DF:
36913 case V2DF_FTYPE_V2DF_V4SF:
36914 case V2DF_FTYPE_V2DF_V2DI:
36915 case V2DF_FTYPE_V2DF_DI:
36916 case V2DF_FTYPE_V2DF_SI:
36917 case V2DF_FTYPE_V2DF_UINT:
36918 case V2DF_FTYPE_V2DF_UINT64:
36919 case V2SF_FTYPE_V2SF_V2SF:
36920 case V1DI_FTYPE_V1DI_V1DI:
36921 case V1DI_FTYPE_V8QI_V8QI:
36922 case V1DI_FTYPE_V2SI_V2SI:
36923 case V32QI_FTYPE_V16HI_V16HI:
36924 case V16HI_FTYPE_V8SI_V8SI:
36925 case V32QI_FTYPE_V32QI_V32QI:
36926 case V16HI_FTYPE_V32QI_V32QI:
36927 case V16HI_FTYPE_V16HI_V16HI:
36928 case V8SI_FTYPE_V4DF_V4DF:
36929 case V8SI_FTYPE_V8SI_V8SI:
36930 case V8SI_FTYPE_V16HI_V16HI:
36931 case V4DI_FTYPE_V4DI_V4DI:
36932 case V4DI_FTYPE_V8SI_V8SI:
36933 case V4UDI_FTYPE_V8USI_V8USI:
36934 case QI_FTYPE_V8DI_V8DI:
36935 case V8DI_FTYPE_V64QI_V64QI:
36936 case HI_FTYPE_V16SI_V16SI:
36937 if (comparison == UNKNOWN)
36938 return ix86_expand_binop_builtin (icode, exp, target);
36939 nargs = 2;
36940 break;
36941 case V4SF_FTYPE_V4SF_V4SF_SWAP:
36942 case V2DF_FTYPE_V2DF_V2DF_SWAP:
36943 gcc_assert (comparison != UNKNOWN);
36944 nargs = 2;
36945 swap = true;
36946 break;
36947 case V16HI_FTYPE_V16HI_V8HI_COUNT:
36948 case V16HI_FTYPE_V16HI_SI_COUNT:
36949 case V8SI_FTYPE_V8SI_V4SI_COUNT:
36950 case V8SI_FTYPE_V8SI_SI_COUNT:
36951 case V4DI_FTYPE_V4DI_V2DI_COUNT:
36952 case V4DI_FTYPE_V4DI_INT_COUNT:
36953 case V8HI_FTYPE_V8HI_V8HI_COUNT:
36954 case V8HI_FTYPE_V8HI_SI_COUNT:
36955 case V4SI_FTYPE_V4SI_V4SI_COUNT:
36956 case V4SI_FTYPE_V4SI_SI_COUNT:
36957 case V4HI_FTYPE_V4HI_V4HI_COUNT:
36958 case V4HI_FTYPE_V4HI_SI_COUNT:
36959 case V2DI_FTYPE_V2DI_V2DI_COUNT:
36960 case V2DI_FTYPE_V2DI_SI_COUNT:
36961 case V2SI_FTYPE_V2SI_V2SI_COUNT:
36962 case V2SI_FTYPE_V2SI_SI_COUNT:
36963 case V1DI_FTYPE_V1DI_V1DI_COUNT:
36964 case V1DI_FTYPE_V1DI_SI_COUNT:
36965 nargs = 2;
36966 last_arg_count = true;
36967 break;
36968 case UINT64_FTYPE_UINT64_UINT64:
36969 case UINT_FTYPE_UINT_UINT:
36970 case UINT_FTYPE_UINT_USHORT:
36971 case UINT_FTYPE_UINT_UCHAR:
36972 case UINT16_FTYPE_UINT16_INT:
36973 case UINT8_FTYPE_UINT8_INT:
36974 case HI_FTYPE_HI_HI:
36975 case SI_FTYPE_SI_SI:
36976 case DI_FTYPE_DI_DI:
36977 case V16SI_FTYPE_V8DF_V8DF:
36978 nargs = 2;
36979 break;
36980 case V2DI_FTYPE_V2DI_INT_CONVERT:
36981 nargs = 2;
36982 rmode = V1TImode;
36983 nargs_constant = 1;
36984 break;
36985 case V4DI_FTYPE_V4DI_INT_CONVERT:
36986 nargs = 2;
36987 rmode = V2TImode;
36988 nargs_constant = 1;
36989 break;
36990 case V8DI_FTYPE_V8DI_INT_CONVERT:
36991 nargs = 2;
36992 rmode = V4TImode;
36993 nargs_constant = 1;
36994 break;
36995 case V8HI_FTYPE_V8HI_INT:
36996 case V8HI_FTYPE_V8SF_INT:
36997 case V16HI_FTYPE_V16SF_INT:
36998 case V8HI_FTYPE_V4SF_INT:
36999 case V8SF_FTYPE_V8SF_INT:
37000 case V4SF_FTYPE_V16SF_INT:
37001 case V16SF_FTYPE_V16SF_INT:
37002 case V4SI_FTYPE_V4SI_INT:
37003 case V4SI_FTYPE_V8SI_INT:
37004 case V4HI_FTYPE_V4HI_INT:
37005 case V4DF_FTYPE_V4DF_INT:
37006 case V4DF_FTYPE_V8DF_INT:
37007 case V4SF_FTYPE_V4SF_INT:
37008 case V4SF_FTYPE_V8SF_INT:
37009 case V2DI_FTYPE_V2DI_INT:
37010 case V2DF_FTYPE_V2DF_INT:
37011 case V2DF_FTYPE_V4DF_INT:
37012 case V16HI_FTYPE_V16HI_INT:
37013 case V8SI_FTYPE_V8SI_INT:
37014 case V16SI_FTYPE_V16SI_INT:
37015 case V4SI_FTYPE_V16SI_INT:
37016 case V4DI_FTYPE_V4DI_INT:
37017 case V2DI_FTYPE_V4DI_INT:
37018 case V4DI_FTYPE_V8DI_INT:
37019 case HI_FTYPE_HI_INT:
37020 case QI_FTYPE_V4SF_INT:
37021 case QI_FTYPE_V2DF_INT:
37022 nargs = 2;
37023 nargs_constant = 1;
37024 break;
37025 case V16QI_FTYPE_V16QI_V16QI_V16QI:
37026 case V8SF_FTYPE_V8SF_V8SF_V8SF:
37027 case V4DF_FTYPE_V4DF_V4DF_V4DF:
37028 case V4SF_FTYPE_V4SF_V4SF_V4SF:
37029 case V2DF_FTYPE_V2DF_V2DF_V2DF:
37030 case V32QI_FTYPE_V32QI_V32QI_V32QI:
37031 case HI_FTYPE_V16SI_V16SI_HI:
37032 case QI_FTYPE_V8DI_V8DI_QI:
37033 case V16HI_FTYPE_V16SI_V16HI_HI:
37034 case V16QI_FTYPE_V16SI_V16QI_HI:
37035 case V16QI_FTYPE_V8DI_V16QI_QI:
37036 case V16SF_FTYPE_V16SF_V16SF_HI:
37037 case V16SF_FTYPE_V16SF_V16SF_V16SF:
37038 case V16SF_FTYPE_V16SF_V16SI_V16SF:
37039 case V16SF_FTYPE_V16SI_V16SF_HI:
37040 case V16SF_FTYPE_V16SI_V16SF_V16SF:
37041 case V16SF_FTYPE_V4SF_V16SF_HI:
37042 case V16SI_FTYPE_SI_V16SI_HI:
37043 case V16SI_FTYPE_V16HI_V16SI_HI:
37044 case V16SI_FTYPE_V16QI_V16SI_HI:
37045 case V16SI_FTYPE_V16SF_V16SI_HI:
37046 case V8SF_FTYPE_V4SF_V8SF_QI:
37047 case V4DF_FTYPE_V2DF_V4DF_QI:
37048 case V8SI_FTYPE_V4SI_V8SI_QI:
37049 case V8SI_FTYPE_SI_V8SI_QI:
37050 case V4SI_FTYPE_V4SI_V4SI_QI:
37051 case V4SI_FTYPE_SI_V4SI_QI:
37052 case V4DI_FTYPE_V2DI_V4DI_QI:
37053 case V4DI_FTYPE_DI_V4DI_QI:
37054 case V2DI_FTYPE_V2DI_V2DI_QI:
37055 case V2DI_FTYPE_DI_V2DI_QI:
37056 case V64QI_FTYPE_V64QI_V64QI_DI:
37057 case V64QI_FTYPE_V16QI_V64QI_DI:
37058 case V64QI_FTYPE_QI_V64QI_DI:
37059 case V32QI_FTYPE_V32QI_V32QI_SI:
37060 case V32QI_FTYPE_V16QI_V32QI_SI:
37061 case V32QI_FTYPE_QI_V32QI_SI:
37062 case V16QI_FTYPE_V16QI_V16QI_HI:
37063 case V16QI_FTYPE_QI_V16QI_HI:
37064 case V32HI_FTYPE_V8HI_V32HI_SI:
37065 case V32HI_FTYPE_HI_V32HI_SI:
37066 case V16HI_FTYPE_V8HI_V16HI_HI:
37067 case V16HI_FTYPE_HI_V16HI_HI:
37068 case V8HI_FTYPE_V8HI_V8HI_QI:
37069 case V8HI_FTYPE_HI_V8HI_QI:
37070 case V8SF_FTYPE_V8HI_V8SF_QI:
37071 case V4SF_FTYPE_V8HI_V4SF_QI:
37072 case V8SI_FTYPE_V8SF_V8SI_QI:
37073 case V4SI_FTYPE_V4SF_V4SI_QI:
37074 case V8DI_FTYPE_V8SF_V8DI_QI:
37075 case V4DI_FTYPE_V4SF_V4DI_QI:
37076 case V2DI_FTYPE_V4SF_V2DI_QI:
37077 case V8SF_FTYPE_V8DI_V8SF_QI:
37078 case V4SF_FTYPE_V4DI_V4SF_QI:
37079 case V4SF_FTYPE_V2DI_V4SF_QI:
37080 case V8DF_FTYPE_V8DI_V8DF_QI:
37081 case V4DF_FTYPE_V4DI_V4DF_QI:
37082 case V2DF_FTYPE_V2DI_V2DF_QI:
37083 case V16QI_FTYPE_V8HI_V16QI_QI:
37084 case V16QI_FTYPE_V16HI_V16QI_HI:
37085 case V16QI_FTYPE_V4SI_V16QI_QI:
37086 case V16QI_FTYPE_V8SI_V16QI_QI:
37087 case V8HI_FTYPE_V4SI_V8HI_QI:
37088 case V8HI_FTYPE_V8SI_V8HI_QI:
37089 case V16QI_FTYPE_V2DI_V16QI_QI:
37090 case V16QI_FTYPE_V4DI_V16QI_QI:
37091 case V8HI_FTYPE_V2DI_V8HI_QI:
37092 case V8HI_FTYPE_V4DI_V8HI_QI:
37093 case V4SI_FTYPE_V2DI_V4SI_QI:
37094 case V4SI_FTYPE_V4DI_V4SI_QI:
37095 case V32QI_FTYPE_V32HI_V32QI_SI:
37096 case HI_FTYPE_V16QI_V16QI_HI:
37097 case SI_FTYPE_V32QI_V32QI_SI:
37098 case DI_FTYPE_V64QI_V64QI_DI:
37099 case QI_FTYPE_V8HI_V8HI_QI:
37100 case HI_FTYPE_V16HI_V16HI_HI:
37101 case SI_FTYPE_V32HI_V32HI_SI:
37102 case QI_FTYPE_V4SI_V4SI_QI:
37103 case QI_FTYPE_V8SI_V8SI_QI:
37104 case QI_FTYPE_V2DI_V2DI_QI:
37105 case QI_FTYPE_V4DI_V4DI_QI:
37106 case V4SF_FTYPE_V2DF_V4SF_QI:
37107 case V4SF_FTYPE_V4DF_V4SF_QI:
37108 case V16SI_FTYPE_V16SI_V16SI_HI:
37109 case V16SI_FTYPE_V16SI_V16SI_V16SI:
37110 case V16SI_FTYPE_V4SI_V16SI_HI:
37111 case V2DI_FTYPE_V2DI_V2DI_V2DI:
37112 case V2DI_FTYPE_V4SI_V2DI_QI:
37113 case V2DI_FTYPE_V8HI_V2DI_QI:
37114 case V2DI_FTYPE_V16QI_V2DI_QI:
37115 case V4DI_FTYPE_V4DI_V4DI_QI:
37116 case V4DI_FTYPE_V4SI_V4DI_QI:
37117 case V4DI_FTYPE_V8HI_V4DI_QI:
37118 case V4DI_FTYPE_V16QI_V4DI_QI:
37119 case V8DI_FTYPE_V8DF_V8DI_QI:
37120 case V4DI_FTYPE_V4DF_V4DI_QI:
37121 case V2DI_FTYPE_V2DF_V2DI_QI:
37122 case V4SI_FTYPE_V4DF_V4SI_QI:
37123 case V4SI_FTYPE_V2DF_V4SI_QI:
37124 case V4SI_FTYPE_V8HI_V4SI_QI:
37125 case V4SI_FTYPE_V16QI_V4SI_QI:
37126 case V8SI_FTYPE_V8SI_V8SI_V8SI:
37127 case V4DI_FTYPE_V4DI_V4DI_V4DI:
37128 case V8DF_FTYPE_V2DF_V8DF_QI:
37129 case V8DF_FTYPE_V4DF_V8DF_QI:
37130 case V8DF_FTYPE_V8DF_V8DF_QI:
37131 case V8DF_FTYPE_V8DF_V8DF_V8DF:
37132 case V8SF_FTYPE_V8SF_V8SF_QI:
37133 case V8SF_FTYPE_V8SI_V8SF_QI:
37134 case V4DF_FTYPE_V4DF_V4DF_QI:
37135 case V4SF_FTYPE_V4SF_V4SF_QI:
37136 case V2DF_FTYPE_V2DF_V2DF_QI:
37137 case V2DF_FTYPE_V4SF_V2DF_QI:
37138 case V2DF_FTYPE_V4SI_V2DF_QI:
37139 case V4SF_FTYPE_V4SI_V4SF_QI:
37140 case V4DF_FTYPE_V4SF_V4DF_QI:
37141 case V4DF_FTYPE_V4SI_V4DF_QI:
37142 case V8SI_FTYPE_V8SI_V8SI_QI:
37143 case V8SI_FTYPE_V8HI_V8SI_QI:
37144 case V8SI_FTYPE_V16QI_V8SI_QI:
37145 case V8DF_FTYPE_V8DF_V8DI_V8DF:
37146 case V8DF_FTYPE_V8DI_V8DF_V8DF:
37147 case V8DF_FTYPE_V8SF_V8DF_QI:
37148 case V8DF_FTYPE_V8SI_V8DF_QI:
37149 case V8DI_FTYPE_DI_V8DI_QI:
37150 case V16SF_FTYPE_V8SF_V16SF_HI:
37151 case V16SI_FTYPE_V8SI_V16SI_HI:
37152 case V16HI_FTYPE_V16HI_V16HI_HI:
37153 case V8HI_FTYPE_V16QI_V8HI_QI:
37154 case V16HI_FTYPE_V16QI_V16HI_HI:
37155 case V32HI_FTYPE_V32HI_V32HI_SI:
37156 case V32HI_FTYPE_V32QI_V32HI_SI:
37157 case V8DI_FTYPE_V16QI_V8DI_QI:
37158 case V8DI_FTYPE_V2DI_V8DI_QI:
37159 case V8DI_FTYPE_V4DI_V8DI_QI:
37160 case V8DI_FTYPE_V8DI_V8DI_QI:
37161 case V8DI_FTYPE_V8DI_V8DI_V8DI:
37162 case V8DI_FTYPE_V8HI_V8DI_QI:
37163 case V8DI_FTYPE_V8SI_V8DI_QI:
37164 case V8HI_FTYPE_V8DI_V8HI_QI:
37165 case V8SF_FTYPE_V8DF_V8SF_QI:
37166 case V8SI_FTYPE_V8DF_V8SI_QI:
37167 case V8SI_FTYPE_V8DI_V8SI_QI:
37168 case V4SI_FTYPE_V4SI_V4SI_V4SI:
37169 nargs = 3;
37170 break;
37171 case V32QI_FTYPE_V32QI_V32QI_INT:
37172 case V16HI_FTYPE_V16HI_V16HI_INT:
37173 case V16QI_FTYPE_V16QI_V16QI_INT:
37174 case V4DI_FTYPE_V4DI_V4DI_INT:
37175 case V8HI_FTYPE_V8HI_V8HI_INT:
37176 case V8SI_FTYPE_V8SI_V8SI_INT:
37177 case V8SI_FTYPE_V8SI_V4SI_INT:
37178 case V8SF_FTYPE_V8SF_V8SF_INT:
37179 case V8SF_FTYPE_V8SF_V4SF_INT:
37180 case V4SI_FTYPE_V4SI_V4SI_INT:
37181 case V4DF_FTYPE_V4DF_V4DF_INT:
37182 case V16SF_FTYPE_V16SF_V16SF_INT:
37183 case V16SF_FTYPE_V16SF_V4SF_INT:
37184 case V16SI_FTYPE_V16SI_V4SI_INT:
37185 case V4DF_FTYPE_V4DF_V2DF_INT:
37186 case V4SF_FTYPE_V4SF_V4SF_INT:
37187 case V2DI_FTYPE_V2DI_V2DI_INT:
37188 case V4DI_FTYPE_V4DI_V2DI_INT:
37189 case V2DF_FTYPE_V2DF_V2DF_INT:
37190 case QI_FTYPE_V8DI_V8DI_INT:
37191 case QI_FTYPE_V8DF_V8DF_INT:
37192 case QI_FTYPE_V2DF_V2DF_INT:
37193 case QI_FTYPE_V4SF_V4SF_INT:
37194 case HI_FTYPE_V16SI_V16SI_INT:
37195 case HI_FTYPE_V16SF_V16SF_INT:
37196 nargs = 3;
37197 nargs_constant = 1;
37198 break;
37199 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
37200 nargs = 3;
37201 rmode = V4DImode;
37202 nargs_constant = 1;
37203 break;
37204 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
37205 nargs = 3;
37206 rmode = V2DImode;
37207 nargs_constant = 1;
37208 break;
37209 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
37210 nargs = 3;
37211 rmode = DImode;
37212 nargs_constant = 1;
37213 break;
37214 case V2DI_FTYPE_V2DI_UINT_UINT:
37215 nargs = 3;
37216 nargs_constant = 2;
37217 break;
37218 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
37219 nargs = 3;
37220 rmode = V8DImode;
37221 nargs_constant = 1;
37222 break;
37223 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT:
37224 nargs = 5;
37225 rmode = V8DImode;
37226 mask_pos = 2;
37227 nargs_constant = 1;
37228 break;
37229 case QI_FTYPE_V8DF_INT_QI:
37230 case QI_FTYPE_V4DF_INT_QI:
37231 case QI_FTYPE_V2DF_INT_QI:
37232 case HI_FTYPE_V16SF_INT_HI:
37233 case QI_FTYPE_V8SF_INT_QI:
37234 case QI_FTYPE_V4SF_INT_QI:
37235 nargs = 3;
37236 mask_pos = 1;
37237 nargs_constant = 1;
37238 break;
37239 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT:
37240 nargs = 5;
37241 rmode = V4DImode;
37242 mask_pos = 2;
37243 nargs_constant = 1;
37244 break;
37245 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT:
37246 nargs = 5;
37247 rmode = V2DImode;
37248 mask_pos = 2;
37249 nargs_constant = 1;
37250 break;
37251 case V32QI_FTYPE_V32QI_V32QI_V32QI_SI:
37252 case V32HI_FTYPE_V32HI_V32HI_V32HI_SI:
37253 case V32HI_FTYPE_V64QI_V64QI_V32HI_SI:
37254 case V16SI_FTYPE_V32HI_V32HI_V16SI_HI:
37255 case V64QI_FTYPE_V64QI_V64QI_V64QI_DI:
37256 case V32HI_FTYPE_V32HI_V8HI_V32HI_SI:
37257 case V16HI_FTYPE_V16HI_V8HI_V16HI_HI:
37258 case V8SI_FTYPE_V8SI_V4SI_V8SI_QI:
37259 case V4DI_FTYPE_V4DI_V2DI_V4DI_QI:
37260 case V64QI_FTYPE_V32HI_V32HI_V64QI_DI:
37261 case V32QI_FTYPE_V16HI_V16HI_V32QI_SI:
37262 case V16QI_FTYPE_V8HI_V8HI_V16QI_HI:
37263 case V32HI_FTYPE_V16SI_V16SI_V32HI_SI:
37264 case V16HI_FTYPE_V8SI_V8SI_V16HI_HI:
37265 case V8HI_FTYPE_V4SI_V4SI_V8HI_QI:
37266 case V4DF_FTYPE_V4DF_V4DI_V4DF_QI:
37267 case V8SF_FTYPE_V8SF_V8SI_V8SF_QI:
37268 case V4SF_FTYPE_V4SF_V4SI_V4SF_QI:
37269 case V2DF_FTYPE_V2DF_V2DI_V2DF_QI:
37270 case V2DI_FTYPE_V4SI_V4SI_V2DI_QI:
37271 case V4DI_FTYPE_V8SI_V8SI_V4DI_QI:
37272 case V4DF_FTYPE_V4DI_V4DF_V4DF_QI:
37273 case V8SF_FTYPE_V8SI_V8SF_V8SF_QI:
37274 case V2DF_FTYPE_V2DI_V2DF_V2DF_QI:
37275 case V4SF_FTYPE_V4SI_V4SF_V4SF_QI:
37276 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
37277 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
37278 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
37279 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
37280 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
37281 case V8HI_FTYPE_V8HI_V8HI_V8HI_QI:
37282 case V8SI_FTYPE_V8SI_V8SI_V8SI_QI:
37283 case V4SI_FTYPE_V4SI_V4SI_V4SI_QI:
37284 case V8SF_FTYPE_V8SF_V8SF_V8SF_QI:
37285 case V16QI_FTYPE_V16QI_V16QI_V16QI_HI:
37286 case V16HI_FTYPE_V16HI_V16HI_V16HI_HI:
37287 case V2DI_FTYPE_V2DI_V2DI_V2DI_QI:
37288 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
37289 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
37290 case V4DI_FTYPE_V4DI_V4DI_V4DI_QI:
37291 case V4DF_FTYPE_V4DF_V4DF_V4DF_QI:
37292 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
37293 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
37294 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
37295 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
37296 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
37297 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
37298 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
37299 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
37300 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
37301 case V8HI_FTYPE_V16QI_V16QI_V8HI_QI:
37302 case V16HI_FTYPE_V32QI_V32QI_V16HI_HI:
37303 case V8SI_FTYPE_V16HI_V16HI_V8SI_QI:
37304 case V4SI_FTYPE_V8HI_V8HI_V4SI_QI:
37305 nargs = 4;
37306 break;
37307 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
37308 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
37309 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
37310 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
37311 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
37312 nargs = 4;
37313 nargs_constant = 1;
37314 break;
37315 case QI_FTYPE_V4DI_V4DI_INT_QI:
37316 case QI_FTYPE_V8SI_V8SI_INT_QI:
37317 case QI_FTYPE_V4DF_V4DF_INT_QI:
37318 case QI_FTYPE_V8SF_V8SF_INT_QI:
37319 case QI_FTYPE_V2DI_V2DI_INT_QI:
37320 case QI_FTYPE_V4SI_V4SI_INT_QI:
37321 case QI_FTYPE_V2DF_V2DF_INT_QI:
37322 case QI_FTYPE_V4SF_V4SF_INT_QI:
37323 case DI_FTYPE_V64QI_V64QI_INT_DI:
37324 case SI_FTYPE_V32QI_V32QI_INT_SI:
37325 case HI_FTYPE_V16QI_V16QI_INT_HI:
37326 case SI_FTYPE_V32HI_V32HI_INT_SI:
37327 case HI_FTYPE_V16HI_V16HI_INT_HI:
37328 case QI_FTYPE_V8HI_V8HI_INT_QI:
37329 nargs = 4;
37330 mask_pos = 1;
37331 nargs_constant = 1;
37332 break;
37333 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
37334 nargs = 4;
37335 nargs_constant = 2;
37336 break;
37337 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
37338 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
37339 nargs = 4;
37340 break;
37341 case QI_FTYPE_V8DI_V8DI_INT_QI:
37342 case HI_FTYPE_V16SI_V16SI_INT_HI:
37343 case QI_FTYPE_V8DF_V8DF_INT_QI:
37344 case HI_FTYPE_V16SF_V16SF_INT_HI:
37345 mask_pos = 1;
37346 nargs = 4;
37347 nargs_constant = 1;
37348 break;
37349 case V8SF_FTYPE_V8SF_INT_V8SF_QI:
37350 case V4SF_FTYPE_V4SF_INT_V4SF_QI:
37351 case V2DF_FTYPE_V4DF_INT_V2DF_QI:
37352 case V2DI_FTYPE_V4DI_INT_V2DI_QI:
37353 case V8SF_FTYPE_V16SF_INT_V8SF_QI:
37354 case V8SI_FTYPE_V16SI_INT_V8SI_QI:
37355 case V2DF_FTYPE_V8DF_INT_V2DF_QI:
37356 case V2DI_FTYPE_V8DI_INT_V2DI_QI:
37357 case V4SF_FTYPE_V8SF_INT_V4SF_QI:
37358 case V4SI_FTYPE_V8SI_INT_V4SI_QI:
37359 case V8HI_FTYPE_V8SF_INT_V8HI_QI:
37360 case V8HI_FTYPE_V4SF_INT_V8HI_QI:
37361 case V32HI_FTYPE_V32HI_INT_V32HI_SI:
37362 case V16HI_FTYPE_V16HI_INT_V16HI_HI:
37363 case V8HI_FTYPE_V8HI_INT_V8HI_QI:
37364 case V4DI_FTYPE_V4DI_INT_V4DI_QI:
37365 case V2DI_FTYPE_V2DI_INT_V2DI_QI:
37366 case V8SI_FTYPE_V8SI_INT_V8SI_QI:
37367 case V4SI_FTYPE_V4SI_INT_V4SI_QI:
37368 case V4DF_FTYPE_V4DF_INT_V4DF_QI:
37369 case V2DF_FTYPE_V2DF_INT_V2DF_QI:
37370 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
37371 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
37372 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
37373 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
37374 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
37375 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
37376 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
37377 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
37378 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
37379 nargs = 4;
37380 mask_pos = 2;
37381 nargs_constant = 1;
37382 break;
37383 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
37384 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
37385 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
37386 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
37387 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
37388 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
37389 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
37390 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
37391 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
37392 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
37393 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI:
37394 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI:
37395 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI:
37396 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI:
37397 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI:
37398 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI:
37399 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI:
37400 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI:
37401 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI:
37402 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI:
37403 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI:
37404 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI:
37405 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI:
37406 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI:
37407 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI:
37408 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI:
37409 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI:
37410 nargs = 5;
37411 mask_pos = 2;
37412 nargs_constant = 1;
37413 break;
37414 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
37415 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
37416 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
37417 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
37418 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
37419 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI:
37420 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI:
37421 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI:
37422 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI:
37423 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI:
37424 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI:
37425 nargs = 5;
37426 nargs = 5;
37427 mask_pos = 1;
37428 nargs_constant = 1;
37429 break;
37431 default:
37432 gcc_unreachable ();
37435 gcc_assert (nargs <= ARRAY_SIZE (args));
37437 if (comparison != UNKNOWN)
37439 gcc_assert (nargs == 2);
37440 return ix86_expand_sse_compare (d, exp, target, swap);
37443 if (rmode == VOIDmode || rmode == tmode)
37445 if (optimize
37446 || target == 0
37447 || GET_MODE (target) != tmode
37448 || !insn_p->operand[0].predicate (target, tmode))
37449 target = gen_reg_rtx (tmode);
37450 real_target = target;
37452 else
37454 real_target = gen_reg_rtx (tmode);
37455 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
37458 for (i = 0; i < nargs; i++)
37460 tree arg = CALL_EXPR_ARG (exp, i);
37461 rtx op = expand_normal (arg);
37462 machine_mode mode = insn_p->operand[i + 1].mode;
37463 bool match = insn_p->operand[i + 1].predicate (op, mode);
37465 if (last_arg_count && (i + 1) == nargs)
37467 /* SIMD shift insns take either an 8-bit immediate or
37468 register as count. But builtin functions take int as
37469 count. If count doesn't match, we put it in register. */
37470 if (!match)
37472 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
37473 if (!insn_p->operand[i + 1].predicate (op, mode))
37474 op = copy_to_reg (op);
37477 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37478 (!mask_pos && (nargs - i) <= nargs_constant))
37480 if (!match)
37481 switch (icode)
37483 case CODE_FOR_avx_vinsertf128v4di:
37484 case CODE_FOR_avx_vextractf128v4di:
37485 error ("the last argument must be an 1-bit immediate");
37486 return const0_rtx;
37488 case CODE_FOR_avx512f_cmpv8di3_mask:
37489 case CODE_FOR_avx512f_cmpv16si3_mask:
37490 case CODE_FOR_avx512f_ucmpv8di3_mask:
37491 case CODE_FOR_avx512f_ucmpv16si3_mask:
37492 case CODE_FOR_avx512vl_cmpv4di3_mask:
37493 case CODE_FOR_avx512vl_cmpv8si3_mask:
37494 case CODE_FOR_avx512vl_ucmpv4di3_mask:
37495 case CODE_FOR_avx512vl_ucmpv8si3_mask:
37496 case CODE_FOR_avx512vl_cmpv2di3_mask:
37497 case CODE_FOR_avx512vl_cmpv4si3_mask:
37498 case CODE_FOR_avx512vl_ucmpv2di3_mask:
37499 case CODE_FOR_avx512vl_ucmpv4si3_mask:
37500 error ("the last argument must be a 3-bit immediate");
37501 return const0_rtx;
37503 case CODE_FOR_sse4_1_roundsd:
37504 case CODE_FOR_sse4_1_roundss:
37506 case CODE_FOR_sse4_1_roundpd:
37507 case CODE_FOR_sse4_1_roundps:
37508 case CODE_FOR_avx_roundpd256:
37509 case CODE_FOR_avx_roundps256:
37511 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
37512 case CODE_FOR_sse4_1_roundps_sfix:
37513 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
37514 case CODE_FOR_avx_roundps_sfix256:
37516 case CODE_FOR_sse4_1_blendps:
37517 case CODE_FOR_avx_blendpd256:
37518 case CODE_FOR_avx_vpermilv4df:
37519 case CODE_FOR_avx_vpermilv4df_mask:
37520 case CODE_FOR_avx512f_getmantv8df_mask:
37521 case CODE_FOR_avx512f_getmantv16sf_mask:
37522 case CODE_FOR_avx512vl_getmantv8sf_mask:
37523 case CODE_FOR_avx512vl_getmantv4df_mask:
37524 case CODE_FOR_avx512vl_getmantv4sf_mask:
37525 case CODE_FOR_avx512vl_getmantv2df_mask:
37526 case CODE_FOR_avx512dq_rangepv8df_mask_round:
37527 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
37528 case CODE_FOR_avx512dq_rangepv4df_mask:
37529 case CODE_FOR_avx512dq_rangepv8sf_mask:
37530 case CODE_FOR_avx512dq_rangepv2df_mask:
37531 case CODE_FOR_avx512dq_rangepv4sf_mask:
37532 case CODE_FOR_avx_shufpd256_mask:
37533 error ("the last argument must be a 4-bit immediate");
37534 return const0_rtx;
37536 case CODE_FOR_sha1rnds4:
37537 case CODE_FOR_sse4_1_blendpd:
37538 case CODE_FOR_avx_vpermilv2df:
37539 case CODE_FOR_avx_vpermilv2df_mask:
37540 case CODE_FOR_xop_vpermil2v2df3:
37541 case CODE_FOR_xop_vpermil2v4sf3:
37542 case CODE_FOR_xop_vpermil2v4df3:
37543 case CODE_FOR_xop_vpermil2v8sf3:
37544 case CODE_FOR_avx512f_vinsertf32x4_mask:
37545 case CODE_FOR_avx512f_vinserti32x4_mask:
37546 case CODE_FOR_avx512f_vextractf32x4_mask:
37547 case CODE_FOR_avx512f_vextracti32x4_mask:
37548 case CODE_FOR_sse2_shufpd:
37549 case CODE_FOR_sse2_shufpd_mask:
37550 case CODE_FOR_avx512dq_shuf_f64x2_mask:
37551 case CODE_FOR_avx512dq_shuf_i64x2_mask:
37552 case CODE_FOR_avx512vl_shuf_i32x4_mask:
37553 case CODE_FOR_avx512vl_shuf_f32x4_mask:
37554 error ("the last argument must be a 2-bit immediate");
37555 return const0_rtx;
37557 case CODE_FOR_avx_vextractf128v4df:
37558 case CODE_FOR_avx_vextractf128v8sf:
37559 case CODE_FOR_avx_vextractf128v8si:
37560 case CODE_FOR_avx_vinsertf128v4df:
37561 case CODE_FOR_avx_vinsertf128v8sf:
37562 case CODE_FOR_avx_vinsertf128v8si:
37563 case CODE_FOR_avx512f_vinsertf64x4_mask:
37564 case CODE_FOR_avx512f_vinserti64x4_mask:
37565 case CODE_FOR_avx512f_vextractf64x4_mask:
37566 case CODE_FOR_avx512f_vextracti64x4_mask:
37567 case CODE_FOR_avx512dq_vinsertf32x8_mask:
37568 case CODE_FOR_avx512dq_vinserti32x8_mask:
37569 case CODE_FOR_avx512vl_vinsertv4df:
37570 case CODE_FOR_avx512vl_vinsertv4di:
37571 case CODE_FOR_avx512vl_vinsertv8sf:
37572 case CODE_FOR_avx512vl_vinsertv8si:
37573 error ("the last argument must be a 1-bit immediate");
37574 return const0_rtx;
37576 case CODE_FOR_avx_vmcmpv2df3:
37577 case CODE_FOR_avx_vmcmpv4sf3:
37578 case CODE_FOR_avx_cmpv2df3:
37579 case CODE_FOR_avx_cmpv4sf3:
37580 case CODE_FOR_avx_cmpv4df3:
37581 case CODE_FOR_avx_cmpv8sf3:
37582 case CODE_FOR_avx512f_cmpv8df3_mask:
37583 case CODE_FOR_avx512f_cmpv16sf3_mask:
37584 case CODE_FOR_avx512f_vmcmpv2df3_mask:
37585 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
37586 error ("the last argument must be a 5-bit immediate");
37587 return const0_rtx;
37589 default:
37590 switch (nargs_constant)
37592 case 2:
37593 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37594 (!mask_pos && (nargs - i) == nargs_constant))
37596 error ("the next to last argument must be an 8-bit immediate");
37597 break;
37599 case 1:
37600 error ("the last argument must be an 8-bit immediate");
37601 break;
37602 default:
37603 gcc_unreachable ();
37605 return const0_rtx;
37608 else
37610 if (VECTOR_MODE_P (mode))
37611 op = safe_vector_operand (op, mode);
37613 /* If we aren't optimizing, only allow one memory operand to
37614 be generated. */
37615 if (memory_operand (op, mode))
37616 num_memory++;
37618 op = fixup_modeless_constant (op, mode);
37620 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37622 if (optimize || !match || num_memory > 1)
37623 op = copy_to_mode_reg (mode, op);
37625 else
37627 op = copy_to_reg (op);
37628 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37632 args[i].op = op;
37633 args[i].mode = mode;
37636 switch (nargs)
37638 case 1:
37639 pat = GEN_FCN (icode) (real_target, args[0].op);
37640 break;
37641 case 2:
37642 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
37643 break;
37644 case 3:
37645 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37646 args[2].op);
37647 break;
37648 case 4:
37649 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37650 args[2].op, args[3].op);
37651 break;
37652 case 5:
37653 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37654 args[2].op, args[3].op, args[4].op);
37655 case 6:
37656 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37657 args[2].op, args[3].op, args[4].op,
37658 args[5].op);
37659 break;
37660 default:
37661 gcc_unreachable ();
37664 if (! pat)
37665 return 0;
37667 emit_insn (pat);
37668 return target;
37671 /* Transform pattern of following layout:
37672 (parallel [
37673 set (A B)
37674 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
37676 into:
37677 (set (A B))
37680 (parallel [ A B
37682 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
37685 into:
37686 (parallel [ A B ... ]) */
37688 static rtx
37689 ix86_erase_embedded_rounding (rtx pat)
37691 if (GET_CODE (pat) == INSN)
37692 pat = PATTERN (pat);
37694 gcc_assert (GET_CODE (pat) == PARALLEL);
37696 if (XVECLEN (pat, 0) == 2)
37698 rtx p0 = XVECEXP (pat, 0, 0);
37699 rtx p1 = XVECEXP (pat, 0, 1);
37701 gcc_assert (GET_CODE (p0) == SET
37702 && GET_CODE (p1) == UNSPEC
37703 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
37705 return p0;
37707 else
37709 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
37710 int i = 0;
37711 int j = 0;
37713 for (; i < XVECLEN (pat, 0); ++i)
37715 rtx elem = XVECEXP (pat, 0, i);
37716 if (GET_CODE (elem) != UNSPEC
37717 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
37718 res [j++] = elem;
37721 /* No more than 1 occurence was removed. */
37722 gcc_assert (j >= XVECLEN (pat, 0) - 1);
37724 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
37728 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
37729 with rounding. */
37730 static rtx
37731 ix86_expand_sse_comi_round (const struct builtin_description *d,
37732 tree exp, rtx target)
37734 rtx pat, set_dst;
37735 tree arg0 = CALL_EXPR_ARG (exp, 0);
37736 tree arg1 = CALL_EXPR_ARG (exp, 1);
37737 tree arg2 = CALL_EXPR_ARG (exp, 2);
37738 tree arg3 = CALL_EXPR_ARG (exp, 3);
37739 rtx op0 = expand_normal (arg0);
37740 rtx op1 = expand_normal (arg1);
37741 rtx op2 = expand_normal (arg2);
37742 rtx op3 = expand_normal (arg3);
37743 enum insn_code icode = d->icode;
37744 const struct insn_data_d *insn_p = &insn_data[icode];
37745 machine_mode mode0 = insn_p->operand[0].mode;
37746 machine_mode mode1 = insn_p->operand[1].mode;
37747 enum rtx_code comparison = UNEQ;
37748 bool need_ucomi = false;
37750 /* See avxintrin.h for values. */
37751 enum rtx_code comi_comparisons[32] =
37753 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
37754 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
37755 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
37757 bool need_ucomi_values[32] =
37759 true, false, false, true, true, false, false, true,
37760 true, false, false, true, true, false, false, true,
37761 false, true, true, false, false, true, true, false,
37762 false, true, true, false, false, true, true, false
37765 if (!CONST_INT_P (op2))
37767 error ("the third argument must be comparison constant");
37768 return const0_rtx;
37770 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
37772 error ("incorrect comparison mode");
37773 return const0_rtx;
37776 if (!insn_p->operand[2].predicate (op3, SImode))
37778 error ("incorrect rounding operand");
37779 return const0_rtx;
37782 comparison = comi_comparisons[INTVAL (op2)];
37783 need_ucomi = need_ucomi_values[INTVAL (op2)];
37785 if (VECTOR_MODE_P (mode0))
37786 op0 = safe_vector_operand (op0, mode0);
37787 if (VECTOR_MODE_P (mode1))
37788 op1 = safe_vector_operand (op1, mode1);
37790 target = gen_reg_rtx (SImode);
37791 emit_move_insn (target, const0_rtx);
37792 target = gen_rtx_SUBREG (QImode, target, 0);
37794 if ((optimize && !register_operand (op0, mode0))
37795 || !insn_p->operand[0].predicate (op0, mode0))
37796 op0 = copy_to_mode_reg (mode0, op0);
37797 if ((optimize && !register_operand (op1, mode1))
37798 || !insn_p->operand[1].predicate (op1, mode1))
37799 op1 = copy_to_mode_reg (mode1, op1);
37801 if (need_ucomi)
37802 icode = icode == CODE_FOR_sse_comi_round
37803 ? CODE_FOR_sse_ucomi_round
37804 : CODE_FOR_sse2_ucomi_round;
37806 pat = GEN_FCN (icode) (op0, op1, op3);
37807 if (! pat)
37808 return 0;
37810 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
37811 if (INTVAL (op3) == NO_ROUND)
37813 pat = ix86_erase_embedded_rounding (pat);
37814 if (! pat)
37815 return 0;
37817 set_dst = SET_DEST (pat);
37819 else
37821 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
37822 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
37825 emit_insn (pat);
37826 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
37827 gen_rtx_fmt_ee (comparison, QImode,
37828 set_dst,
37829 const0_rtx)));
37831 return SUBREG_REG (target);
37834 static rtx
37835 ix86_expand_round_builtin (const struct builtin_description *d,
37836 tree exp, rtx target)
37838 rtx pat;
37839 unsigned int i, nargs;
37840 struct
37842 rtx op;
37843 machine_mode mode;
37844 } args[6];
37845 enum insn_code icode = d->icode;
37846 const struct insn_data_d *insn_p = &insn_data[icode];
37847 machine_mode tmode = insn_p->operand[0].mode;
37848 unsigned int nargs_constant = 0;
37849 unsigned int redundant_embed_rnd = 0;
37851 switch ((enum ix86_builtin_func_type) d->flag)
37853 case UINT64_FTYPE_V2DF_INT:
37854 case UINT64_FTYPE_V4SF_INT:
37855 case UINT_FTYPE_V2DF_INT:
37856 case UINT_FTYPE_V4SF_INT:
37857 case INT64_FTYPE_V2DF_INT:
37858 case INT64_FTYPE_V4SF_INT:
37859 case INT_FTYPE_V2DF_INT:
37860 case INT_FTYPE_V4SF_INT:
37861 nargs = 2;
37862 break;
37863 case V4SF_FTYPE_V4SF_UINT_INT:
37864 case V4SF_FTYPE_V4SF_UINT64_INT:
37865 case V2DF_FTYPE_V2DF_UINT64_INT:
37866 case V4SF_FTYPE_V4SF_INT_INT:
37867 case V4SF_FTYPE_V4SF_INT64_INT:
37868 case V2DF_FTYPE_V2DF_INT64_INT:
37869 case V4SF_FTYPE_V4SF_V4SF_INT:
37870 case V2DF_FTYPE_V2DF_V2DF_INT:
37871 case V4SF_FTYPE_V4SF_V2DF_INT:
37872 case V2DF_FTYPE_V2DF_V4SF_INT:
37873 nargs = 3;
37874 break;
37875 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
37876 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
37877 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
37878 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
37879 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
37880 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
37881 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
37882 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
37883 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
37884 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
37885 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
37886 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
37887 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
37888 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
37889 nargs = 4;
37890 break;
37891 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
37892 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
37893 nargs_constant = 2;
37894 nargs = 4;
37895 break;
37896 case INT_FTYPE_V4SF_V4SF_INT_INT:
37897 case INT_FTYPE_V2DF_V2DF_INT_INT:
37898 return ix86_expand_sse_comi_round (d, exp, target);
37899 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
37900 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
37901 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
37902 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
37903 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
37904 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
37905 nargs = 5;
37906 break;
37907 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
37908 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
37909 nargs_constant = 4;
37910 nargs = 5;
37911 break;
37912 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
37913 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
37914 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
37915 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
37916 nargs_constant = 3;
37917 nargs = 5;
37918 break;
37919 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
37920 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
37921 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
37922 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
37923 nargs = 6;
37924 nargs_constant = 4;
37925 break;
37926 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
37927 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
37928 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
37929 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
37930 nargs = 6;
37931 nargs_constant = 3;
37932 break;
37933 default:
37934 gcc_unreachable ();
37936 gcc_assert (nargs <= ARRAY_SIZE (args));
37938 if (optimize
37939 || target == 0
37940 || GET_MODE (target) != tmode
37941 || !insn_p->operand[0].predicate (target, tmode))
37942 target = gen_reg_rtx (tmode);
37944 for (i = 0; i < nargs; i++)
37946 tree arg = CALL_EXPR_ARG (exp, i);
37947 rtx op = expand_normal (arg);
37948 machine_mode mode = insn_p->operand[i + 1].mode;
37949 bool match = insn_p->operand[i + 1].predicate (op, mode);
37951 if (i == nargs - nargs_constant)
37953 if (!match)
37955 switch (icode)
37957 case CODE_FOR_avx512f_getmantv8df_mask_round:
37958 case CODE_FOR_avx512f_getmantv16sf_mask_round:
37959 case CODE_FOR_avx512f_vgetmantv2df_round:
37960 case CODE_FOR_avx512f_vgetmantv4sf_round:
37961 error ("the immediate argument must be a 4-bit immediate");
37962 return const0_rtx;
37963 case CODE_FOR_avx512f_cmpv8df3_mask_round:
37964 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
37965 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
37966 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
37967 error ("the immediate argument must be a 5-bit immediate");
37968 return const0_rtx;
37969 default:
37970 error ("the immediate argument must be an 8-bit immediate");
37971 return const0_rtx;
37975 else if (i == nargs-1)
37977 if (!insn_p->operand[nargs].predicate (op, SImode))
37979 error ("incorrect rounding operand");
37980 return const0_rtx;
37983 /* If there is no rounding use normal version of the pattern. */
37984 if (INTVAL (op) == NO_ROUND)
37985 redundant_embed_rnd = 1;
37987 else
37989 if (VECTOR_MODE_P (mode))
37990 op = safe_vector_operand (op, mode);
37992 op = fixup_modeless_constant (op, mode);
37994 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37996 if (optimize || !match)
37997 op = copy_to_mode_reg (mode, op);
37999 else
38001 op = copy_to_reg (op);
38002 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
38006 args[i].op = op;
38007 args[i].mode = mode;
38010 switch (nargs)
38012 case 1:
38013 pat = GEN_FCN (icode) (target, args[0].op);
38014 break;
38015 case 2:
38016 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
38017 break;
38018 case 3:
38019 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
38020 args[2].op);
38021 break;
38022 case 4:
38023 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
38024 args[2].op, args[3].op);
38025 break;
38026 case 5:
38027 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
38028 args[2].op, args[3].op, args[4].op);
38029 case 6:
38030 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
38031 args[2].op, args[3].op, args[4].op,
38032 args[5].op);
38033 break;
38034 default:
38035 gcc_unreachable ();
38038 if (!pat)
38039 return 0;
38041 if (redundant_embed_rnd)
38042 pat = ix86_erase_embedded_rounding (pat);
38044 emit_insn (pat);
38045 return target;
38048 /* Subroutine of ix86_expand_builtin to take care of special insns
38049 with variable number of operands. */
38051 static rtx
38052 ix86_expand_special_args_builtin (const struct builtin_description *d,
38053 tree exp, rtx target)
38055 tree arg;
38056 rtx pat, op;
38057 unsigned int i, nargs, arg_adjust, memory;
38058 bool aligned_mem = false;
38059 struct
38061 rtx op;
38062 machine_mode mode;
38063 } args[3];
38064 enum insn_code icode = d->icode;
38065 bool last_arg_constant = false;
38066 const struct insn_data_d *insn_p = &insn_data[icode];
38067 machine_mode tmode = insn_p->operand[0].mode;
38068 enum { load, store } klass;
38070 switch ((enum ix86_builtin_func_type) d->flag)
38072 case VOID_FTYPE_VOID:
38073 emit_insn (GEN_FCN (icode) (target));
38074 return 0;
38075 case VOID_FTYPE_UINT64:
38076 case VOID_FTYPE_UNSIGNED:
38077 nargs = 0;
38078 klass = store;
38079 memory = 0;
38080 break;
38082 case INT_FTYPE_VOID:
38083 case USHORT_FTYPE_VOID:
38084 case UINT64_FTYPE_VOID:
38085 case UNSIGNED_FTYPE_VOID:
38086 nargs = 0;
38087 klass = load;
38088 memory = 0;
38089 break;
38090 case UINT64_FTYPE_PUNSIGNED:
38091 case V2DI_FTYPE_PV2DI:
38092 case V4DI_FTYPE_PV4DI:
38093 case V32QI_FTYPE_PCCHAR:
38094 case V16QI_FTYPE_PCCHAR:
38095 case V8SF_FTYPE_PCV4SF:
38096 case V8SF_FTYPE_PCFLOAT:
38097 case V4SF_FTYPE_PCFLOAT:
38098 case V4DF_FTYPE_PCV2DF:
38099 case V4DF_FTYPE_PCDOUBLE:
38100 case V2DF_FTYPE_PCDOUBLE:
38101 case VOID_FTYPE_PVOID:
38102 case V16SI_FTYPE_PV4SI:
38103 case V16SF_FTYPE_PV4SF:
38104 case V8DI_FTYPE_PV4DI:
38105 case V8DI_FTYPE_PV8DI:
38106 case V8DF_FTYPE_PV4DF:
38107 nargs = 1;
38108 klass = load;
38109 memory = 0;
38110 switch (icode)
38112 case CODE_FOR_sse4_1_movntdqa:
38113 case CODE_FOR_avx2_movntdqa:
38114 case CODE_FOR_avx512f_movntdqa:
38115 aligned_mem = true;
38116 break;
38117 default:
38118 break;
38120 break;
38121 case VOID_FTYPE_PV2SF_V4SF:
38122 case VOID_FTYPE_PV8DI_V8DI:
38123 case VOID_FTYPE_PV4DI_V4DI:
38124 case VOID_FTYPE_PV2DI_V2DI:
38125 case VOID_FTYPE_PCHAR_V32QI:
38126 case VOID_FTYPE_PCHAR_V16QI:
38127 case VOID_FTYPE_PFLOAT_V16SF:
38128 case VOID_FTYPE_PFLOAT_V8SF:
38129 case VOID_FTYPE_PFLOAT_V4SF:
38130 case VOID_FTYPE_PDOUBLE_V8DF:
38131 case VOID_FTYPE_PDOUBLE_V4DF:
38132 case VOID_FTYPE_PDOUBLE_V2DF:
38133 case VOID_FTYPE_PLONGLONG_LONGLONG:
38134 case VOID_FTYPE_PULONGLONG_ULONGLONG:
38135 case VOID_FTYPE_PINT_INT:
38136 nargs = 1;
38137 klass = store;
38138 /* Reserve memory operand for target. */
38139 memory = ARRAY_SIZE (args);
38140 switch (icode)
38142 /* These builtins and instructions require the memory
38143 to be properly aligned. */
38144 case CODE_FOR_avx_movntv4di:
38145 case CODE_FOR_sse2_movntv2di:
38146 case CODE_FOR_avx_movntv8sf:
38147 case CODE_FOR_sse_movntv4sf:
38148 case CODE_FOR_sse4a_vmmovntv4sf:
38149 case CODE_FOR_avx_movntv4df:
38150 case CODE_FOR_sse2_movntv2df:
38151 case CODE_FOR_sse4a_vmmovntv2df:
38152 case CODE_FOR_sse2_movntidi:
38153 case CODE_FOR_sse_movntq:
38154 case CODE_FOR_sse2_movntisi:
38155 case CODE_FOR_avx512f_movntv16sf:
38156 case CODE_FOR_avx512f_movntv8df:
38157 case CODE_FOR_avx512f_movntv8di:
38158 aligned_mem = true;
38159 break;
38160 default:
38161 break;
38163 break;
38164 case V4SF_FTYPE_V4SF_PCV2SF:
38165 case V2DF_FTYPE_V2DF_PCDOUBLE:
38166 nargs = 2;
38167 klass = load;
38168 memory = 1;
38169 break;
38170 case V8SF_FTYPE_PCV8SF_V8SI:
38171 case V4DF_FTYPE_PCV4DF_V4DI:
38172 case V4SF_FTYPE_PCV4SF_V4SI:
38173 case V2DF_FTYPE_PCV2DF_V2DI:
38174 case V8SI_FTYPE_PCV8SI_V8SI:
38175 case V4DI_FTYPE_PCV4DI_V4DI:
38176 case V4SI_FTYPE_PCV4SI_V4SI:
38177 case V2DI_FTYPE_PCV2DI_V2DI:
38178 nargs = 2;
38179 klass = load;
38180 memory = 0;
38181 break;
38182 case VOID_FTYPE_PV8DF_V8DF_QI:
38183 case VOID_FTYPE_PV16SF_V16SF_HI:
38184 case VOID_FTYPE_PV8DI_V8DI_QI:
38185 case VOID_FTYPE_PV4DI_V4DI_QI:
38186 case VOID_FTYPE_PV2DI_V2DI_QI:
38187 case VOID_FTYPE_PV16SI_V16SI_HI:
38188 case VOID_FTYPE_PV8SI_V8SI_QI:
38189 case VOID_FTYPE_PV4SI_V4SI_QI:
38190 switch (icode)
38192 /* These builtins and instructions require the memory
38193 to be properly aligned. */
38194 case CODE_FOR_avx512f_storev16sf_mask:
38195 case CODE_FOR_avx512f_storev16si_mask:
38196 case CODE_FOR_avx512f_storev8df_mask:
38197 case CODE_FOR_avx512f_storev8di_mask:
38198 case CODE_FOR_avx512vl_storev8sf_mask:
38199 case CODE_FOR_avx512vl_storev8si_mask:
38200 case CODE_FOR_avx512vl_storev4df_mask:
38201 case CODE_FOR_avx512vl_storev4di_mask:
38202 case CODE_FOR_avx512vl_storev4sf_mask:
38203 case CODE_FOR_avx512vl_storev4si_mask:
38204 case CODE_FOR_avx512vl_storev2df_mask:
38205 case CODE_FOR_avx512vl_storev2di_mask:
38206 aligned_mem = true;
38207 break;
38208 default:
38209 break;
38211 /* FALLTHRU */
38212 case VOID_FTYPE_PV8SF_V8SI_V8SF:
38213 case VOID_FTYPE_PV4DF_V4DI_V4DF:
38214 case VOID_FTYPE_PV4SF_V4SI_V4SF:
38215 case VOID_FTYPE_PV2DF_V2DI_V2DF:
38216 case VOID_FTYPE_PV8SI_V8SI_V8SI:
38217 case VOID_FTYPE_PV4DI_V4DI_V4DI:
38218 case VOID_FTYPE_PV4SI_V4SI_V4SI:
38219 case VOID_FTYPE_PV2DI_V2DI_V2DI:
38220 case VOID_FTYPE_PDOUBLE_V2DF_QI:
38221 case VOID_FTYPE_PFLOAT_V4SF_QI:
38222 case VOID_FTYPE_PV8SI_V8DI_QI:
38223 case VOID_FTYPE_PV8HI_V8DI_QI:
38224 case VOID_FTYPE_PV16HI_V16SI_HI:
38225 case VOID_FTYPE_PV16QI_V8DI_QI:
38226 case VOID_FTYPE_PV16QI_V16SI_HI:
38227 case VOID_FTYPE_PV4SI_V4DI_QI:
38228 case VOID_FTYPE_PV4SI_V2DI_QI:
38229 case VOID_FTYPE_PV8HI_V4DI_QI:
38230 case VOID_FTYPE_PV8HI_V2DI_QI:
38231 case VOID_FTYPE_PV8HI_V8SI_QI:
38232 case VOID_FTYPE_PV8HI_V4SI_QI:
38233 case VOID_FTYPE_PV16QI_V4DI_QI:
38234 case VOID_FTYPE_PV16QI_V2DI_QI:
38235 case VOID_FTYPE_PV16QI_V8SI_QI:
38236 case VOID_FTYPE_PV16QI_V4SI_QI:
38237 case VOID_FTYPE_PV8HI_V8HI_QI:
38238 case VOID_FTYPE_PV16HI_V16HI_HI:
38239 case VOID_FTYPE_PV32HI_V32HI_SI:
38240 case VOID_FTYPE_PV16QI_V16QI_HI:
38241 case VOID_FTYPE_PV32QI_V32QI_SI:
38242 case VOID_FTYPE_PV64QI_V64QI_DI:
38243 case VOID_FTYPE_PV4DF_V4DF_QI:
38244 case VOID_FTYPE_PV2DF_V2DF_QI:
38245 case VOID_FTYPE_PV8SF_V8SF_QI:
38246 case VOID_FTYPE_PV4SF_V4SF_QI:
38247 nargs = 2;
38248 klass = store;
38249 /* Reserve memory operand for target. */
38250 memory = ARRAY_SIZE (args);
38251 break;
38252 case V4SF_FTYPE_PCV4SF_V4SF_QI:
38253 case V8SF_FTYPE_PCV8SF_V8SF_QI:
38254 case V16SF_FTYPE_PCV16SF_V16SF_HI:
38255 case V4SI_FTYPE_PCV4SI_V4SI_QI:
38256 case V8SI_FTYPE_PCV8SI_V8SI_QI:
38257 case V16SI_FTYPE_PCV16SI_V16SI_HI:
38258 case V2DF_FTYPE_PCV2DF_V2DF_QI:
38259 case V4DF_FTYPE_PCV4DF_V4DF_QI:
38260 case V8DF_FTYPE_PCV8DF_V8DF_QI:
38261 case V2DI_FTYPE_PCV2DI_V2DI_QI:
38262 case V4DI_FTYPE_PCV4DI_V4DI_QI:
38263 case V8DI_FTYPE_PCV8DI_V8DI_QI:
38264 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
38265 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
38266 case V8HI_FTYPE_PCV8HI_V8HI_QI:
38267 case V16HI_FTYPE_PCV16HI_V16HI_HI:
38268 case V32HI_FTYPE_PCV32HI_V32HI_SI:
38269 case V16QI_FTYPE_PCV16QI_V16QI_HI:
38270 case V32QI_FTYPE_PCV32QI_V32QI_SI:
38271 case V64QI_FTYPE_PCV64QI_V64QI_DI:
38272 nargs = 3;
38273 klass = load;
38274 memory = 0;
38275 switch (icode)
38277 /* These builtins and instructions require the memory
38278 to be properly aligned. */
38279 case CODE_FOR_avx512f_loadv16sf_mask:
38280 case CODE_FOR_avx512f_loadv16si_mask:
38281 case CODE_FOR_avx512f_loadv8df_mask:
38282 case CODE_FOR_avx512f_loadv8di_mask:
38283 case CODE_FOR_avx512vl_loadv8sf_mask:
38284 case CODE_FOR_avx512vl_loadv8si_mask:
38285 case CODE_FOR_avx512vl_loadv4df_mask:
38286 case CODE_FOR_avx512vl_loadv4di_mask:
38287 case CODE_FOR_avx512vl_loadv4sf_mask:
38288 case CODE_FOR_avx512vl_loadv4si_mask:
38289 case CODE_FOR_avx512vl_loadv2df_mask:
38290 case CODE_FOR_avx512vl_loadv2di_mask:
38291 case CODE_FOR_avx512bw_loadv64qi_mask:
38292 case CODE_FOR_avx512vl_loadv32qi_mask:
38293 case CODE_FOR_avx512vl_loadv16qi_mask:
38294 case CODE_FOR_avx512bw_loadv32hi_mask:
38295 case CODE_FOR_avx512vl_loadv16hi_mask:
38296 case CODE_FOR_avx512vl_loadv8hi_mask:
38297 aligned_mem = true;
38298 break;
38299 default:
38300 break;
38302 break;
38303 case VOID_FTYPE_UINT_UINT_UINT:
38304 case VOID_FTYPE_UINT64_UINT_UINT:
38305 case UCHAR_FTYPE_UINT_UINT_UINT:
38306 case UCHAR_FTYPE_UINT64_UINT_UINT:
38307 nargs = 3;
38308 klass = load;
38309 memory = ARRAY_SIZE (args);
38310 last_arg_constant = true;
38311 break;
38312 default:
38313 gcc_unreachable ();
38316 gcc_assert (nargs <= ARRAY_SIZE (args));
38318 if (klass == store)
38320 arg = CALL_EXPR_ARG (exp, 0);
38321 op = expand_normal (arg);
38322 gcc_assert (target == 0);
38323 if (memory)
38325 op = ix86_zero_extend_to_Pmode (op);
38326 target = gen_rtx_MEM (tmode, op);
38327 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
38328 on it. Try to improve it using get_pointer_alignment,
38329 and if the special builtin is one that requires strict
38330 mode alignment, also from it's GET_MODE_ALIGNMENT.
38331 Failure to do so could lead to ix86_legitimate_combined_insn
38332 rejecting all changes to such insns. */
38333 unsigned int align = get_pointer_alignment (arg);
38334 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
38335 align = GET_MODE_ALIGNMENT (tmode);
38336 if (MEM_ALIGN (target) < align)
38337 set_mem_align (target, align);
38339 else
38340 target = force_reg (tmode, op);
38341 arg_adjust = 1;
38343 else
38345 arg_adjust = 0;
38346 if (optimize
38347 || target == 0
38348 || !register_operand (target, tmode)
38349 || GET_MODE (target) != tmode)
38350 target = gen_reg_rtx (tmode);
38353 for (i = 0; i < nargs; i++)
38355 machine_mode mode = insn_p->operand[i + 1].mode;
38356 bool match;
38358 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
38359 op = expand_normal (arg);
38360 match = insn_p->operand[i + 1].predicate (op, mode);
38362 if (last_arg_constant && (i + 1) == nargs)
38364 if (!match)
38366 if (icode == CODE_FOR_lwp_lwpvalsi3
38367 || icode == CODE_FOR_lwp_lwpinssi3
38368 || icode == CODE_FOR_lwp_lwpvaldi3
38369 || icode == CODE_FOR_lwp_lwpinsdi3)
38370 error ("the last argument must be a 32-bit immediate");
38371 else
38372 error ("the last argument must be an 8-bit immediate");
38373 return const0_rtx;
38376 else
38378 if (i == memory)
38380 /* This must be the memory operand. */
38381 op = ix86_zero_extend_to_Pmode (op);
38382 op = gen_rtx_MEM (mode, op);
38383 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
38384 on it. Try to improve it using get_pointer_alignment,
38385 and if the special builtin is one that requires strict
38386 mode alignment, also from it's GET_MODE_ALIGNMENT.
38387 Failure to do so could lead to ix86_legitimate_combined_insn
38388 rejecting all changes to such insns. */
38389 unsigned int align = get_pointer_alignment (arg);
38390 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
38391 align = GET_MODE_ALIGNMENT (mode);
38392 if (MEM_ALIGN (op) < align)
38393 set_mem_align (op, align);
38395 else
38397 /* This must be register. */
38398 if (VECTOR_MODE_P (mode))
38399 op = safe_vector_operand (op, mode);
38401 op = fixup_modeless_constant (op, mode);
38403 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
38404 op = copy_to_mode_reg (mode, op);
38405 else
38407 op = copy_to_reg (op);
38408 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
38413 args[i].op = op;
38414 args[i].mode = mode;
38417 switch (nargs)
38419 case 0:
38420 pat = GEN_FCN (icode) (target);
38421 break;
38422 case 1:
38423 pat = GEN_FCN (icode) (target, args[0].op);
38424 break;
38425 case 2:
38426 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
38427 break;
38428 case 3:
38429 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
38430 break;
38431 default:
38432 gcc_unreachable ();
38435 if (! pat)
38436 return 0;
38437 emit_insn (pat);
38438 return klass == store ? 0 : target;
38441 /* Return the integer constant in ARG. Constrain it to be in the range
38442 of the subparts of VEC_TYPE; issue an error if not. */
38444 static int
38445 get_element_number (tree vec_type, tree arg)
38447 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
38449 if (!tree_fits_uhwi_p (arg)
38450 || (elt = tree_to_uhwi (arg), elt > max))
38452 error ("selector must be an integer constant in the range 0..%wi", max);
38453 return 0;
38456 return elt;
38459 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38460 ix86_expand_vector_init. We DO have language-level syntax for this, in
38461 the form of (type){ init-list }. Except that since we can't place emms
38462 instructions from inside the compiler, we can't allow the use of MMX
38463 registers unless the user explicitly asks for it. So we do *not* define
38464 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
38465 we have builtins invoked by mmintrin.h that gives us license to emit
38466 these sorts of instructions. */
38468 static rtx
38469 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
38471 machine_mode tmode = TYPE_MODE (type);
38472 machine_mode inner_mode = GET_MODE_INNER (tmode);
38473 int i, n_elt = GET_MODE_NUNITS (tmode);
38474 rtvec v = rtvec_alloc (n_elt);
38476 gcc_assert (VECTOR_MODE_P (tmode));
38477 gcc_assert (call_expr_nargs (exp) == n_elt);
38479 for (i = 0; i < n_elt; ++i)
38481 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
38482 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
38485 if (!target || !register_operand (target, tmode))
38486 target = gen_reg_rtx (tmode);
38488 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
38489 return target;
38492 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38493 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
38494 had a language-level syntax for referencing vector elements. */
38496 static rtx
38497 ix86_expand_vec_ext_builtin (tree exp, rtx target)
38499 machine_mode tmode, mode0;
38500 tree arg0, arg1;
38501 int elt;
38502 rtx op0;
38504 arg0 = CALL_EXPR_ARG (exp, 0);
38505 arg1 = CALL_EXPR_ARG (exp, 1);
38507 op0 = expand_normal (arg0);
38508 elt = get_element_number (TREE_TYPE (arg0), arg1);
38510 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38511 mode0 = TYPE_MODE (TREE_TYPE (arg0));
38512 gcc_assert (VECTOR_MODE_P (mode0));
38514 op0 = force_reg (mode0, op0);
38516 if (optimize || !target || !register_operand (target, tmode))
38517 target = gen_reg_rtx (tmode);
38519 ix86_expand_vector_extract (true, target, op0, elt);
38521 return target;
38524 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38525 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
38526 a language-level syntax for referencing vector elements. */
38528 static rtx
38529 ix86_expand_vec_set_builtin (tree exp)
38531 machine_mode tmode, mode1;
38532 tree arg0, arg1, arg2;
38533 int elt;
38534 rtx op0, op1, target;
38536 arg0 = CALL_EXPR_ARG (exp, 0);
38537 arg1 = CALL_EXPR_ARG (exp, 1);
38538 arg2 = CALL_EXPR_ARG (exp, 2);
38540 tmode = TYPE_MODE (TREE_TYPE (arg0));
38541 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38542 gcc_assert (VECTOR_MODE_P (tmode));
38544 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
38545 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
38546 elt = get_element_number (TREE_TYPE (arg0), arg2);
38548 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
38549 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
38551 op0 = force_reg (tmode, op0);
38552 op1 = force_reg (mode1, op1);
38554 /* OP0 is the source of these builtin functions and shouldn't be
38555 modified. Create a copy, use it and return it as target. */
38556 target = gen_reg_rtx (tmode);
38557 emit_move_insn (target, op0);
38558 ix86_expand_vector_set (true, target, op1, elt);
38560 return target;
38563 /* Emit conditional move of SRC to DST with condition
38564 OP1 CODE OP2. */
38565 static void
38566 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
38568 rtx t;
38570 if (TARGET_CMOVE)
38572 t = ix86_expand_compare (code, op1, op2);
38573 emit_insn (gen_rtx_SET (dst, gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
38574 src, dst)));
38576 else
38578 rtx_code_label *nomove = gen_label_rtx ();
38579 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
38580 const0_rtx, GET_MODE (op1), 1, nomove);
38581 emit_move_insn (dst, src);
38582 emit_label (nomove);
38586 /* Choose max of DST and SRC and put it to DST. */
38587 static void
38588 ix86_emit_move_max (rtx dst, rtx src)
38590 ix86_emit_cmove (dst, src, LTU, dst, src);
38593 /* Expand an expression EXP that calls a built-in function,
38594 with result going to TARGET if that's convenient
38595 (and in mode MODE if that's convenient).
38596 SUBTARGET may be used as the target for computing one of EXP's operands.
38597 IGNORE is nonzero if the value is to be ignored. */
38599 static rtx
38600 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
38601 machine_mode mode, int ignore)
38603 const struct builtin_description *d;
38604 size_t i;
38605 enum insn_code icode;
38606 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
38607 tree arg0, arg1, arg2, arg3, arg4;
38608 rtx op0, op1, op2, op3, op4, pat, insn;
38609 machine_mode mode0, mode1, mode2, mode3, mode4;
38610 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
38612 /* For CPU builtins that can be folded, fold first and expand the fold. */
38613 switch (fcode)
38615 case IX86_BUILTIN_CPU_INIT:
38617 /* Make it call __cpu_indicator_init in libgcc. */
38618 tree call_expr, fndecl, type;
38619 type = build_function_type_list (integer_type_node, NULL_TREE);
38620 fndecl = build_fn_decl ("__cpu_indicator_init", type);
38621 call_expr = build_call_expr (fndecl, 0);
38622 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
38624 case IX86_BUILTIN_CPU_IS:
38625 case IX86_BUILTIN_CPU_SUPPORTS:
38627 tree arg0 = CALL_EXPR_ARG (exp, 0);
38628 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
38629 gcc_assert (fold_expr != NULL_TREE);
38630 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
38634 /* Determine whether the builtin function is available under the current ISA.
38635 Originally the builtin was not created if it wasn't applicable to the
38636 current ISA based on the command line switches. With function specific
38637 options, we need to check in the context of the function making the call
38638 whether it is supported. */
38639 if (ix86_builtins_isa[fcode].isa
38640 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
38642 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
38643 NULL, (enum fpmath_unit) 0, false);
38645 if (!opts)
38646 error ("%qE needs unknown isa option", fndecl);
38647 else
38649 gcc_assert (opts != NULL);
38650 error ("%qE needs isa option %s", fndecl, opts);
38651 free (opts);
38653 return const0_rtx;
38656 switch (fcode)
38658 case IX86_BUILTIN_BNDMK:
38659 if (!target
38660 || GET_MODE (target) != BNDmode
38661 || !register_operand (target, BNDmode))
38662 target = gen_reg_rtx (BNDmode);
38664 arg0 = CALL_EXPR_ARG (exp, 0);
38665 arg1 = CALL_EXPR_ARG (exp, 1);
38667 op0 = expand_normal (arg0);
38668 op1 = expand_normal (arg1);
38670 if (!register_operand (op0, Pmode))
38671 op0 = ix86_zero_extend_to_Pmode (op0);
38672 if (!register_operand (op1, Pmode))
38673 op1 = ix86_zero_extend_to_Pmode (op1);
38675 /* Builtin arg1 is size of block but instruction op1 should
38676 be (size - 1). */
38677 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
38678 NULL_RTX, 1, OPTAB_DIRECT);
38680 emit_insn (BNDmode == BND64mode
38681 ? gen_bnd64_mk (target, op0, op1)
38682 : gen_bnd32_mk (target, op0, op1));
38683 return target;
38685 case IX86_BUILTIN_BNDSTX:
38686 arg0 = CALL_EXPR_ARG (exp, 0);
38687 arg1 = CALL_EXPR_ARG (exp, 1);
38688 arg2 = CALL_EXPR_ARG (exp, 2);
38690 op0 = expand_normal (arg0);
38691 op1 = expand_normal (arg1);
38692 op2 = expand_normal (arg2);
38694 if (!register_operand (op0, Pmode))
38695 op0 = ix86_zero_extend_to_Pmode (op0);
38696 if (!register_operand (op1, BNDmode))
38697 op1 = copy_to_mode_reg (BNDmode, op1);
38698 if (!register_operand (op2, Pmode))
38699 op2 = ix86_zero_extend_to_Pmode (op2);
38701 emit_insn (BNDmode == BND64mode
38702 ? gen_bnd64_stx (op2, op0, op1)
38703 : gen_bnd32_stx (op2, op0, op1));
38704 return 0;
38706 case IX86_BUILTIN_BNDLDX:
38707 if (!target
38708 || GET_MODE (target) != BNDmode
38709 || !register_operand (target, BNDmode))
38710 target = gen_reg_rtx (BNDmode);
38712 arg0 = CALL_EXPR_ARG (exp, 0);
38713 arg1 = CALL_EXPR_ARG (exp, 1);
38715 op0 = expand_normal (arg0);
38716 op1 = expand_normal (arg1);
38718 if (!register_operand (op0, Pmode))
38719 op0 = ix86_zero_extend_to_Pmode (op0);
38720 if (!register_operand (op1, Pmode))
38721 op1 = ix86_zero_extend_to_Pmode (op1);
38723 emit_insn (BNDmode == BND64mode
38724 ? gen_bnd64_ldx (target, op0, op1)
38725 : gen_bnd32_ldx (target, op0, op1));
38726 return target;
38728 case IX86_BUILTIN_BNDCL:
38729 arg0 = CALL_EXPR_ARG (exp, 0);
38730 arg1 = CALL_EXPR_ARG (exp, 1);
38732 op0 = expand_normal (arg0);
38733 op1 = expand_normal (arg1);
38735 if (!register_operand (op0, Pmode))
38736 op0 = ix86_zero_extend_to_Pmode (op0);
38737 if (!register_operand (op1, BNDmode))
38738 op1 = copy_to_mode_reg (BNDmode, op1);
38740 emit_insn (BNDmode == BND64mode
38741 ? gen_bnd64_cl (op1, op0)
38742 : gen_bnd32_cl (op1, op0));
38743 return 0;
38745 case IX86_BUILTIN_BNDCU:
38746 arg0 = CALL_EXPR_ARG (exp, 0);
38747 arg1 = CALL_EXPR_ARG (exp, 1);
38749 op0 = expand_normal (arg0);
38750 op1 = expand_normal (arg1);
38752 if (!register_operand (op0, Pmode))
38753 op0 = ix86_zero_extend_to_Pmode (op0);
38754 if (!register_operand (op1, BNDmode))
38755 op1 = copy_to_mode_reg (BNDmode, op1);
38757 emit_insn (BNDmode == BND64mode
38758 ? gen_bnd64_cu (op1, op0)
38759 : gen_bnd32_cu (op1, op0));
38760 return 0;
38762 case IX86_BUILTIN_BNDRET:
38763 arg0 = CALL_EXPR_ARG (exp, 0);
38764 gcc_assert (TREE_CODE (arg0) == SSA_NAME);
38765 target = chkp_get_rtl_bounds (arg0);
38767 /* If no bounds were specified for returned value,
38768 then use INIT bounds. It usually happens when
38769 some built-in function is expanded. */
38770 if (!target)
38772 rtx t1 = gen_reg_rtx (Pmode);
38773 rtx t2 = gen_reg_rtx (Pmode);
38774 target = gen_reg_rtx (BNDmode);
38775 emit_move_insn (t1, const0_rtx);
38776 emit_move_insn (t2, constm1_rtx);
38777 emit_insn (BNDmode == BND64mode
38778 ? gen_bnd64_mk (target, t1, t2)
38779 : gen_bnd32_mk (target, t1, t2));
38782 gcc_assert (target && REG_P (target));
38783 return target;
38785 case IX86_BUILTIN_BNDNARROW:
38787 rtx m1, m1h1, m1h2, lb, ub, t1;
38789 /* Return value and lb. */
38790 arg0 = CALL_EXPR_ARG (exp, 0);
38791 /* Bounds. */
38792 arg1 = CALL_EXPR_ARG (exp, 1);
38793 /* Size. */
38794 arg2 = CALL_EXPR_ARG (exp, 2);
38796 lb = expand_normal (arg0);
38797 op1 = expand_normal (arg1);
38798 op2 = expand_normal (arg2);
38800 /* Size was passed but we need to use (size - 1) as for bndmk. */
38801 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
38802 NULL_RTX, 1, OPTAB_DIRECT);
38804 /* Add LB to size and inverse to get UB. */
38805 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
38806 op2, 1, OPTAB_DIRECT);
38807 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
38809 if (!register_operand (lb, Pmode))
38810 lb = ix86_zero_extend_to_Pmode (lb);
38811 if (!register_operand (ub, Pmode))
38812 ub = ix86_zero_extend_to_Pmode (ub);
38814 /* We need to move bounds to memory before any computations. */
38815 if (MEM_P (op1))
38816 m1 = op1;
38817 else
38819 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
38820 emit_move_insn (m1, op1);
38823 /* Generate mem expression to be used for access to LB and UB. */
38824 m1h1 = adjust_address (m1, Pmode, 0);
38825 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
38827 t1 = gen_reg_rtx (Pmode);
38829 /* Compute LB. */
38830 emit_move_insn (t1, m1h1);
38831 ix86_emit_move_max (t1, lb);
38832 emit_move_insn (m1h1, t1);
38834 /* Compute UB. UB is stored in 1's complement form. Therefore
38835 we also use max here. */
38836 emit_move_insn (t1, m1h2);
38837 ix86_emit_move_max (t1, ub);
38838 emit_move_insn (m1h2, t1);
38840 op2 = gen_reg_rtx (BNDmode);
38841 emit_move_insn (op2, m1);
38843 return chkp_join_splitted_slot (lb, op2);
38846 case IX86_BUILTIN_BNDINT:
38848 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
38850 if (!target
38851 || GET_MODE (target) != BNDmode
38852 || !register_operand (target, BNDmode))
38853 target = gen_reg_rtx (BNDmode);
38855 arg0 = CALL_EXPR_ARG (exp, 0);
38856 arg1 = CALL_EXPR_ARG (exp, 1);
38858 op0 = expand_normal (arg0);
38859 op1 = expand_normal (arg1);
38861 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
38862 rh1 = adjust_address (res, Pmode, 0);
38863 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
38865 /* Put first bounds to temporaries. */
38866 lb1 = gen_reg_rtx (Pmode);
38867 ub1 = gen_reg_rtx (Pmode);
38868 if (MEM_P (op0))
38870 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
38871 emit_move_insn (ub1, adjust_address (op0, Pmode,
38872 GET_MODE_SIZE (Pmode)));
38874 else
38876 emit_move_insn (res, op0);
38877 emit_move_insn (lb1, rh1);
38878 emit_move_insn (ub1, rh2);
38881 /* Put second bounds to temporaries. */
38882 lb2 = gen_reg_rtx (Pmode);
38883 ub2 = gen_reg_rtx (Pmode);
38884 if (MEM_P (op1))
38886 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
38887 emit_move_insn (ub2, adjust_address (op1, Pmode,
38888 GET_MODE_SIZE (Pmode)));
38890 else
38892 emit_move_insn (res, op1);
38893 emit_move_insn (lb2, rh1);
38894 emit_move_insn (ub2, rh2);
38897 /* Compute LB. */
38898 ix86_emit_move_max (lb1, lb2);
38899 emit_move_insn (rh1, lb1);
38901 /* Compute UB. UB is stored in 1's complement form. Therefore
38902 we also use max here. */
38903 ix86_emit_move_max (ub1, ub2);
38904 emit_move_insn (rh2, ub1);
38906 emit_move_insn (target, res);
38908 return target;
38911 case IX86_BUILTIN_SIZEOF:
38913 tree name;
38914 rtx symbol;
38916 if (!target
38917 || GET_MODE (target) != Pmode
38918 || !register_operand (target, Pmode))
38919 target = gen_reg_rtx (Pmode);
38921 arg0 = CALL_EXPR_ARG (exp, 0);
38922 gcc_assert (TREE_CODE (arg0) == VAR_DECL);
38924 name = DECL_ASSEMBLER_NAME (arg0);
38925 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
38927 emit_insn (Pmode == SImode
38928 ? gen_move_size_reloc_si (target, symbol)
38929 : gen_move_size_reloc_di (target, symbol));
38931 return target;
38934 case IX86_BUILTIN_BNDLOWER:
38936 rtx mem, hmem;
38938 if (!target
38939 || GET_MODE (target) != Pmode
38940 || !register_operand (target, Pmode))
38941 target = gen_reg_rtx (Pmode);
38943 arg0 = CALL_EXPR_ARG (exp, 0);
38944 op0 = expand_normal (arg0);
38946 /* We need to move bounds to memory first. */
38947 if (MEM_P (op0))
38948 mem = op0;
38949 else
38951 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38952 emit_move_insn (mem, op0);
38955 /* Generate mem expression to access LB and load it. */
38956 hmem = adjust_address (mem, Pmode, 0);
38957 emit_move_insn (target, hmem);
38959 return target;
38962 case IX86_BUILTIN_BNDUPPER:
38964 rtx mem, hmem, res;
38966 if (!target
38967 || GET_MODE (target) != Pmode
38968 || !register_operand (target, Pmode))
38969 target = gen_reg_rtx (Pmode);
38971 arg0 = CALL_EXPR_ARG (exp, 0);
38972 op0 = expand_normal (arg0);
38974 /* We need to move bounds to memory first. */
38975 if (MEM_P (op0))
38976 mem = op0;
38977 else
38979 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38980 emit_move_insn (mem, op0);
38983 /* Generate mem expression to access UB. */
38984 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
38986 /* We need to inverse all bits of UB. */
38987 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
38989 if (res != target)
38990 emit_move_insn (target, res);
38992 return target;
38995 case IX86_BUILTIN_MASKMOVQ:
38996 case IX86_BUILTIN_MASKMOVDQU:
38997 icode = (fcode == IX86_BUILTIN_MASKMOVQ
38998 ? CODE_FOR_mmx_maskmovq
38999 : CODE_FOR_sse2_maskmovdqu);
39000 /* Note the arg order is different from the operand order. */
39001 arg1 = CALL_EXPR_ARG (exp, 0);
39002 arg2 = CALL_EXPR_ARG (exp, 1);
39003 arg0 = CALL_EXPR_ARG (exp, 2);
39004 op0 = expand_normal (arg0);
39005 op1 = expand_normal (arg1);
39006 op2 = expand_normal (arg2);
39007 mode0 = insn_data[icode].operand[0].mode;
39008 mode1 = insn_data[icode].operand[1].mode;
39009 mode2 = insn_data[icode].operand[2].mode;
39011 op0 = ix86_zero_extend_to_Pmode (op0);
39012 op0 = gen_rtx_MEM (mode1, op0);
39014 if (!insn_data[icode].operand[0].predicate (op0, mode0))
39015 op0 = copy_to_mode_reg (mode0, op0);
39016 if (!insn_data[icode].operand[1].predicate (op1, mode1))
39017 op1 = copy_to_mode_reg (mode1, op1);
39018 if (!insn_data[icode].operand[2].predicate (op2, mode2))
39019 op2 = copy_to_mode_reg (mode2, op2);
39020 pat = GEN_FCN (icode) (op0, op1, op2);
39021 if (! pat)
39022 return 0;
39023 emit_insn (pat);
39024 return 0;
39026 case IX86_BUILTIN_LDMXCSR:
39027 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
39028 target = assign_386_stack_local (SImode, SLOT_TEMP);
39029 emit_move_insn (target, op0);
39030 emit_insn (gen_sse_ldmxcsr (target));
39031 return 0;
39033 case IX86_BUILTIN_STMXCSR:
39034 target = assign_386_stack_local (SImode, SLOT_TEMP);
39035 emit_insn (gen_sse_stmxcsr (target));
39036 return copy_to_mode_reg (SImode, target);
39038 case IX86_BUILTIN_CLFLUSH:
39039 arg0 = CALL_EXPR_ARG (exp, 0);
39040 op0 = expand_normal (arg0);
39041 icode = CODE_FOR_sse2_clflush;
39042 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39043 op0 = ix86_zero_extend_to_Pmode (op0);
39045 emit_insn (gen_sse2_clflush (op0));
39046 return 0;
39048 case IX86_BUILTIN_CLWB:
39049 arg0 = CALL_EXPR_ARG (exp, 0);
39050 op0 = expand_normal (arg0);
39051 icode = CODE_FOR_clwb;
39052 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39053 op0 = ix86_zero_extend_to_Pmode (op0);
39055 emit_insn (gen_clwb (op0));
39056 return 0;
39058 case IX86_BUILTIN_CLFLUSHOPT:
39059 arg0 = CALL_EXPR_ARG (exp, 0);
39060 op0 = expand_normal (arg0);
39061 icode = CODE_FOR_clflushopt;
39062 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39063 op0 = ix86_zero_extend_to_Pmode (op0);
39065 emit_insn (gen_clflushopt (op0));
39066 return 0;
39068 case IX86_BUILTIN_MONITOR:
39069 case IX86_BUILTIN_MONITORX:
39070 arg0 = CALL_EXPR_ARG (exp, 0);
39071 arg1 = CALL_EXPR_ARG (exp, 1);
39072 arg2 = CALL_EXPR_ARG (exp, 2);
39073 op0 = expand_normal (arg0);
39074 op1 = expand_normal (arg1);
39075 op2 = expand_normal (arg2);
39076 if (!REG_P (op0))
39077 op0 = ix86_zero_extend_to_Pmode (op0);
39078 if (!REG_P (op1))
39079 op1 = copy_to_mode_reg (SImode, op1);
39080 if (!REG_P (op2))
39081 op2 = copy_to_mode_reg (SImode, op2);
39083 emit_insn (fcode == IX86_BUILTIN_MONITOR
39084 ? ix86_gen_monitor (op0, op1, op2)
39085 : ix86_gen_monitorx (op0, op1, op2));
39086 return 0;
39088 case IX86_BUILTIN_MWAIT:
39089 arg0 = CALL_EXPR_ARG (exp, 0);
39090 arg1 = CALL_EXPR_ARG (exp, 1);
39091 op0 = expand_normal (arg0);
39092 op1 = expand_normal (arg1);
39093 if (!REG_P (op0))
39094 op0 = copy_to_mode_reg (SImode, op0);
39095 if (!REG_P (op1))
39096 op1 = copy_to_mode_reg (SImode, op1);
39097 emit_insn (gen_sse3_mwait (op0, op1));
39098 return 0;
39100 case IX86_BUILTIN_MWAITX:
39101 arg0 = CALL_EXPR_ARG (exp, 0);
39102 arg1 = CALL_EXPR_ARG (exp, 1);
39103 arg2 = CALL_EXPR_ARG (exp, 2);
39104 op0 = expand_normal (arg0);
39105 op1 = expand_normal (arg1);
39106 op2 = expand_normal (arg2);
39107 if (!REG_P (op0))
39108 op0 = copy_to_mode_reg (SImode, op0);
39109 if (!REG_P (op1))
39110 op1 = copy_to_mode_reg (SImode, op1);
39111 if (!REG_P (op2))
39112 op2 = copy_to_mode_reg (SImode, op2);
39113 emit_insn (gen_mwaitx (op0, op1, op2));
39114 return 0;
39116 case IX86_BUILTIN_VEC_INIT_V2SI:
39117 case IX86_BUILTIN_VEC_INIT_V4HI:
39118 case IX86_BUILTIN_VEC_INIT_V8QI:
39119 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
39121 case IX86_BUILTIN_VEC_EXT_V2DF:
39122 case IX86_BUILTIN_VEC_EXT_V2DI:
39123 case IX86_BUILTIN_VEC_EXT_V4SF:
39124 case IX86_BUILTIN_VEC_EXT_V4SI:
39125 case IX86_BUILTIN_VEC_EXT_V8HI:
39126 case IX86_BUILTIN_VEC_EXT_V2SI:
39127 case IX86_BUILTIN_VEC_EXT_V4HI:
39128 case IX86_BUILTIN_VEC_EXT_V16QI:
39129 return ix86_expand_vec_ext_builtin (exp, target);
39131 case IX86_BUILTIN_VEC_SET_V2DI:
39132 case IX86_BUILTIN_VEC_SET_V4SF:
39133 case IX86_BUILTIN_VEC_SET_V4SI:
39134 case IX86_BUILTIN_VEC_SET_V8HI:
39135 case IX86_BUILTIN_VEC_SET_V4HI:
39136 case IX86_BUILTIN_VEC_SET_V16QI:
39137 return ix86_expand_vec_set_builtin (exp);
39139 case IX86_BUILTIN_INFQ:
39140 case IX86_BUILTIN_HUGE_VALQ:
39142 REAL_VALUE_TYPE inf;
39143 rtx tmp;
39145 real_inf (&inf);
39146 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
39148 tmp = validize_mem (force_const_mem (mode, tmp));
39150 if (target == 0)
39151 target = gen_reg_rtx (mode);
39153 emit_move_insn (target, tmp);
39154 return target;
39157 case IX86_BUILTIN_RDPMC:
39158 case IX86_BUILTIN_RDTSC:
39159 case IX86_BUILTIN_RDTSCP:
39161 op0 = gen_reg_rtx (DImode);
39162 op1 = gen_reg_rtx (DImode);
39164 if (fcode == IX86_BUILTIN_RDPMC)
39166 arg0 = CALL_EXPR_ARG (exp, 0);
39167 op2 = expand_normal (arg0);
39168 if (!register_operand (op2, SImode))
39169 op2 = copy_to_mode_reg (SImode, op2);
39171 insn = (TARGET_64BIT
39172 ? gen_rdpmc_rex64 (op0, op1, op2)
39173 : gen_rdpmc (op0, op2));
39174 emit_insn (insn);
39176 else if (fcode == IX86_BUILTIN_RDTSC)
39178 insn = (TARGET_64BIT
39179 ? gen_rdtsc_rex64 (op0, op1)
39180 : gen_rdtsc (op0));
39181 emit_insn (insn);
39183 else
39185 op2 = gen_reg_rtx (SImode);
39187 insn = (TARGET_64BIT
39188 ? gen_rdtscp_rex64 (op0, op1, op2)
39189 : gen_rdtscp (op0, op2));
39190 emit_insn (insn);
39192 arg0 = CALL_EXPR_ARG (exp, 0);
39193 op4 = expand_normal (arg0);
39194 if (!address_operand (op4, VOIDmode))
39196 op4 = convert_memory_address (Pmode, op4);
39197 op4 = copy_addr_to_reg (op4);
39199 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
39202 if (target == 0)
39204 /* mode is VOIDmode if __builtin_rd* has been called
39205 without lhs. */
39206 if (mode == VOIDmode)
39207 return target;
39208 target = gen_reg_rtx (mode);
39211 if (TARGET_64BIT)
39213 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
39214 op1, 1, OPTAB_DIRECT);
39215 op0 = expand_simple_binop (DImode, IOR, op0, op1,
39216 op0, 1, OPTAB_DIRECT);
39219 emit_move_insn (target, op0);
39220 return target;
39222 case IX86_BUILTIN_FXSAVE:
39223 case IX86_BUILTIN_FXRSTOR:
39224 case IX86_BUILTIN_FXSAVE64:
39225 case IX86_BUILTIN_FXRSTOR64:
39226 case IX86_BUILTIN_FNSTENV:
39227 case IX86_BUILTIN_FLDENV:
39228 mode0 = BLKmode;
39229 switch (fcode)
39231 case IX86_BUILTIN_FXSAVE:
39232 icode = CODE_FOR_fxsave;
39233 break;
39234 case IX86_BUILTIN_FXRSTOR:
39235 icode = CODE_FOR_fxrstor;
39236 break;
39237 case IX86_BUILTIN_FXSAVE64:
39238 icode = CODE_FOR_fxsave64;
39239 break;
39240 case IX86_BUILTIN_FXRSTOR64:
39241 icode = CODE_FOR_fxrstor64;
39242 break;
39243 case IX86_BUILTIN_FNSTENV:
39244 icode = CODE_FOR_fnstenv;
39245 break;
39246 case IX86_BUILTIN_FLDENV:
39247 icode = CODE_FOR_fldenv;
39248 break;
39249 default:
39250 gcc_unreachable ();
39253 arg0 = CALL_EXPR_ARG (exp, 0);
39254 op0 = expand_normal (arg0);
39256 if (!address_operand (op0, VOIDmode))
39258 op0 = convert_memory_address (Pmode, op0);
39259 op0 = copy_addr_to_reg (op0);
39261 op0 = gen_rtx_MEM (mode0, op0);
39263 pat = GEN_FCN (icode) (op0);
39264 if (pat)
39265 emit_insn (pat);
39266 return 0;
39268 case IX86_BUILTIN_XSAVE:
39269 case IX86_BUILTIN_XRSTOR:
39270 case IX86_BUILTIN_XSAVE64:
39271 case IX86_BUILTIN_XRSTOR64:
39272 case IX86_BUILTIN_XSAVEOPT:
39273 case IX86_BUILTIN_XSAVEOPT64:
39274 case IX86_BUILTIN_XSAVES:
39275 case IX86_BUILTIN_XRSTORS:
39276 case IX86_BUILTIN_XSAVES64:
39277 case IX86_BUILTIN_XRSTORS64:
39278 case IX86_BUILTIN_XSAVEC:
39279 case IX86_BUILTIN_XSAVEC64:
39280 arg0 = CALL_EXPR_ARG (exp, 0);
39281 arg1 = CALL_EXPR_ARG (exp, 1);
39282 op0 = expand_normal (arg0);
39283 op1 = expand_normal (arg1);
39285 if (!address_operand (op0, VOIDmode))
39287 op0 = convert_memory_address (Pmode, op0);
39288 op0 = copy_addr_to_reg (op0);
39290 op0 = gen_rtx_MEM (BLKmode, op0);
39292 op1 = force_reg (DImode, op1);
39294 if (TARGET_64BIT)
39296 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
39297 NULL, 1, OPTAB_DIRECT);
39298 switch (fcode)
39300 case IX86_BUILTIN_XSAVE:
39301 icode = CODE_FOR_xsave_rex64;
39302 break;
39303 case IX86_BUILTIN_XRSTOR:
39304 icode = CODE_FOR_xrstor_rex64;
39305 break;
39306 case IX86_BUILTIN_XSAVE64:
39307 icode = CODE_FOR_xsave64;
39308 break;
39309 case IX86_BUILTIN_XRSTOR64:
39310 icode = CODE_FOR_xrstor64;
39311 break;
39312 case IX86_BUILTIN_XSAVEOPT:
39313 icode = CODE_FOR_xsaveopt_rex64;
39314 break;
39315 case IX86_BUILTIN_XSAVEOPT64:
39316 icode = CODE_FOR_xsaveopt64;
39317 break;
39318 case IX86_BUILTIN_XSAVES:
39319 icode = CODE_FOR_xsaves_rex64;
39320 break;
39321 case IX86_BUILTIN_XRSTORS:
39322 icode = CODE_FOR_xrstors_rex64;
39323 break;
39324 case IX86_BUILTIN_XSAVES64:
39325 icode = CODE_FOR_xsaves64;
39326 break;
39327 case IX86_BUILTIN_XRSTORS64:
39328 icode = CODE_FOR_xrstors64;
39329 break;
39330 case IX86_BUILTIN_XSAVEC:
39331 icode = CODE_FOR_xsavec_rex64;
39332 break;
39333 case IX86_BUILTIN_XSAVEC64:
39334 icode = CODE_FOR_xsavec64;
39335 break;
39336 default:
39337 gcc_unreachable ();
39340 op2 = gen_lowpart (SImode, op2);
39341 op1 = gen_lowpart (SImode, op1);
39342 pat = GEN_FCN (icode) (op0, op1, op2);
39344 else
39346 switch (fcode)
39348 case IX86_BUILTIN_XSAVE:
39349 icode = CODE_FOR_xsave;
39350 break;
39351 case IX86_BUILTIN_XRSTOR:
39352 icode = CODE_FOR_xrstor;
39353 break;
39354 case IX86_BUILTIN_XSAVEOPT:
39355 icode = CODE_FOR_xsaveopt;
39356 break;
39357 case IX86_BUILTIN_XSAVES:
39358 icode = CODE_FOR_xsaves;
39359 break;
39360 case IX86_BUILTIN_XRSTORS:
39361 icode = CODE_FOR_xrstors;
39362 break;
39363 case IX86_BUILTIN_XSAVEC:
39364 icode = CODE_FOR_xsavec;
39365 break;
39366 default:
39367 gcc_unreachable ();
39369 pat = GEN_FCN (icode) (op0, op1);
39372 if (pat)
39373 emit_insn (pat);
39374 return 0;
39376 case IX86_BUILTIN_LLWPCB:
39377 arg0 = CALL_EXPR_ARG (exp, 0);
39378 op0 = expand_normal (arg0);
39379 icode = CODE_FOR_lwp_llwpcb;
39380 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39381 op0 = ix86_zero_extend_to_Pmode (op0);
39382 emit_insn (gen_lwp_llwpcb (op0));
39383 return 0;
39385 case IX86_BUILTIN_SLWPCB:
39386 icode = CODE_FOR_lwp_slwpcb;
39387 if (!target
39388 || !insn_data[icode].operand[0].predicate (target, Pmode))
39389 target = gen_reg_rtx (Pmode);
39390 emit_insn (gen_lwp_slwpcb (target));
39391 return target;
39393 case IX86_BUILTIN_BEXTRI32:
39394 case IX86_BUILTIN_BEXTRI64:
39395 arg0 = CALL_EXPR_ARG (exp, 0);
39396 arg1 = CALL_EXPR_ARG (exp, 1);
39397 op0 = expand_normal (arg0);
39398 op1 = expand_normal (arg1);
39399 icode = (fcode == IX86_BUILTIN_BEXTRI32
39400 ? CODE_FOR_tbm_bextri_si
39401 : CODE_FOR_tbm_bextri_di);
39402 if (!CONST_INT_P (op1))
39404 error ("last argument must be an immediate");
39405 return const0_rtx;
39407 else
39409 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
39410 unsigned char lsb_index = INTVAL (op1) & 0xFF;
39411 op1 = GEN_INT (length);
39412 op2 = GEN_INT (lsb_index);
39413 pat = GEN_FCN (icode) (target, op0, op1, op2);
39414 if (pat)
39415 emit_insn (pat);
39416 return target;
39419 case IX86_BUILTIN_RDRAND16_STEP:
39420 icode = CODE_FOR_rdrandhi_1;
39421 mode0 = HImode;
39422 goto rdrand_step;
39424 case IX86_BUILTIN_RDRAND32_STEP:
39425 icode = CODE_FOR_rdrandsi_1;
39426 mode0 = SImode;
39427 goto rdrand_step;
39429 case IX86_BUILTIN_RDRAND64_STEP:
39430 icode = CODE_FOR_rdranddi_1;
39431 mode0 = DImode;
39433 rdrand_step:
39434 op0 = gen_reg_rtx (mode0);
39435 emit_insn (GEN_FCN (icode) (op0));
39437 arg0 = CALL_EXPR_ARG (exp, 0);
39438 op1 = expand_normal (arg0);
39439 if (!address_operand (op1, VOIDmode))
39441 op1 = convert_memory_address (Pmode, op1);
39442 op1 = copy_addr_to_reg (op1);
39444 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39446 op1 = gen_reg_rtx (SImode);
39447 emit_move_insn (op1, CONST1_RTX (SImode));
39449 /* Emit SImode conditional move. */
39450 if (mode0 == HImode)
39452 op2 = gen_reg_rtx (SImode);
39453 emit_insn (gen_zero_extendhisi2 (op2, op0));
39455 else if (mode0 == SImode)
39456 op2 = op0;
39457 else
39458 op2 = gen_rtx_SUBREG (SImode, op0, 0);
39460 if (target == 0
39461 || !register_operand (target, SImode))
39462 target = gen_reg_rtx (SImode);
39464 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
39465 const0_rtx);
39466 emit_insn (gen_rtx_SET (target,
39467 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
39468 return target;
39470 case IX86_BUILTIN_RDSEED16_STEP:
39471 icode = CODE_FOR_rdseedhi_1;
39472 mode0 = HImode;
39473 goto rdseed_step;
39475 case IX86_BUILTIN_RDSEED32_STEP:
39476 icode = CODE_FOR_rdseedsi_1;
39477 mode0 = SImode;
39478 goto rdseed_step;
39480 case IX86_BUILTIN_RDSEED64_STEP:
39481 icode = CODE_FOR_rdseeddi_1;
39482 mode0 = DImode;
39484 rdseed_step:
39485 op0 = gen_reg_rtx (mode0);
39486 emit_insn (GEN_FCN (icode) (op0));
39488 arg0 = CALL_EXPR_ARG (exp, 0);
39489 op1 = expand_normal (arg0);
39490 if (!address_operand (op1, VOIDmode))
39492 op1 = convert_memory_address (Pmode, op1);
39493 op1 = copy_addr_to_reg (op1);
39495 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39497 op2 = gen_reg_rtx (QImode);
39499 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
39500 const0_rtx);
39501 emit_insn (gen_rtx_SET (op2, pat));
39503 if (target == 0
39504 || !register_operand (target, SImode))
39505 target = gen_reg_rtx (SImode);
39507 emit_insn (gen_zero_extendqisi2 (target, op2));
39508 return target;
39510 case IX86_BUILTIN_SBB32:
39511 icode = CODE_FOR_subsi3_carry;
39512 mode0 = SImode;
39513 goto addcarryx;
39515 case IX86_BUILTIN_SBB64:
39516 icode = CODE_FOR_subdi3_carry;
39517 mode0 = DImode;
39518 goto addcarryx;
39520 case IX86_BUILTIN_ADDCARRYX32:
39521 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
39522 mode0 = SImode;
39523 goto addcarryx;
39525 case IX86_BUILTIN_ADDCARRYX64:
39526 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
39527 mode0 = DImode;
39529 addcarryx:
39530 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
39531 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
39532 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
39533 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
39535 op0 = gen_reg_rtx (QImode);
39537 /* Generate CF from input operand. */
39538 op1 = expand_normal (arg0);
39539 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
39540 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
39542 /* Gen ADCX instruction to compute X+Y+CF. */
39543 op2 = expand_normal (arg1);
39544 op3 = expand_normal (arg2);
39546 if (!REG_P (op2))
39547 op2 = copy_to_mode_reg (mode0, op2);
39548 if (!REG_P (op3))
39549 op3 = copy_to_mode_reg (mode0, op3);
39551 op0 = gen_reg_rtx (mode0);
39553 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
39554 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
39555 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
39557 /* Store the result. */
39558 op4 = expand_normal (arg3);
39559 if (!address_operand (op4, VOIDmode))
39561 op4 = convert_memory_address (Pmode, op4);
39562 op4 = copy_addr_to_reg (op4);
39564 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
39566 /* Return current CF value. */
39567 if (target == 0)
39568 target = gen_reg_rtx (QImode);
39570 PUT_MODE (pat, QImode);
39571 emit_insn (gen_rtx_SET (target, pat));
39572 return target;
39574 case IX86_BUILTIN_READ_FLAGS:
39575 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
39577 if (optimize
39578 || target == NULL_RTX
39579 || !nonimmediate_operand (target, word_mode)
39580 || GET_MODE (target) != word_mode)
39581 target = gen_reg_rtx (word_mode);
39583 emit_insn (gen_pop (target));
39584 return target;
39586 case IX86_BUILTIN_WRITE_FLAGS:
39588 arg0 = CALL_EXPR_ARG (exp, 0);
39589 op0 = expand_normal (arg0);
39590 if (!general_no_elim_operand (op0, word_mode))
39591 op0 = copy_to_mode_reg (word_mode, op0);
39593 emit_insn (gen_push (op0));
39594 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
39595 return 0;
39597 case IX86_BUILTIN_KORTESTC16:
39598 icode = CODE_FOR_kortestchi;
39599 mode0 = HImode;
39600 mode1 = CCCmode;
39601 goto kortest;
39603 case IX86_BUILTIN_KORTESTZ16:
39604 icode = CODE_FOR_kortestzhi;
39605 mode0 = HImode;
39606 mode1 = CCZmode;
39608 kortest:
39609 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
39610 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
39611 op0 = expand_normal (arg0);
39612 op1 = expand_normal (arg1);
39614 op0 = copy_to_reg (op0);
39615 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39616 op1 = copy_to_reg (op1);
39617 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
39619 target = gen_reg_rtx (QImode);
39620 emit_insn (gen_rtx_SET (target, const0_rtx));
39622 /* Emit kortest. */
39623 emit_insn (GEN_FCN (icode) (op0, op1));
39624 /* And use setcc to return result from flags. */
39625 ix86_expand_setcc (target, EQ,
39626 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
39627 return target;
39629 case IX86_BUILTIN_GATHERSIV2DF:
39630 icode = CODE_FOR_avx2_gathersiv2df;
39631 goto gather_gen;
39632 case IX86_BUILTIN_GATHERSIV4DF:
39633 icode = CODE_FOR_avx2_gathersiv4df;
39634 goto gather_gen;
39635 case IX86_BUILTIN_GATHERDIV2DF:
39636 icode = CODE_FOR_avx2_gatherdiv2df;
39637 goto gather_gen;
39638 case IX86_BUILTIN_GATHERDIV4DF:
39639 icode = CODE_FOR_avx2_gatherdiv4df;
39640 goto gather_gen;
39641 case IX86_BUILTIN_GATHERSIV4SF:
39642 icode = CODE_FOR_avx2_gathersiv4sf;
39643 goto gather_gen;
39644 case IX86_BUILTIN_GATHERSIV8SF:
39645 icode = CODE_FOR_avx2_gathersiv8sf;
39646 goto gather_gen;
39647 case IX86_BUILTIN_GATHERDIV4SF:
39648 icode = CODE_FOR_avx2_gatherdiv4sf;
39649 goto gather_gen;
39650 case IX86_BUILTIN_GATHERDIV8SF:
39651 icode = CODE_FOR_avx2_gatherdiv8sf;
39652 goto gather_gen;
39653 case IX86_BUILTIN_GATHERSIV2DI:
39654 icode = CODE_FOR_avx2_gathersiv2di;
39655 goto gather_gen;
39656 case IX86_BUILTIN_GATHERSIV4DI:
39657 icode = CODE_FOR_avx2_gathersiv4di;
39658 goto gather_gen;
39659 case IX86_BUILTIN_GATHERDIV2DI:
39660 icode = CODE_FOR_avx2_gatherdiv2di;
39661 goto gather_gen;
39662 case IX86_BUILTIN_GATHERDIV4DI:
39663 icode = CODE_FOR_avx2_gatherdiv4di;
39664 goto gather_gen;
39665 case IX86_BUILTIN_GATHERSIV4SI:
39666 icode = CODE_FOR_avx2_gathersiv4si;
39667 goto gather_gen;
39668 case IX86_BUILTIN_GATHERSIV8SI:
39669 icode = CODE_FOR_avx2_gathersiv8si;
39670 goto gather_gen;
39671 case IX86_BUILTIN_GATHERDIV4SI:
39672 icode = CODE_FOR_avx2_gatherdiv4si;
39673 goto gather_gen;
39674 case IX86_BUILTIN_GATHERDIV8SI:
39675 icode = CODE_FOR_avx2_gatherdiv8si;
39676 goto gather_gen;
39677 case IX86_BUILTIN_GATHERALTSIV4DF:
39678 icode = CODE_FOR_avx2_gathersiv4df;
39679 goto gather_gen;
39680 case IX86_BUILTIN_GATHERALTDIV8SF:
39681 icode = CODE_FOR_avx2_gatherdiv8sf;
39682 goto gather_gen;
39683 case IX86_BUILTIN_GATHERALTSIV4DI:
39684 icode = CODE_FOR_avx2_gathersiv4di;
39685 goto gather_gen;
39686 case IX86_BUILTIN_GATHERALTDIV8SI:
39687 icode = CODE_FOR_avx2_gatherdiv8si;
39688 goto gather_gen;
39689 case IX86_BUILTIN_GATHER3SIV16SF:
39690 icode = CODE_FOR_avx512f_gathersiv16sf;
39691 goto gather_gen;
39692 case IX86_BUILTIN_GATHER3SIV8DF:
39693 icode = CODE_FOR_avx512f_gathersiv8df;
39694 goto gather_gen;
39695 case IX86_BUILTIN_GATHER3DIV16SF:
39696 icode = CODE_FOR_avx512f_gatherdiv16sf;
39697 goto gather_gen;
39698 case IX86_BUILTIN_GATHER3DIV8DF:
39699 icode = CODE_FOR_avx512f_gatherdiv8df;
39700 goto gather_gen;
39701 case IX86_BUILTIN_GATHER3SIV16SI:
39702 icode = CODE_FOR_avx512f_gathersiv16si;
39703 goto gather_gen;
39704 case IX86_BUILTIN_GATHER3SIV8DI:
39705 icode = CODE_FOR_avx512f_gathersiv8di;
39706 goto gather_gen;
39707 case IX86_BUILTIN_GATHER3DIV16SI:
39708 icode = CODE_FOR_avx512f_gatherdiv16si;
39709 goto gather_gen;
39710 case IX86_BUILTIN_GATHER3DIV8DI:
39711 icode = CODE_FOR_avx512f_gatherdiv8di;
39712 goto gather_gen;
39713 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39714 icode = CODE_FOR_avx512f_gathersiv8df;
39715 goto gather_gen;
39716 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39717 icode = CODE_FOR_avx512f_gatherdiv16sf;
39718 goto gather_gen;
39719 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39720 icode = CODE_FOR_avx512f_gathersiv8di;
39721 goto gather_gen;
39722 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39723 icode = CODE_FOR_avx512f_gatherdiv16si;
39724 goto gather_gen;
39725 case IX86_BUILTIN_GATHER3SIV2DF:
39726 icode = CODE_FOR_avx512vl_gathersiv2df;
39727 goto gather_gen;
39728 case IX86_BUILTIN_GATHER3SIV4DF:
39729 icode = CODE_FOR_avx512vl_gathersiv4df;
39730 goto gather_gen;
39731 case IX86_BUILTIN_GATHER3DIV2DF:
39732 icode = CODE_FOR_avx512vl_gatherdiv2df;
39733 goto gather_gen;
39734 case IX86_BUILTIN_GATHER3DIV4DF:
39735 icode = CODE_FOR_avx512vl_gatherdiv4df;
39736 goto gather_gen;
39737 case IX86_BUILTIN_GATHER3SIV4SF:
39738 icode = CODE_FOR_avx512vl_gathersiv4sf;
39739 goto gather_gen;
39740 case IX86_BUILTIN_GATHER3SIV8SF:
39741 icode = CODE_FOR_avx512vl_gathersiv8sf;
39742 goto gather_gen;
39743 case IX86_BUILTIN_GATHER3DIV4SF:
39744 icode = CODE_FOR_avx512vl_gatherdiv4sf;
39745 goto gather_gen;
39746 case IX86_BUILTIN_GATHER3DIV8SF:
39747 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39748 goto gather_gen;
39749 case IX86_BUILTIN_GATHER3SIV2DI:
39750 icode = CODE_FOR_avx512vl_gathersiv2di;
39751 goto gather_gen;
39752 case IX86_BUILTIN_GATHER3SIV4DI:
39753 icode = CODE_FOR_avx512vl_gathersiv4di;
39754 goto gather_gen;
39755 case IX86_BUILTIN_GATHER3DIV2DI:
39756 icode = CODE_FOR_avx512vl_gatherdiv2di;
39757 goto gather_gen;
39758 case IX86_BUILTIN_GATHER3DIV4DI:
39759 icode = CODE_FOR_avx512vl_gatherdiv4di;
39760 goto gather_gen;
39761 case IX86_BUILTIN_GATHER3SIV4SI:
39762 icode = CODE_FOR_avx512vl_gathersiv4si;
39763 goto gather_gen;
39764 case IX86_BUILTIN_GATHER3SIV8SI:
39765 icode = CODE_FOR_avx512vl_gathersiv8si;
39766 goto gather_gen;
39767 case IX86_BUILTIN_GATHER3DIV4SI:
39768 icode = CODE_FOR_avx512vl_gatherdiv4si;
39769 goto gather_gen;
39770 case IX86_BUILTIN_GATHER3DIV8SI:
39771 icode = CODE_FOR_avx512vl_gatherdiv8si;
39772 goto gather_gen;
39773 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39774 icode = CODE_FOR_avx512vl_gathersiv4df;
39775 goto gather_gen;
39776 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39777 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39778 goto gather_gen;
39779 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39780 icode = CODE_FOR_avx512vl_gathersiv4di;
39781 goto gather_gen;
39782 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39783 icode = CODE_FOR_avx512vl_gatherdiv8si;
39784 goto gather_gen;
39785 case IX86_BUILTIN_SCATTERSIV16SF:
39786 icode = CODE_FOR_avx512f_scattersiv16sf;
39787 goto scatter_gen;
39788 case IX86_BUILTIN_SCATTERSIV8DF:
39789 icode = CODE_FOR_avx512f_scattersiv8df;
39790 goto scatter_gen;
39791 case IX86_BUILTIN_SCATTERDIV16SF:
39792 icode = CODE_FOR_avx512f_scatterdiv16sf;
39793 goto scatter_gen;
39794 case IX86_BUILTIN_SCATTERDIV8DF:
39795 icode = CODE_FOR_avx512f_scatterdiv8df;
39796 goto scatter_gen;
39797 case IX86_BUILTIN_SCATTERSIV16SI:
39798 icode = CODE_FOR_avx512f_scattersiv16si;
39799 goto scatter_gen;
39800 case IX86_BUILTIN_SCATTERSIV8DI:
39801 icode = CODE_FOR_avx512f_scattersiv8di;
39802 goto scatter_gen;
39803 case IX86_BUILTIN_SCATTERDIV16SI:
39804 icode = CODE_FOR_avx512f_scatterdiv16si;
39805 goto scatter_gen;
39806 case IX86_BUILTIN_SCATTERDIV8DI:
39807 icode = CODE_FOR_avx512f_scatterdiv8di;
39808 goto scatter_gen;
39809 case IX86_BUILTIN_SCATTERSIV8SF:
39810 icode = CODE_FOR_avx512vl_scattersiv8sf;
39811 goto scatter_gen;
39812 case IX86_BUILTIN_SCATTERSIV4SF:
39813 icode = CODE_FOR_avx512vl_scattersiv4sf;
39814 goto scatter_gen;
39815 case IX86_BUILTIN_SCATTERSIV4DF:
39816 icode = CODE_FOR_avx512vl_scattersiv4df;
39817 goto scatter_gen;
39818 case IX86_BUILTIN_SCATTERSIV2DF:
39819 icode = CODE_FOR_avx512vl_scattersiv2df;
39820 goto scatter_gen;
39821 case IX86_BUILTIN_SCATTERDIV8SF:
39822 icode = CODE_FOR_avx512vl_scatterdiv8sf;
39823 goto scatter_gen;
39824 case IX86_BUILTIN_SCATTERDIV4SF:
39825 icode = CODE_FOR_avx512vl_scatterdiv4sf;
39826 goto scatter_gen;
39827 case IX86_BUILTIN_SCATTERDIV4DF:
39828 icode = CODE_FOR_avx512vl_scatterdiv4df;
39829 goto scatter_gen;
39830 case IX86_BUILTIN_SCATTERDIV2DF:
39831 icode = CODE_FOR_avx512vl_scatterdiv2df;
39832 goto scatter_gen;
39833 case IX86_BUILTIN_SCATTERSIV8SI:
39834 icode = CODE_FOR_avx512vl_scattersiv8si;
39835 goto scatter_gen;
39836 case IX86_BUILTIN_SCATTERSIV4SI:
39837 icode = CODE_FOR_avx512vl_scattersiv4si;
39838 goto scatter_gen;
39839 case IX86_BUILTIN_SCATTERSIV4DI:
39840 icode = CODE_FOR_avx512vl_scattersiv4di;
39841 goto scatter_gen;
39842 case IX86_BUILTIN_SCATTERSIV2DI:
39843 icode = CODE_FOR_avx512vl_scattersiv2di;
39844 goto scatter_gen;
39845 case IX86_BUILTIN_SCATTERDIV8SI:
39846 icode = CODE_FOR_avx512vl_scatterdiv8si;
39847 goto scatter_gen;
39848 case IX86_BUILTIN_SCATTERDIV4SI:
39849 icode = CODE_FOR_avx512vl_scatterdiv4si;
39850 goto scatter_gen;
39851 case IX86_BUILTIN_SCATTERDIV4DI:
39852 icode = CODE_FOR_avx512vl_scatterdiv4di;
39853 goto scatter_gen;
39854 case IX86_BUILTIN_SCATTERDIV2DI:
39855 icode = CODE_FOR_avx512vl_scatterdiv2di;
39856 goto scatter_gen;
39857 case IX86_BUILTIN_GATHERPFDPD:
39858 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
39859 goto vec_prefetch_gen;
39860 case IX86_BUILTIN_GATHERPFDPS:
39861 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
39862 goto vec_prefetch_gen;
39863 case IX86_BUILTIN_GATHERPFQPD:
39864 icode = CODE_FOR_avx512pf_gatherpfv8didf;
39865 goto vec_prefetch_gen;
39866 case IX86_BUILTIN_GATHERPFQPS:
39867 icode = CODE_FOR_avx512pf_gatherpfv8disf;
39868 goto vec_prefetch_gen;
39869 case IX86_BUILTIN_SCATTERPFDPD:
39870 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
39871 goto vec_prefetch_gen;
39872 case IX86_BUILTIN_SCATTERPFDPS:
39873 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
39874 goto vec_prefetch_gen;
39875 case IX86_BUILTIN_SCATTERPFQPD:
39876 icode = CODE_FOR_avx512pf_scatterpfv8didf;
39877 goto vec_prefetch_gen;
39878 case IX86_BUILTIN_SCATTERPFQPS:
39879 icode = CODE_FOR_avx512pf_scatterpfv8disf;
39880 goto vec_prefetch_gen;
39882 gather_gen:
39883 rtx half;
39884 rtx (*gen) (rtx, rtx);
39886 arg0 = CALL_EXPR_ARG (exp, 0);
39887 arg1 = CALL_EXPR_ARG (exp, 1);
39888 arg2 = CALL_EXPR_ARG (exp, 2);
39889 arg3 = CALL_EXPR_ARG (exp, 3);
39890 arg4 = CALL_EXPR_ARG (exp, 4);
39891 op0 = expand_normal (arg0);
39892 op1 = expand_normal (arg1);
39893 op2 = expand_normal (arg2);
39894 op3 = expand_normal (arg3);
39895 op4 = expand_normal (arg4);
39896 /* Note the arg order is different from the operand order. */
39897 mode0 = insn_data[icode].operand[1].mode;
39898 mode2 = insn_data[icode].operand[3].mode;
39899 mode3 = insn_data[icode].operand[4].mode;
39900 mode4 = insn_data[icode].operand[5].mode;
39902 if (target == NULL_RTX
39903 || GET_MODE (target) != insn_data[icode].operand[0].mode
39904 || !insn_data[icode].operand[0].predicate (target,
39905 GET_MODE (target)))
39906 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
39907 else
39908 subtarget = target;
39910 switch (fcode)
39912 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39913 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39914 half = gen_reg_rtx (V8SImode);
39915 if (!nonimmediate_operand (op2, V16SImode))
39916 op2 = copy_to_mode_reg (V16SImode, op2);
39917 emit_insn (gen_vec_extract_lo_v16si (half, op2));
39918 op2 = half;
39919 break;
39920 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39921 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39922 case IX86_BUILTIN_GATHERALTSIV4DF:
39923 case IX86_BUILTIN_GATHERALTSIV4DI:
39924 half = gen_reg_rtx (V4SImode);
39925 if (!nonimmediate_operand (op2, V8SImode))
39926 op2 = copy_to_mode_reg (V8SImode, op2);
39927 emit_insn (gen_vec_extract_lo_v8si (half, op2));
39928 op2 = half;
39929 break;
39930 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39931 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39932 half = gen_reg_rtx (mode0);
39933 if (mode0 == V8SFmode)
39934 gen = gen_vec_extract_lo_v16sf;
39935 else
39936 gen = gen_vec_extract_lo_v16si;
39937 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39938 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39939 emit_insn (gen (half, op0));
39940 op0 = half;
39941 if (GET_MODE (op3) != VOIDmode)
39943 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39944 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39945 emit_insn (gen (half, op3));
39946 op3 = half;
39948 break;
39949 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39950 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39951 case IX86_BUILTIN_GATHERALTDIV8SF:
39952 case IX86_BUILTIN_GATHERALTDIV8SI:
39953 half = gen_reg_rtx (mode0);
39954 if (mode0 == V4SFmode)
39955 gen = gen_vec_extract_lo_v8sf;
39956 else
39957 gen = gen_vec_extract_lo_v8si;
39958 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39959 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39960 emit_insn (gen (half, op0));
39961 op0 = half;
39962 if (GET_MODE (op3) != VOIDmode)
39964 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39965 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39966 emit_insn (gen (half, op3));
39967 op3 = half;
39969 break;
39970 default:
39971 break;
39974 /* Force memory operand only with base register here. But we
39975 don't want to do it on memory operand for other builtin
39976 functions. */
39977 op1 = ix86_zero_extend_to_Pmode (op1);
39979 if (!insn_data[icode].operand[1].predicate (op0, mode0))
39980 op0 = copy_to_mode_reg (mode0, op0);
39981 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
39982 op1 = copy_to_mode_reg (Pmode, op1);
39983 if (!insn_data[icode].operand[3].predicate (op2, mode2))
39984 op2 = copy_to_mode_reg (mode2, op2);
39986 op3 = fixup_modeless_constant (op3, mode3);
39988 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
39990 if (!insn_data[icode].operand[4].predicate (op3, mode3))
39991 op3 = copy_to_mode_reg (mode3, op3);
39993 else
39995 op3 = copy_to_reg (op3);
39996 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
39998 if (!insn_data[icode].operand[5].predicate (op4, mode4))
40000 error ("the last argument must be scale 1, 2, 4, 8");
40001 return const0_rtx;
40004 /* Optimize. If mask is known to have all high bits set,
40005 replace op0 with pc_rtx to signal that the instruction
40006 overwrites the whole destination and doesn't use its
40007 previous contents. */
40008 if (optimize)
40010 if (TREE_CODE (arg3) == INTEGER_CST)
40012 if (integer_all_onesp (arg3))
40013 op0 = pc_rtx;
40015 else if (TREE_CODE (arg3) == VECTOR_CST)
40017 unsigned int negative = 0;
40018 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
40020 tree cst = VECTOR_CST_ELT (arg3, i);
40021 if (TREE_CODE (cst) == INTEGER_CST
40022 && tree_int_cst_sign_bit (cst))
40023 negative++;
40024 else if (TREE_CODE (cst) == REAL_CST
40025 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
40026 negative++;
40028 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
40029 op0 = pc_rtx;
40031 else if (TREE_CODE (arg3) == SSA_NAME
40032 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
40034 /* Recognize also when mask is like:
40035 __v2df src = _mm_setzero_pd ();
40036 __v2df mask = _mm_cmpeq_pd (src, src);
40038 __v8sf src = _mm256_setzero_ps ();
40039 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
40040 as that is a cheaper way to load all ones into
40041 a register than having to load a constant from
40042 memory. */
40043 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
40044 if (is_gimple_call (def_stmt))
40046 tree fndecl = gimple_call_fndecl (def_stmt);
40047 if (fndecl
40048 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
40049 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
40051 case IX86_BUILTIN_CMPPD:
40052 case IX86_BUILTIN_CMPPS:
40053 case IX86_BUILTIN_CMPPD256:
40054 case IX86_BUILTIN_CMPPS256:
40055 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
40056 break;
40057 /* FALLTHRU */
40058 case IX86_BUILTIN_CMPEQPD:
40059 case IX86_BUILTIN_CMPEQPS:
40060 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
40061 && initializer_zerop (gimple_call_arg (def_stmt,
40062 1)))
40063 op0 = pc_rtx;
40064 break;
40065 default:
40066 break;
40072 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
40073 if (! pat)
40074 return const0_rtx;
40075 emit_insn (pat);
40077 switch (fcode)
40079 case IX86_BUILTIN_GATHER3DIV16SF:
40080 if (target == NULL_RTX)
40081 target = gen_reg_rtx (V8SFmode);
40082 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
40083 break;
40084 case IX86_BUILTIN_GATHER3DIV16SI:
40085 if (target == NULL_RTX)
40086 target = gen_reg_rtx (V8SImode);
40087 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
40088 break;
40089 case IX86_BUILTIN_GATHER3DIV8SF:
40090 case IX86_BUILTIN_GATHERDIV8SF:
40091 if (target == NULL_RTX)
40092 target = gen_reg_rtx (V4SFmode);
40093 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
40094 break;
40095 case IX86_BUILTIN_GATHER3DIV8SI:
40096 case IX86_BUILTIN_GATHERDIV8SI:
40097 if (target == NULL_RTX)
40098 target = gen_reg_rtx (V4SImode);
40099 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
40100 break;
40101 default:
40102 target = subtarget;
40103 break;
40105 return target;
40107 scatter_gen:
40108 arg0 = CALL_EXPR_ARG (exp, 0);
40109 arg1 = CALL_EXPR_ARG (exp, 1);
40110 arg2 = CALL_EXPR_ARG (exp, 2);
40111 arg3 = CALL_EXPR_ARG (exp, 3);
40112 arg4 = CALL_EXPR_ARG (exp, 4);
40113 op0 = expand_normal (arg0);
40114 op1 = expand_normal (arg1);
40115 op2 = expand_normal (arg2);
40116 op3 = expand_normal (arg3);
40117 op4 = expand_normal (arg4);
40118 mode1 = insn_data[icode].operand[1].mode;
40119 mode2 = insn_data[icode].operand[2].mode;
40120 mode3 = insn_data[icode].operand[3].mode;
40121 mode4 = insn_data[icode].operand[4].mode;
40123 /* Force memory operand only with base register here. But we
40124 don't want to do it on memory operand for other builtin
40125 functions. */
40126 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
40128 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
40129 op0 = copy_to_mode_reg (Pmode, op0);
40131 op1 = fixup_modeless_constant (op1, mode1);
40133 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
40135 if (!insn_data[icode].operand[1].predicate (op1, mode1))
40136 op1 = copy_to_mode_reg (mode1, op1);
40138 else
40140 op1 = copy_to_reg (op1);
40141 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
40144 if (!insn_data[icode].operand[2].predicate (op2, mode2))
40145 op2 = copy_to_mode_reg (mode2, op2);
40147 if (!insn_data[icode].operand[3].predicate (op3, mode3))
40148 op3 = copy_to_mode_reg (mode3, op3);
40150 if (!insn_data[icode].operand[4].predicate (op4, mode4))
40152 error ("the last argument must be scale 1, 2, 4, 8");
40153 return const0_rtx;
40156 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
40157 if (! pat)
40158 return const0_rtx;
40160 emit_insn (pat);
40161 return 0;
40163 vec_prefetch_gen:
40164 arg0 = CALL_EXPR_ARG (exp, 0);
40165 arg1 = CALL_EXPR_ARG (exp, 1);
40166 arg2 = CALL_EXPR_ARG (exp, 2);
40167 arg3 = CALL_EXPR_ARG (exp, 3);
40168 arg4 = CALL_EXPR_ARG (exp, 4);
40169 op0 = expand_normal (arg0);
40170 op1 = expand_normal (arg1);
40171 op2 = expand_normal (arg2);
40172 op3 = expand_normal (arg3);
40173 op4 = expand_normal (arg4);
40174 mode0 = insn_data[icode].operand[0].mode;
40175 mode1 = insn_data[icode].operand[1].mode;
40176 mode3 = insn_data[icode].operand[3].mode;
40177 mode4 = insn_data[icode].operand[4].mode;
40179 op0 = fixup_modeless_constant (op0, mode0);
40181 if (GET_MODE (op0) == mode0
40182 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
40184 if (!insn_data[icode].operand[0].predicate (op0, mode0))
40185 op0 = copy_to_mode_reg (mode0, op0);
40187 else if (op0 != constm1_rtx)
40189 op0 = copy_to_reg (op0);
40190 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
40193 if (!insn_data[icode].operand[1].predicate (op1, mode1))
40194 op1 = copy_to_mode_reg (mode1, op1);
40196 /* Force memory operand only with base register here. But we
40197 don't want to do it on memory operand for other builtin
40198 functions. */
40199 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
40201 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
40202 op2 = copy_to_mode_reg (Pmode, op2);
40204 if (!insn_data[icode].operand[3].predicate (op3, mode3))
40206 error ("the forth argument must be scale 1, 2, 4, 8");
40207 return const0_rtx;
40210 if (!insn_data[icode].operand[4].predicate (op4, mode4))
40212 error ("incorrect hint operand");
40213 return const0_rtx;
40216 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
40217 if (! pat)
40218 return const0_rtx;
40220 emit_insn (pat);
40222 return 0;
40224 case IX86_BUILTIN_XABORT:
40225 icode = CODE_FOR_xabort;
40226 arg0 = CALL_EXPR_ARG (exp, 0);
40227 op0 = expand_normal (arg0);
40228 mode0 = insn_data[icode].operand[0].mode;
40229 if (!insn_data[icode].operand[0].predicate (op0, mode0))
40231 error ("the xabort's argument must be an 8-bit immediate");
40232 return const0_rtx;
40234 emit_insn (gen_xabort (op0));
40235 return 0;
40237 default:
40238 break;
40241 for (i = 0, d = bdesc_special_args;
40242 i < ARRAY_SIZE (bdesc_special_args);
40243 i++, d++)
40244 if (d->code == fcode)
40245 return ix86_expand_special_args_builtin (d, exp, target);
40247 for (i = 0, d = bdesc_args;
40248 i < ARRAY_SIZE (bdesc_args);
40249 i++, d++)
40250 if (d->code == fcode)
40251 switch (fcode)
40253 case IX86_BUILTIN_FABSQ:
40254 case IX86_BUILTIN_COPYSIGNQ:
40255 if (!TARGET_SSE)
40256 /* Emit a normal call if SSE isn't available. */
40257 return expand_call (exp, target, ignore);
40258 default:
40259 return ix86_expand_args_builtin (d, exp, target);
40262 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
40263 if (d->code == fcode)
40264 return ix86_expand_sse_comi (d, exp, target);
40266 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
40267 if (d->code == fcode)
40268 return ix86_expand_round_builtin (d, exp, target);
40270 for (i = 0, d = bdesc_pcmpestr;
40271 i < ARRAY_SIZE (bdesc_pcmpestr);
40272 i++, d++)
40273 if (d->code == fcode)
40274 return ix86_expand_sse_pcmpestr (d, exp, target);
40276 for (i = 0, d = bdesc_pcmpistr;
40277 i < ARRAY_SIZE (bdesc_pcmpistr);
40278 i++, d++)
40279 if (d->code == fcode)
40280 return ix86_expand_sse_pcmpistr (d, exp, target);
40282 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
40283 if (d->code == fcode)
40284 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
40285 (enum ix86_builtin_func_type)
40286 d->flag, d->comparison);
40288 gcc_unreachable ();
40291 /* This returns the target-specific builtin with code CODE if
40292 current_function_decl has visibility on this builtin, which is checked
40293 using isa flags. Returns NULL_TREE otherwise. */
40295 static tree ix86_get_builtin (enum ix86_builtins code)
40297 struct cl_target_option *opts;
40298 tree target_tree = NULL_TREE;
40300 /* Determine the isa flags of current_function_decl. */
40302 if (current_function_decl)
40303 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
40305 if (target_tree == NULL)
40306 target_tree = target_option_default_node;
40308 opts = TREE_TARGET_OPTION (target_tree);
40310 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
40311 return ix86_builtin_decl (code, true);
40312 else
40313 return NULL_TREE;
40316 /* Return function decl for target specific builtin
40317 for given MPX builtin passed i FCODE. */
40318 static tree
40319 ix86_builtin_mpx_function (unsigned fcode)
40321 switch (fcode)
40323 case BUILT_IN_CHKP_BNDMK:
40324 return ix86_builtins[IX86_BUILTIN_BNDMK];
40326 case BUILT_IN_CHKP_BNDSTX:
40327 return ix86_builtins[IX86_BUILTIN_BNDSTX];
40329 case BUILT_IN_CHKP_BNDLDX:
40330 return ix86_builtins[IX86_BUILTIN_BNDLDX];
40332 case BUILT_IN_CHKP_BNDCL:
40333 return ix86_builtins[IX86_BUILTIN_BNDCL];
40335 case BUILT_IN_CHKP_BNDCU:
40336 return ix86_builtins[IX86_BUILTIN_BNDCU];
40338 case BUILT_IN_CHKP_BNDRET:
40339 return ix86_builtins[IX86_BUILTIN_BNDRET];
40341 case BUILT_IN_CHKP_INTERSECT:
40342 return ix86_builtins[IX86_BUILTIN_BNDINT];
40344 case BUILT_IN_CHKP_NARROW:
40345 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
40347 case BUILT_IN_CHKP_SIZEOF:
40348 return ix86_builtins[IX86_BUILTIN_SIZEOF];
40350 case BUILT_IN_CHKP_EXTRACT_LOWER:
40351 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
40353 case BUILT_IN_CHKP_EXTRACT_UPPER:
40354 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
40356 default:
40357 return NULL_TREE;
40360 gcc_unreachable ();
40363 /* Helper function for ix86_load_bounds and ix86_store_bounds.
40365 Return an address to be used to load/store bounds for pointer
40366 passed in SLOT.
40368 SLOT_NO is an integer constant holding number of a target
40369 dependent special slot to be used in case SLOT is not a memory.
40371 SPECIAL_BASE is a pointer to be used as a base of fake address
40372 to access special slots in Bounds Table. SPECIAL_BASE[-1],
40373 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
40375 static rtx
40376 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
40378 rtx addr = NULL;
40380 /* NULL slot means we pass bounds for pointer not passed to the
40381 function at all. Register slot means we pass pointer in a
40382 register. In both these cases bounds are passed via Bounds
40383 Table. Since we do not have actual pointer stored in memory,
40384 we have to use fake addresses to access Bounds Table. We
40385 start with (special_base - sizeof (void*)) and decrease this
40386 address by pointer size to get addresses for other slots. */
40387 if (!slot || REG_P (slot))
40389 gcc_assert (CONST_INT_P (slot_no));
40390 addr = plus_constant (Pmode, special_base,
40391 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
40393 /* If pointer is passed in a memory then its address is used to
40394 access Bounds Table. */
40395 else if (MEM_P (slot))
40397 addr = XEXP (slot, 0);
40398 if (!register_operand (addr, Pmode))
40399 addr = copy_addr_to_reg (addr);
40401 else
40402 gcc_unreachable ();
40404 return addr;
40407 /* Expand pass uses this hook to load bounds for function parameter
40408 PTR passed in SLOT in case its bounds are not passed in a register.
40410 If SLOT is a memory, then bounds are loaded as for regular pointer
40411 loaded from memory. PTR may be NULL in case SLOT is a memory.
40412 In such case value of PTR (if required) may be loaded from SLOT.
40414 If SLOT is NULL or a register then SLOT_NO is an integer constant
40415 holding number of the target dependent special slot which should be
40416 used to obtain bounds.
40418 Return loaded bounds. */
40420 static rtx
40421 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
40423 rtx reg = gen_reg_rtx (BNDmode);
40424 rtx addr;
40426 /* Get address to be used to access Bounds Table. Special slots start
40427 at the location of return address of the current function. */
40428 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
40430 /* Load pointer value from a memory if we don't have it. */
40431 if (!ptr)
40433 gcc_assert (MEM_P (slot));
40434 ptr = copy_addr_to_reg (slot);
40437 if (!register_operand (ptr, Pmode))
40438 ptr = ix86_zero_extend_to_Pmode (ptr);
40440 emit_insn (BNDmode == BND64mode
40441 ? gen_bnd64_ldx (reg, addr, ptr)
40442 : gen_bnd32_ldx (reg, addr, ptr));
40444 return reg;
40447 /* Expand pass uses this hook to store BOUNDS for call argument PTR
40448 passed in SLOT in case BOUNDS are not passed in a register.
40450 If SLOT is a memory, then BOUNDS are stored as for regular pointer
40451 stored in memory. PTR may be NULL in case SLOT is a memory.
40452 In such case value of PTR (if required) may be loaded from SLOT.
40454 If SLOT is NULL or a register then SLOT_NO is an integer constant
40455 holding number of the target dependent special slot which should be
40456 used to store BOUNDS. */
40458 static void
40459 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
40461 rtx addr;
40463 /* Get address to be used to access Bounds Table. Special slots start
40464 at the location of return address of a called function. */
40465 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
40467 /* Load pointer value from a memory if we don't have it. */
40468 if (!ptr)
40470 gcc_assert (MEM_P (slot));
40471 ptr = copy_addr_to_reg (slot);
40474 if (!register_operand (ptr, Pmode))
40475 ptr = ix86_zero_extend_to_Pmode (ptr);
40477 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
40478 if (!register_operand (bounds, BNDmode))
40479 bounds = copy_to_mode_reg (BNDmode, bounds);
40481 emit_insn (BNDmode == BND64mode
40482 ? gen_bnd64_stx (addr, ptr, bounds)
40483 : gen_bnd32_stx (addr, ptr, bounds));
40486 /* Load and return bounds returned by function in SLOT. */
40488 static rtx
40489 ix86_load_returned_bounds (rtx slot)
40491 rtx res;
40493 gcc_assert (REG_P (slot));
40494 res = gen_reg_rtx (BNDmode);
40495 emit_move_insn (res, slot);
40497 return res;
40500 /* Store BOUNDS returned by function into SLOT. */
40502 static void
40503 ix86_store_returned_bounds (rtx slot, rtx bounds)
40505 gcc_assert (REG_P (slot));
40506 emit_move_insn (slot, bounds);
40509 /* Returns a function decl for a vectorized version of the builtin function
40510 with builtin function code FN and the result vector type TYPE, or NULL_TREE
40511 if it is not available. */
40513 static tree
40514 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
40515 tree type_in)
40517 machine_mode in_mode, out_mode;
40518 int in_n, out_n;
40519 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
40521 if (TREE_CODE (type_out) != VECTOR_TYPE
40522 || TREE_CODE (type_in) != VECTOR_TYPE
40523 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
40524 return NULL_TREE;
40526 out_mode = TYPE_MODE (TREE_TYPE (type_out));
40527 out_n = TYPE_VECTOR_SUBPARTS (type_out);
40528 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40529 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40531 switch (fn)
40533 case BUILT_IN_SQRT:
40534 if (out_mode == DFmode && in_mode == DFmode)
40536 if (out_n == 2 && in_n == 2)
40537 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
40538 else if (out_n == 4 && in_n == 4)
40539 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
40540 else if (out_n == 8 && in_n == 8)
40541 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
40543 break;
40545 case BUILT_IN_EXP2F:
40546 if (out_mode == SFmode && in_mode == SFmode)
40548 if (out_n == 16 && in_n == 16)
40549 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
40551 break;
40553 case BUILT_IN_SQRTF:
40554 if (out_mode == SFmode && in_mode == SFmode)
40556 if (out_n == 4 && in_n == 4)
40557 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
40558 else if (out_n == 8 && in_n == 8)
40559 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
40560 else if (out_n == 16 && in_n == 16)
40561 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
40563 break;
40565 case BUILT_IN_IFLOOR:
40566 case BUILT_IN_LFLOOR:
40567 case BUILT_IN_LLFLOOR:
40568 /* The round insn does not trap on denormals. */
40569 if (flag_trapping_math || !TARGET_ROUND)
40570 break;
40572 if (out_mode == SImode && in_mode == DFmode)
40574 if (out_n == 4 && in_n == 2)
40575 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
40576 else if (out_n == 8 && in_n == 4)
40577 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
40578 else if (out_n == 16 && in_n == 8)
40579 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
40581 break;
40583 case BUILT_IN_IFLOORF:
40584 case BUILT_IN_LFLOORF:
40585 case BUILT_IN_LLFLOORF:
40586 /* The round insn does not trap on denormals. */
40587 if (flag_trapping_math || !TARGET_ROUND)
40588 break;
40590 if (out_mode == SImode && in_mode == SFmode)
40592 if (out_n == 4 && in_n == 4)
40593 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
40594 else if (out_n == 8 && in_n == 8)
40595 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
40597 break;
40599 case BUILT_IN_ICEIL:
40600 case BUILT_IN_LCEIL:
40601 case BUILT_IN_LLCEIL:
40602 /* The round insn does not trap on denormals. */
40603 if (flag_trapping_math || !TARGET_ROUND)
40604 break;
40606 if (out_mode == SImode && in_mode == DFmode)
40608 if (out_n == 4 && in_n == 2)
40609 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
40610 else if (out_n == 8 && in_n == 4)
40611 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
40612 else if (out_n == 16 && in_n == 8)
40613 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
40615 break;
40617 case BUILT_IN_ICEILF:
40618 case BUILT_IN_LCEILF:
40619 case BUILT_IN_LLCEILF:
40620 /* The round insn does not trap on denormals. */
40621 if (flag_trapping_math || !TARGET_ROUND)
40622 break;
40624 if (out_mode == SImode && in_mode == SFmode)
40626 if (out_n == 4 && in_n == 4)
40627 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
40628 else if (out_n == 8 && in_n == 8)
40629 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
40631 break;
40633 case BUILT_IN_IRINT:
40634 case BUILT_IN_LRINT:
40635 case BUILT_IN_LLRINT:
40636 if (out_mode == SImode && in_mode == DFmode)
40638 if (out_n == 4 && in_n == 2)
40639 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
40640 else if (out_n == 8 && in_n == 4)
40641 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
40643 break;
40645 case BUILT_IN_IRINTF:
40646 case BUILT_IN_LRINTF:
40647 case BUILT_IN_LLRINTF:
40648 if (out_mode == SImode && in_mode == SFmode)
40650 if (out_n == 4 && in_n == 4)
40651 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
40652 else if (out_n == 8 && in_n == 8)
40653 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
40655 break;
40657 case BUILT_IN_IROUND:
40658 case BUILT_IN_LROUND:
40659 case BUILT_IN_LLROUND:
40660 /* The round insn does not trap on denormals. */
40661 if (flag_trapping_math || !TARGET_ROUND)
40662 break;
40664 if (out_mode == SImode && in_mode == DFmode)
40666 if (out_n == 4 && in_n == 2)
40667 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
40668 else if (out_n == 8 && in_n == 4)
40669 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
40670 else if (out_n == 16 && in_n == 8)
40671 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
40673 break;
40675 case BUILT_IN_IROUNDF:
40676 case BUILT_IN_LROUNDF:
40677 case BUILT_IN_LLROUNDF:
40678 /* The round insn does not trap on denormals. */
40679 if (flag_trapping_math || !TARGET_ROUND)
40680 break;
40682 if (out_mode == SImode && in_mode == SFmode)
40684 if (out_n == 4 && in_n == 4)
40685 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
40686 else if (out_n == 8 && in_n == 8)
40687 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
40689 break;
40691 case BUILT_IN_COPYSIGN:
40692 if (out_mode == DFmode && in_mode == DFmode)
40694 if (out_n == 2 && in_n == 2)
40695 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
40696 else if (out_n == 4 && in_n == 4)
40697 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
40698 else if (out_n == 8 && in_n == 8)
40699 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
40701 break;
40703 case BUILT_IN_COPYSIGNF:
40704 if (out_mode == SFmode && in_mode == SFmode)
40706 if (out_n == 4 && in_n == 4)
40707 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
40708 else if (out_n == 8 && in_n == 8)
40709 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
40710 else if (out_n == 16 && in_n == 16)
40711 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
40713 break;
40715 case BUILT_IN_FLOOR:
40716 /* The round insn does not trap on denormals. */
40717 if (flag_trapping_math || !TARGET_ROUND)
40718 break;
40720 if (out_mode == DFmode && in_mode == DFmode)
40722 if (out_n == 2 && in_n == 2)
40723 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
40724 else if (out_n == 4 && in_n == 4)
40725 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
40727 break;
40729 case BUILT_IN_FLOORF:
40730 /* The round insn does not trap on denormals. */
40731 if (flag_trapping_math || !TARGET_ROUND)
40732 break;
40734 if (out_mode == SFmode && in_mode == SFmode)
40736 if (out_n == 4 && in_n == 4)
40737 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
40738 else if (out_n == 8 && in_n == 8)
40739 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
40741 break;
40743 case BUILT_IN_CEIL:
40744 /* The round insn does not trap on denormals. */
40745 if (flag_trapping_math || !TARGET_ROUND)
40746 break;
40748 if (out_mode == DFmode && in_mode == DFmode)
40750 if (out_n == 2 && in_n == 2)
40751 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
40752 else if (out_n == 4 && in_n == 4)
40753 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
40755 break;
40757 case BUILT_IN_CEILF:
40758 /* The round insn does not trap on denormals. */
40759 if (flag_trapping_math || !TARGET_ROUND)
40760 break;
40762 if (out_mode == SFmode && in_mode == SFmode)
40764 if (out_n == 4 && in_n == 4)
40765 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
40766 else if (out_n == 8 && in_n == 8)
40767 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
40769 break;
40771 case BUILT_IN_TRUNC:
40772 /* The round insn does not trap on denormals. */
40773 if (flag_trapping_math || !TARGET_ROUND)
40774 break;
40776 if (out_mode == DFmode && in_mode == DFmode)
40778 if (out_n == 2 && in_n == 2)
40779 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
40780 else if (out_n == 4 && in_n == 4)
40781 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
40783 break;
40785 case BUILT_IN_TRUNCF:
40786 /* The round insn does not trap on denormals. */
40787 if (flag_trapping_math || !TARGET_ROUND)
40788 break;
40790 if (out_mode == SFmode && in_mode == SFmode)
40792 if (out_n == 4 && in_n == 4)
40793 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
40794 else if (out_n == 8 && in_n == 8)
40795 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
40797 break;
40799 case BUILT_IN_RINT:
40800 /* The round insn does not trap on denormals. */
40801 if (flag_trapping_math || !TARGET_ROUND)
40802 break;
40804 if (out_mode == DFmode && in_mode == DFmode)
40806 if (out_n == 2 && in_n == 2)
40807 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
40808 else if (out_n == 4 && in_n == 4)
40809 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
40811 break;
40813 case BUILT_IN_RINTF:
40814 /* The round insn does not trap on denormals. */
40815 if (flag_trapping_math || !TARGET_ROUND)
40816 break;
40818 if (out_mode == SFmode && in_mode == SFmode)
40820 if (out_n == 4 && in_n == 4)
40821 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
40822 else if (out_n == 8 && in_n == 8)
40823 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
40825 break;
40827 case BUILT_IN_ROUND:
40828 /* The round insn does not trap on denormals. */
40829 if (flag_trapping_math || !TARGET_ROUND)
40830 break;
40832 if (out_mode == DFmode && in_mode == DFmode)
40834 if (out_n == 2 && in_n == 2)
40835 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
40836 else if (out_n == 4 && in_n == 4)
40837 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
40839 break;
40841 case BUILT_IN_ROUNDF:
40842 /* The round insn does not trap on denormals. */
40843 if (flag_trapping_math || !TARGET_ROUND)
40844 break;
40846 if (out_mode == SFmode && in_mode == SFmode)
40848 if (out_n == 4 && in_n == 4)
40849 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
40850 else if (out_n == 8 && in_n == 8)
40851 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
40853 break;
40855 case BUILT_IN_FMA:
40856 if (out_mode == DFmode && in_mode == DFmode)
40858 if (out_n == 2 && in_n == 2)
40859 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
40860 if (out_n == 4 && in_n == 4)
40861 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
40863 break;
40865 case BUILT_IN_FMAF:
40866 if (out_mode == SFmode && in_mode == SFmode)
40868 if (out_n == 4 && in_n == 4)
40869 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
40870 if (out_n == 8 && in_n == 8)
40871 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
40873 break;
40875 default:
40876 break;
40879 /* Dispatch to a handler for a vectorization library. */
40880 if (ix86_veclib_handler)
40881 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
40882 type_in);
40884 return NULL_TREE;
40887 /* Handler for an SVML-style interface to
40888 a library with vectorized intrinsics. */
40890 static tree
40891 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
40893 char name[20];
40894 tree fntype, new_fndecl, args;
40895 unsigned arity;
40896 const char *bname;
40897 machine_mode el_mode, in_mode;
40898 int n, in_n;
40900 /* The SVML is suitable for unsafe math only. */
40901 if (!flag_unsafe_math_optimizations)
40902 return NULL_TREE;
40904 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40905 n = TYPE_VECTOR_SUBPARTS (type_out);
40906 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40907 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40908 if (el_mode != in_mode
40909 || n != in_n)
40910 return NULL_TREE;
40912 switch (fn)
40914 case BUILT_IN_EXP:
40915 case BUILT_IN_LOG:
40916 case BUILT_IN_LOG10:
40917 case BUILT_IN_POW:
40918 case BUILT_IN_TANH:
40919 case BUILT_IN_TAN:
40920 case BUILT_IN_ATAN:
40921 case BUILT_IN_ATAN2:
40922 case BUILT_IN_ATANH:
40923 case BUILT_IN_CBRT:
40924 case BUILT_IN_SINH:
40925 case BUILT_IN_SIN:
40926 case BUILT_IN_ASINH:
40927 case BUILT_IN_ASIN:
40928 case BUILT_IN_COSH:
40929 case BUILT_IN_COS:
40930 case BUILT_IN_ACOSH:
40931 case BUILT_IN_ACOS:
40932 if (el_mode != DFmode || n != 2)
40933 return NULL_TREE;
40934 break;
40936 case BUILT_IN_EXPF:
40937 case BUILT_IN_LOGF:
40938 case BUILT_IN_LOG10F:
40939 case BUILT_IN_POWF:
40940 case BUILT_IN_TANHF:
40941 case BUILT_IN_TANF:
40942 case BUILT_IN_ATANF:
40943 case BUILT_IN_ATAN2F:
40944 case BUILT_IN_ATANHF:
40945 case BUILT_IN_CBRTF:
40946 case BUILT_IN_SINHF:
40947 case BUILT_IN_SINF:
40948 case BUILT_IN_ASINHF:
40949 case BUILT_IN_ASINF:
40950 case BUILT_IN_COSHF:
40951 case BUILT_IN_COSF:
40952 case BUILT_IN_ACOSHF:
40953 case BUILT_IN_ACOSF:
40954 if (el_mode != SFmode || n != 4)
40955 return NULL_TREE;
40956 break;
40958 default:
40959 return NULL_TREE;
40962 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40964 if (fn == BUILT_IN_LOGF)
40965 strcpy (name, "vmlsLn4");
40966 else if (fn == BUILT_IN_LOG)
40967 strcpy (name, "vmldLn2");
40968 else if (n == 4)
40970 sprintf (name, "vmls%s", bname+10);
40971 name[strlen (name)-1] = '4';
40973 else
40974 sprintf (name, "vmld%s2", bname+10);
40976 /* Convert to uppercase. */
40977 name[4] &= ~0x20;
40979 arity = 0;
40980 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40981 args;
40982 args = TREE_CHAIN (args))
40983 arity++;
40985 if (arity == 1)
40986 fntype = build_function_type_list (type_out, type_in, NULL);
40987 else
40988 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40990 /* Build a function declaration for the vectorized function. */
40991 new_fndecl = build_decl (BUILTINS_LOCATION,
40992 FUNCTION_DECL, get_identifier (name), fntype);
40993 TREE_PUBLIC (new_fndecl) = 1;
40994 DECL_EXTERNAL (new_fndecl) = 1;
40995 DECL_IS_NOVOPS (new_fndecl) = 1;
40996 TREE_READONLY (new_fndecl) = 1;
40998 return new_fndecl;
41001 /* Handler for an ACML-style interface to
41002 a library with vectorized intrinsics. */
41004 static tree
41005 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
41007 char name[20] = "__vr.._";
41008 tree fntype, new_fndecl, args;
41009 unsigned arity;
41010 const char *bname;
41011 machine_mode el_mode, in_mode;
41012 int n, in_n;
41014 /* The ACML is 64bits only and suitable for unsafe math only as
41015 it does not correctly support parts of IEEE with the required
41016 precision such as denormals. */
41017 if (!TARGET_64BIT
41018 || !flag_unsafe_math_optimizations)
41019 return NULL_TREE;
41021 el_mode = TYPE_MODE (TREE_TYPE (type_out));
41022 n = TYPE_VECTOR_SUBPARTS (type_out);
41023 in_mode = TYPE_MODE (TREE_TYPE (type_in));
41024 in_n = TYPE_VECTOR_SUBPARTS (type_in);
41025 if (el_mode != in_mode
41026 || n != in_n)
41027 return NULL_TREE;
41029 switch (fn)
41031 case BUILT_IN_SIN:
41032 case BUILT_IN_COS:
41033 case BUILT_IN_EXP:
41034 case BUILT_IN_LOG:
41035 case BUILT_IN_LOG2:
41036 case BUILT_IN_LOG10:
41037 name[4] = 'd';
41038 name[5] = '2';
41039 if (el_mode != DFmode
41040 || n != 2)
41041 return NULL_TREE;
41042 break;
41044 case BUILT_IN_SINF:
41045 case BUILT_IN_COSF:
41046 case BUILT_IN_EXPF:
41047 case BUILT_IN_POWF:
41048 case BUILT_IN_LOGF:
41049 case BUILT_IN_LOG2F:
41050 case BUILT_IN_LOG10F:
41051 name[4] = 's';
41052 name[5] = '4';
41053 if (el_mode != SFmode
41054 || n != 4)
41055 return NULL_TREE;
41056 break;
41058 default:
41059 return NULL_TREE;
41062 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
41063 sprintf (name + 7, "%s", bname+10);
41065 arity = 0;
41066 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
41067 args;
41068 args = TREE_CHAIN (args))
41069 arity++;
41071 if (arity == 1)
41072 fntype = build_function_type_list (type_out, type_in, NULL);
41073 else
41074 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
41076 /* Build a function declaration for the vectorized function. */
41077 new_fndecl = build_decl (BUILTINS_LOCATION,
41078 FUNCTION_DECL, get_identifier (name), fntype);
41079 TREE_PUBLIC (new_fndecl) = 1;
41080 DECL_EXTERNAL (new_fndecl) = 1;
41081 DECL_IS_NOVOPS (new_fndecl) = 1;
41082 TREE_READONLY (new_fndecl) = 1;
41084 return new_fndecl;
41087 /* Returns a decl of a function that implements gather load with
41088 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
41089 Return NULL_TREE if it is not available. */
41091 static tree
41092 ix86_vectorize_builtin_gather (const_tree mem_vectype,
41093 const_tree index_type, int scale)
41095 bool si;
41096 enum ix86_builtins code;
41098 if (! TARGET_AVX2)
41099 return NULL_TREE;
41101 if ((TREE_CODE (index_type) != INTEGER_TYPE
41102 && !POINTER_TYPE_P (index_type))
41103 || (TYPE_MODE (index_type) != SImode
41104 && TYPE_MODE (index_type) != DImode))
41105 return NULL_TREE;
41107 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
41108 return NULL_TREE;
41110 /* v*gather* insn sign extends index to pointer mode. */
41111 if (TYPE_PRECISION (index_type) < POINTER_SIZE
41112 && TYPE_UNSIGNED (index_type))
41113 return NULL_TREE;
41115 if (scale <= 0
41116 || scale > 8
41117 || (scale & (scale - 1)) != 0)
41118 return NULL_TREE;
41120 si = TYPE_MODE (index_type) == SImode;
41121 switch (TYPE_MODE (mem_vectype))
41123 case V2DFmode:
41124 if (TARGET_AVX512VL)
41125 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
41126 else
41127 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
41128 break;
41129 case V4DFmode:
41130 if (TARGET_AVX512VL)
41131 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
41132 else
41133 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
41134 break;
41135 case V2DImode:
41136 if (TARGET_AVX512VL)
41137 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
41138 else
41139 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
41140 break;
41141 case V4DImode:
41142 if (TARGET_AVX512VL)
41143 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
41144 else
41145 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
41146 break;
41147 case V4SFmode:
41148 if (TARGET_AVX512VL)
41149 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
41150 else
41151 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
41152 break;
41153 case V8SFmode:
41154 if (TARGET_AVX512VL)
41155 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
41156 else
41157 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
41158 break;
41159 case V4SImode:
41160 if (TARGET_AVX512VL)
41161 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
41162 else
41163 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
41164 break;
41165 case V8SImode:
41166 if (TARGET_AVX512VL)
41167 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
41168 else
41169 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
41170 break;
41171 case V8DFmode:
41172 if (TARGET_AVX512F)
41173 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
41174 else
41175 return NULL_TREE;
41176 break;
41177 case V8DImode:
41178 if (TARGET_AVX512F)
41179 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
41180 else
41181 return NULL_TREE;
41182 break;
41183 case V16SFmode:
41184 if (TARGET_AVX512F)
41185 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
41186 else
41187 return NULL_TREE;
41188 break;
41189 case V16SImode:
41190 if (TARGET_AVX512F)
41191 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
41192 else
41193 return NULL_TREE;
41194 break;
41195 default:
41196 return NULL_TREE;
41199 return ix86_get_builtin (code);
41202 /* Returns a code for a target-specific builtin that implements
41203 reciprocal of the function, or NULL_TREE if not available. */
41205 static tree
41206 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
41208 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
41209 && flag_finite_math_only && !flag_trapping_math
41210 && flag_unsafe_math_optimizations))
41211 return NULL_TREE;
41213 if (md_fn)
41214 /* Machine dependent builtins. */
41215 switch (fn)
41217 /* Vectorized version of sqrt to rsqrt conversion. */
41218 case IX86_BUILTIN_SQRTPS_NR:
41219 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
41221 case IX86_BUILTIN_SQRTPS_NR256:
41222 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
41224 default:
41225 return NULL_TREE;
41227 else
41228 /* Normal builtins. */
41229 switch (fn)
41231 /* Sqrt to rsqrt conversion. */
41232 case BUILT_IN_SQRTF:
41233 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
41235 default:
41236 return NULL_TREE;
41240 /* Helper for avx_vpermilps256_operand et al. This is also used by
41241 the expansion functions to turn the parallel back into a mask.
41242 The return value is 0 for no match and the imm8+1 for a match. */
41245 avx_vpermilp_parallel (rtx par, machine_mode mode)
41247 unsigned i, nelt = GET_MODE_NUNITS (mode);
41248 unsigned mask = 0;
41249 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
41251 if (XVECLEN (par, 0) != (int) nelt)
41252 return 0;
41254 /* Validate that all of the elements are constants, and not totally
41255 out of range. Copy the data into an integral array to make the
41256 subsequent checks easier. */
41257 for (i = 0; i < nelt; ++i)
41259 rtx er = XVECEXP (par, 0, i);
41260 unsigned HOST_WIDE_INT ei;
41262 if (!CONST_INT_P (er))
41263 return 0;
41264 ei = INTVAL (er);
41265 if (ei >= nelt)
41266 return 0;
41267 ipar[i] = ei;
41270 switch (mode)
41272 case V8DFmode:
41273 /* In the 512-bit DFmode case, we can only move elements within
41274 a 128-bit lane. First fill the second part of the mask,
41275 then fallthru. */
41276 for (i = 4; i < 6; ++i)
41278 if (ipar[i] < 4 || ipar[i] >= 6)
41279 return 0;
41280 mask |= (ipar[i] - 4) << i;
41282 for (i = 6; i < 8; ++i)
41284 if (ipar[i] < 6)
41285 return 0;
41286 mask |= (ipar[i] - 6) << i;
41288 /* FALLTHRU */
41290 case V4DFmode:
41291 /* In the 256-bit DFmode case, we can only move elements within
41292 a 128-bit lane. */
41293 for (i = 0; i < 2; ++i)
41295 if (ipar[i] >= 2)
41296 return 0;
41297 mask |= ipar[i] << i;
41299 for (i = 2; i < 4; ++i)
41301 if (ipar[i] < 2)
41302 return 0;
41303 mask |= (ipar[i] - 2) << i;
41305 break;
41307 case V16SFmode:
41308 /* In 512 bit SFmode case, permutation in the upper 256 bits
41309 must mirror the permutation in the lower 256-bits. */
41310 for (i = 0; i < 8; ++i)
41311 if (ipar[i] + 8 != ipar[i + 8])
41312 return 0;
41313 /* FALLTHRU */
41315 case V8SFmode:
41316 /* In 256 bit SFmode case, we have full freedom of
41317 movement within the low 128-bit lane, but the high 128-bit
41318 lane must mirror the exact same pattern. */
41319 for (i = 0; i < 4; ++i)
41320 if (ipar[i] + 4 != ipar[i + 4])
41321 return 0;
41322 nelt = 4;
41323 /* FALLTHRU */
41325 case V2DFmode:
41326 case V4SFmode:
41327 /* In the 128-bit case, we've full freedom in the placement of
41328 the elements from the source operand. */
41329 for (i = 0; i < nelt; ++i)
41330 mask |= ipar[i] << (i * (nelt / 2));
41331 break;
41333 default:
41334 gcc_unreachable ();
41337 /* Make sure success has a non-zero value by adding one. */
41338 return mask + 1;
41341 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
41342 the expansion functions to turn the parallel back into a mask.
41343 The return value is 0 for no match and the imm8+1 for a match. */
41346 avx_vperm2f128_parallel (rtx par, machine_mode mode)
41348 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
41349 unsigned mask = 0;
41350 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
41352 if (XVECLEN (par, 0) != (int) nelt)
41353 return 0;
41355 /* Validate that all of the elements are constants, and not totally
41356 out of range. Copy the data into an integral array to make the
41357 subsequent checks easier. */
41358 for (i = 0; i < nelt; ++i)
41360 rtx er = XVECEXP (par, 0, i);
41361 unsigned HOST_WIDE_INT ei;
41363 if (!CONST_INT_P (er))
41364 return 0;
41365 ei = INTVAL (er);
41366 if (ei >= 2 * nelt)
41367 return 0;
41368 ipar[i] = ei;
41371 /* Validate that the halves of the permute are halves. */
41372 for (i = 0; i < nelt2 - 1; ++i)
41373 if (ipar[i] + 1 != ipar[i + 1])
41374 return 0;
41375 for (i = nelt2; i < nelt - 1; ++i)
41376 if (ipar[i] + 1 != ipar[i + 1])
41377 return 0;
41379 /* Reconstruct the mask. */
41380 for (i = 0; i < 2; ++i)
41382 unsigned e = ipar[i * nelt2];
41383 if (e % nelt2)
41384 return 0;
41385 e /= nelt2;
41386 mask |= e << (i * 4);
41389 /* Make sure success has a non-zero value by adding one. */
41390 return mask + 1;
41393 /* Return a register priority for hard reg REGNO. */
41394 static int
41395 ix86_register_priority (int hard_regno)
41397 /* ebp and r13 as the base always wants a displacement, r12 as the
41398 base always wants an index. So discourage their usage in an
41399 address. */
41400 if (hard_regno == R12_REG || hard_regno == R13_REG)
41401 return 0;
41402 if (hard_regno == BP_REG)
41403 return 1;
41404 /* New x86-64 int registers result in bigger code size. Discourage
41405 them. */
41406 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
41407 return 2;
41408 /* New x86-64 SSE registers result in bigger code size. Discourage
41409 them. */
41410 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
41411 return 2;
41412 /* Usage of AX register results in smaller code. Prefer it. */
41413 if (hard_regno == AX_REG)
41414 return 4;
41415 return 3;
41418 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
41420 Put float CONST_DOUBLE in the constant pool instead of fp regs.
41421 QImode must go into class Q_REGS.
41422 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
41423 movdf to do mem-to-mem moves through integer regs. */
41425 static reg_class_t
41426 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
41428 machine_mode mode = GET_MODE (x);
41430 /* We're only allowed to return a subclass of CLASS. Many of the
41431 following checks fail for NO_REGS, so eliminate that early. */
41432 if (regclass == NO_REGS)
41433 return NO_REGS;
41435 /* All classes can load zeros. */
41436 if (x == CONST0_RTX (mode))
41437 return regclass;
41439 /* Force constants into memory if we are loading a (nonzero) constant into
41440 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
41441 instructions to load from a constant. */
41442 if (CONSTANT_P (x)
41443 && (MAYBE_MMX_CLASS_P (regclass)
41444 || MAYBE_SSE_CLASS_P (regclass)
41445 || MAYBE_MASK_CLASS_P (regclass)))
41446 return NO_REGS;
41448 /* Prefer SSE regs only, if we can use them for math. */
41449 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
41450 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
41452 /* Floating-point constants need more complex checks. */
41453 if (CONST_DOUBLE_P (x))
41455 /* General regs can load everything. */
41456 if (reg_class_subset_p (regclass, GENERAL_REGS))
41457 return regclass;
41459 /* Floats can load 0 and 1 plus some others. Note that we eliminated
41460 zero above. We only want to wind up preferring 80387 registers if
41461 we plan on doing computation with them. */
41462 if (TARGET_80387
41463 && standard_80387_constant_p (x) > 0)
41465 /* Limit class to non-sse. */
41466 if (regclass == FLOAT_SSE_REGS)
41467 return FLOAT_REGS;
41468 if (regclass == FP_TOP_SSE_REGS)
41469 return FP_TOP_REG;
41470 if (regclass == FP_SECOND_SSE_REGS)
41471 return FP_SECOND_REG;
41472 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
41473 return regclass;
41476 return NO_REGS;
41479 /* Generally when we see PLUS here, it's the function invariant
41480 (plus soft-fp const_int). Which can only be computed into general
41481 regs. */
41482 if (GET_CODE (x) == PLUS)
41483 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
41485 /* QImode constants are easy to load, but non-constant QImode data
41486 must go into Q_REGS. */
41487 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
41489 if (reg_class_subset_p (regclass, Q_REGS))
41490 return regclass;
41491 if (reg_class_subset_p (Q_REGS, regclass))
41492 return Q_REGS;
41493 return NO_REGS;
41496 return regclass;
41499 /* Discourage putting floating-point values in SSE registers unless
41500 SSE math is being used, and likewise for the 387 registers. */
41501 static reg_class_t
41502 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
41504 machine_mode mode = GET_MODE (x);
41506 /* Restrict the output reload class to the register bank that we are doing
41507 math on. If we would like not to return a subset of CLASS, reject this
41508 alternative: if reload cannot do this, it will still use its choice. */
41509 mode = GET_MODE (x);
41510 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
41511 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
41513 if (X87_FLOAT_MODE_P (mode))
41515 if (regclass == FP_TOP_SSE_REGS)
41516 return FP_TOP_REG;
41517 else if (regclass == FP_SECOND_SSE_REGS)
41518 return FP_SECOND_REG;
41519 else
41520 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
41523 return regclass;
41526 static reg_class_t
41527 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
41528 machine_mode mode, secondary_reload_info *sri)
41530 /* Double-word spills from general registers to non-offsettable memory
41531 references (zero-extended addresses) require special handling. */
41532 if (TARGET_64BIT
41533 && MEM_P (x)
41534 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
41535 && INTEGER_CLASS_P (rclass)
41536 && !offsettable_memref_p (x))
41538 sri->icode = (in_p
41539 ? CODE_FOR_reload_noff_load
41540 : CODE_FOR_reload_noff_store);
41541 /* Add the cost of moving address to a temporary. */
41542 sri->extra_cost = 1;
41544 return NO_REGS;
41547 /* QImode spills from non-QI registers require
41548 intermediate register on 32bit targets. */
41549 if (mode == QImode
41550 && (MAYBE_MASK_CLASS_P (rclass)
41551 || (!TARGET_64BIT && !in_p
41552 && INTEGER_CLASS_P (rclass)
41553 && MAYBE_NON_Q_CLASS_P (rclass))))
41555 int regno;
41557 if (REG_P (x))
41558 regno = REGNO (x);
41559 else
41560 regno = -1;
41562 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
41563 regno = true_regnum (x);
41565 /* Return Q_REGS if the operand is in memory. */
41566 if (regno == -1)
41567 return Q_REGS;
41570 /* This condition handles corner case where an expression involving
41571 pointers gets vectorized. We're trying to use the address of a
41572 stack slot as a vector initializer.
41574 (set (reg:V2DI 74 [ vect_cst_.2 ])
41575 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
41577 Eventually frame gets turned into sp+offset like this:
41579 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41580 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41581 (const_int 392 [0x188]))))
41583 That later gets turned into:
41585 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41586 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41587 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
41589 We'll have the following reload recorded:
41591 Reload 0: reload_in (DI) =
41592 (plus:DI (reg/f:DI 7 sp)
41593 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
41594 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41595 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
41596 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
41597 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41598 reload_reg_rtx: (reg:V2DI 22 xmm1)
41600 Which isn't going to work since SSE instructions can't handle scalar
41601 additions. Returning GENERAL_REGS forces the addition into integer
41602 register and reload can handle subsequent reloads without problems. */
41604 if (in_p && GET_CODE (x) == PLUS
41605 && SSE_CLASS_P (rclass)
41606 && SCALAR_INT_MODE_P (mode))
41607 return GENERAL_REGS;
41609 return NO_REGS;
41612 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
41614 static bool
41615 ix86_class_likely_spilled_p (reg_class_t rclass)
41617 switch (rclass)
41619 case AREG:
41620 case DREG:
41621 case CREG:
41622 case BREG:
41623 case AD_REGS:
41624 case SIREG:
41625 case DIREG:
41626 case SSE_FIRST_REG:
41627 case FP_TOP_REG:
41628 case FP_SECOND_REG:
41629 case BND_REGS:
41630 return true;
41632 default:
41633 break;
41636 return false;
41639 /* If we are copying between general and FP registers, we need a memory
41640 location. The same is true for SSE and MMX registers.
41642 To optimize register_move_cost performance, allow inline variant.
41644 The macro can't work reliably when one of the CLASSES is class containing
41645 registers from multiple units (SSE, MMX, integer). We avoid this by never
41646 combining those units in single alternative in the machine description.
41647 Ensure that this constraint holds to avoid unexpected surprises.
41649 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
41650 enforce these sanity checks. */
41652 static inline bool
41653 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41654 machine_mode mode, int strict)
41656 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
41657 return false;
41658 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
41659 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
41660 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
41661 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
41662 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
41663 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
41665 gcc_assert (!strict || lra_in_progress);
41666 return true;
41669 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
41670 return true;
41672 /* Between mask and general, we have moves no larger than word size. */
41673 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
41674 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
41675 return true;
41677 /* ??? This is a lie. We do have moves between mmx/general, and for
41678 mmx/sse2. But by saying we need secondary memory we discourage the
41679 register allocator from using the mmx registers unless needed. */
41680 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
41681 return true;
41683 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41685 /* SSE1 doesn't have any direct moves from other classes. */
41686 if (!TARGET_SSE2)
41687 return true;
41689 /* If the target says that inter-unit moves are more expensive
41690 than moving through memory, then don't generate them. */
41691 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
41692 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
41693 return true;
41695 /* Between SSE and general, we have moves no larger than word size. */
41696 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41697 return true;
41700 return false;
41703 bool
41704 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41705 machine_mode mode, int strict)
41707 return inline_secondary_memory_needed (class1, class2, mode, strict);
41710 /* Implement the TARGET_CLASS_MAX_NREGS hook.
41712 On the 80386, this is the size of MODE in words,
41713 except in the FP regs, where a single reg is always enough. */
41715 static unsigned char
41716 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
41718 if (MAYBE_INTEGER_CLASS_P (rclass))
41720 if (mode == XFmode)
41721 return (TARGET_64BIT ? 2 : 3);
41722 else if (mode == XCmode)
41723 return (TARGET_64BIT ? 4 : 6);
41724 else
41725 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
41727 else
41729 if (COMPLEX_MODE_P (mode))
41730 return 2;
41731 else
41732 return 1;
41736 /* Return true if the registers in CLASS cannot represent the change from
41737 modes FROM to TO. */
41739 bool
41740 ix86_cannot_change_mode_class (machine_mode from, machine_mode to,
41741 enum reg_class regclass)
41743 if (from == to)
41744 return false;
41746 /* x87 registers can't do subreg at all, as all values are reformatted
41747 to extended precision. */
41748 if (MAYBE_FLOAT_CLASS_P (regclass))
41749 return true;
41751 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
41753 /* Vector registers do not support QI or HImode loads. If we don't
41754 disallow a change to these modes, reload will assume it's ok to
41755 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
41756 the vec_dupv4hi pattern. */
41757 if (GET_MODE_SIZE (from) < 4)
41758 return true;
41761 return false;
41764 /* Return the cost of moving data of mode M between a
41765 register and memory. A value of 2 is the default; this cost is
41766 relative to those in `REGISTER_MOVE_COST'.
41768 This function is used extensively by register_move_cost that is used to
41769 build tables at startup. Make it inline in this case.
41770 When IN is 2, return maximum of in and out move cost.
41772 If moving between registers and memory is more expensive than
41773 between two registers, you should define this macro to express the
41774 relative cost.
41776 Model also increased moving costs of QImode registers in non
41777 Q_REGS classes.
41779 static inline int
41780 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
41781 int in)
41783 int cost;
41784 if (FLOAT_CLASS_P (regclass))
41786 int index;
41787 switch (mode)
41789 case SFmode:
41790 index = 0;
41791 break;
41792 case DFmode:
41793 index = 1;
41794 break;
41795 case XFmode:
41796 index = 2;
41797 break;
41798 default:
41799 return 100;
41801 if (in == 2)
41802 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
41803 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
41805 if (SSE_CLASS_P (regclass))
41807 int index;
41808 switch (GET_MODE_SIZE (mode))
41810 case 4:
41811 index = 0;
41812 break;
41813 case 8:
41814 index = 1;
41815 break;
41816 case 16:
41817 index = 2;
41818 break;
41819 default:
41820 return 100;
41822 if (in == 2)
41823 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
41824 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
41826 if (MMX_CLASS_P (regclass))
41828 int index;
41829 switch (GET_MODE_SIZE (mode))
41831 case 4:
41832 index = 0;
41833 break;
41834 case 8:
41835 index = 1;
41836 break;
41837 default:
41838 return 100;
41840 if (in)
41841 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
41842 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
41844 switch (GET_MODE_SIZE (mode))
41846 case 1:
41847 if (Q_CLASS_P (regclass) || TARGET_64BIT)
41849 if (!in)
41850 return ix86_cost->int_store[0];
41851 if (TARGET_PARTIAL_REG_DEPENDENCY
41852 && optimize_function_for_speed_p (cfun))
41853 cost = ix86_cost->movzbl_load;
41854 else
41855 cost = ix86_cost->int_load[0];
41856 if (in == 2)
41857 return MAX (cost, ix86_cost->int_store[0]);
41858 return cost;
41860 else
41862 if (in == 2)
41863 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
41864 if (in)
41865 return ix86_cost->movzbl_load;
41866 else
41867 return ix86_cost->int_store[0] + 4;
41869 break;
41870 case 2:
41871 if (in == 2)
41872 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
41873 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
41874 default:
41875 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
41876 if (mode == TFmode)
41877 mode = XFmode;
41878 if (in == 2)
41879 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
41880 else if (in)
41881 cost = ix86_cost->int_load[2];
41882 else
41883 cost = ix86_cost->int_store[2];
41884 return (cost * (((int) GET_MODE_SIZE (mode)
41885 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
41889 static int
41890 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
41891 bool in)
41893 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
41897 /* Return the cost of moving data from a register in class CLASS1 to
41898 one in class CLASS2.
41900 It is not required that the cost always equal 2 when FROM is the same as TO;
41901 on some machines it is expensive to move between registers if they are not
41902 general registers. */
41904 static int
41905 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
41906 reg_class_t class2_i)
41908 enum reg_class class1 = (enum reg_class) class1_i;
41909 enum reg_class class2 = (enum reg_class) class2_i;
41911 /* In case we require secondary memory, compute cost of the store followed
41912 by load. In order to avoid bad register allocation choices, we need
41913 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
41915 if (inline_secondary_memory_needed (class1, class2, mode, 0))
41917 int cost = 1;
41919 cost += inline_memory_move_cost (mode, class1, 2);
41920 cost += inline_memory_move_cost (mode, class2, 2);
41922 /* In case of copying from general_purpose_register we may emit multiple
41923 stores followed by single load causing memory size mismatch stall.
41924 Count this as arbitrarily high cost of 20. */
41925 if (targetm.class_max_nregs (class1, mode)
41926 > targetm.class_max_nregs (class2, mode))
41927 cost += 20;
41929 /* In the case of FP/MMX moves, the registers actually overlap, and we
41930 have to switch modes in order to treat them differently. */
41931 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
41932 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
41933 cost += 20;
41935 return cost;
41938 /* Moves between SSE/MMX and integer unit are expensive. */
41939 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
41940 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41942 /* ??? By keeping returned value relatively high, we limit the number
41943 of moves between integer and MMX/SSE registers for all targets.
41944 Additionally, high value prevents problem with x86_modes_tieable_p(),
41945 where integer modes in MMX/SSE registers are not tieable
41946 because of missing QImode and HImode moves to, from or between
41947 MMX/SSE registers. */
41948 return MAX (8, ix86_cost->mmxsse_to_integer);
41950 if (MAYBE_FLOAT_CLASS_P (class1))
41951 return ix86_cost->fp_move;
41952 if (MAYBE_SSE_CLASS_P (class1))
41953 return ix86_cost->sse_move;
41954 if (MAYBE_MMX_CLASS_P (class1))
41955 return ix86_cost->mmx_move;
41956 return 2;
41959 /* Return TRUE if hard register REGNO can hold a value of machine-mode
41960 MODE. */
41962 bool
41963 ix86_hard_regno_mode_ok (int regno, machine_mode mode)
41965 /* Flags and only flags can only hold CCmode values. */
41966 if (CC_REGNO_P (regno))
41967 return GET_MODE_CLASS (mode) == MODE_CC;
41968 if (GET_MODE_CLASS (mode) == MODE_CC
41969 || GET_MODE_CLASS (mode) == MODE_RANDOM
41970 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
41971 return false;
41972 if (STACK_REGNO_P (regno))
41973 return VALID_FP_MODE_P (mode);
41974 if (MASK_REGNO_P (regno))
41975 return (VALID_MASK_REG_MODE (mode)
41976 || (TARGET_AVX512BW
41977 && VALID_MASK_AVX512BW_MODE (mode)));
41978 if (BND_REGNO_P (regno))
41979 return VALID_BND_REG_MODE (mode);
41980 if (SSE_REGNO_P (regno))
41982 /* We implement the move patterns for all vector modes into and
41983 out of SSE registers, even when no operation instructions
41984 are available. */
41986 /* For AVX-512 we allow, regardless of regno:
41987 - XI mode
41988 - any of 512-bit wide vector mode
41989 - any scalar mode. */
41990 if (TARGET_AVX512F
41991 && (mode == XImode
41992 || VALID_AVX512F_REG_MODE (mode)
41993 || VALID_AVX512F_SCALAR_MODE (mode)))
41994 return true;
41996 /* TODO check for QI/HI scalars. */
41997 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
41998 if (TARGET_AVX512VL
41999 && (mode == OImode
42000 || mode == TImode
42001 || VALID_AVX256_REG_MODE (mode)
42002 || VALID_AVX512VL_128_REG_MODE (mode)))
42003 return true;
42005 /* xmm16-xmm31 are only available for AVX-512. */
42006 if (EXT_REX_SSE_REGNO_P (regno))
42007 return false;
42009 /* OImode and AVX modes are available only when AVX is enabled. */
42010 return ((TARGET_AVX
42011 && VALID_AVX256_REG_OR_OI_MODE (mode))
42012 || VALID_SSE_REG_MODE (mode)
42013 || VALID_SSE2_REG_MODE (mode)
42014 || VALID_MMX_REG_MODE (mode)
42015 || VALID_MMX_REG_MODE_3DNOW (mode));
42017 if (MMX_REGNO_P (regno))
42019 /* We implement the move patterns for 3DNOW modes even in MMX mode,
42020 so if the register is available at all, then we can move data of
42021 the given mode into or out of it. */
42022 return (VALID_MMX_REG_MODE (mode)
42023 || VALID_MMX_REG_MODE_3DNOW (mode));
42026 if (mode == QImode)
42028 /* Take care for QImode values - they can be in non-QI regs,
42029 but then they do cause partial register stalls. */
42030 if (ANY_QI_REGNO_P (regno))
42031 return true;
42032 if (!TARGET_PARTIAL_REG_STALL)
42033 return true;
42034 /* LRA checks if the hard register is OK for the given mode.
42035 QImode values can live in non-QI regs, so we allow all
42036 registers here. */
42037 if (lra_in_progress)
42038 return true;
42039 return !can_create_pseudo_p ();
42041 /* We handle both integer and floats in the general purpose registers. */
42042 else if (VALID_INT_MODE_P (mode))
42043 return true;
42044 else if (VALID_FP_MODE_P (mode))
42045 return true;
42046 else if (VALID_DFP_MODE_P (mode))
42047 return true;
42048 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
42049 on to use that value in smaller contexts, this can easily force a
42050 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
42051 supporting DImode, allow it. */
42052 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
42053 return true;
42055 return false;
42058 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
42059 tieable integer mode. */
42061 static bool
42062 ix86_tieable_integer_mode_p (machine_mode mode)
42064 switch (mode)
42066 case HImode:
42067 case SImode:
42068 return true;
42070 case QImode:
42071 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
42073 case DImode:
42074 return TARGET_64BIT;
42076 default:
42077 return false;
42081 /* Return true if MODE1 is accessible in a register that can hold MODE2
42082 without copying. That is, all register classes that can hold MODE2
42083 can also hold MODE1. */
42085 bool
42086 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
42088 if (mode1 == mode2)
42089 return true;
42091 if (ix86_tieable_integer_mode_p (mode1)
42092 && ix86_tieable_integer_mode_p (mode2))
42093 return true;
42095 /* MODE2 being XFmode implies fp stack or general regs, which means we
42096 can tie any smaller floating point modes to it. Note that we do not
42097 tie this with TFmode. */
42098 if (mode2 == XFmode)
42099 return mode1 == SFmode || mode1 == DFmode;
42101 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
42102 that we can tie it with SFmode. */
42103 if (mode2 == DFmode)
42104 return mode1 == SFmode;
42106 /* If MODE2 is only appropriate for an SSE register, then tie with
42107 any other mode acceptable to SSE registers. */
42108 if (GET_MODE_SIZE (mode2) == 32
42109 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
42110 return (GET_MODE_SIZE (mode1) == 32
42111 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
42112 if (GET_MODE_SIZE (mode2) == 16
42113 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
42114 return (GET_MODE_SIZE (mode1) == 16
42115 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
42117 /* If MODE2 is appropriate for an MMX register, then tie
42118 with any other mode acceptable to MMX registers. */
42119 if (GET_MODE_SIZE (mode2) == 8
42120 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
42121 return (GET_MODE_SIZE (mode1) == 8
42122 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
42124 return false;
42127 /* Return the cost of moving between two registers of mode MODE. */
42129 static int
42130 ix86_set_reg_reg_cost (machine_mode mode)
42132 unsigned int units = UNITS_PER_WORD;
42134 switch (GET_MODE_CLASS (mode))
42136 default:
42137 break;
42139 case MODE_CC:
42140 units = GET_MODE_SIZE (CCmode);
42141 break;
42143 case MODE_FLOAT:
42144 if ((TARGET_SSE && mode == TFmode)
42145 || (TARGET_80387 && mode == XFmode)
42146 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
42147 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
42148 units = GET_MODE_SIZE (mode);
42149 break;
42151 case MODE_COMPLEX_FLOAT:
42152 if ((TARGET_SSE && mode == TCmode)
42153 || (TARGET_80387 && mode == XCmode)
42154 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
42155 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
42156 units = GET_MODE_SIZE (mode);
42157 break;
42159 case MODE_VECTOR_INT:
42160 case MODE_VECTOR_FLOAT:
42161 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
42162 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
42163 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
42164 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
42165 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
42166 units = GET_MODE_SIZE (mode);
42169 /* Return the cost of moving between two registers of mode MODE,
42170 assuming that the move will be in pieces of at most UNITS bytes. */
42171 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
42174 /* Compute a (partial) cost for rtx X. Return true if the complete
42175 cost has been computed, and false if subexpressions should be
42176 scanned. In either case, *TOTAL contains the cost result. */
42178 static bool
42179 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
42180 bool speed)
42182 rtx mask;
42183 enum rtx_code code = (enum rtx_code) code_i;
42184 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
42185 machine_mode mode = GET_MODE (x);
42186 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
42188 switch (code)
42190 case SET:
42191 if (register_operand (SET_DEST (x), VOIDmode)
42192 && reg_or_0_operand (SET_SRC (x), VOIDmode))
42194 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
42195 return true;
42197 return false;
42199 case CONST_INT:
42200 case CONST:
42201 case LABEL_REF:
42202 case SYMBOL_REF:
42203 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
42204 *total = 3;
42205 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
42206 *total = 2;
42207 else if (flag_pic && SYMBOLIC_CONST (x)
42208 && !(TARGET_64BIT
42209 && (GET_CODE (x) == LABEL_REF
42210 || (GET_CODE (x) == SYMBOL_REF
42211 && SYMBOL_REF_LOCAL_P (x))))
42212 /* Use 0 cost for CONST to improve its propagation. */
42213 && (TARGET_64BIT || GET_CODE (x) != CONST))
42214 *total = 1;
42215 else
42216 *total = 0;
42217 return true;
42219 case CONST_WIDE_INT:
42220 *total = 0;
42221 return true;
42223 case CONST_DOUBLE:
42224 switch (standard_80387_constant_p (x))
42226 case 1: /* 0.0 */
42227 *total = 1;
42228 return true;
42229 default: /* Other constants */
42230 *total = 2;
42231 return true;
42232 case 0:
42233 case -1:
42234 break;
42236 if (SSE_FLOAT_MODE_P (mode))
42238 case CONST_VECTOR:
42239 switch (standard_sse_constant_p (x))
42241 case 0:
42242 break;
42243 case 1: /* 0: xor eliminates false dependency */
42244 *total = 0;
42245 return true;
42246 default: /* -1: cmp contains false dependency */
42247 *total = 1;
42248 return true;
42251 /* Fall back to (MEM (SYMBOL_REF)), since that's where
42252 it'll probably end up. Add a penalty for size. */
42253 *total = (COSTS_N_INSNS (1)
42254 + (flag_pic != 0 && !TARGET_64BIT)
42255 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
42256 return true;
42258 case ZERO_EXTEND:
42259 /* The zero extensions is often completely free on x86_64, so make
42260 it as cheap as possible. */
42261 if (TARGET_64BIT && mode == DImode
42262 && GET_MODE (XEXP (x, 0)) == SImode)
42263 *total = 1;
42264 else if (TARGET_ZERO_EXTEND_WITH_AND)
42265 *total = cost->add;
42266 else
42267 *total = cost->movzx;
42268 return false;
42270 case SIGN_EXTEND:
42271 *total = cost->movsx;
42272 return false;
42274 case ASHIFT:
42275 if (SCALAR_INT_MODE_P (mode)
42276 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
42277 && CONST_INT_P (XEXP (x, 1)))
42279 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42280 if (value == 1)
42282 *total = cost->add;
42283 return false;
42285 if ((value == 2 || value == 3)
42286 && cost->lea <= cost->shift_const)
42288 *total = cost->lea;
42289 return false;
42292 /* FALLTHRU */
42294 case ROTATE:
42295 case ASHIFTRT:
42296 case LSHIFTRT:
42297 case ROTATERT:
42298 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42300 /* ??? Should be SSE vector operation cost. */
42301 /* At least for published AMD latencies, this really is the same
42302 as the latency for a simple fpu operation like fabs. */
42303 /* V*QImode is emulated with 1-11 insns. */
42304 if (mode == V16QImode || mode == V32QImode)
42306 int count = 11;
42307 if (TARGET_XOP && mode == V16QImode)
42309 /* For XOP we use vpshab, which requires a broadcast of the
42310 value to the variable shift insn. For constants this
42311 means a V16Q const in mem; even when we can perform the
42312 shift with one insn set the cost to prefer paddb. */
42313 if (CONSTANT_P (XEXP (x, 1)))
42315 *total = (cost->fabs
42316 + rtx_cost (XEXP (x, 0), code, 0, speed)
42317 + (speed ? 2 : COSTS_N_BYTES (16)));
42318 return true;
42320 count = 3;
42322 else if (TARGET_SSSE3)
42323 count = 7;
42324 *total = cost->fabs * count;
42326 else
42327 *total = cost->fabs;
42329 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42331 if (CONST_INT_P (XEXP (x, 1)))
42333 if (INTVAL (XEXP (x, 1)) > 32)
42334 *total = cost->shift_const + COSTS_N_INSNS (2);
42335 else
42336 *total = cost->shift_const * 2;
42338 else
42340 if (GET_CODE (XEXP (x, 1)) == AND)
42341 *total = cost->shift_var * 2;
42342 else
42343 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
42346 else
42348 if (CONST_INT_P (XEXP (x, 1)))
42349 *total = cost->shift_const;
42350 else if (GET_CODE (XEXP (x, 1)) == SUBREG
42351 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
42353 /* Return the cost after shift-and truncation. */
42354 *total = cost->shift_var;
42355 return true;
42357 else
42358 *total = cost->shift_var;
42360 return false;
42362 case FMA:
42364 rtx sub;
42366 gcc_assert (FLOAT_MODE_P (mode));
42367 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
42369 /* ??? SSE scalar/vector cost should be used here. */
42370 /* ??? Bald assumption that fma has the same cost as fmul. */
42371 *total = cost->fmul;
42372 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
42374 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
42375 sub = XEXP (x, 0);
42376 if (GET_CODE (sub) == NEG)
42377 sub = XEXP (sub, 0);
42378 *total += rtx_cost (sub, FMA, 0, speed);
42380 sub = XEXP (x, 2);
42381 if (GET_CODE (sub) == NEG)
42382 sub = XEXP (sub, 0);
42383 *total += rtx_cost (sub, FMA, 2, speed);
42384 return true;
42387 case MULT:
42388 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42390 /* ??? SSE scalar cost should be used here. */
42391 *total = cost->fmul;
42392 return false;
42394 else if (X87_FLOAT_MODE_P (mode))
42396 *total = cost->fmul;
42397 return false;
42399 else if (FLOAT_MODE_P (mode))
42401 /* ??? SSE vector cost should be used here. */
42402 *total = cost->fmul;
42403 return false;
42405 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42407 /* V*QImode is emulated with 7-13 insns. */
42408 if (mode == V16QImode || mode == V32QImode)
42410 int extra = 11;
42411 if (TARGET_XOP && mode == V16QImode)
42412 extra = 5;
42413 else if (TARGET_SSSE3)
42414 extra = 6;
42415 *total = cost->fmul * 2 + cost->fabs * extra;
42417 /* V*DImode is emulated with 5-8 insns. */
42418 else if (mode == V2DImode || mode == V4DImode)
42420 if (TARGET_XOP && mode == V2DImode)
42421 *total = cost->fmul * 2 + cost->fabs * 3;
42422 else
42423 *total = cost->fmul * 3 + cost->fabs * 5;
42425 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
42426 insns, including two PMULUDQ. */
42427 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
42428 *total = cost->fmul * 2 + cost->fabs * 5;
42429 else
42430 *total = cost->fmul;
42431 return false;
42433 else
42435 rtx op0 = XEXP (x, 0);
42436 rtx op1 = XEXP (x, 1);
42437 int nbits;
42438 if (CONST_INT_P (XEXP (x, 1)))
42440 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42441 for (nbits = 0; value != 0; value &= value - 1)
42442 nbits++;
42444 else
42445 /* This is arbitrary. */
42446 nbits = 7;
42448 /* Compute costs correctly for widening multiplication. */
42449 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
42450 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
42451 == GET_MODE_SIZE (mode))
42453 int is_mulwiden = 0;
42454 machine_mode inner_mode = GET_MODE (op0);
42456 if (GET_CODE (op0) == GET_CODE (op1))
42457 is_mulwiden = 1, op1 = XEXP (op1, 0);
42458 else if (CONST_INT_P (op1))
42460 if (GET_CODE (op0) == SIGN_EXTEND)
42461 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
42462 == INTVAL (op1);
42463 else
42464 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
42467 if (is_mulwiden)
42468 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
42471 *total = (cost->mult_init[MODE_INDEX (mode)]
42472 + nbits * cost->mult_bit
42473 + rtx_cost (op0, outer_code, opno, speed)
42474 + rtx_cost (op1, outer_code, opno, speed));
42476 return true;
42479 case DIV:
42480 case UDIV:
42481 case MOD:
42482 case UMOD:
42483 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42484 /* ??? SSE cost should be used here. */
42485 *total = cost->fdiv;
42486 else if (X87_FLOAT_MODE_P (mode))
42487 *total = cost->fdiv;
42488 else if (FLOAT_MODE_P (mode))
42489 /* ??? SSE vector cost should be used here. */
42490 *total = cost->fdiv;
42491 else
42492 *total = cost->divide[MODE_INDEX (mode)];
42493 return false;
42495 case PLUS:
42496 if (GET_MODE_CLASS (mode) == MODE_INT
42497 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
42499 if (GET_CODE (XEXP (x, 0)) == PLUS
42500 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
42501 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
42502 && CONSTANT_P (XEXP (x, 1)))
42504 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
42505 if (val == 2 || val == 4 || val == 8)
42507 *total = cost->lea;
42508 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42509 outer_code, opno, speed);
42510 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
42511 outer_code, opno, speed);
42512 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42513 return true;
42516 else if (GET_CODE (XEXP (x, 0)) == MULT
42517 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
42519 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
42520 if (val == 2 || val == 4 || val == 8)
42522 *total = cost->lea;
42523 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42524 outer_code, opno, speed);
42525 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42526 return true;
42529 else if (GET_CODE (XEXP (x, 0)) == PLUS)
42531 *total = cost->lea;
42532 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42533 outer_code, opno, speed);
42534 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42535 outer_code, opno, speed);
42536 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42537 return true;
42540 /* FALLTHRU */
42542 case MINUS:
42543 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42545 /* ??? SSE cost should be used here. */
42546 *total = cost->fadd;
42547 return false;
42549 else if (X87_FLOAT_MODE_P (mode))
42551 *total = cost->fadd;
42552 return false;
42554 else if (FLOAT_MODE_P (mode))
42556 /* ??? SSE vector cost should be used here. */
42557 *total = cost->fadd;
42558 return false;
42560 /* FALLTHRU */
42562 case AND:
42563 case IOR:
42564 case XOR:
42565 if (GET_MODE_CLASS (mode) == MODE_INT
42566 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42568 *total = (cost->add * 2
42569 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
42570 << (GET_MODE (XEXP (x, 0)) != DImode))
42571 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
42572 << (GET_MODE (XEXP (x, 1)) != DImode)));
42573 return true;
42575 /* FALLTHRU */
42577 case NEG:
42578 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42580 /* ??? SSE cost should be used here. */
42581 *total = cost->fchs;
42582 return false;
42584 else if (X87_FLOAT_MODE_P (mode))
42586 *total = cost->fchs;
42587 return false;
42589 else if (FLOAT_MODE_P (mode))
42591 /* ??? SSE vector cost should be used here. */
42592 *total = cost->fchs;
42593 return false;
42595 /* FALLTHRU */
42597 case NOT:
42598 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42600 /* ??? Should be SSE vector operation cost. */
42601 /* At least for published AMD latencies, this really is the same
42602 as the latency for a simple fpu operation like fabs. */
42603 *total = cost->fabs;
42605 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42606 *total = cost->add * 2;
42607 else
42608 *total = cost->add;
42609 return false;
42611 case COMPARE:
42612 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
42613 && XEXP (XEXP (x, 0), 1) == const1_rtx
42614 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
42615 && XEXP (x, 1) == const0_rtx)
42617 /* This kind of construct is implemented using test[bwl].
42618 Treat it as if we had an AND. */
42619 *total = (cost->add
42620 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
42621 + rtx_cost (const1_rtx, outer_code, opno, speed));
42622 return true;
42625 /* The embedded comparison operand is completely free. */
42626 if (!general_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0)))
42627 && XEXP (x, 1) == const0_rtx)
42628 *total = 0;
42630 return false;
42632 case FLOAT_EXTEND:
42633 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
42634 *total = 0;
42635 return false;
42637 case ABS:
42638 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42639 /* ??? SSE cost should be used here. */
42640 *total = cost->fabs;
42641 else if (X87_FLOAT_MODE_P (mode))
42642 *total = cost->fabs;
42643 else if (FLOAT_MODE_P (mode))
42644 /* ??? SSE vector cost should be used here. */
42645 *total = cost->fabs;
42646 return false;
42648 case SQRT:
42649 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42650 /* ??? SSE cost should be used here. */
42651 *total = cost->fsqrt;
42652 else if (X87_FLOAT_MODE_P (mode))
42653 *total = cost->fsqrt;
42654 else if (FLOAT_MODE_P (mode))
42655 /* ??? SSE vector cost should be used here. */
42656 *total = cost->fsqrt;
42657 return false;
42659 case UNSPEC:
42660 if (XINT (x, 1) == UNSPEC_TP)
42661 *total = 0;
42662 return false;
42664 case VEC_SELECT:
42665 case VEC_CONCAT:
42666 case VEC_DUPLICATE:
42667 /* ??? Assume all of these vector manipulation patterns are
42668 recognizable. In which case they all pretty much have the
42669 same cost. */
42670 *total = cost->fabs;
42671 return true;
42672 case VEC_MERGE:
42673 mask = XEXP (x, 2);
42674 /* This is masked instruction, assume the same cost,
42675 as nonmasked variant. */
42676 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
42677 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
42678 else
42679 *total = cost->fabs;
42680 return true;
42682 default:
42683 return false;
42687 #if TARGET_MACHO
42689 static int current_machopic_label_num;
42691 /* Given a symbol name and its associated stub, write out the
42692 definition of the stub. */
42694 void
42695 machopic_output_stub (FILE *file, const char *symb, const char *stub)
42697 unsigned int length;
42698 char *binder_name, *symbol_name, lazy_ptr_name[32];
42699 int label = ++current_machopic_label_num;
42701 /* For 64-bit we shouldn't get here. */
42702 gcc_assert (!TARGET_64BIT);
42704 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
42705 symb = targetm.strip_name_encoding (symb);
42707 length = strlen (stub);
42708 binder_name = XALLOCAVEC (char, length + 32);
42709 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
42711 length = strlen (symb);
42712 symbol_name = XALLOCAVEC (char, length + 32);
42713 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
42715 sprintf (lazy_ptr_name, "L%d$lz", label);
42717 if (MACHOPIC_ATT_STUB)
42718 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
42719 else if (MACHOPIC_PURE)
42720 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
42721 else
42722 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
42724 fprintf (file, "%s:\n", stub);
42725 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42727 if (MACHOPIC_ATT_STUB)
42729 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
42731 else if (MACHOPIC_PURE)
42733 /* PIC stub. */
42734 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42735 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
42736 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
42737 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
42738 label, lazy_ptr_name, label);
42739 fprintf (file, "\tjmp\t*%%ecx\n");
42741 else
42742 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
42744 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
42745 it needs no stub-binding-helper. */
42746 if (MACHOPIC_ATT_STUB)
42747 return;
42749 fprintf (file, "%s:\n", binder_name);
42751 if (MACHOPIC_PURE)
42753 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
42754 fprintf (file, "\tpushl\t%%ecx\n");
42756 else
42757 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
42759 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
42761 /* N.B. Keep the correspondence of these
42762 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
42763 old-pic/new-pic/non-pic stubs; altering this will break
42764 compatibility with existing dylibs. */
42765 if (MACHOPIC_PURE)
42767 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42768 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
42770 else
42771 /* 16-byte -mdynamic-no-pic stub. */
42772 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
42774 fprintf (file, "%s:\n", lazy_ptr_name);
42775 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42776 fprintf (file, ASM_LONG "%s\n", binder_name);
42778 #endif /* TARGET_MACHO */
42780 /* Order the registers for register allocator. */
42782 void
42783 x86_order_regs_for_local_alloc (void)
42785 int pos = 0;
42786 int i;
42788 /* First allocate the local general purpose registers. */
42789 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42790 if (GENERAL_REGNO_P (i) && call_used_regs[i])
42791 reg_alloc_order [pos++] = i;
42793 /* Global general purpose registers. */
42794 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42795 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
42796 reg_alloc_order [pos++] = i;
42798 /* x87 registers come first in case we are doing FP math
42799 using them. */
42800 if (!TARGET_SSE_MATH)
42801 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42802 reg_alloc_order [pos++] = i;
42804 /* SSE registers. */
42805 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
42806 reg_alloc_order [pos++] = i;
42807 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
42808 reg_alloc_order [pos++] = i;
42810 /* Extended REX SSE registers. */
42811 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
42812 reg_alloc_order [pos++] = i;
42814 /* Mask register. */
42815 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
42816 reg_alloc_order [pos++] = i;
42818 /* MPX bound registers. */
42819 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
42820 reg_alloc_order [pos++] = i;
42822 /* x87 registers. */
42823 if (TARGET_SSE_MATH)
42824 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42825 reg_alloc_order [pos++] = i;
42827 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
42828 reg_alloc_order [pos++] = i;
42830 /* Initialize the rest of array as we do not allocate some registers
42831 at all. */
42832 while (pos < FIRST_PSEUDO_REGISTER)
42833 reg_alloc_order [pos++] = 0;
42836 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
42837 in struct attribute_spec handler. */
42838 static tree
42839 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
42840 tree args,
42841 int,
42842 bool *no_add_attrs)
42844 if (TREE_CODE (*node) != FUNCTION_TYPE
42845 && TREE_CODE (*node) != METHOD_TYPE
42846 && TREE_CODE (*node) != FIELD_DECL
42847 && TREE_CODE (*node) != TYPE_DECL)
42849 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42850 name);
42851 *no_add_attrs = true;
42852 return NULL_TREE;
42854 if (TARGET_64BIT)
42856 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
42857 name);
42858 *no_add_attrs = true;
42859 return NULL_TREE;
42861 if (is_attribute_p ("callee_pop_aggregate_return", name))
42863 tree cst;
42865 cst = TREE_VALUE (args);
42866 if (TREE_CODE (cst) != INTEGER_CST)
42868 warning (OPT_Wattributes,
42869 "%qE attribute requires an integer constant argument",
42870 name);
42871 *no_add_attrs = true;
42873 else if (compare_tree_int (cst, 0) != 0
42874 && compare_tree_int (cst, 1) != 0)
42876 warning (OPT_Wattributes,
42877 "argument to %qE attribute is neither zero, nor one",
42878 name);
42879 *no_add_attrs = true;
42882 return NULL_TREE;
42885 return NULL_TREE;
42888 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
42889 struct attribute_spec.handler. */
42890 static tree
42891 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
42892 bool *no_add_attrs)
42894 if (TREE_CODE (*node) != FUNCTION_TYPE
42895 && TREE_CODE (*node) != METHOD_TYPE
42896 && TREE_CODE (*node) != FIELD_DECL
42897 && TREE_CODE (*node) != TYPE_DECL)
42899 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42900 name);
42901 *no_add_attrs = true;
42902 return NULL_TREE;
42905 /* Can combine regparm with all attributes but fastcall. */
42906 if (is_attribute_p ("ms_abi", name))
42908 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
42910 error ("ms_abi and sysv_abi attributes are not compatible");
42913 return NULL_TREE;
42915 else if (is_attribute_p ("sysv_abi", name))
42917 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
42919 error ("ms_abi and sysv_abi attributes are not compatible");
42922 return NULL_TREE;
42925 return NULL_TREE;
42928 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
42929 struct attribute_spec.handler. */
42930 static tree
42931 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
42932 bool *no_add_attrs)
42934 tree *type = NULL;
42935 if (DECL_P (*node))
42937 if (TREE_CODE (*node) == TYPE_DECL)
42938 type = &TREE_TYPE (*node);
42940 else
42941 type = node;
42943 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
42945 warning (OPT_Wattributes, "%qE attribute ignored",
42946 name);
42947 *no_add_attrs = true;
42950 else if ((is_attribute_p ("ms_struct", name)
42951 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
42952 || ((is_attribute_p ("gcc_struct", name)
42953 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
42955 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
42956 name);
42957 *no_add_attrs = true;
42960 return NULL_TREE;
42963 static tree
42964 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
42965 bool *no_add_attrs)
42967 if (TREE_CODE (*node) != FUNCTION_DECL)
42969 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42970 name);
42971 *no_add_attrs = true;
42973 return NULL_TREE;
42976 static bool
42977 ix86_ms_bitfield_layout_p (const_tree record_type)
42979 return ((TARGET_MS_BITFIELD_LAYOUT
42980 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
42981 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
42984 /* Returns an expression indicating where the this parameter is
42985 located on entry to the FUNCTION. */
42987 static rtx
42988 x86_this_parameter (tree function)
42990 tree type = TREE_TYPE (function);
42991 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
42992 int nregs;
42994 if (TARGET_64BIT)
42996 const int *parm_regs;
42998 if (ix86_function_type_abi (type) == MS_ABI)
42999 parm_regs = x86_64_ms_abi_int_parameter_registers;
43000 else
43001 parm_regs = x86_64_int_parameter_registers;
43002 return gen_rtx_REG (Pmode, parm_regs[aggr]);
43005 nregs = ix86_function_regparm (type, function);
43007 if (nregs > 0 && !stdarg_p (type))
43009 int regno;
43010 unsigned int ccvt = ix86_get_callcvt (type);
43012 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
43013 regno = aggr ? DX_REG : CX_REG;
43014 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
43016 regno = CX_REG;
43017 if (aggr)
43018 return gen_rtx_MEM (SImode,
43019 plus_constant (Pmode, stack_pointer_rtx, 4));
43021 else
43023 regno = AX_REG;
43024 if (aggr)
43026 regno = DX_REG;
43027 if (nregs == 1)
43028 return gen_rtx_MEM (SImode,
43029 plus_constant (Pmode,
43030 stack_pointer_rtx, 4));
43033 return gen_rtx_REG (SImode, regno);
43036 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
43037 aggr ? 8 : 4));
43040 /* Determine whether x86_output_mi_thunk can succeed. */
43042 static bool
43043 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
43044 const_tree function)
43046 /* 64-bit can handle anything. */
43047 if (TARGET_64BIT)
43048 return true;
43050 /* For 32-bit, everything's fine if we have one free register. */
43051 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
43052 return true;
43054 /* Need a free register for vcall_offset. */
43055 if (vcall_offset)
43056 return false;
43058 /* Need a free register for GOT references. */
43059 if (flag_pic && !targetm.binds_local_p (function))
43060 return false;
43062 /* Otherwise ok. */
43063 return true;
43066 /* Output the assembler code for a thunk function. THUNK_DECL is the
43067 declaration for the thunk function itself, FUNCTION is the decl for
43068 the target function. DELTA is an immediate constant offset to be
43069 added to THIS. If VCALL_OFFSET is nonzero, the word at
43070 *(*this + vcall_offset) should be added to THIS. */
43072 static void
43073 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
43074 HOST_WIDE_INT vcall_offset, tree function)
43076 rtx this_param = x86_this_parameter (function);
43077 rtx this_reg, tmp, fnaddr;
43078 unsigned int tmp_regno;
43079 rtx_insn *insn;
43081 if (TARGET_64BIT)
43082 tmp_regno = R10_REG;
43083 else
43085 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
43086 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
43087 tmp_regno = AX_REG;
43088 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
43089 tmp_regno = DX_REG;
43090 else
43091 tmp_regno = CX_REG;
43094 emit_note (NOTE_INSN_PROLOGUE_END);
43096 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
43097 pull it in now and let DELTA benefit. */
43098 if (REG_P (this_param))
43099 this_reg = this_param;
43100 else if (vcall_offset)
43102 /* Put the this parameter into %eax. */
43103 this_reg = gen_rtx_REG (Pmode, AX_REG);
43104 emit_move_insn (this_reg, this_param);
43106 else
43107 this_reg = NULL_RTX;
43109 /* Adjust the this parameter by a fixed constant. */
43110 if (delta)
43112 rtx delta_rtx = GEN_INT (delta);
43113 rtx delta_dst = this_reg ? this_reg : this_param;
43115 if (TARGET_64BIT)
43117 if (!x86_64_general_operand (delta_rtx, Pmode))
43119 tmp = gen_rtx_REG (Pmode, tmp_regno);
43120 emit_move_insn (tmp, delta_rtx);
43121 delta_rtx = tmp;
43125 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
43128 /* Adjust the this parameter by a value stored in the vtable. */
43129 if (vcall_offset)
43131 rtx vcall_addr, vcall_mem, this_mem;
43133 tmp = gen_rtx_REG (Pmode, tmp_regno);
43135 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
43136 if (Pmode != ptr_mode)
43137 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
43138 emit_move_insn (tmp, this_mem);
43140 /* Adjust the this parameter. */
43141 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
43142 if (TARGET_64BIT
43143 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
43145 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
43146 emit_move_insn (tmp2, GEN_INT (vcall_offset));
43147 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
43150 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
43151 if (Pmode != ptr_mode)
43152 emit_insn (gen_addsi_1_zext (this_reg,
43153 gen_rtx_REG (ptr_mode,
43154 REGNO (this_reg)),
43155 vcall_mem));
43156 else
43157 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
43160 /* If necessary, drop THIS back to its stack slot. */
43161 if (this_reg && this_reg != this_param)
43162 emit_move_insn (this_param, this_reg);
43164 fnaddr = XEXP (DECL_RTL (function), 0);
43165 if (TARGET_64BIT)
43167 if (!flag_pic || targetm.binds_local_p (function)
43168 || TARGET_PECOFF)
43170 else
43172 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
43173 tmp = gen_rtx_CONST (Pmode, tmp);
43174 fnaddr = gen_const_mem (Pmode, tmp);
43177 else
43179 if (!flag_pic || targetm.binds_local_p (function))
43181 #if TARGET_MACHO
43182 else if (TARGET_MACHO)
43184 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
43185 fnaddr = XEXP (fnaddr, 0);
43187 #endif /* TARGET_MACHO */
43188 else
43190 tmp = gen_rtx_REG (Pmode, CX_REG);
43191 output_set_got (tmp, NULL_RTX);
43193 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
43194 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
43195 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
43196 fnaddr = gen_const_mem (Pmode, fnaddr);
43200 /* Our sibling call patterns do not allow memories, because we have no
43201 predicate that can distinguish between frame and non-frame memory.
43202 For our purposes here, we can get away with (ab)using a jump pattern,
43203 because we're going to do no optimization. */
43204 if (MEM_P (fnaddr))
43206 if (sibcall_insn_operand (fnaddr, word_mode))
43208 fnaddr = XEXP (DECL_RTL (function), 0);
43209 tmp = gen_rtx_MEM (QImode, fnaddr);
43210 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
43211 tmp = emit_call_insn (tmp);
43212 SIBLING_CALL_P (tmp) = 1;
43214 else
43215 emit_jump_insn (gen_indirect_jump (fnaddr));
43217 else
43219 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
43221 // CM_LARGE_PIC always uses pseudo PIC register which is
43222 // uninitialized. Since FUNCTION is local and calling it
43223 // doesn't go through PLT, we use scratch register %r11 as
43224 // PIC register and initialize it here.
43225 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
43226 ix86_init_large_pic_reg (tmp_regno);
43227 fnaddr = legitimize_pic_address (fnaddr,
43228 gen_rtx_REG (Pmode, tmp_regno));
43231 if (!sibcall_insn_operand (fnaddr, word_mode))
43233 tmp = gen_rtx_REG (word_mode, tmp_regno);
43234 if (GET_MODE (fnaddr) != word_mode)
43235 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
43236 emit_move_insn (tmp, fnaddr);
43237 fnaddr = tmp;
43240 tmp = gen_rtx_MEM (QImode, fnaddr);
43241 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
43242 tmp = emit_call_insn (tmp);
43243 SIBLING_CALL_P (tmp) = 1;
43245 emit_barrier ();
43247 /* Emit just enough of rest_of_compilation to get the insns emitted.
43248 Note that use_thunk calls assemble_start_function et al. */
43249 insn = get_insns ();
43250 shorten_branches (insn);
43251 final_start_function (insn, file, 1);
43252 final (insn, file, 1);
43253 final_end_function ();
43256 static void
43257 x86_file_start (void)
43259 default_file_start ();
43260 if (TARGET_16BIT)
43261 fputs ("\t.code16gcc\n", asm_out_file);
43262 #if TARGET_MACHO
43263 darwin_file_start ();
43264 #endif
43265 if (X86_FILE_START_VERSION_DIRECTIVE)
43266 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
43267 if (X86_FILE_START_FLTUSED)
43268 fputs ("\t.global\t__fltused\n", asm_out_file);
43269 if (ix86_asm_dialect == ASM_INTEL)
43270 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
43274 x86_field_alignment (tree field, int computed)
43276 machine_mode mode;
43277 tree type = TREE_TYPE (field);
43279 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
43280 return computed;
43281 if (TARGET_IAMCU)
43282 return iamcu_alignment (type, computed);
43283 mode = TYPE_MODE (strip_array_types (type));
43284 if (mode == DFmode || mode == DCmode
43285 || GET_MODE_CLASS (mode) == MODE_INT
43286 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
43287 return MIN (32, computed);
43288 return computed;
43291 /* Print call to TARGET to FILE. */
43293 static void
43294 x86_print_call_or_nop (FILE *file, const char *target)
43296 if (flag_nop_mcount)
43297 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
43298 else
43299 fprintf (file, "1:\tcall\t%s\n", target);
43302 /* Output assembler code to FILE to increment profiler label # LABELNO
43303 for profiling a function entry. */
43304 void
43305 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
43307 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
43308 : MCOUNT_NAME);
43309 if (TARGET_64BIT)
43311 #ifndef NO_PROFILE_COUNTERS
43312 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
43313 #endif
43315 if (!TARGET_PECOFF && flag_pic)
43316 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
43317 else
43318 x86_print_call_or_nop (file, mcount_name);
43320 else if (flag_pic)
43322 #ifndef NO_PROFILE_COUNTERS
43323 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
43324 LPREFIX, labelno);
43325 #endif
43326 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
43328 else
43330 #ifndef NO_PROFILE_COUNTERS
43331 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
43332 LPREFIX, labelno);
43333 #endif
43334 x86_print_call_or_nop (file, mcount_name);
43337 if (flag_record_mcount)
43339 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
43340 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
43341 fprintf (file, "\t.previous\n");
43345 /* We don't have exact information about the insn sizes, but we may assume
43346 quite safely that we are informed about all 1 byte insns and memory
43347 address sizes. This is enough to eliminate unnecessary padding in
43348 99% of cases. */
43350 static int
43351 min_insn_size (rtx_insn *insn)
43353 int l = 0, len;
43355 if (!INSN_P (insn) || !active_insn_p (insn))
43356 return 0;
43358 /* Discard alignments we've emit and jump instructions. */
43359 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
43360 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
43361 return 0;
43363 /* Important case - calls are always 5 bytes.
43364 It is common to have many calls in the row. */
43365 if (CALL_P (insn)
43366 && symbolic_reference_mentioned_p (PATTERN (insn))
43367 && !SIBLING_CALL_P (insn))
43368 return 5;
43369 len = get_attr_length (insn);
43370 if (len <= 1)
43371 return 1;
43373 /* For normal instructions we rely on get_attr_length being exact,
43374 with a few exceptions. */
43375 if (!JUMP_P (insn))
43377 enum attr_type type = get_attr_type (insn);
43379 switch (type)
43381 case TYPE_MULTI:
43382 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
43383 || asm_noperands (PATTERN (insn)) >= 0)
43384 return 0;
43385 break;
43386 case TYPE_OTHER:
43387 case TYPE_FCMP:
43388 break;
43389 default:
43390 /* Otherwise trust get_attr_length. */
43391 return len;
43394 l = get_attr_length_address (insn);
43395 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
43396 l = 4;
43398 if (l)
43399 return 1+l;
43400 else
43401 return 2;
43404 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43406 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
43407 window. */
43409 static void
43410 ix86_avoid_jump_mispredicts (void)
43412 rtx_insn *insn, *start = get_insns ();
43413 int nbytes = 0, njumps = 0;
43414 bool isjump = false;
43416 /* Look for all minimal intervals of instructions containing 4 jumps.
43417 The intervals are bounded by START and INSN. NBYTES is the total
43418 size of instructions in the interval including INSN and not including
43419 START. When the NBYTES is smaller than 16 bytes, it is possible
43420 that the end of START and INSN ends up in the same 16byte page.
43422 The smallest offset in the page INSN can start is the case where START
43423 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
43424 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
43426 Don't consider asm goto as jump, while it can contain a jump, it doesn't
43427 have to, control transfer to label(s) can be performed through other
43428 means, and also we estimate minimum length of all asm stmts as 0. */
43429 for (insn = start; insn; insn = NEXT_INSN (insn))
43431 int min_size;
43433 if (LABEL_P (insn))
43435 int align = label_to_alignment (insn);
43436 int max_skip = label_to_max_skip (insn);
43438 if (max_skip > 15)
43439 max_skip = 15;
43440 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
43441 already in the current 16 byte page, because otherwise
43442 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
43443 bytes to reach 16 byte boundary. */
43444 if (align <= 0
43445 || (align <= 3 && max_skip != (1 << align) - 1))
43446 max_skip = 0;
43447 if (dump_file)
43448 fprintf (dump_file, "Label %i with max_skip %i\n",
43449 INSN_UID (insn), max_skip);
43450 if (max_skip)
43452 while (nbytes + max_skip >= 16)
43454 start = NEXT_INSN (start);
43455 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43456 || CALL_P (start))
43457 njumps--, isjump = true;
43458 else
43459 isjump = false;
43460 nbytes -= min_insn_size (start);
43463 continue;
43466 min_size = min_insn_size (insn);
43467 nbytes += min_size;
43468 if (dump_file)
43469 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
43470 INSN_UID (insn), min_size);
43471 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
43472 || CALL_P (insn))
43473 njumps++;
43474 else
43475 continue;
43477 while (njumps > 3)
43479 start = NEXT_INSN (start);
43480 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43481 || CALL_P (start))
43482 njumps--, isjump = true;
43483 else
43484 isjump = false;
43485 nbytes -= min_insn_size (start);
43487 gcc_assert (njumps >= 0);
43488 if (dump_file)
43489 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
43490 INSN_UID (start), INSN_UID (insn), nbytes);
43492 if (njumps == 3 && isjump && nbytes < 16)
43494 int padsize = 15 - nbytes + min_insn_size (insn);
43496 if (dump_file)
43497 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
43498 INSN_UID (insn), padsize);
43499 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
43503 #endif
43505 /* AMD Athlon works faster
43506 when RET is not destination of conditional jump or directly preceded
43507 by other jump instruction. We avoid the penalty by inserting NOP just
43508 before the RET instructions in such cases. */
43509 static void
43510 ix86_pad_returns (void)
43512 edge e;
43513 edge_iterator ei;
43515 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43517 basic_block bb = e->src;
43518 rtx_insn *ret = BB_END (bb);
43519 rtx_insn *prev;
43520 bool replace = false;
43522 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
43523 || optimize_bb_for_size_p (bb))
43524 continue;
43525 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
43526 if (active_insn_p (prev) || LABEL_P (prev))
43527 break;
43528 if (prev && LABEL_P (prev))
43530 edge e;
43531 edge_iterator ei;
43533 FOR_EACH_EDGE (e, ei, bb->preds)
43534 if (EDGE_FREQUENCY (e) && e->src->index >= 0
43535 && !(e->flags & EDGE_FALLTHRU))
43537 replace = true;
43538 break;
43541 if (!replace)
43543 prev = prev_active_insn (ret);
43544 if (prev
43545 && ((JUMP_P (prev) && any_condjump_p (prev))
43546 || CALL_P (prev)))
43547 replace = true;
43548 /* Empty functions get branch mispredict even when
43549 the jump destination is not visible to us. */
43550 if (!prev && !optimize_function_for_size_p (cfun))
43551 replace = true;
43553 if (replace)
43555 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
43556 delete_insn (ret);
43561 /* Count the minimum number of instructions in BB. Return 4 if the
43562 number of instructions >= 4. */
43564 static int
43565 ix86_count_insn_bb (basic_block bb)
43567 rtx_insn *insn;
43568 int insn_count = 0;
43570 /* Count number of instructions in this block. Return 4 if the number
43571 of instructions >= 4. */
43572 FOR_BB_INSNS (bb, insn)
43574 /* Only happen in exit blocks. */
43575 if (JUMP_P (insn)
43576 && ANY_RETURN_P (PATTERN (insn)))
43577 break;
43579 if (NONDEBUG_INSN_P (insn)
43580 && GET_CODE (PATTERN (insn)) != USE
43581 && GET_CODE (PATTERN (insn)) != CLOBBER)
43583 insn_count++;
43584 if (insn_count >= 4)
43585 return insn_count;
43589 return insn_count;
43593 /* Count the minimum number of instructions in code path in BB.
43594 Return 4 if the number of instructions >= 4. */
43596 static int
43597 ix86_count_insn (basic_block bb)
43599 edge e;
43600 edge_iterator ei;
43601 int min_prev_count;
43603 /* Only bother counting instructions along paths with no
43604 more than 2 basic blocks between entry and exit. Given
43605 that BB has an edge to exit, determine if a predecessor
43606 of BB has an edge from entry. If so, compute the number
43607 of instructions in the predecessor block. If there
43608 happen to be multiple such blocks, compute the minimum. */
43609 min_prev_count = 4;
43610 FOR_EACH_EDGE (e, ei, bb->preds)
43612 edge prev_e;
43613 edge_iterator prev_ei;
43615 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43617 min_prev_count = 0;
43618 break;
43620 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
43622 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43624 int count = ix86_count_insn_bb (e->src);
43625 if (count < min_prev_count)
43626 min_prev_count = count;
43627 break;
43632 if (min_prev_count < 4)
43633 min_prev_count += ix86_count_insn_bb (bb);
43635 return min_prev_count;
43638 /* Pad short function to 4 instructions. */
43640 static void
43641 ix86_pad_short_function (void)
43643 edge e;
43644 edge_iterator ei;
43646 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43648 rtx_insn *ret = BB_END (e->src);
43649 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
43651 int insn_count = ix86_count_insn (e->src);
43653 /* Pad short function. */
43654 if (insn_count < 4)
43656 rtx_insn *insn = ret;
43658 /* Find epilogue. */
43659 while (insn
43660 && (!NOTE_P (insn)
43661 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
43662 insn = PREV_INSN (insn);
43664 if (!insn)
43665 insn = ret;
43667 /* Two NOPs count as one instruction. */
43668 insn_count = 2 * (4 - insn_count);
43669 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
43675 /* Fix up a Windows system unwinder issue. If an EH region falls through into
43676 the epilogue, the Windows system unwinder will apply epilogue logic and
43677 produce incorrect offsets. This can be avoided by adding a nop between
43678 the last insn that can throw and the first insn of the epilogue. */
43680 static void
43681 ix86_seh_fixup_eh_fallthru (void)
43683 edge e;
43684 edge_iterator ei;
43686 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43688 rtx_insn *insn, *next;
43690 /* Find the beginning of the epilogue. */
43691 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
43692 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
43693 break;
43694 if (insn == NULL)
43695 continue;
43697 /* We only care about preceding insns that can throw. */
43698 insn = prev_active_insn (insn);
43699 if (insn == NULL || !can_throw_internal (insn))
43700 continue;
43702 /* Do not separate calls from their debug information. */
43703 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
43704 if (NOTE_P (next)
43705 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
43706 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
43707 insn = next;
43708 else
43709 break;
43711 emit_insn_after (gen_nops (const1_rtx), insn);
43715 /* Implement machine specific optimizations. We implement padding of returns
43716 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
43717 static void
43718 ix86_reorg (void)
43720 /* We are freeing block_for_insn in the toplev to keep compatibility
43721 with old MDEP_REORGS that are not CFG based. Recompute it now. */
43722 compute_bb_for_insn ();
43724 if (TARGET_SEH && current_function_has_exception_handlers ())
43725 ix86_seh_fixup_eh_fallthru ();
43727 if (optimize && optimize_function_for_speed_p (cfun))
43729 if (TARGET_PAD_SHORT_FUNCTION)
43730 ix86_pad_short_function ();
43731 else if (TARGET_PAD_RETURNS)
43732 ix86_pad_returns ();
43733 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43734 if (TARGET_FOUR_JUMP_LIMIT)
43735 ix86_avoid_jump_mispredicts ();
43736 #endif
43740 /* Return nonzero when QImode register that must be represented via REX prefix
43741 is used. */
43742 bool
43743 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
43745 int i;
43746 extract_insn_cached (insn);
43747 for (i = 0; i < recog_data.n_operands; i++)
43748 if (GENERAL_REG_P (recog_data.operand[i])
43749 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
43750 return true;
43751 return false;
43754 /* Return true when INSN mentions register that must be encoded using REX
43755 prefix. */
43756 bool
43757 x86_extended_reg_mentioned_p (rtx insn)
43759 subrtx_iterator::array_type array;
43760 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
43762 const_rtx x = *iter;
43763 if (REG_P (x)
43764 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
43765 return true;
43767 return false;
43770 /* If profitable, negate (without causing overflow) integer constant
43771 of mode MODE at location LOC. Return true in this case. */
43772 bool
43773 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
43775 HOST_WIDE_INT val;
43777 if (!CONST_INT_P (*loc))
43778 return false;
43780 switch (mode)
43782 case DImode:
43783 /* DImode x86_64 constants must fit in 32 bits. */
43784 gcc_assert (x86_64_immediate_operand (*loc, mode));
43786 mode = SImode;
43787 break;
43789 case SImode:
43790 case HImode:
43791 case QImode:
43792 break;
43794 default:
43795 gcc_unreachable ();
43798 /* Avoid overflows. */
43799 if (mode_signbit_p (mode, *loc))
43800 return false;
43802 val = INTVAL (*loc);
43804 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
43805 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
43806 if ((val < 0 && val != -128)
43807 || val == 128)
43809 *loc = GEN_INT (-val);
43810 return true;
43813 return false;
43816 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
43817 optabs would emit if we didn't have TFmode patterns. */
43819 void
43820 x86_emit_floatuns (rtx operands[2])
43822 rtx_code_label *neglab, *donelab;
43823 rtx i0, i1, f0, in, out;
43824 machine_mode mode, inmode;
43826 inmode = GET_MODE (operands[1]);
43827 gcc_assert (inmode == SImode || inmode == DImode);
43829 out = operands[0];
43830 in = force_reg (inmode, operands[1]);
43831 mode = GET_MODE (out);
43832 neglab = gen_label_rtx ();
43833 donelab = gen_label_rtx ();
43834 f0 = gen_reg_rtx (mode);
43836 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
43838 expand_float (out, in, 0);
43840 emit_jump_insn (gen_jump (donelab));
43841 emit_barrier ();
43843 emit_label (neglab);
43845 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
43846 1, OPTAB_DIRECT);
43847 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
43848 1, OPTAB_DIRECT);
43849 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
43851 expand_float (f0, i0, 0);
43853 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
43855 emit_label (donelab);
43858 static bool canonicalize_perm (struct expand_vec_perm_d *d);
43859 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
43860 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
43861 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
43863 /* Get a vector mode of the same size as the original but with elements
43864 twice as wide. This is only guaranteed to apply to integral vectors. */
43866 static inline machine_mode
43867 get_mode_wider_vector (machine_mode o)
43869 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
43870 machine_mode n = GET_MODE_WIDER_MODE (o);
43871 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
43872 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
43873 return n;
43876 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
43877 fill target with val via vec_duplicate. */
43879 static bool
43880 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
43882 bool ok;
43883 rtx_insn *insn;
43884 rtx dup;
43886 /* First attempt to recognize VAL as-is. */
43887 dup = gen_rtx_VEC_DUPLICATE (mode, val);
43888 insn = emit_insn (gen_rtx_SET (target, dup));
43889 if (recog_memoized (insn) < 0)
43891 rtx_insn *seq;
43892 /* If that fails, force VAL into a register. */
43894 start_sequence ();
43895 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
43896 seq = get_insns ();
43897 end_sequence ();
43898 if (seq)
43899 emit_insn_before (seq, insn);
43901 ok = recog_memoized (insn) >= 0;
43902 gcc_assert (ok);
43904 return true;
43907 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43908 with all elements equal to VAR. Return true if successful. */
43910 static bool
43911 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
43912 rtx target, rtx val)
43914 bool ok;
43916 switch (mode)
43918 case V2SImode:
43919 case V2SFmode:
43920 if (!mmx_ok)
43921 return false;
43922 /* FALLTHRU */
43924 case V4DFmode:
43925 case V4DImode:
43926 case V8SFmode:
43927 case V8SImode:
43928 case V2DFmode:
43929 case V2DImode:
43930 case V4SFmode:
43931 case V4SImode:
43932 case V16SImode:
43933 case V8DImode:
43934 case V16SFmode:
43935 case V8DFmode:
43936 return ix86_vector_duplicate_value (mode, target, val);
43938 case V4HImode:
43939 if (!mmx_ok)
43940 return false;
43941 if (TARGET_SSE || TARGET_3DNOW_A)
43943 rtx x;
43945 val = gen_lowpart (SImode, val);
43946 x = gen_rtx_TRUNCATE (HImode, val);
43947 x = gen_rtx_VEC_DUPLICATE (mode, x);
43948 emit_insn (gen_rtx_SET (target, x));
43949 return true;
43951 goto widen;
43953 case V8QImode:
43954 if (!mmx_ok)
43955 return false;
43956 goto widen;
43958 case V8HImode:
43959 if (TARGET_AVX2)
43960 return ix86_vector_duplicate_value (mode, target, val);
43962 if (TARGET_SSE2)
43964 struct expand_vec_perm_d dperm;
43965 rtx tmp1, tmp2;
43967 permute:
43968 memset (&dperm, 0, sizeof (dperm));
43969 dperm.target = target;
43970 dperm.vmode = mode;
43971 dperm.nelt = GET_MODE_NUNITS (mode);
43972 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
43973 dperm.one_operand_p = true;
43975 /* Extend to SImode using a paradoxical SUBREG. */
43976 tmp1 = gen_reg_rtx (SImode);
43977 emit_move_insn (tmp1, gen_lowpart (SImode, val));
43979 /* Insert the SImode value as low element of a V4SImode vector. */
43980 tmp2 = gen_reg_rtx (V4SImode);
43981 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
43982 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
43984 ok = (expand_vec_perm_1 (&dperm)
43985 || expand_vec_perm_broadcast_1 (&dperm));
43986 gcc_assert (ok);
43987 return ok;
43989 goto widen;
43991 case V16QImode:
43992 if (TARGET_AVX2)
43993 return ix86_vector_duplicate_value (mode, target, val);
43995 if (TARGET_SSE2)
43996 goto permute;
43997 goto widen;
43999 widen:
44000 /* Replicate the value once into the next wider mode and recurse. */
44002 machine_mode smode, wsmode, wvmode;
44003 rtx x;
44005 smode = GET_MODE_INNER (mode);
44006 wvmode = get_mode_wider_vector (mode);
44007 wsmode = GET_MODE_INNER (wvmode);
44009 val = convert_modes (wsmode, smode, val, true);
44010 x = expand_simple_binop (wsmode, ASHIFT, val,
44011 GEN_INT (GET_MODE_BITSIZE (smode)),
44012 NULL_RTX, 1, OPTAB_LIB_WIDEN);
44013 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
44015 x = gen_reg_rtx (wvmode);
44016 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
44017 gcc_assert (ok);
44018 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
44019 return ok;
44022 case V16HImode:
44023 case V32QImode:
44024 if (TARGET_AVX2)
44025 return ix86_vector_duplicate_value (mode, target, val);
44026 else
44028 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
44029 rtx x = gen_reg_rtx (hvmode);
44031 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
44032 gcc_assert (ok);
44034 x = gen_rtx_VEC_CONCAT (mode, x, x);
44035 emit_insn (gen_rtx_SET (target, x));
44037 return true;
44039 case V64QImode:
44040 case V32HImode:
44041 if (TARGET_AVX512BW)
44042 return ix86_vector_duplicate_value (mode, target, val);
44043 else
44045 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
44046 rtx x = gen_reg_rtx (hvmode);
44048 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
44049 gcc_assert (ok);
44051 x = gen_rtx_VEC_CONCAT (mode, x, x);
44052 emit_insn (gen_rtx_SET (target, x));
44054 return true;
44056 default:
44057 return false;
44061 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
44062 whose ONE_VAR element is VAR, and other elements are zero. Return true
44063 if successful. */
44065 static bool
44066 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
44067 rtx target, rtx var, int one_var)
44069 machine_mode vsimode;
44070 rtx new_target;
44071 rtx x, tmp;
44072 bool use_vector_set = false;
44074 switch (mode)
44076 case V2DImode:
44077 /* For SSE4.1, we normally use vector set. But if the second
44078 element is zero and inter-unit moves are OK, we use movq
44079 instead. */
44080 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
44081 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
44082 && one_var == 0));
44083 break;
44084 case V16QImode:
44085 case V4SImode:
44086 case V4SFmode:
44087 use_vector_set = TARGET_SSE4_1;
44088 break;
44089 case V8HImode:
44090 use_vector_set = TARGET_SSE2;
44091 break;
44092 case V4HImode:
44093 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
44094 break;
44095 case V32QImode:
44096 case V16HImode:
44097 case V8SImode:
44098 case V8SFmode:
44099 case V4DFmode:
44100 use_vector_set = TARGET_AVX;
44101 break;
44102 case V4DImode:
44103 /* Use ix86_expand_vector_set in 64bit mode only. */
44104 use_vector_set = TARGET_AVX && TARGET_64BIT;
44105 break;
44106 default:
44107 break;
44110 if (use_vector_set)
44112 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
44113 var = force_reg (GET_MODE_INNER (mode), var);
44114 ix86_expand_vector_set (mmx_ok, target, var, one_var);
44115 return true;
44118 switch (mode)
44120 case V2SFmode:
44121 case V2SImode:
44122 if (!mmx_ok)
44123 return false;
44124 /* FALLTHRU */
44126 case V2DFmode:
44127 case V2DImode:
44128 if (one_var != 0)
44129 return false;
44130 var = force_reg (GET_MODE_INNER (mode), var);
44131 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
44132 emit_insn (gen_rtx_SET (target, x));
44133 return true;
44135 case V4SFmode:
44136 case V4SImode:
44137 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
44138 new_target = gen_reg_rtx (mode);
44139 else
44140 new_target = target;
44141 var = force_reg (GET_MODE_INNER (mode), var);
44142 x = gen_rtx_VEC_DUPLICATE (mode, var);
44143 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
44144 emit_insn (gen_rtx_SET (new_target, x));
44145 if (one_var != 0)
44147 /* We need to shuffle the value to the correct position, so
44148 create a new pseudo to store the intermediate result. */
44150 /* With SSE2, we can use the integer shuffle insns. */
44151 if (mode != V4SFmode && TARGET_SSE2)
44153 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
44154 const1_rtx,
44155 GEN_INT (one_var == 1 ? 0 : 1),
44156 GEN_INT (one_var == 2 ? 0 : 1),
44157 GEN_INT (one_var == 3 ? 0 : 1)));
44158 if (target != new_target)
44159 emit_move_insn (target, new_target);
44160 return true;
44163 /* Otherwise convert the intermediate result to V4SFmode and
44164 use the SSE1 shuffle instructions. */
44165 if (mode != V4SFmode)
44167 tmp = gen_reg_rtx (V4SFmode);
44168 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
44170 else
44171 tmp = new_target;
44173 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
44174 const1_rtx,
44175 GEN_INT (one_var == 1 ? 0 : 1),
44176 GEN_INT (one_var == 2 ? 0+4 : 1+4),
44177 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
44179 if (mode != V4SFmode)
44180 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
44181 else if (tmp != target)
44182 emit_move_insn (target, tmp);
44184 else if (target != new_target)
44185 emit_move_insn (target, new_target);
44186 return true;
44188 case V8HImode:
44189 case V16QImode:
44190 vsimode = V4SImode;
44191 goto widen;
44192 case V4HImode:
44193 case V8QImode:
44194 if (!mmx_ok)
44195 return false;
44196 vsimode = V2SImode;
44197 goto widen;
44198 widen:
44199 if (one_var != 0)
44200 return false;
44202 /* Zero extend the variable element to SImode and recurse. */
44203 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
44205 x = gen_reg_rtx (vsimode);
44206 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
44207 var, one_var))
44208 gcc_unreachable ();
44210 emit_move_insn (target, gen_lowpart (mode, x));
44211 return true;
44213 default:
44214 return false;
44218 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
44219 consisting of the values in VALS. It is known that all elements
44220 except ONE_VAR are constants. Return true if successful. */
44222 static bool
44223 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
44224 rtx target, rtx vals, int one_var)
44226 rtx var = XVECEXP (vals, 0, one_var);
44227 machine_mode wmode;
44228 rtx const_vec, x;
44230 const_vec = copy_rtx (vals);
44231 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
44232 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
44234 switch (mode)
44236 case V2DFmode:
44237 case V2DImode:
44238 case V2SFmode:
44239 case V2SImode:
44240 /* For the two element vectors, it's just as easy to use
44241 the general case. */
44242 return false;
44244 case V4DImode:
44245 /* Use ix86_expand_vector_set in 64bit mode only. */
44246 if (!TARGET_64BIT)
44247 return false;
44248 case V4DFmode:
44249 case V8SFmode:
44250 case V8SImode:
44251 case V16HImode:
44252 case V32QImode:
44253 case V4SFmode:
44254 case V4SImode:
44255 case V8HImode:
44256 case V4HImode:
44257 break;
44259 case V16QImode:
44260 if (TARGET_SSE4_1)
44261 break;
44262 wmode = V8HImode;
44263 goto widen;
44264 case V8QImode:
44265 wmode = V4HImode;
44266 goto widen;
44267 widen:
44268 /* There's no way to set one QImode entry easily. Combine
44269 the variable value with its adjacent constant value, and
44270 promote to an HImode set. */
44271 x = XVECEXP (vals, 0, one_var ^ 1);
44272 if (one_var & 1)
44274 var = convert_modes (HImode, QImode, var, true);
44275 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
44276 NULL_RTX, 1, OPTAB_LIB_WIDEN);
44277 x = GEN_INT (INTVAL (x) & 0xff);
44279 else
44281 var = convert_modes (HImode, QImode, var, true);
44282 x = gen_int_mode (INTVAL (x) << 8, HImode);
44284 if (x != const0_rtx)
44285 var = expand_simple_binop (HImode, IOR, var, x, var,
44286 1, OPTAB_LIB_WIDEN);
44288 x = gen_reg_rtx (wmode);
44289 emit_move_insn (x, gen_lowpart (wmode, const_vec));
44290 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
44292 emit_move_insn (target, gen_lowpart (mode, x));
44293 return true;
44295 default:
44296 return false;
44299 emit_move_insn (target, const_vec);
44300 ix86_expand_vector_set (mmx_ok, target, var, one_var);
44301 return true;
44304 /* A subroutine of ix86_expand_vector_init_general. Use vector
44305 concatenate to handle the most general case: all values variable,
44306 and none identical. */
44308 static void
44309 ix86_expand_vector_init_concat (machine_mode mode,
44310 rtx target, rtx *ops, int n)
44312 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
44313 rtx first[16], second[8], third[4];
44314 rtvec v;
44315 int i, j;
44317 switch (n)
44319 case 2:
44320 switch (mode)
44322 case V16SImode:
44323 cmode = V8SImode;
44324 break;
44325 case V16SFmode:
44326 cmode = V8SFmode;
44327 break;
44328 case V8DImode:
44329 cmode = V4DImode;
44330 break;
44331 case V8DFmode:
44332 cmode = V4DFmode;
44333 break;
44334 case V8SImode:
44335 cmode = V4SImode;
44336 break;
44337 case V8SFmode:
44338 cmode = V4SFmode;
44339 break;
44340 case V4DImode:
44341 cmode = V2DImode;
44342 break;
44343 case V4DFmode:
44344 cmode = V2DFmode;
44345 break;
44346 case V4SImode:
44347 cmode = V2SImode;
44348 break;
44349 case V4SFmode:
44350 cmode = V2SFmode;
44351 break;
44352 case V2DImode:
44353 cmode = DImode;
44354 break;
44355 case V2SImode:
44356 cmode = SImode;
44357 break;
44358 case V2DFmode:
44359 cmode = DFmode;
44360 break;
44361 case V2SFmode:
44362 cmode = SFmode;
44363 break;
44364 default:
44365 gcc_unreachable ();
44368 if (!register_operand (ops[1], cmode))
44369 ops[1] = force_reg (cmode, ops[1]);
44370 if (!register_operand (ops[0], cmode))
44371 ops[0] = force_reg (cmode, ops[0]);
44372 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, ops[0],
44373 ops[1])));
44374 break;
44376 case 4:
44377 switch (mode)
44379 case V4DImode:
44380 cmode = V2DImode;
44381 break;
44382 case V4DFmode:
44383 cmode = V2DFmode;
44384 break;
44385 case V4SImode:
44386 cmode = V2SImode;
44387 break;
44388 case V4SFmode:
44389 cmode = V2SFmode;
44390 break;
44391 default:
44392 gcc_unreachable ();
44394 goto half;
44396 case 8:
44397 switch (mode)
44399 case V8DImode:
44400 cmode = V2DImode;
44401 hmode = V4DImode;
44402 break;
44403 case V8DFmode:
44404 cmode = V2DFmode;
44405 hmode = V4DFmode;
44406 break;
44407 case V8SImode:
44408 cmode = V2SImode;
44409 hmode = V4SImode;
44410 break;
44411 case V8SFmode:
44412 cmode = V2SFmode;
44413 hmode = V4SFmode;
44414 break;
44415 default:
44416 gcc_unreachable ();
44418 goto half;
44420 case 16:
44421 switch (mode)
44423 case V16SImode:
44424 cmode = V2SImode;
44425 hmode = V4SImode;
44426 gmode = V8SImode;
44427 break;
44428 case V16SFmode:
44429 cmode = V2SFmode;
44430 hmode = V4SFmode;
44431 gmode = V8SFmode;
44432 break;
44433 default:
44434 gcc_unreachable ();
44436 goto half;
44438 half:
44439 /* FIXME: We process inputs backward to help RA. PR 36222. */
44440 i = n - 1;
44441 j = (n >> 1) - 1;
44442 for (; i > 0; i -= 2, j--)
44444 first[j] = gen_reg_rtx (cmode);
44445 v = gen_rtvec (2, ops[i - 1], ops[i]);
44446 ix86_expand_vector_init (false, first[j],
44447 gen_rtx_PARALLEL (cmode, v));
44450 n >>= 1;
44451 if (n > 4)
44453 gcc_assert (hmode != VOIDmode);
44454 gcc_assert (gmode != VOIDmode);
44455 for (i = j = 0; i < n; i += 2, j++)
44457 second[j] = gen_reg_rtx (hmode);
44458 ix86_expand_vector_init_concat (hmode, second [j],
44459 &first [i], 2);
44461 n >>= 1;
44462 for (i = j = 0; i < n; i += 2, j++)
44464 third[j] = gen_reg_rtx (gmode);
44465 ix86_expand_vector_init_concat (gmode, third[j],
44466 &second[i], 2);
44468 n >>= 1;
44469 ix86_expand_vector_init_concat (mode, target, third, n);
44471 else if (n > 2)
44473 gcc_assert (hmode != VOIDmode);
44474 for (i = j = 0; i < n; i += 2, j++)
44476 second[j] = gen_reg_rtx (hmode);
44477 ix86_expand_vector_init_concat (hmode, second [j],
44478 &first [i], 2);
44480 n >>= 1;
44481 ix86_expand_vector_init_concat (mode, target, second, n);
44483 else
44484 ix86_expand_vector_init_concat (mode, target, first, n);
44485 break;
44487 default:
44488 gcc_unreachable ();
44492 /* A subroutine of ix86_expand_vector_init_general. Use vector
44493 interleave to handle the most general case: all values variable,
44494 and none identical. */
44496 static void
44497 ix86_expand_vector_init_interleave (machine_mode mode,
44498 rtx target, rtx *ops, int n)
44500 machine_mode first_imode, second_imode, third_imode, inner_mode;
44501 int i, j;
44502 rtx op0, op1;
44503 rtx (*gen_load_even) (rtx, rtx, rtx);
44504 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
44505 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
44507 switch (mode)
44509 case V8HImode:
44510 gen_load_even = gen_vec_setv8hi;
44511 gen_interleave_first_low = gen_vec_interleave_lowv4si;
44512 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44513 inner_mode = HImode;
44514 first_imode = V4SImode;
44515 second_imode = V2DImode;
44516 third_imode = VOIDmode;
44517 break;
44518 case V16QImode:
44519 gen_load_even = gen_vec_setv16qi;
44520 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
44521 gen_interleave_second_low = gen_vec_interleave_lowv4si;
44522 inner_mode = QImode;
44523 first_imode = V8HImode;
44524 second_imode = V4SImode;
44525 third_imode = V2DImode;
44526 break;
44527 default:
44528 gcc_unreachable ();
44531 for (i = 0; i < n; i++)
44533 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
44534 op0 = gen_reg_rtx (SImode);
44535 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
44537 /* Insert the SImode value as low element of V4SImode vector. */
44538 op1 = gen_reg_rtx (V4SImode);
44539 op0 = gen_rtx_VEC_MERGE (V4SImode,
44540 gen_rtx_VEC_DUPLICATE (V4SImode,
44541 op0),
44542 CONST0_RTX (V4SImode),
44543 const1_rtx);
44544 emit_insn (gen_rtx_SET (op1, op0));
44546 /* Cast the V4SImode vector back to a vector in orignal mode. */
44547 op0 = gen_reg_rtx (mode);
44548 emit_move_insn (op0, gen_lowpart (mode, op1));
44550 /* Load even elements into the second position. */
44551 emit_insn (gen_load_even (op0,
44552 force_reg (inner_mode,
44553 ops [i + i + 1]),
44554 const1_rtx));
44556 /* Cast vector to FIRST_IMODE vector. */
44557 ops[i] = gen_reg_rtx (first_imode);
44558 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
44561 /* Interleave low FIRST_IMODE vectors. */
44562 for (i = j = 0; i < n; i += 2, j++)
44564 op0 = gen_reg_rtx (first_imode);
44565 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
44567 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
44568 ops[j] = gen_reg_rtx (second_imode);
44569 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
44572 /* Interleave low SECOND_IMODE vectors. */
44573 switch (second_imode)
44575 case V4SImode:
44576 for (i = j = 0; i < n / 2; i += 2, j++)
44578 op0 = gen_reg_rtx (second_imode);
44579 emit_insn (gen_interleave_second_low (op0, ops[i],
44580 ops[i + 1]));
44582 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
44583 vector. */
44584 ops[j] = gen_reg_rtx (third_imode);
44585 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
44587 second_imode = V2DImode;
44588 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44589 /* FALLTHRU */
44591 case V2DImode:
44592 op0 = gen_reg_rtx (second_imode);
44593 emit_insn (gen_interleave_second_low (op0, ops[0],
44594 ops[1]));
44596 /* Cast the SECOND_IMODE vector back to a vector on original
44597 mode. */
44598 emit_insn (gen_rtx_SET (target, gen_lowpart (mode, op0)));
44599 break;
44601 default:
44602 gcc_unreachable ();
44606 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
44607 all values variable, and none identical. */
44609 static void
44610 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
44611 rtx target, rtx vals)
44613 rtx ops[64], op0, op1, op2, op3, op4, op5;
44614 machine_mode half_mode = VOIDmode;
44615 machine_mode quarter_mode = VOIDmode;
44616 int n, i;
44618 switch (mode)
44620 case V2SFmode:
44621 case V2SImode:
44622 if (!mmx_ok && !TARGET_SSE)
44623 break;
44624 /* FALLTHRU */
44626 case V16SImode:
44627 case V16SFmode:
44628 case V8DFmode:
44629 case V8DImode:
44630 case V8SFmode:
44631 case V8SImode:
44632 case V4DFmode:
44633 case V4DImode:
44634 case V4SFmode:
44635 case V4SImode:
44636 case V2DFmode:
44637 case V2DImode:
44638 n = GET_MODE_NUNITS (mode);
44639 for (i = 0; i < n; i++)
44640 ops[i] = XVECEXP (vals, 0, i);
44641 ix86_expand_vector_init_concat (mode, target, ops, n);
44642 return;
44644 case V32QImode:
44645 half_mode = V16QImode;
44646 goto half;
44648 case V16HImode:
44649 half_mode = V8HImode;
44650 goto half;
44652 half:
44653 n = GET_MODE_NUNITS (mode);
44654 for (i = 0; i < n; i++)
44655 ops[i] = XVECEXP (vals, 0, i);
44656 op0 = gen_reg_rtx (half_mode);
44657 op1 = gen_reg_rtx (half_mode);
44658 ix86_expand_vector_init_interleave (half_mode, op0, ops,
44659 n >> 2);
44660 ix86_expand_vector_init_interleave (half_mode, op1,
44661 &ops [n >> 1], n >> 2);
44662 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op0, op1)));
44663 return;
44665 case V64QImode:
44666 quarter_mode = V16QImode;
44667 half_mode = V32QImode;
44668 goto quarter;
44670 case V32HImode:
44671 quarter_mode = V8HImode;
44672 half_mode = V16HImode;
44673 goto quarter;
44675 quarter:
44676 n = GET_MODE_NUNITS (mode);
44677 for (i = 0; i < n; i++)
44678 ops[i] = XVECEXP (vals, 0, i);
44679 op0 = gen_reg_rtx (quarter_mode);
44680 op1 = gen_reg_rtx (quarter_mode);
44681 op2 = gen_reg_rtx (quarter_mode);
44682 op3 = gen_reg_rtx (quarter_mode);
44683 op4 = gen_reg_rtx (half_mode);
44684 op5 = gen_reg_rtx (half_mode);
44685 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
44686 n >> 3);
44687 ix86_expand_vector_init_interleave (quarter_mode, op1,
44688 &ops [n >> 2], n >> 3);
44689 ix86_expand_vector_init_interleave (quarter_mode, op2,
44690 &ops [n >> 1], n >> 3);
44691 ix86_expand_vector_init_interleave (quarter_mode, op3,
44692 &ops [(n >> 1) | (n >> 2)], n >> 3);
44693 emit_insn (gen_rtx_SET (op4, gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
44694 emit_insn (gen_rtx_SET (op5, gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
44695 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op4, op5)));
44696 return;
44698 case V16QImode:
44699 if (!TARGET_SSE4_1)
44700 break;
44701 /* FALLTHRU */
44703 case V8HImode:
44704 if (!TARGET_SSE2)
44705 break;
44707 /* Don't use ix86_expand_vector_init_interleave if we can't
44708 move from GPR to SSE register directly. */
44709 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
44710 break;
44712 n = GET_MODE_NUNITS (mode);
44713 for (i = 0; i < n; i++)
44714 ops[i] = XVECEXP (vals, 0, i);
44715 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
44716 return;
44718 case V4HImode:
44719 case V8QImode:
44720 break;
44722 default:
44723 gcc_unreachable ();
44727 int i, j, n_elts, n_words, n_elt_per_word;
44728 machine_mode inner_mode;
44729 rtx words[4], shift;
44731 inner_mode = GET_MODE_INNER (mode);
44732 n_elts = GET_MODE_NUNITS (mode);
44733 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
44734 n_elt_per_word = n_elts / n_words;
44735 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
44737 for (i = 0; i < n_words; ++i)
44739 rtx word = NULL_RTX;
44741 for (j = 0; j < n_elt_per_word; ++j)
44743 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
44744 elt = convert_modes (word_mode, inner_mode, elt, true);
44746 if (j == 0)
44747 word = elt;
44748 else
44750 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
44751 word, 1, OPTAB_LIB_WIDEN);
44752 word = expand_simple_binop (word_mode, IOR, word, elt,
44753 word, 1, OPTAB_LIB_WIDEN);
44757 words[i] = word;
44760 if (n_words == 1)
44761 emit_move_insn (target, gen_lowpart (mode, words[0]));
44762 else if (n_words == 2)
44764 rtx tmp = gen_reg_rtx (mode);
44765 emit_clobber (tmp);
44766 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
44767 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
44768 emit_move_insn (target, tmp);
44770 else if (n_words == 4)
44772 rtx tmp = gen_reg_rtx (V4SImode);
44773 gcc_assert (word_mode == SImode);
44774 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
44775 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
44776 emit_move_insn (target, gen_lowpart (mode, tmp));
44778 else
44779 gcc_unreachable ();
44783 /* Initialize vector TARGET via VALS. Suppress the use of MMX
44784 instructions unless MMX_OK is true. */
44786 void
44787 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
44789 machine_mode mode = GET_MODE (target);
44790 machine_mode inner_mode = GET_MODE_INNER (mode);
44791 int n_elts = GET_MODE_NUNITS (mode);
44792 int n_var = 0, one_var = -1;
44793 bool all_same = true, all_const_zero = true;
44794 int i;
44795 rtx x;
44797 for (i = 0; i < n_elts; ++i)
44799 x = XVECEXP (vals, 0, i);
44800 if (!(CONST_SCALAR_INT_P (x)
44801 || CONST_DOUBLE_P (x)
44802 || CONST_FIXED_P (x)))
44803 n_var++, one_var = i;
44804 else if (x != CONST0_RTX (inner_mode))
44805 all_const_zero = false;
44806 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
44807 all_same = false;
44810 /* Constants are best loaded from the constant pool. */
44811 if (n_var == 0)
44813 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
44814 return;
44817 /* If all values are identical, broadcast the value. */
44818 if (all_same
44819 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
44820 XVECEXP (vals, 0, 0)))
44821 return;
44823 /* Values where only one field is non-constant are best loaded from
44824 the pool and overwritten via move later. */
44825 if (n_var == 1)
44827 if (all_const_zero
44828 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
44829 XVECEXP (vals, 0, one_var),
44830 one_var))
44831 return;
44833 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
44834 return;
44837 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
44840 void
44841 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
44843 machine_mode mode = GET_MODE (target);
44844 machine_mode inner_mode = GET_MODE_INNER (mode);
44845 machine_mode half_mode;
44846 bool use_vec_merge = false;
44847 rtx tmp;
44848 static rtx (*gen_extract[6][2]) (rtx, rtx)
44850 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
44851 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
44852 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
44853 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
44854 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
44855 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
44857 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
44859 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
44860 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
44861 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
44862 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
44863 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
44864 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
44866 int i, j, n;
44867 machine_mode mmode = VOIDmode;
44868 rtx (*gen_blendm) (rtx, rtx, rtx, rtx);
44870 switch (mode)
44872 case V2SFmode:
44873 case V2SImode:
44874 if (mmx_ok)
44876 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44877 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
44878 if (elt == 0)
44879 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44880 else
44881 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44882 emit_insn (gen_rtx_SET (target, tmp));
44883 return;
44885 break;
44887 case V2DImode:
44888 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
44889 if (use_vec_merge)
44890 break;
44892 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44893 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
44894 if (elt == 0)
44895 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44896 else
44897 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44898 emit_insn (gen_rtx_SET (target, tmp));
44899 return;
44901 case V2DFmode:
44903 rtx op0, op1;
44905 /* For the two element vectors, we implement a VEC_CONCAT with
44906 the extraction of the other element. */
44908 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
44909 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
44911 if (elt == 0)
44912 op0 = val, op1 = tmp;
44913 else
44914 op0 = tmp, op1 = val;
44916 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
44917 emit_insn (gen_rtx_SET (target, tmp));
44919 return;
44921 case V4SFmode:
44922 use_vec_merge = TARGET_SSE4_1;
44923 if (use_vec_merge)
44924 break;
44926 switch (elt)
44928 case 0:
44929 use_vec_merge = true;
44930 break;
44932 case 1:
44933 /* tmp = target = A B C D */
44934 tmp = copy_to_reg (target);
44935 /* target = A A B B */
44936 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
44937 /* target = X A B B */
44938 ix86_expand_vector_set (false, target, val, 0);
44939 /* target = A X C D */
44940 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44941 const1_rtx, const0_rtx,
44942 GEN_INT (2+4), GEN_INT (3+4)));
44943 return;
44945 case 2:
44946 /* tmp = target = A B C D */
44947 tmp = copy_to_reg (target);
44948 /* tmp = X B C D */
44949 ix86_expand_vector_set (false, tmp, val, 0);
44950 /* target = A B X D */
44951 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44952 const0_rtx, const1_rtx,
44953 GEN_INT (0+4), GEN_INT (3+4)));
44954 return;
44956 case 3:
44957 /* tmp = target = A B C D */
44958 tmp = copy_to_reg (target);
44959 /* tmp = X B C D */
44960 ix86_expand_vector_set (false, tmp, val, 0);
44961 /* target = A B X D */
44962 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44963 const0_rtx, const1_rtx,
44964 GEN_INT (2+4), GEN_INT (0+4)));
44965 return;
44967 default:
44968 gcc_unreachable ();
44970 break;
44972 case V4SImode:
44973 use_vec_merge = TARGET_SSE4_1;
44974 if (use_vec_merge)
44975 break;
44977 /* Element 0 handled by vec_merge below. */
44978 if (elt == 0)
44980 use_vec_merge = true;
44981 break;
44984 if (TARGET_SSE2)
44986 /* With SSE2, use integer shuffles to swap element 0 and ELT,
44987 store into element 0, then shuffle them back. */
44989 rtx order[4];
44991 order[0] = GEN_INT (elt);
44992 order[1] = const1_rtx;
44993 order[2] = const2_rtx;
44994 order[3] = GEN_INT (3);
44995 order[elt] = const0_rtx;
44997 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44998 order[1], order[2], order[3]));
45000 ix86_expand_vector_set (false, target, val, 0);
45002 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
45003 order[1], order[2], order[3]));
45005 else
45007 /* For SSE1, we have to reuse the V4SF code. */
45008 rtx t = gen_reg_rtx (V4SFmode);
45009 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
45010 emit_move_insn (target, gen_lowpart (mode, t));
45012 return;
45014 case V8HImode:
45015 use_vec_merge = TARGET_SSE2;
45016 break;
45017 case V4HImode:
45018 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
45019 break;
45021 case V16QImode:
45022 use_vec_merge = TARGET_SSE4_1;
45023 break;
45025 case V8QImode:
45026 break;
45028 case V32QImode:
45029 half_mode = V16QImode;
45030 j = 0;
45031 n = 16;
45032 goto half;
45034 case V16HImode:
45035 half_mode = V8HImode;
45036 j = 1;
45037 n = 8;
45038 goto half;
45040 case V8SImode:
45041 half_mode = V4SImode;
45042 j = 2;
45043 n = 4;
45044 goto half;
45046 case V4DImode:
45047 half_mode = V2DImode;
45048 j = 3;
45049 n = 2;
45050 goto half;
45052 case V8SFmode:
45053 half_mode = V4SFmode;
45054 j = 4;
45055 n = 4;
45056 goto half;
45058 case V4DFmode:
45059 half_mode = V2DFmode;
45060 j = 5;
45061 n = 2;
45062 goto half;
45064 half:
45065 /* Compute offset. */
45066 i = elt / n;
45067 elt %= n;
45069 gcc_assert (i <= 1);
45071 /* Extract the half. */
45072 tmp = gen_reg_rtx (half_mode);
45073 emit_insn (gen_extract[j][i] (tmp, target));
45075 /* Put val in tmp at elt. */
45076 ix86_expand_vector_set (false, tmp, val, elt);
45078 /* Put it back. */
45079 emit_insn (gen_insert[j][i] (target, target, tmp));
45080 return;
45082 case V8DFmode:
45083 if (TARGET_AVX512F)
45085 mmode = QImode;
45086 gen_blendm = gen_avx512f_blendmv8df;
45088 break;
45090 case V8DImode:
45091 if (TARGET_AVX512F)
45093 mmode = QImode;
45094 gen_blendm = gen_avx512f_blendmv8di;
45096 break;
45098 case V16SFmode:
45099 if (TARGET_AVX512F)
45101 mmode = HImode;
45102 gen_blendm = gen_avx512f_blendmv16sf;
45104 break;
45106 case V16SImode:
45107 if (TARGET_AVX512F)
45109 mmode = HImode;
45110 gen_blendm = gen_avx512f_blendmv16si;
45112 break;
45114 case V32HImode:
45115 if (TARGET_AVX512F && TARGET_AVX512BW)
45117 mmode = SImode;
45118 gen_blendm = gen_avx512bw_blendmv32hi;
45120 break;
45122 case V64QImode:
45123 if (TARGET_AVX512F && TARGET_AVX512BW)
45125 mmode = DImode;
45126 gen_blendm = gen_avx512bw_blendmv64qi;
45128 break;
45130 default:
45131 break;
45134 if (mmode != VOIDmode)
45136 tmp = gen_reg_rtx (mode);
45137 emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val)));
45138 emit_insn (gen_blendm (target, tmp, target,
45139 force_reg (mmode,
45140 gen_int_mode (1 << elt, mmode))));
45142 else if (use_vec_merge)
45144 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
45145 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
45146 emit_insn (gen_rtx_SET (target, tmp));
45148 else
45150 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
45152 emit_move_insn (mem, target);
45154 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45155 emit_move_insn (tmp, val);
45157 emit_move_insn (target, mem);
45161 void
45162 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
45164 machine_mode mode = GET_MODE (vec);
45165 machine_mode inner_mode = GET_MODE_INNER (mode);
45166 bool use_vec_extr = false;
45167 rtx tmp;
45169 switch (mode)
45171 case V2SImode:
45172 case V2SFmode:
45173 if (!mmx_ok)
45174 break;
45175 /* FALLTHRU */
45177 case V2DFmode:
45178 case V2DImode:
45179 use_vec_extr = true;
45180 break;
45182 case V4SFmode:
45183 use_vec_extr = TARGET_SSE4_1;
45184 if (use_vec_extr)
45185 break;
45187 switch (elt)
45189 case 0:
45190 tmp = vec;
45191 break;
45193 case 1:
45194 case 3:
45195 tmp = gen_reg_rtx (mode);
45196 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
45197 GEN_INT (elt), GEN_INT (elt),
45198 GEN_INT (elt+4), GEN_INT (elt+4)));
45199 break;
45201 case 2:
45202 tmp = gen_reg_rtx (mode);
45203 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
45204 break;
45206 default:
45207 gcc_unreachable ();
45209 vec = tmp;
45210 use_vec_extr = true;
45211 elt = 0;
45212 break;
45214 case V4SImode:
45215 use_vec_extr = TARGET_SSE4_1;
45216 if (use_vec_extr)
45217 break;
45219 if (TARGET_SSE2)
45221 switch (elt)
45223 case 0:
45224 tmp = vec;
45225 break;
45227 case 1:
45228 case 3:
45229 tmp = gen_reg_rtx (mode);
45230 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
45231 GEN_INT (elt), GEN_INT (elt),
45232 GEN_INT (elt), GEN_INT (elt)));
45233 break;
45235 case 2:
45236 tmp = gen_reg_rtx (mode);
45237 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
45238 break;
45240 default:
45241 gcc_unreachable ();
45243 vec = tmp;
45244 use_vec_extr = true;
45245 elt = 0;
45247 else
45249 /* For SSE1, we have to reuse the V4SF code. */
45250 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
45251 gen_lowpart (V4SFmode, vec), elt);
45252 return;
45254 break;
45256 case V8HImode:
45257 use_vec_extr = TARGET_SSE2;
45258 break;
45259 case V4HImode:
45260 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
45261 break;
45263 case V16QImode:
45264 use_vec_extr = TARGET_SSE4_1;
45265 break;
45267 case V8SFmode:
45268 if (TARGET_AVX)
45270 tmp = gen_reg_rtx (V4SFmode);
45271 if (elt < 4)
45272 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
45273 else
45274 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
45275 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45276 return;
45278 break;
45280 case V4DFmode:
45281 if (TARGET_AVX)
45283 tmp = gen_reg_rtx (V2DFmode);
45284 if (elt < 2)
45285 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
45286 else
45287 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
45288 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45289 return;
45291 break;
45293 case V32QImode:
45294 if (TARGET_AVX)
45296 tmp = gen_reg_rtx (V16QImode);
45297 if (elt < 16)
45298 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
45299 else
45300 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
45301 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45302 return;
45304 break;
45306 case V16HImode:
45307 if (TARGET_AVX)
45309 tmp = gen_reg_rtx (V8HImode);
45310 if (elt < 8)
45311 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
45312 else
45313 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
45314 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45315 return;
45317 break;
45319 case V8SImode:
45320 if (TARGET_AVX)
45322 tmp = gen_reg_rtx (V4SImode);
45323 if (elt < 4)
45324 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
45325 else
45326 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
45327 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45328 return;
45330 break;
45332 case V4DImode:
45333 if (TARGET_AVX)
45335 tmp = gen_reg_rtx (V2DImode);
45336 if (elt < 2)
45337 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
45338 else
45339 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
45340 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45341 return;
45343 break;
45345 case V32HImode:
45346 if (TARGET_AVX512BW)
45348 tmp = gen_reg_rtx (V16HImode);
45349 if (elt < 16)
45350 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
45351 else
45352 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
45353 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45354 return;
45356 break;
45358 case V64QImode:
45359 if (TARGET_AVX512BW)
45361 tmp = gen_reg_rtx (V32QImode);
45362 if (elt < 32)
45363 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
45364 else
45365 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
45366 ix86_expand_vector_extract (false, target, tmp, elt & 31);
45367 return;
45369 break;
45371 case V16SFmode:
45372 tmp = gen_reg_rtx (V8SFmode);
45373 if (elt < 8)
45374 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
45375 else
45376 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
45377 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45378 return;
45380 case V8DFmode:
45381 tmp = gen_reg_rtx (V4DFmode);
45382 if (elt < 4)
45383 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
45384 else
45385 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
45386 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45387 return;
45389 case V16SImode:
45390 tmp = gen_reg_rtx (V8SImode);
45391 if (elt < 8)
45392 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
45393 else
45394 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
45395 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45396 return;
45398 case V8DImode:
45399 tmp = gen_reg_rtx (V4DImode);
45400 if (elt < 4)
45401 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
45402 else
45403 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
45404 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45405 return;
45407 case V8QImode:
45408 /* ??? Could extract the appropriate HImode element and shift. */
45409 default:
45410 break;
45413 if (use_vec_extr)
45415 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
45416 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
45418 /* Let the rtl optimizers know about the zero extension performed. */
45419 if (inner_mode == QImode || inner_mode == HImode)
45421 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
45422 target = gen_lowpart (SImode, target);
45425 emit_insn (gen_rtx_SET (target, tmp));
45427 else
45429 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
45431 emit_move_insn (mem, vec);
45433 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45434 emit_move_insn (target, tmp);
45438 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
45439 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
45440 The upper bits of DEST are undefined, though they shouldn't cause
45441 exceptions (some bits from src or all zeros are ok). */
45443 static void
45444 emit_reduc_half (rtx dest, rtx src, int i)
45446 rtx tem, d = dest;
45447 switch (GET_MODE (src))
45449 case V4SFmode:
45450 if (i == 128)
45451 tem = gen_sse_movhlps (dest, src, src);
45452 else
45453 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
45454 GEN_INT (1 + 4), GEN_INT (1 + 4));
45455 break;
45456 case V2DFmode:
45457 tem = gen_vec_interleave_highv2df (dest, src, src);
45458 break;
45459 case V16QImode:
45460 case V8HImode:
45461 case V4SImode:
45462 case V2DImode:
45463 d = gen_reg_rtx (V1TImode);
45464 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
45465 GEN_INT (i / 2));
45466 break;
45467 case V8SFmode:
45468 if (i == 256)
45469 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
45470 else
45471 tem = gen_avx_shufps256 (dest, src, src,
45472 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
45473 break;
45474 case V4DFmode:
45475 if (i == 256)
45476 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
45477 else
45478 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
45479 break;
45480 case V32QImode:
45481 case V16HImode:
45482 case V8SImode:
45483 case V4DImode:
45484 if (i == 256)
45486 if (GET_MODE (dest) != V4DImode)
45487 d = gen_reg_rtx (V4DImode);
45488 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
45489 gen_lowpart (V4DImode, src),
45490 const1_rtx);
45492 else
45494 d = gen_reg_rtx (V2TImode);
45495 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
45496 GEN_INT (i / 2));
45498 break;
45499 case V64QImode:
45500 case V32HImode:
45501 case V16SImode:
45502 case V16SFmode:
45503 case V8DImode:
45504 case V8DFmode:
45505 if (i > 128)
45506 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
45507 gen_lowpart (V16SImode, src),
45508 gen_lowpart (V16SImode, src),
45509 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
45510 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
45511 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
45512 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
45513 GEN_INT (0xC), GEN_INT (0xD),
45514 GEN_INT (0xE), GEN_INT (0xF),
45515 GEN_INT (0x10), GEN_INT (0x11),
45516 GEN_INT (0x12), GEN_INT (0x13),
45517 GEN_INT (0x14), GEN_INT (0x15),
45518 GEN_INT (0x16), GEN_INT (0x17));
45519 else
45520 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
45521 gen_lowpart (V16SImode, src),
45522 GEN_INT (i == 128 ? 0x2 : 0x1),
45523 GEN_INT (0x3),
45524 GEN_INT (0x3),
45525 GEN_INT (0x3),
45526 GEN_INT (i == 128 ? 0x6 : 0x5),
45527 GEN_INT (0x7),
45528 GEN_INT (0x7),
45529 GEN_INT (0x7),
45530 GEN_INT (i == 128 ? 0xA : 0x9),
45531 GEN_INT (0xB),
45532 GEN_INT (0xB),
45533 GEN_INT (0xB),
45534 GEN_INT (i == 128 ? 0xE : 0xD),
45535 GEN_INT (0xF),
45536 GEN_INT (0xF),
45537 GEN_INT (0xF));
45538 break;
45539 default:
45540 gcc_unreachable ();
45542 emit_insn (tem);
45543 if (d != dest)
45544 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
45547 /* Expand a vector reduction. FN is the binary pattern to reduce;
45548 DEST is the destination; IN is the input vector. */
45550 void
45551 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
45553 rtx half, dst, vec = in;
45554 machine_mode mode = GET_MODE (in);
45555 int i;
45557 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
45558 if (TARGET_SSE4_1
45559 && mode == V8HImode
45560 && fn == gen_uminv8hi3)
45562 emit_insn (gen_sse4_1_phminposuw (dest, in));
45563 return;
45566 for (i = GET_MODE_BITSIZE (mode);
45567 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
45568 i >>= 1)
45570 half = gen_reg_rtx (mode);
45571 emit_reduc_half (half, vec, i);
45572 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
45573 dst = dest;
45574 else
45575 dst = gen_reg_rtx (mode);
45576 emit_insn (fn (dst, half, vec));
45577 vec = dst;
45581 /* Target hook for scalar_mode_supported_p. */
45582 static bool
45583 ix86_scalar_mode_supported_p (machine_mode mode)
45585 if (DECIMAL_FLOAT_MODE_P (mode))
45586 return default_decimal_float_supported_p ();
45587 else if (mode == TFmode)
45588 return true;
45589 else
45590 return default_scalar_mode_supported_p (mode);
45593 /* Implements target hook vector_mode_supported_p. */
45594 static bool
45595 ix86_vector_mode_supported_p (machine_mode mode)
45597 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
45598 return true;
45599 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
45600 return true;
45601 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
45602 return true;
45603 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
45604 return true;
45605 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
45606 return true;
45607 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
45608 return true;
45609 return false;
45612 /* Implement target hook libgcc_floating_mode_supported_p. */
45613 static bool
45614 ix86_libgcc_floating_mode_supported_p (machine_mode mode)
45616 switch (mode)
45618 case SFmode:
45619 case DFmode:
45620 case XFmode:
45621 return true;
45623 case TFmode:
45624 #ifdef IX86_NO_LIBGCC_TFMODE
45625 return false;
45626 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
45627 return TARGET_LONG_DOUBLE_128;
45628 #else
45629 return true;
45630 #endif
45632 default:
45633 return false;
45637 /* Target hook for c_mode_for_suffix. */
45638 static machine_mode
45639 ix86_c_mode_for_suffix (char suffix)
45641 if (suffix == 'q')
45642 return TFmode;
45643 if (suffix == 'w')
45644 return XFmode;
45646 return VOIDmode;
45649 /* Worker function for TARGET_MD_ASM_ADJUST.
45651 We implement asm flag outputs, and maintain source compatibility
45652 with the old cc0-based compiler. */
45654 static rtx_insn *
45655 ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
45656 vec<const char *> &constraints,
45657 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
45659 clobbers.safe_push (gen_rtx_REG (CCFPmode, FPSR_REG));
45660 SET_HARD_REG_BIT (clobbered_regs, FPSR_REG);
45662 bool saw_asm_flag = false;
45664 start_sequence ();
45665 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
45667 const char *con = constraints[i];
45668 if (strncmp (con, "=@cc", 4) != 0)
45669 continue;
45670 con += 4;
45671 if (strchr (con, ',') != NULL)
45673 error ("alternatives not allowed in asm flag output");
45674 continue;
45677 bool invert = false;
45678 if (con[0] == 'n')
45679 invert = true, con++;
45681 machine_mode mode = CCmode;
45682 rtx_code code = UNKNOWN;
45684 switch (con[0])
45686 case 'a':
45687 if (con[1] == 0)
45688 mode = CCAmode, code = EQ;
45689 else if (con[1] == 'e' && con[2] == 0)
45690 mode = CCCmode, code = EQ;
45691 break;
45692 case 'b':
45693 if (con[1] == 0)
45694 mode = CCCmode, code = EQ;
45695 else if (con[1] == 'e' && con[2] == 0)
45696 mode = CCAmode, code = NE;
45697 break;
45698 case 'c':
45699 if (con[1] == 0)
45700 mode = CCCmode, code = EQ;
45701 break;
45702 case 'e':
45703 if (con[1] == 0)
45704 mode = CCZmode, code = EQ;
45705 break;
45706 case 'g':
45707 if (con[1] == 0)
45708 mode = CCGCmode, code = GT;
45709 else if (con[1] == 'e' && con[2] == 0)
45710 mode = CCGCmode, code = GE;
45711 break;
45712 case 'l':
45713 if (con[1] == 0)
45714 mode = CCGCmode, code = LT;
45715 else if (con[1] == 'e' && con[2] == 0)
45716 mode = CCGCmode, code = LE;
45717 break;
45718 case 'o':
45719 if (con[1] == 0)
45720 mode = CCOmode, code = EQ;
45721 break;
45722 case 'p':
45723 if (con[1] == 0)
45724 mode = CCPmode, code = EQ;
45725 break;
45726 case 's':
45727 if (con[1] == 0)
45728 mode = CCSmode, code = EQ;
45729 break;
45730 case 'z':
45731 if (con[1] == 0)
45732 mode = CCZmode, code = EQ;
45733 break;
45735 if (code == UNKNOWN)
45737 error ("unknown asm flag output %qs", constraints[i]);
45738 continue;
45740 if (invert)
45741 code = reverse_condition (code);
45743 rtx dest = outputs[i];
45744 if (!saw_asm_flag)
45746 /* This is the first asm flag output. Here we put the flags
45747 register in as the real output and adjust the condition to
45748 allow it. */
45749 constraints[i] = "=Bf";
45750 outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
45751 saw_asm_flag = true;
45753 else
45755 /* We don't need the flags register as output twice. */
45756 constraints[i] = "=X";
45757 outputs[i] = gen_rtx_SCRATCH (SImode);
45760 rtx x = gen_rtx_REG (mode, FLAGS_REG);
45761 x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
45763 machine_mode dest_mode = GET_MODE (dest);
45764 if (!SCALAR_INT_MODE_P (dest_mode))
45766 error ("invalid type for asm flag output");
45767 continue;
45769 if (dest_mode != QImode)
45771 rtx destqi = gen_reg_rtx (QImode);
45772 emit_insn (gen_rtx_SET (destqi, x));
45773 x = gen_rtx_ZERO_EXTEND (dest_mode, destqi);
45775 emit_insn (gen_rtx_SET (dest, x));
45777 rtx_insn *seq = get_insns ();
45778 end_sequence ();
45780 if (saw_asm_flag)
45781 return seq;
45782 else
45784 /* If we had no asm flag outputs, clobber the flags. */
45785 clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
45786 SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
45787 return NULL;
45791 /* Implements target vector targetm.asm.encode_section_info. */
45793 static void ATTRIBUTE_UNUSED
45794 ix86_encode_section_info (tree decl, rtx rtl, int first)
45796 default_encode_section_info (decl, rtl, first);
45798 if (ix86_in_large_data_p (decl))
45799 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
45802 /* Worker function for REVERSE_CONDITION. */
45804 enum rtx_code
45805 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
45807 return (mode != CCFPmode && mode != CCFPUmode
45808 ? reverse_condition (code)
45809 : reverse_condition_maybe_unordered (code));
45812 /* Output code to perform an x87 FP register move, from OPERANDS[1]
45813 to OPERANDS[0]. */
45815 const char *
45816 output_387_reg_move (rtx insn, rtx *operands)
45818 if (REG_P (operands[0]))
45820 if (REG_P (operands[1])
45821 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45823 if (REGNO (operands[0]) == FIRST_STACK_REG)
45824 return output_387_ffreep (operands, 0);
45825 return "fstp\t%y0";
45827 if (STACK_TOP_P (operands[0]))
45828 return "fld%Z1\t%y1";
45829 return "fst\t%y0";
45831 else if (MEM_P (operands[0]))
45833 gcc_assert (REG_P (operands[1]));
45834 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45835 return "fstp%Z0\t%y0";
45836 else
45838 /* There is no non-popping store to memory for XFmode.
45839 So if we need one, follow the store with a load. */
45840 if (GET_MODE (operands[0]) == XFmode)
45841 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
45842 else
45843 return "fst%Z0\t%y0";
45846 else
45847 gcc_unreachable();
45850 /* Output code to perform a conditional jump to LABEL, if C2 flag in
45851 FP status register is set. */
45853 void
45854 ix86_emit_fp_unordered_jump (rtx label)
45856 rtx reg = gen_reg_rtx (HImode);
45857 rtx temp;
45859 emit_insn (gen_x86_fnstsw_1 (reg));
45861 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
45863 emit_insn (gen_x86_sahf_1 (reg));
45865 temp = gen_rtx_REG (CCmode, FLAGS_REG);
45866 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
45868 else
45870 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
45872 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
45873 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
45876 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
45877 gen_rtx_LABEL_REF (VOIDmode, label),
45878 pc_rtx);
45879 temp = gen_rtx_SET (pc_rtx, temp);
45881 emit_jump_insn (temp);
45882 predict_jump (REG_BR_PROB_BASE * 10 / 100);
45885 /* Output code to perform a log1p XFmode calculation. */
45887 void ix86_emit_i387_log1p (rtx op0, rtx op1)
45889 rtx_code_label *label1 = gen_label_rtx ();
45890 rtx_code_label *label2 = gen_label_rtx ();
45892 rtx tmp = gen_reg_rtx (XFmode);
45893 rtx tmp2 = gen_reg_rtx (XFmode);
45894 rtx test;
45896 emit_insn (gen_absxf2 (tmp, op1));
45897 test = gen_rtx_GE (VOIDmode, tmp,
45898 CONST_DOUBLE_FROM_REAL_VALUE (
45899 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
45900 XFmode));
45901 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
45903 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45904 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
45905 emit_jump (label2);
45907 emit_label (label1);
45908 emit_move_insn (tmp, CONST1_RTX (XFmode));
45909 emit_insn (gen_addxf3 (tmp, op1, tmp));
45910 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45911 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
45913 emit_label (label2);
45916 /* Emit code for round calculation. */
45917 void ix86_emit_i387_round (rtx op0, rtx op1)
45919 machine_mode inmode = GET_MODE (op1);
45920 machine_mode outmode = GET_MODE (op0);
45921 rtx e1, e2, res, tmp, tmp1, half;
45922 rtx scratch = gen_reg_rtx (HImode);
45923 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
45924 rtx_code_label *jump_label = gen_label_rtx ();
45925 rtx insn;
45926 rtx (*gen_abs) (rtx, rtx);
45927 rtx (*gen_neg) (rtx, rtx);
45929 switch (inmode)
45931 case SFmode:
45932 gen_abs = gen_abssf2;
45933 break;
45934 case DFmode:
45935 gen_abs = gen_absdf2;
45936 break;
45937 case XFmode:
45938 gen_abs = gen_absxf2;
45939 break;
45940 default:
45941 gcc_unreachable ();
45944 switch (outmode)
45946 case SFmode:
45947 gen_neg = gen_negsf2;
45948 break;
45949 case DFmode:
45950 gen_neg = gen_negdf2;
45951 break;
45952 case XFmode:
45953 gen_neg = gen_negxf2;
45954 break;
45955 case HImode:
45956 gen_neg = gen_neghi2;
45957 break;
45958 case SImode:
45959 gen_neg = gen_negsi2;
45960 break;
45961 case DImode:
45962 gen_neg = gen_negdi2;
45963 break;
45964 default:
45965 gcc_unreachable ();
45968 e1 = gen_reg_rtx (inmode);
45969 e2 = gen_reg_rtx (inmode);
45970 res = gen_reg_rtx (outmode);
45972 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
45974 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
45976 /* scratch = fxam(op1) */
45977 emit_insn (gen_rtx_SET (scratch,
45978 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
45979 UNSPEC_FXAM)));
45980 /* e1 = fabs(op1) */
45981 emit_insn (gen_abs (e1, op1));
45983 /* e2 = e1 + 0.5 */
45984 half = force_reg (inmode, half);
45985 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (inmode, e1, half)));
45987 /* res = floor(e2) */
45988 if (inmode != XFmode)
45990 tmp1 = gen_reg_rtx (XFmode);
45992 emit_insn (gen_rtx_SET (tmp1, gen_rtx_FLOAT_EXTEND (XFmode, e2)));
45994 else
45995 tmp1 = e2;
45997 switch (outmode)
45999 case SFmode:
46000 case DFmode:
46002 rtx tmp0 = gen_reg_rtx (XFmode);
46004 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
46006 emit_insn (gen_rtx_SET (res,
46007 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
46008 UNSPEC_TRUNC_NOOP)));
46010 break;
46011 case XFmode:
46012 emit_insn (gen_frndintxf2_floor (res, tmp1));
46013 break;
46014 case HImode:
46015 emit_insn (gen_lfloorxfhi2 (res, tmp1));
46016 break;
46017 case SImode:
46018 emit_insn (gen_lfloorxfsi2 (res, tmp1));
46019 break;
46020 case DImode:
46021 emit_insn (gen_lfloorxfdi2 (res, tmp1));
46022 break;
46023 default:
46024 gcc_unreachable ();
46027 /* flags = signbit(a) */
46028 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
46030 /* if (flags) then res = -res */
46031 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
46032 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
46033 gen_rtx_LABEL_REF (VOIDmode, jump_label),
46034 pc_rtx);
46035 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
46036 predict_jump (REG_BR_PROB_BASE * 50 / 100);
46037 JUMP_LABEL (insn) = jump_label;
46039 emit_insn (gen_neg (res, res));
46041 emit_label (jump_label);
46042 LABEL_NUSES (jump_label) = 1;
46044 emit_move_insn (op0, res);
46047 /* Output code to perform a Newton-Rhapson approximation of a single precision
46048 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
46050 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
46052 rtx x0, x1, e0, e1;
46054 x0 = gen_reg_rtx (mode);
46055 e0 = gen_reg_rtx (mode);
46056 e1 = gen_reg_rtx (mode);
46057 x1 = gen_reg_rtx (mode);
46059 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
46061 b = force_reg (mode, b);
46063 /* x0 = rcp(b) estimate */
46064 if (mode == V16SFmode || mode == V8DFmode)
46065 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
46066 UNSPEC_RCP14)));
46067 else
46068 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
46069 UNSPEC_RCP)));
46071 /* e0 = x0 * b */
46072 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
46074 /* e0 = x0 * e0 */
46075 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
46077 /* e1 = x0 + x0 */
46078 emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
46080 /* x1 = e1 - e0 */
46081 emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
46083 /* res = a * x1 */
46084 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
46087 /* Output code to perform a Newton-Rhapson approximation of a
46088 single precision floating point [reciprocal] square root. */
46090 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode,
46091 bool recip)
46093 rtx x0, e0, e1, e2, e3, mthree, mhalf;
46094 REAL_VALUE_TYPE r;
46095 int unspec;
46097 x0 = gen_reg_rtx (mode);
46098 e0 = gen_reg_rtx (mode);
46099 e1 = gen_reg_rtx (mode);
46100 e2 = gen_reg_rtx (mode);
46101 e3 = gen_reg_rtx (mode);
46103 real_from_integer (&r, VOIDmode, -3, SIGNED);
46104 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
46106 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
46107 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
46108 unspec = UNSPEC_RSQRT;
46110 if (VECTOR_MODE_P (mode))
46112 mthree = ix86_build_const_vector (mode, true, mthree);
46113 mhalf = ix86_build_const_vector (mode, true, mhalf);
46114 /* There is no 512-bit rsqrt. There is however rsqrt14. */
46115 if (GET_MODE_SIZE (mode) == 64)
46116 unspec = UNSPEC_RSQRT14;
46119 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
46120 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
46122 a = force_reg (mode, a);
46124 /* x0 = rsqrt(a) estimate */
46125 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
46126 unspec)));
46128 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
46129 if (!recip)
46131 rtx zero, mask;
46133 zero = gen_reg_rtx (mode);
46134 mask = gen_reg_rtx (mode);
46136 zero = force_reg (mode, CONST0_RTX(mode));
46138 /* Handle masked compare. */
46139 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
46141 mask = gen_reg_rtx (HImode);
46142 /* Imm value 0x4 corresponds to not-equal comparison. */
46143 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
46144 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
46146 else
46148 emit_insn (gen_rtx_SET (mask, gen_rtx_NE (mode, zero, a)));
46150 emit_insn (gen_rtx_SET (x0, gen_rtx_AND (mode, x0, mask)));
46154 /* e0 = x0 * a */
46155 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
46156 /* e1 = e0 * x0 */
46157 emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0)));
46159 /* e2 = e1 - 3. */
46160 mthree = force_reg (mode, mthree);
46161 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (mode, e1, mthree)));
46163 mhalf = force_reg (mode, mhalf);
46164 if (recip)
46165 /* e3 = -.5 * x0 */
46166 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, x0, mhalf)));
46167 else
46168 /* e3 = -.5 * e0 */
46169 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, e0, mhalf)));
46170 /* ret = e2 * e3 */
46171 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e2, e3)));
46174 #ifdef TARGET_SOLARIS
46175 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
46177 static void
46178 i386_solaris_elf_named_section (const char *name, unsigned int flags,
46179 tree decl)
46181 /* With Binutils 2.15, the "@unwind" marker must be specified on
46182 every occurrence of the ".eh_frame" section, not just the first
46183 one. */
46184 if (TARGET_64BIT
46185 && strcmp (name, ".eh_frame") == 0)
46187 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
46188 flags & SECTION_WRITE ? "aw" : "a");
46189 return;
46192 #ifndef USE_GAS
46193 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
46195 solaris_elf_asm_comdat_section (name, flags, decl);
46196 return;
46198 #endif
46200 default_elf_asm_named_section (name, flags, decl);
46202 #endif /* TARGET_SOLARIS */
46204 /* Return the mangling of TYPE if it is an extended fundamental type. */
46206 static const char *
46207 ix86_mangle_type (const_tree type)
46209 type = TYPE_MAIN_VARIANT (type);
46211 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
46212 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
46213 return NULL;
46215 switch (TYPE_MODE (type))
46217 case TFmode:
46218 /* __float128 is "g". */
46219 return "g";
46220 case XFmode:
46221 /* "long double" or __float80 is "e". */
46222 return "e";
46223 default:
46224 return NULL;
46228 /* For 32-bit code we can save PIC register setup by using
46229 __stack_chk_fail_local hidden function instead of calling
46230 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
46231 register, so it is better to call __stack_chk_fail directly. */
46233 static tree ATTRIBUTE_UNUSED
46234 ix86_stack_protect_fail (void)
46236 return TARGET_64BIT
46237 ? default_external_stack_protect_fail ()
46238 : default_hidden_stack_protect_fail ();
46241 /* Select a format to encode pointers in exception handling data. CODE
46242 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
46243 true if the symbol may be affected by dynamic relocations.
46245 ??? All x86 object file formats are capable of representing this.
46246 After all, the relocation needed is the same as for the call insn.
46247 Whether or not a particular assembler allows us to enter such, I
46248 guess we'll have to see. */
46250 asm_preferred_eh_data_format (int code, int global)
46252 if (flag_pic)
46254 int type = DW_EH_PE_sdata8;
46255 if (!TARGET_64BIT
46256 || ix86_cmodel == CM_SMALL_PIC
46257 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
46258 type = DW_EH_PE_sdata4;
46259 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
46261 if (ix86_cmodel == CM_SMALL
46262 || (ix86_cmodel == CM_MEDIUM && code))
46263 return DW_EH_PE_udata4;
46264 return DW_EH_PE_absptr;
46267 /* Expand copysign from SIGN to the positive value ABS_VALUE
46268 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
46269 the sign-bit. */
46270 static void
46271 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
46273 machine_mode mode = GET_MODE (sign);
46274 rtx sgn = gen_reg_rtx (mode);
46275 if (mask == NULL_RTX)
46277 machine_mode vmode;
46279 if (mode == SFmode)
46280 vmode = V4SFmode;
46281 else if (mode == DFmode)
46282 vmode = V2DFmode;
46283 else
46284 vmode = mode;
46286 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
46287 if (!VECTOR_MODE_P (mode))
46289 /* We need to generate a scalar mode mask in this case. */
46290 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
46291 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
46292 mask = gen_reg_rtx (mode);
46293 emit_insn (gen_rtx_SET (mask, tmp));
46296 else
46297 mask = gen_rtx_NOT (mode, mask);
46298 emit_insn (gen_rtx_SET (sgn, gen_rtx_AND (mode, mask, sign)));
46299 emit_insn (gen_rtx_SET (result, gen_rtx_IOR (mode, abs_value, sgn)));
46302 /* Expand fabs (OP0) and return a new rtx that holds the result. The
46303 mask for masking out the sign-bit is stored in *SMASK, if that is
46304 non-null. */
46305 static rtx
46306 ix86_expand_sse_fabs (rtx op0, rtx *smask)
46308 machine_mode vmode, mode = GET_MODE (op0);
46309 rtx xa, mask;
46311 xa = gen_reg_rtx (mode);
46312 if (mode == SFmode)
46313 vmode = V4SFmode;
46314 else if (mode == DFmode)
46315 vmode = V2DFmode;
46316 else
46317 vmode = mode;
46318 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
46319 if (!VECTOR_MODE_P (mode))
46321 /* We need to generate a scalar mode mask in this case. */
46322 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
46323 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
46324 mask = gen_reg_rtx (mode);
46325 emit_insn (gen_rtx_SET (mask, tmp));
46327 emit_insn (gen_rtx_SET (xa, gen_rtx_AND (mode, op0, mask)));
46329 if (smask)
46330 *smask = mask;
46332 return xa;
46335 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
46336 swapping the operands if SWAP_OPERANDS is true. The expanded
46337 code is a forward jump to a newly created label in case the
46338 comparison is true. The generated label rtx is returned. */
46339 static rtx_code_label *
46340 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
46341 bool swap_operands)
46343 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
46344 rtx_code_label *label;
46345 rtx tmp;
46347 if (swap_operands)
46348 std::swap (op0, op1);
46350 label = gen_label_rtx ();
46351 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
46352 emit_insn (gen_rtx_SET (tmp, gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
46353 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
46354 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
46355 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
46356 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
46357 JUMP_LABEL (tmp) = label;
46359 return label;
46362 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
46363 using comparison code CODE. Operands are swapped for the comparison if
46364 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
46365 static rtx
46366 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
46367 bool swap_operands)
46369 rtx (*insn)(rtx, rtx, rtx, rtx);
46370 machine_mode mode = GET_MODE (op0);
46371 rtx mask = gen_reg_rtx (mode);
46373 if (swap_operands)
46374 std::swap (op0, op1);
46376 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
46378 emit_insn (insn (mask, op0, op1,
46379 gen_rtx_fmt_ee (code, mode, op0, op1)));
46380 return mask;
46383 /* Generate and return a rtx of mode MODE for 2**n where n is the number
46384 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
46385 static rtx
46386 ix86_gen_TWO52 (machine_mode mode)
46388 REAL_VALUE_TYPE TWO52r;
46389 rtx TWO52;
46391 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
46392 TWO52 = const_double_from_real_value (TWO52r, mode);
46393 TWO52 = force_reg (mode, TWO52);
46395 return TWO52;
46398 /* Expand SSE sequence for computing lround from OP1 storing
46399 into OP0. */
46400 void
46401 ix86_expand_lround (rtx op0, rtx op1)
46403 /* C code for the stuff we're doing below:
46404 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
46405 return (long)tmp;
46407 machine_mode mode = GET_MODE (op1);
46408 const struct real_format *fmt;
46409 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46410 rtx adj;
46412 /* load nextafter (0.5, 0.0) */
46413 fmt = REAL_MODE_FORMAT (mode);
46414 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46415 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46417 /* adj = copysign (0.5, op1) */
46418 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
46419 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
46421 /* adj = op1 + adj */
46422 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
46424 /* op0 = (imode)adj */
46425 expand_fix (op0, adj, 0);
46428 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
46429 into OPERAND0. */
46430 void
46431 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
46433 /* C code for the stuff we're doing below (for do_floor):
46434 xi = (long)op1;
46435 xi -= (double)xi > op1 ? 1 : 0;
46436 return xi;
46438 machine_mode fmode = GET_MODE (op1);
46439 machine_mode imode = GET_MODE (op0);
46440 rtx ireg, freg, tmp;
46441 rtx_code_label *label;
46443 /* reg = (long)op1 */
46444 ireg = gen_reg_rtx (imode);
46445 expand_fix (ireg, op1, 0);
46447 /* freg = (double)reg */
46448 freg = gen_reg_rtx (fmode);
46449 expand_float (freg, ireg, 0);
46451 /* ireg = (freg > op1) ? ireg - 1 : ireg */
46452 label = ix86_expand_sse_compare_and_jump (UNLE,
46453 freg, op1, !do_floor);
46454 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
46455 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
46456 emit_move_insn (ireg, tmp);
46458 emit_label (label);
46459 LABEL_NUSES (label) = 1;
46461 emit_move_insn (op0, ireg);
46464 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
46465 result in OPERAND0. */
46466 void
46467 ix86_expand_rint (rtx operand0, rtx operand1)
46469 /* C code for the stuff we're doing below:
46470 xa = fabs (operand1);
46471 if (!isless (xa, 2**52))
46472 return operand1;
46473 xa = xa + 2**52 - 2**52;
46474 return copysign (xa, operand1);
46476 machine_mode mode = GET_MODE (operand0);
46477 rtx res, xa, TWO52, mask;
46478 rtx_code_label *label;
46480 res = gen_reg_rtx (mode);
46481 emit_move_insn (res, operand1);
46483 /* xa = abs (operand1) */
46484 xa = ix86_expand_sse_fabs (res, &mask);
46486 /* if (!isless (xa, TWO52)) goto label; */
46487 TWO52 = ix86_gen_TWO52 (mode);
46488 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46490 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46491 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46493 ix86_sse_copysign_to_positive (res, xa, res, mask);
46495 emit_label (label);
46496 LABEL_NUSES (label) = 1;
46498 emit_move_insn (operand0, res);
46501 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46502 into OPERAND0. */
46503 void
46504 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
46506 /* C code for the stuff we expand below.
46507 double xa = fabs (x), x2;
46508 if (!isless (xa, TWO52))
46509 return x;
46510 xa = xa + TWO52 - TWO52;
46511 x2 = copysign (xa, x);
46512 Compensate. Floor:
46513 if (x2 > x)
46514 x2 -= 1;
46515 Compensate. Ceil:
46516 if (x2 < x)
46517 x2 -= -1;
46518 return x2;
46520 machine_mode mode = GET_MODE (operand0);
46521 rtx xa, TWO52, tmp, one, res, mask;
46522 rtx_code_label *label;
46524 TWO52 = ix86_gen_TWO52 (mode);
46526 /* Temporary for holding the result, initialized to the input
46527 operand to ease control flow. */
46528 res = gen_reg_rtx (mode);
46529 emit_move_insn (res, operand1);
46531 /* xa = abs (operand1) */
46532 xa = ix86_expand_sse_fabs (res, &mask);
46534 /* if (!isless (xa, TWO52)) goto label; */
46535 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46537 /* xa = xa + TWO52 - TWO52; */
46538 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46539 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46541 /* xa = copysign (xa, operand1) */
46542 ix86_sse_copysign_to_positive (xa, xa, res, mask);
46544 /* generate 1.0 or -1.0 */
46545 one = force_reg (mode,
46546 const_double_from_real_value (do_floor
46547 ? dconst1 : dconstm1, mode));
46549 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46550 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46551 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
46552 /* We always need to subtract here to preserve signed zero. */
46553 tmp = expand_simple_binop (mode, MINUS,
46554 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46555 emit_move_insn (res, tmp);
46557 emit_label (label);
46558 LABEL_NUSES (label) = 1;
46560 emit_move_insn (operand0, res);
46563 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46564 into OPERAND0. */
46565 void
46566 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
46568 /* C code for the stuff we expand below.
46569 double xa = fabs (x), x2;
46570 if (!isless (xa, TWO52))
46571 return x;
46572 x2 = (double)(long)x;
46573 Compensate. Floor:
46574 if (x2 > x)
46575 x2 -= 1;
46576 Compensate. Ceil:
46577 if (x2 < x)
46578 x2 += 1;
46579 if (HONOR_SIGNED_ZEROS (mode))
46580 return copysign (x2, x);
46581 return x2;
46583 machine_mode mode = GET_MODE (operand0);
46584 rtx xa, xi, TWO52, tmp, one, res, mask;
46585 rtx_code_label *label;
46587 TWO52 = ix86_gen_TWO52 (mode);
46589 /* Temporary for holding the result, initialized to the input
46590 operand to ease control flow. */
46591 res = gen_reg_rtx (mode);
46592 emit_move_insn (res, operand1);
46594 /* xa = abs (operand1) */
46595 xa = ix86_expand_sse_fabs (res, &mask);
46597 /* if (!isless (xa, TWO52)) goto label; */
46598 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46600 /* xa = (double)(long)x */
46601 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46602 expand_fix (xi, res, 0);
46603 expand_float (xa, xi, 0);
46605 /* generate 1.0 */
46606 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46608 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46609 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46610 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
46611 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
46612 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46613 emit_move_insn (res, tmp);
46615 if (HONOR_SIGNED_ZEROS (mode))
46616 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46618 emit_label (label);
46619 LABEL_NUSES (label) = 1;
46621 emit_move_insn (operand0, res);
46624 /* Expand SSE sequence for computing round from OPERAND1 storing
46625 into OPERAND0. Sequence that works without relying on DImode truncation
46626 via cvttsd2siq that is only available on 64bit targets. */
46627 void
46628 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
46630 /* C code for the stuff we expand below.
46631 double xa = fabs (x), xa2, x2;
46632 if (!isless (xa, TWO52))
46633 return x;
46634 Using the absolute value and copying back sign makes
46635 -0.0 -> -0.0 correct.
46636 xa2 = xa + TWO52 - TWO52;
46637 Compensate.
46638 dxa = xa2 - xa;
46639 if (dxa <= -0.5)
46640 xa2 += 1;
46641 else if (dxa > 0.5)
46642 xa2 -= 1;
46643 x2 = copysign (xa2, x);
46644 return x2;
46646 machine_mode mode = GET_MODE (operand0);
46647 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
46648 rtx_code_label *label;
46650 TWO52 = ix86_gen_TWO52 (mode);
46652 /* Temporary for holding the result, initialized to the input
46653 operand to ease control flow. */
46654 res = gen_reg_rtx (mode);
46655 emit_move_insn (res, operand1);
46657 /* xa = abs (operand1) */
46658 xa = ix86_expand_sse_fabs (res, &mask);
46660 /* if (!isless (xa, TWO52)) goto label; */
46661 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46663 /* xa2 = xa + TWO52 - TWO52; */
46664 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46665 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
46667 /* dxa = xa2 - xa; */
46668 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
46670 /* generate 0.5, 1.0 and -0.5 */
46671 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
46672 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
46673 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
46674 0, OPTAB_DIRECT);
46676 /* Compensate. */
46677 tmp = gen_reg_rtx (mode);
46678 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
46679 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
46680 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
46681 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46682 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
46683 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
46684 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
46685 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46687 /* res = copysign (xa2, operand1) */
46688 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
46690 emit_label (label);
46691 LABEL_NUSES (label) = 1;
46693 emit_move_insn (operand0, res);
46696 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46697 into OPERAND0. */
46698 void
46699 ix86_expand_trunc (rtx operand0, rtx operand1)
46701 /* C code for SSE variant we expand below.
46702 double xa = fabs (x), x2;
46703 if (!isless (xa, TWO52))
46704 return x;
46705 x2 = (double)(long)x;
46706 if (HONOR_SIGNED_ZEROS (mode))
46707 return copysign (x2, x);
46708 return x2;
46710 machine_mode mode = GET_MODE (operand0);
46711 rtx xa, xi, TWO52, res, mask;
46712 rtx_code_label *label;
46714 TWO52 = ix86_gen_TWO52 (mode);
46716 /* Temporary for holding the result, initialized to the input
46717 operand to ease control flow. */
46718 res = gen_reg_rtx (mode);
46719 emit_move_insn (res, operand1);
46721 /* xa = abs (operand1) */
46722 xa = ix86_expand_sse_fabs (res, &mask);
46724 /* if (!isless (xa, TWO52)) goto label; */
46725 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46727 /* x = (double)(long)x */
46728 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46729 expand_fix (xi, res, 0);
46730 expand_float (res, xi, 0);
46732 if (HONOR_SIGNED_ZEROS (mode))
46733 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46735 emit_label (label);
46736 LABEL_NUSES (label) = 1;
46738 emit_move_insn (operand0, res);
46741 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46742 into OPERAND0. */
46743 void
46744 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
46746 machine_mode mode = GET_MODE (operand0);
46747 rtx xa, mask, TWO52, one, res, smask, tmp;
46748 rtx_code_label *label;
46750 /* C code for SSE variant we expand below.
46751 double xa = fabs (x), x2;
46752 if (!isless (xa, TWO52))
46753 return x;
46754 xa2 = xa + TWO52 - TWO52;
46755 Compensate:
46756 if (xa2 > xa)
46757 xa2 -= 1.0;
46758 x2 = copysign (xa2, x);
46759 return x2;
46762 TWO52 = ix86_gen_TWO52 (mode);
46764 /* Temporary for holding the result, initialized to the input
46765 operand to ease control flow. */
46766 res = gen_reg_rtx (mode);
46767 emit_move_insn (res, operand1);
46769 /* xa = abs (operand1) */
46770 xa = ix86_expand_sse_fabs (res, &smask);
46772 /* if (!isless (xa, TWO52)) goto label; */
46773 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46775 /* res = xa + TWO52 - TWO52; */
46776 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46777 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
46778 emit_move_insn (res, tmp);
46780 /* generate 1.0 */
46781 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46783 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
46784 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
46785 emit_insn (gen_rtx_SET (mask, gen_rtx_AND (mode, mask, one)));
46786 tmp = expand_simple_binop (mode, MINUS,
46787 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
46788 emit_move_insn (res, tmp);
46790 /* res = copysign (res, operand1) */
46791 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
46793 emit_label (label);
46794 LABEL_NUSES (label) = 1;
46796 emit_move_insn (operand0, res);
46799 /* Expand SSE sequence for computing round from OPERAND1 storing
46800 into OPERAND0. */
46801 void
46802 ix86_expand_round (rtx operand0, rtx operand1)
46804 /* C code for the stuff we're doing below:
46805 double xa = fabs (x);
46806 if (!isless (xa, TWO52))
46807 return x;
46808 xa = (double)(long)(xa + nextafter (0.5, 0.0));
46809 return copysign (xa, x);
46811 machine_mode mode = GET_MODE (operand0);
46812 rtx res, TWO52, xa, xi, half, mask;
46813 rtx_code_label *label;
46814 const struct real_format *fmt;
46815 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46817 /* Temporary for holding the result, initialized to the input
46818 operand to ease control flow. */
46819 res = gen_reg_rtx (mode);
46820 emit_move_insn (res, operand1);
46822 TWO52 = ix86_gen_TWO52 (mode);
46823 xa = ix86_expand_sse_fabs (res, &mask);
46824 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46826 /* load nextafter (0.5, 0.0) */
46827 fmt = REAL_MODE_FORMAT (mode);
46828 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46829 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46831 /* xa = xa + 0.5 */
46832 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
46833 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
46835 /* xa = (double)(int64_t)xa */
46836 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46837 expand_fix (xi, xa, 0);
46838 expand_float (xa, xi, 0);
46840 /* res = copysign (xa, operand1) */
46841 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
46843 emit_label (label);
46844 LABEL_NUSES (label) = 1;
46846 emit_move_insn (operand0, res);
46849 /* Expand SSE sequence for computing round
46850 from OP1 storing into OP0 using sse4 round insn. */
46851 void
46852 ix86_expand_round_sse4 (rtx op0, rtx op1)
46854 machine_mode mode = GET_MODE (op0);
46855 rtx e1, e2, res, half;
46856 const struct real_format *fmt;
46857 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46858 rtx (*gen_copysign) (rtx, rtx, rtx);
46859 rtx (*gen_round) (rtx, rtx, rtx);
46861 switch (mode)
46863 case SFmode:
46864 gen_copysign = gen_copysignsf3;
46865 gen_round = gen_sse4_1_roundsf2;
46866 break;
46867 case DFmode:
46868 gen_copysign = gen_copysigndf3;
46869 gen_round = gen_sse4_1_rounddf2;
46870 break;
46871 default:
46872 gcc_unreachable ();
46875 /* round (a) = trunc (a + copysign (0.5, a)) */
46877 /* load nextafter (0.5, 0.0) */
46878 fmt = REAL_MODE_FORMAT (mode);
46879 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46880 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46881 half = const_double_from_real_value (pred_half, mode);
46883 /* e1 = copysign (0.5, op1) */
46884 e1 = gen_reg_rtx (mode);
46885 emit_insn (gen_copysign (e1, half, op1));
46887 /* e2 = op1 + e1 */
46888 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
46890 /* res = trunc (e2) */
46891 res = gen_reg_rtx (mode);
46892 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
46894 emit_move_insn (op0, res);
46898 /* Table of valid machine attributes. */
46899 static const struct attribute_spec ix86_attribute_table[] =
46901 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
46902 affects_type_identity } */
46903 /* Stdcall attribute says callee is responsible for popping arguments
46904 if they are not variable. */
46905 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46906 true },
46907 /* Fastcall attribute says callee is responsible for popping arguments
46908 if they are not variable. */
46909 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46910 true },
46911 /* Thiscall attribute says callee is responsible for popping arguments
46912 if they are not variable. */
46913 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46914 true },
46915 /* Cdecl attribute says the callee is a normal C declaration */
46916 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46917 true },
46918 /* Regparm attribute specifies how many integer arguments are to be
46919 passed in registers. */
46920 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
46921 true },
46922 /* Sseregparm attribute says we are using x86_64 calling conventions
46923 for FP arguments. */
46924 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46925 true },
46926 /* The transactional memory builtins are implicitly regparm or fastcall
46927 depending on the ABI. Override the generic do-nothing attribute that
46928 these builtins were declared with. */
46929 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
46930 true },
46931 /* force_align_arg_pointer says this function realigns the stack at entry. */
46932 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
46933 false, true, true, ix86_handle_cconv_attribute, false },
46934 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46935 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
46936 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
46937 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
46938 false },
46939 #endif
46940 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46941 false },
46942 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46943 false },
46944 #ifdef SUBTARGET_ATTRIBUTE_TABLE
46945 SUBTARGET_ATTRIBUTE_TABLE,
46946 #endif
46947 /* ms_abi and sysv_abi calling convention function attributes. */
46948 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46949 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46950 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
46951 false },
46952 { "callee_pop_aggregate_return", 1, 1, false, true, true,
46953 ix86_handle_callee_pop_aggregate_return, true },
46954 /* End element. */
46955 { NULL, 0, 0, false, false, false, NULL, false }
46958 /* Implement targetm.vectorize.builtin_vectorization_cost. */
46959 static int
46960 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
46961 tree vectype, int)
46963 unsigned elements;
46965 switch (type_of_cost)
46967 case scalar_stmt:
46968 return ix86_cost->scalar_stmt_cost;
46970 case scalar_load:
46971 return ix86_cost->scalar_load_cost;
46973 case scalar_store:
46974 return ix86_cost->scalar_store_cost;
46976 case vector_stmt:
46977 return ix86_cost->vec_stmt_cost;
46979 case vector_load:
46980 return ix86_cost->vec_align_load_cost;
46982 case vector_store:
46983 return ix86_cost->vec_store_cost;
46985 case vec_to_scalar:
46986 return ix86_cost->vec_to_scalar_cost;
46988 case scalar_to_vec:
46989 return ix86_cost->scalar_to_vec_cost;
46991 case unaligned_load:
46992 case unaligned_store:
46993 return ix86_cost->vec_unalign_load_cost;
46995 case cond_branch_taken:
46996 return ix86_cost->cond_taken_branch_cost;
46998 case cond_branch_not_taken:
46999 return ix86_cost->cond_not_taken_branch_cost;
47001 case vec_perm:
47002 case vec_promote_demote:
47003 return ix86_cost->vec_stmt_cost;
47005 case vec_construct:
47006 elements = TYPE_VECTOR_SUBPARTS (vectype);
47007 return ix86_cost->vec_stmt_cost * (elements / 2 + 1);
47009 default:
47010 gcc_unreachable ();
47014 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
47015 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
47016 insn every time. */
47018 static GTY(()) rtx_insn *vselect_insn;
47020 /* Initialize vselect_insn. */
47022 static void
47023 init_vselect_insn (void)
47025 unsigned i;
47026 rtx x;
47028 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
47029 for (i = 0; i < MAX_VECT_LEN; ++i)
47030 XVECEXP (x, 0, i) = const0_rtx;
47031 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
47032 const0_rtx), x);
47033 x = gen_rtx_SET (const0_rtx, x);
47034 start_sequence ();
47035 vselect_insn = emit_insn (x);
47036 end_sequence ();
47039 /* Construct (set target (vec_select op0 (parallel perm))) and
47040 return true if that's a valid instruction in the active ISA. */
47042 static bool
47043 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
47044 unsigned nelt, bool testing_p)
47046 unsigned int i;
47047 rtx x, save_vconcat;
47048 int icode;
47050 if (vselect_insn == NULL_RTX)
47051 init_vselect_insn ();
47053 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
47054 PUT_NUM_ELEM (XVEC (x, 0), nelt);
47055 for (i = 0; i < nelt; ++i)
47056 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
47057 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
47058 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
47059 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
47060 SET_DEST (PATTERN (vselect_insn)) = target;
47061 icode = recog_memoized (vselect_insn);
47063 if (icode >= 0 && !testing_p)
47064 emit_insn (copy_rtx (PATTERN (vselect_insn)));
47066 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
47067 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
47068 INSN_CODE (vselect_insn) = -1;
47070 return icode >= 0;
47073 /* Similar, but generate a vec_concat from op0 and op1 as well. */
47075 static bool
47076 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
47077 const unsigned char *perm, unsigned nelt,
47078 bool testing_p)
47080 machine_mode v2mode;
47081 rtx x;
47082 bool ok;
47084 if (vselect_insn == NULL_RTX)
47085 init_vselect_insn ();
47087 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
47088 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
47089 PUT_MODE (x, v2mode);
47090 XEXP (x, 0) = op0;
47091 XEXP (x, 1) = op1;
47092 ok = expand_vselect (target, x, perm, nelt, testing_p);
47093 XEXP (x, 0) = const0_rtx;
47094 XEXP (x, 1) = const0_rtx;
47095 return ok;
47098 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47099 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
47101 static bool
47102 expand_vec_perm_blend (struct expand_vec_perm_d *d)
47104 machine_mode mmode, vmode = d->vmode;
47105 unsigned i, mask, nelt = d->nelt;
47106 rtx target, op0, op1, maskop, x;
47107 rtx rperm[32], vperm;
47109 if (d->one_operand_p)
47110 return false;
47111 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
47112 && (TARGET_AVX512BW
47113 || GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4))
47115 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
47117 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
47119 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
47121 else
47122 return false;
47124 /* This is a blend, not a permute. Elements must stay in their
47125 respective lanes. */
47126 for (i = 0; i < nelt; ++i)
47128 unsigned e = d->perm[i];
47129 if (!(e == i || e == i + nelt))
47130 return false;
47133 if (d->testing_p)
47134 return true;
47136 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
47137 decision should be extracted elsewhere, so that we only try that
47138 sequence once all budget==3 options have been tried. */
47139 target = d->target;
47140 op0 = d->op0;
47141 op1 = d->op1;
47142 mask = 0;
47144 switch (vmode)
47146 case V8DFmode:
47147 case V16SFmode:
47148 case V4DFmode:
47149 case V8SFmode:
47150 case V2DFmode:
47151 case V4SFmode:
47152 case V8HImode:
47153 case V8SImode:
47154 case V32HImode:
47155 case V64QImode:
47156 case V16SImode:
47157 case V8DImode:
47158 for (i = 0; i < nelt; ++i)
47159 mask |= (d->perm[i] >= nelt) << i;
47160 break;
47162 case V2DImode:
47163 for (i = 0; i < 2; ++i)
47164 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
47165 vmode = V8HImode;
47166 goto do_subreg;
47168 case V4SImode:
47169 for (i = 0; i < 4; ++i)
47170 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
47171 vmode = V8HImode;
47172 goto do_subreg;
47174 case V16QImode:
47175 /* See if bytes move in pairs so we can use pblendw with
47176 an immediate argument, rather than pblendvb with a vector
47177 argument. */
47178 for (i = 0; i < 16; i += 2)
47179 if (d->perm[i] + 1 != d->perm[i + 1])
47181 use_pblendvb:
47182 for (i = 0; i < nelt; ++i)
47183 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
47185 finish_pblendvb:
47186 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
47187 vperm = force_reg (vmode, vperm);
47189 if (GET_MODE_SIZE (vmode) == 16)
47190 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
47191 else
47192 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
47193 if (target != d->target)
47194 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47195 return true;
47198 for (i = 0; i < 8; ++i)
47199 mask |= (d->perm[i * 2] >= 16) << i;
47200 vmode = V8HImode;
47201 /* FALLTHRU */
47203 do_subreg:
47204 target = gen_reg_rtx (vmode);
47205 op0 = gen_lowpart (vmode, op0);
47206 op1 = gen_lowpart (vmode, op1);
47207 break;
47209 case V32QImode:
47210 /* See if bytes move in pairs. If not, vpblendvb must be used. */
47211 for (i = 0; i < 32; i += 2)
47212 if (d->perm[i] + 1 != d->perm[i + 1])
47213 goto use_pblendvb;
47214 /* See if bytes move in quadruplets. If yes, vpblendd
47215 with immediate can be used. */
47216 for (i = 0; i < 32; i += 4)
47217 if (d->perm[i] + 2 != d->perm[i + 2])
47218 break;
47219 if (i < 32)
47221 /* See if bytes move the same in both lanes. If yes,
47222 vpblendw with immediate can be used. */
47223 for (i = 0; i < 16; i += 2)
47224 if (d->perm[i] + 16 != d->perm[i + 16])
47225 goto use_pblendvb;
47227 /* Use vpblendw. */
47228 for (i = 0; i < 16; ++i)
47229 mask |= (d->perm[i * 2] >= 32) << i;
47230 vmode = V16HImode;
47231 goto do_subreg;
47234 /* Use vpblendd. */
47235 for (i = 0; i < 8; ++i)
47236 mask |= (d->perm[i * 4] >= 32) << i;
47237 vmode = V8SImode;
47238 goto do_subreg;
47240 case V16HImode:
47241 /* See if words move in pairs. If yes, vpblendd can be used. */
47242 for (i = 0; i < 16; i += 2)
47243 if (d->perm[i] + 1 != d->perm[i + 1])
47244 break;
47245 if (i < 16)
47247 /* See if words move the same in both lanes. If not,
47248 vpblendvb must be used. */
47249 for (i = 0; i < 8; i++)
47250 if (d->perm[i] + 8 != d->perm[i + 8])
47252 /* Use vpblendvb. */
47253 for (i = 0; i < 32; ++i)
47254 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
47256 vmode = V32QImode;
47257 nelt = 32;
47258 target = gen_reg_rtx (vmode);
47259 op0 = gen_lowpart (vmode, op0);
47260 op1 = gen_lowpart (vmode, op1);
47261 goto finish_pblendvb;
47264 /* Use vpblendw. */
47265 for (i = 0; i < 16; ++i)
47266 mask |= (d->perm[i] >= 16) << i;
47267 break;
47270 /* Use vpblendd. */
47271 for (i = 0; i < 8; ++i)
47272 mask |= (d->perm[i * 2] >= 16) << i;
47273 vmode = V8SImode;
47274 goto do_subreg;
47276 case V4DImode:
47277 /* Use vpblendd. */
47278 for (i = 0; i < 4; ++i)
47279 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
47280 vmode = V8SImode;
47281 goto do_subreg;
47283 default:
47284 gcc_unreachable ();
47287 switch (vmode)
47289 case V8DFmode:
47290 case V8DImode:
47291 mmode = QImode;
47292 break;
47293 case V16SFmode:
47294 case V16SImode:
47295 mmode = HImode;
47296 break;
47297 case V32HImode:
47298 mmode = SImode;
47299 break;
47300 case V64QImode:
47301 mmode = DImode;
47302 break;
47303 default:
47304 mmode = VOIDmode;
47307 if (mmode != VOIDmode)
47308 maskop = force_reg (mmode, gen_int_mode (mask, mmode));
47309 else
47310 maskop = GEN_INT (mask);
47312 /* This matches five different patterns with the different modes. */
47313 x = gen_rtx_VEC_MERGE (vmode, op1, op0, maskop);
47314 x = gen_rtx_SET (target, x);
47315 emit_insn (x);
47316 if (target != d->target)
47317 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47319 return true;
47322 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47323 in terms of the variable form of vpermilps.
47325 Note that we will have already failed the immediate input vpermilps,
47326 which requires that the high and low part shuffle be identical; the
47327 variable form doesn't require that. */
47329 static bool
47330 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
47332 rtx rperm[8], vperm;
47333 unsigned i;
47335 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
47336 return false;
47338 /* We can only permute within the 128-bit lane. */
47339 for (i = 0; i < 8; ++i)
47341 unsigned e = d->perm[i];
47342 if (i < 4 ? e >= 4 : e < 4)
47343 return false;
47346 if (d->testing_p)
47347 return true;
47349 for (i = 0; i < 8; ++i)
47351 unsigned e = d->perm[i];
47353 /* Within each 128-bit lane, the elements of op0 are numbered
47354 from 0 and the elements of op1 are numbered from 4. */
47355 if (e >= 8 + 4)
47356 e -= 8;
47357 else if (e >= 4)
47358 e -= 4;
47360 rperm[i] = GEN_INT (e);
47363 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
47364 vperm = force_reg (V8SImode, vperm);
47365 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
47367 return true;
47370 /* Return true if permutation D can be performed as VMODE permutation
47371 instead. */
47373 static bool
47374 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
47376 unsigned int i, j, chunk;
47378 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
47379 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
47380 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
47381 return false;
47383 if (GET_MODE_NUNITS (vmode) >= d->nelt)
47384 return true;
47386 chunk = d->nelt / GET_MODE_NUNITS (vmode);
47387 for (i = 0; i < d->nelt; i += chunk)
47388 if (d->perm[i] & (chunk - 1))
47389 return false;
47390 else
47391 for (j = 1; j < chunk; ++j)
47392 if (d->perm[i] + j != d->perm[i + j])
47393 return false;
47395 return true;
47398 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47399 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
47401 static bool
47402 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
47404 unsigned i, nelt, eltsz, mask;
47405 unsigned char perm[64];
47406 machine_mode vmode = V16QImode;
47407 rtx rperm[64], vperm, target, op0, op1;
47409 nelt = d->nelt;
47411 if (!d->one_operand_p)
47413 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
47415 if (TARGET_AVX2
47416 && valid_perm_using_mode_p (V2TImode, d))
47418 if (d->testing_p)
47419 return true;
47421 /* Use vperm2i128 insn. The pattern uses
47422 V4DImode instead of V2TImode. */
47423 target = d->target;
47424 if (d->vmode != V4DImode)
47425 target = gen_reg_rtx (V4DImode);
47426 op0 = gen_lowpart (V4DImode, d->op0);
47427 op1 = gen_lowpart (V4DImode, d->op1);
47428 rperm[0]
47429 = GEN_INT ((d->perm[0] / (nelt / 2))
47430 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
47431 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
47432 if (target != d->target)
47433 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47434 return true;
47436 return false;
47439 else
47441 if (GET_MODE_SIZE (d->vmode) == 16)
47443 if (!TARGET_SSSE3)
47444 return false;
47446 else if (GET_MODE_SIZE (d->vmode) == 32)
47448 if (!TARGET_AVX2)
47449 return false;
47451 /* V4DImode should be already handled through
47452 expand_vselect by vpermq instruction. */
47453 gcc_assert (d->vmode != V4DImode);
47455 vmode = V32QImode;
47456 if (d->vmode == V8SImode
47457 || d->vmode == V16HImode
47458 || d->vmode == V32QImode)
47460 /* First see if vpermq can be used for
47461 V8SImode/V16HImode/V32QImode. */
47462 if (valid_perm_using_mode_p (V4DImode, d))
47464 for (i = 0; i < 4; i++)
47465 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
47466 if (d->testing_p)
47467 return true;
47468 target = gen_reg_rtx (V4DImode);
47469 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
47470 perm, 4, false))
47472 emit_move_insn (d->target,
47473 gen_lowpart (d->vmode, target));
47474 return true;
47476 return false;
47479 /* Next see if vpermd can be used. */
47480 if (valid_perm_using_mode_p (V8SImode, d))
47481 vmode = V8SImode;
47483 /* Or if vpermps can be used. */
47484 else if (d->vmode == V8SFmode)
47485 vmode = V8SImode;
47487 if (vmode == V32QImode)
47489 /* vpshufb only works intra lanes, it is not
47490 possible to shuffle bytes in between the lanes. */
47491 for (i = 0; i < nelt; ++i)
47492 if ((d->perm[i] ^ i) & (nelt / 2))
47493 return false;
47496 else if (GET_MODE_SIZE (d->vmode) == 64)
47498 if (!TARGET_AVX512BW)
47499 return false;
47501 /* If vpermq didn't work, vpshufb won't work either. */
47502 if (d->vmode == V8DFmode || d->vmode == V8DImode)
47503 return false;
47505 vmode = V64QImode;
47506 if (d->vmode == V16SImode
47507 || d->vmode == V32HImode
47508 || d->vmode == V64QImode)
47510 /* First see if vpermq can be used for
47511 V16SImode/V32HImode/V64QImode. */
47512 if (valid_perm_using_mode_p (V8DImode, d))
47514 for (i = 0; i < 8; i++)
47515 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
47516 if (d->testing_p)
47517 return true;
47518 target = gen_reg_rtx (V8DImode);
47519 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
47520 perm, 8, false))
47522 emit_move_insn (d->target,
47523 gen_lowpart (d->vmode, target));
47524 return true;
47526 return false;
47529 /* Next see if vpermd can be used. */
47530 if (valid_perm_using_mode_p (V16SImode, d))
47531 vmode = V16SImode;
47533 /* Or if vpermps can be used. */
47534 else if (d->vmode == V16SFmode)
47535 vmode = V16SImode;
47536 if (vmode == V64QImode)
47538 /* vpshufb only works intra lanes, it is not
47539 possible to shuffle bytes in between the lanes. */
47540 for (i = 0; i < nelt; ++i)
47541 if ((d->perm[i] ^ i) & (nelt / 4))
47542 return false;
47545 else
47546 return false;
47549 if (d->testing_p)
47550 return true;
47552 if (vmode == V8SImode)
47553 for (i = 0; i < 8; ++i)
47554 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
47555 else if (vmode == V16SImode)
47556 for (i = 0; i < 16; ++i)
47557 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
47558 else
47560 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
47561 if (!d->one_operand_p)
47562 mask = 2 * nelt - 1;
47563 else if (vmode == V16QImode)
47564 mask = nelt - 1;
47565 else if (vmode == V64QImode)
47566 mask = nelt / 4 - 1;
47567 else
47568 mask = nelt / 2 - 1;
47570 for (i = 0; i < nelt; ++i)
47572 unsigned j, e = d->perm[i] & mask;
47573 for (j = 0; j < eltsz; ++j)
47574 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
47578 vperm = gen_rtx_CONST_VECTOR (vmode,
47579 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
47580 vperm = force_reg (vmode, vperm);
47582 target = d->target;
47583 if (d->vmode != vmode)
47584 target = gen_reg_rtx (vmode);
47585 op0 = gen_lowpart (vmode, d->op0);
47586 if (d->one_operand_p)
47588 if (vmode == V16QImode)
47589 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
47590 else if (vmode == V32QImode)
47591 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
47592 else if (vmode == V64QImode)
47593 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
47594 else if (vmode == V8SFmode)
47595 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
47596 else if (vmode == V8SImode)
47597 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
47598 else if (vmode == V16SFmode)
47599 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
47600 else if (vmode == V16SImode)
47601 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
47602 else
47603 gcc_unreachable ();
47605 else
47607 op1 = gen_lowpart (vmode, d->op1);
47608 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
47610 if (target != d->target)
47611 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47613 return true;
47616 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
47617 in a single instruction. */
47619 static bool
47620 expand_vec_perm_1 (struct expand_vec_perm_d *d)
47622 unsigned i, nelt = d->nelt;
47623 unsigned char perm2[MAX_VECT_LEN];
47625 /* Check plain VEC_SELECT first, because AVX has instructions that could
47626 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
47627 input where SEL+CONCAT may not. */
47628 if (d->one_operand_p)
47630 int mask = nelt - 1;
47631 bool identity_perm = true;
47632 bool broadcast_perm = true;
47634 for (i = 0; i < nelt; i++)
47636 perm2[i] = d->perm[i] & mask;
47637 if (perm2[i] != i)
47638 identity_perm = false;
47639 if (perm2[i])
47640 broadcast_perm = false;
47643 if (identity_perm)
47645 if (!d->testing_p)
47646 emit_move_insn (d->target, d->op0);
47647 return true;
47649 else if (broadcast_perm && TARGET_AVX2)
47651 /* Use vpbroadcast{b,w,d}. */
47652 rtx (*gen) (rtx, rtx) = NULL;
47653 switch (d->vmode)
47655 case V64QImode:
47656 if (TARGET_AVX512BW)
47657 gen = gen_avx512bw_vec_dupv64qi_1;
47658 break;
47659 case V32QImode:
47660 gen = gen_avx2_pbroadcastv32qi_1;
47661 break;
47662 case V32HImode:
47663 if (TARGET_AVX512BW)
47664 gen = gen_avx512bw_vec_dupv32hi_1;
47665 break;
47666 case V16HImode:
47667 gen = gen_avx2_pbroadcastv16hi_1;
47668 break;
47669 case V16SImode:
47670 if (TARGET_AVX512F)
47671 gen = gen_avx512f_vec_dupv16si_1;
47672 break;
47673 case V8SImode:
47674 gen = gen_avx2_pbroadcastv8si_1;
47675 break;
47676 case V16QImode:
47677 gen = gen_avx2_pbroadcastv16qi;
47678 break;
47679 case V8HImode:
47680 gen = gen_avx2_pbroadcastv8hi;
47681 break;
47682 case V16SFmode:
47683 if (TARGET_AVX512F)
47684 gen = gen_avx512f_vec_dupv16sf_1;
47685 break;
47686 case V8SFmode:
47687 gen = gen_avx2_vec_dupv8sf_1;
47688 break;
47689 case V8DFmode:
47690 if (TARGET_AVX512F)
47691 gen = gen_avx512f_vec_dupv8df_1;
47692 break;
47693 case V8DImode:
47694 if (TARGET_AVX512F)
47695 gen = gen_avx512f_vec_dupv8di_1;
47696 break;
47697 /* For other modes prefer other shuffles this function creates. */
47698 default: break;
47700 if (gen != NULL)
47702 if (!d->testing_p)
47703 emit_insn (gen (d->target, d->op0));
47704 return true;
47708 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
47709 return true;
47711 /* There are plenty of patterns in sse.md that are written for
47712 SEL+CONCAT and are not replicated for a single op. Perhaps
47713 that should be changed, to avoid the nastiness here. */
47715 /* Recognize interleave style patterns, which means incrementing
47716 every other permutation operand. */
47717 for (i = 0; i < nelt; i += 2)
47719 perm2[i] = d->perm[i] & mask;
47720 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
47722 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47723 d->testing_p))
47724 return true;
47726 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
47727 if (nelt >= 4)
47729 for (i = 0; i < nelt; i += 4)
47731 perm2[i + 0] = d->perm[i + 0] & mask;
47732 perm2[i + 1] = d->perm[i + 1] & mask;
47733 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
47734 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
47737 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47738 d->testing_p))
47739 return true;
47743 /* Finally, try the fully general two operand permute. */
47744 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
47745 d->testing_p))
47746 return true;
47748 /* Recognize interleave style patterns with reversed operands. */
47749 if (!d->one_operand_p)
47751 for (i = 0; i < nelt; ++i)
47753 unsigned e = d->perm[i];
47754 if (e >= nelt)
47755 e -= nelt;
47756 else
47757 e += nelt;
47758 perm2[i] = e;
47761 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
47762 d->testing_p))
47763 return true;
47766 /* Try the SSE4.1 blend variable merge instructions. */
47767 if (expand_vec_perm_blend (d))
47768 return true;
47770 /* Try one of the AVX vpermil variable permutations. */
47771 if (expand_vec_perm_vpermil (d))
47772 return true;
47774 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
47775 vpshufb, vpermd, vpermps or vpermq variable permutation. */
47776 if (expand_vec_perm_pshufb (d))
47777 return true;
47779 /* Try the AVX2 vpalignr instruction. */
47780 if (expand_vec_perm_palignr (d, true))
47781 return true;
47783 /* Try the AVX512F vpermi2 instructions. */
47784 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
47785 return true;
47787 return false;
47790 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47791 in terms of a pair of pshuflw + pshufhw instructions. */
47793 static bool
47794 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
47796 unsigned char perm2[MAX_VECT_LEN];
47797 unsigned i;
47798 bool ok;
47800 if (d->vmode != V8HImode || !d->one_operand_p)
47801 return false;
47803 /* The two permutations only operate in 64-bit lanes. */
47804 for (i = 0; i < 4; ++i)
47805 if (d->perm[i] >= 4)
47806 return false;
47807 for (i = 4; i < 8; ++i)
47808 if (d->perm[i] < 4)
47809 return false;
47811 if (d->testing_p)
47812 return true;
47814 /* Emit the pshuflw. */
47815 memcpy (perm2, d->perm, 4);
47816 for (i = 4; i < 8; ++i)
47817 perm2[i] = i;
47818 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
47819 gcc_assert (ok);
47821 /* Emit the pshufhw. */
47822 memcpy (perm2 + 4, d->perm + 4, 4);
47823 for (i = 0; i < 4; ++i)
47824 perm2[i] = i;
47825 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
47826 gcc_assert (ok);
47828 return true;
47831 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47832 the permutation using the SSSE3 palignr instruction. This succeeds
47833 when all of the elements in PERM fit within one vector and we merely
47834 need to shift them down so that a single vector permutation has a
47835 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
47836 the vpalignr instruction itself can perform the requested permutation. */
47838 static bool
47839 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
47841 unsigned i, nelt = d->nelt;
47842 unsigned min, max, minswap, maxswap;
47843 bool in_order, ok, swap = false;
47844 rtx shift, target;
47845 struct expand_vec_perm_d dcopy;
47847 /* Even with AVX, palignr only operates on 128-bit vectors,
47848 in AVX2 palignr operates on both 128-bit lanes. */
47849 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
47850 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
47851 return false;
47853 min = 2 * nelt;
47854 max = 0;
47855 minswap = 2 * nelt;
47856 maxswap = 0;
47857 for (i = 0; i < nelt; ++i)
47859 unsigned e = d->perm[i];
47860 unsigned eswap = d->perm[i] ^ nelt;
47861 if (GET_MODE_SIZE (d->vmode) == 32)
47863 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
47864 eswap = e ^ (nelt / 2);
47866 if (e < min)
47867 min = e;
47868 if (e > max)
47869 max = e;
47870 if (eswap < minswap)
47871 minswap = eswap;
47872 if (eswap > maxswap)
47873 maxswap = eswap;
47875 if (min == 0
47876 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
47878 if (d->one_operand_p
47879 || minswap == 0
47880 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
47881 ? nelt / 2 : nelt))
47882 return false;
47883 swap = true;
47884 min = minswap;
47885 max = maxswap;
47888 /* Given that we have SSSE3, we know we'll be able to implement the
47889 single operand permutation after the palignr with pshufb for
47890 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
47891 first. */
47892 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
47893 return true;
47895 dcopy = *d;
47896 if (swap)
47898 dcopy.op0 = d->op1;
47899 dcopy.op1 = d->op0;
47900 for (i = 0; i < nelt; ++i)
47901 dcopy.perm[i] ^= nelt;
47904 in_order = true;
47905 for (i = 0; i < nelt; ++i)
47907 unsigned e = dcopy.perm[i];
47908 if (GET_MODE_SIZE (d->vmode) == 32
47909 && e >= nelt
47910 && (e & (nelt / 2 - 1)) < min)
47911 e = e - min - (nelt / 2);
47912 else
47913 e = e - min;
47914 if (e != i)
47915 in_order = false;
47916 dcopy.perm[i] = e;
47918 dcopy.one_operand_p = true;
47920 if (single_insn_only_p && !in_order)
47921 return false;
47923 /* For AVX2, test whether we can permute the result in one instruction. */
47924 if (d->testing_p)
47926 if (in_order)
47927 return true;
47928 dcopy.op1 = dcopy.op0;
47929 return expand_vec_perm_1 (&dcopy);
47932 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
47933 if (GET_MODE_SIZE (d->vmode) == 16)
47935 target = gen_reg_rtx (TImode);
47936 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
47937 gen_lowpart (TImode, dcopy.op0), shift));
47939 else
47941 target = gen_reg_rtx (V2TImode);
47942 emit_insn (gen_avx2_palignrv2ti (target,
47943 gen_lowpart (V2TImode, dcopy.op1),
47944 gen_lowpart (V2TImode, dcopy.op0),
47945 shift));
47948 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
47950 /* Test for the degenerate case where the alignment by itself
47951 produces the desired permutation. */
47952 if (in_order)
47954 emit_move_insn (d->target, dcopy.op0);
47955 return true;
47958 ok = expand_vec_perm_1 (&dcopy);
47959 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
47961 return ok;
47964 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
47965 the permutation using the SSE4_1 pblendv instruction. Potentially
47966 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
47968 static bool
47969 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
47971 unsigned i, which, nelt = d->nelt;
47972 struct expand_vec_perm_d dcopy, dcopy1;
47973 machine_mode vmode = d->vmode;
47974 bool ok;
47976 /* Use the same checks as in expand_vec_perm_blend. */
47977 if (d->one_operand_p)
47978 return false;
47979 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
47981 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
47983 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
47985 else
47986 return false;
47988 /* Figure out where permutation elements stay not in their
47989 respective lanes. */
47990 for (i = 0, which = 0; i < nelt; ++i)
47992 unsigned e = d->perm[i];
47993 if (e != i)
47994 which |= (e < nelt ? 1 : 2);
47996 /* We can pblend the part where elements stay not in their
47997 respective lanes only when these elements are all in one
47998 half of a permutation.
47999 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
48000 lanes, but both 8 and 9 >= 8
48001 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
48002 respective lanes and 8 >= 8, but 2 not. */
48003 if (which != 1 && which != 2)
48004 return false;
48005 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
48006 return true;
48008 /* First we apply one operand permutation to the part where
48009 elements stay not in their respective lanes. */
48010 dcopy = *d;
48011 if (which == 2)
48012 dcopy.op0 = dcopy.op1 = d->op1;
48013 else
48014 dcopy.op0 = dcopy.op1 = d->op0;
48015 if (!d->testing_p)
48016 dcopy.target = gen_reg_rtx (vmode);
48017 dcopy.one_operand_p = true;
48019 for (i = 0; i < nelt; ++i)
48020 dcopy.perm[i] = d->perm[i] & (nelt - 1);
48022 ok = expand_vec_perm_1 (&dcopy);
48023 if (GET_MODE_SIZE (vmode) != 16 && !ok)
48024 return false;
48025 else
48026 gcc_assert (ok);
48027 if (d->testing_p)
48028 return true;
48030 /* Next we put permuted elements into their positions. */
48031 dcopy1 = *d;
48032 if (which == 2)
48033 dcopy1.op1 = dcopy.target;
48034 else
48035 dcopy1.op0 = dcopy.target;
48037 for (i = 0; i < nelt; ++i)
48038 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
48040 ok = expand_vec_perm_blend (&dcopy1);
48041 gcc_assert (ok);
48043 return true;
48046 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
48048 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48049 a two vector permutation into a single vector permutation by using
48050 an interleave operation to merge the vectors. */
48052 static bool
48053 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
48055 struct expand_vec_perm_d dremap, dfinal;
48056 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
48057 unsigned HOST_WIDE_INT contents;
48058 unsigned char remap[2 * MAX_VECT_LEN];
48059 rtx_insn *seq;
48060 bool ok, same_halves = false;
48062 if (GET_MODE_SIZE (d->vmode) == 16)
48064 if (d->one_operand_p)
48065 return false;
48067 else if (GET_MODE_SIZE (d->vmode) == 32)
48069 if (!TARGET_AVX)
48070 return false;
48071 /* For 32-byte modes allow even d->one_operand_p.
48072 The lack of cross-lane shuffling in some instructions
48073 might prevent a single insn shuffle. */
48074 dfinal = *d;
48075 dfinal.testing_p = true;
48076 /* If expand_vec_perm_interleave3 can expand this into
48077 a 3 insn sequence, give up and let it be expanded as
48078 3 insn sequence. While that is one insn longer,
48079 it doesn't need a memory operand and in the common
48080 case that both interleave low and high permutations
48081 with the same operands are adjacent needs 4 insns
48082 for both after CSE. */
48083 if (expand_vec_perm_interleave3 (&dfinal))
48084 return false;
48086 else
48087 return false;
48089 /* Examine from whence the elements come. */
48090 contents = 0;
48091 for (i = 0; i < nelt; ++i)
48092 contents |= HOST_WIDE_INT_1U << d->perm[i];
48094 memset (remap, 0xff, sizeof (remap));
48095 dremap = *d;
48097 if (GET_MODE_SIZE (d->vmode) == 16)
48099 unsigned HOST_WIDE_INT h1, h2, h3, h4;
48101 /* Split the two input vectors into 4 halves. */
48102 h1 = (HOST_WIDE_INT_1U << nelt2) - 1;
48103 h2 = h1 << nelt2;
48104 h3 = h2 << nelt2;
48105 h4 = h3 << nelt2;
48107 /* If the elements from the low halves use interleave low, and similarly
48108 for interleave high. If the elements are from mis-matched halves, we
48109 can use shufps for V4SF/V4SI or do a DImode shuffle. */
48110 if ((contents & (h1 | h3)) == contents)
48112 /* punpckl* */
48113 for (i = 0; i < nelt2; ++i)
48115 remap[i] = i * 2;
48116 remap[i + nelt] = i * 2 + 1;
48117 dremap.perm[i * 2] = i;
48118 dremap.perm[i * 2 + 1] = i + nelt;
48120 if (!TARGET_SSE2 && d->vmode == V4SImode)
48121 dremap.vmode = V4SFmode;
48123 else if ((contents & (h2 | h4)) == contents)
48125 /* punpckh* */
48126 for (i = 0; i < nelt2; ++i)
48128 remap[i + nelt2] = i * 2;
48129 remap[i + nelt + nelt2] = i * 2 + 1;
48130 dremap.perm[i * 2] = i + nelt2;
48131 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
48133 if (!TARGET_SSE2 && d->vmode == V4SImode)
48134 dremap.vmode = V4SFmode;
48136 else if ((contents & (h1 | h4)) == contents)
48138 /* shufps */
48139 for (i = 0; i < nelt2; ++i)
48141 remap[i] = i;
48142 remap[i + nelt + nelt2] = i + nelt2;
48143 dremap.perm[i] = i;
48144 dremap.perm[i + nelt2] = i + nelt + nelt2;
48146 if (nelt != 4)
48148 /* shufpd */
48149 dremap.vmode = V2DImode;
48150 dremap.nelt = 2;
48151 dremap.perm[0] = 0;
48152 dremap.perm[1] = 3;
48155 else if ((contents & (h2 | h3)) == contents)
48157 /* shufps */
48158 for (i = 0; i < nelt2; ++i)
48160 remap[i + nelt2] = i;
48161 remap[i + nelt] = i + nelt2;
48162 dremap.perm[i] = i + nelt2;
48163 dremap.perm[i + nelt2] = i + nelt;
48165 if (nelt != 4)
48167 /* shufpd */
48168 dremap.vmode = V2DImode;
48169 dremap.nelt = 2;
48170 dremap.perm[0] = 1;
48171 dremap.perm[1] = 2;
48174 else
48175 return false;
48177 else
48179 unsigned int nelt4 = nelt / 4, nzcnt = 0;
48180 unsigned HOST_WIDE_INT q[8];
48181 unsigned int nonzero_halves[4];
48183 /* Split the two input vectors into 8 quarters. */
48184 q[0] = (HOST_WIDE_INT_1U << nelt4) - 1;
48185 for (i = 1; i < 8; ++i)
48186 q[i] = q[0] << (nelt4 * i);
48187 for (i = 0; i < 4; ++i)
48188 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
48190 nonzero_halves[nzcnt] = i;
48191 ++nzcnt;
48194 if (nzcnt == 1)
48196 gcc_assert (d->one_operand_p);
48197 nonzero_halves[1] = nonzero_halves[0];
48198 same_halves = true;
48200 else if (d->one_operand_p)
48202 gcc_assert (nonzero_halves[0] == 0);
48203 gcc_assert (nonzero_halves[1] == 1);
48206 if (nzcnt <= 2)
48208 if (d->perm[0] / nelt2 == nonzero_halves[1])
48210 /* Attempt to increase the likelihood that dfinal
48211 shuffle will be intra-lane. */
48212 std::swap (nonzero_halves[0], nonzero_halves[1]);
48215 /* vperm2f128 or vperm2i128. */
48216 for (i = 0; i < nelt2; ++i)
48218 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
48219 remap[i + nonzero_halves[0] * nelt2] = i;
48220 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
48221 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
48224 if (d->vmode != V8SFmode
48225 && d->vmode != V4DFmode
48226 && d->vmode != V8SImode)
48228 dremap.vmode = V8SImode;
48229 dremap.nelt = 8;
48230 for (i = 0; i < 4; ++i)
48232 dremap.perm[i] = i + nonzero_halves[0] * 4;
48233 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
48237 else if (d->one_operand_p)
48238 return false;
48239 else if (TARGET_AVX2
48240 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
48242 /* vpunpckl* */
48243 for (i = 0; i < nelt4; ++i)
48245 remap[i] = i * 2;
48246 remap[i + nelt] = i * 2 + 1;
48247 remap[i + nelt2] = i * 2 + nelt2;
48248 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
48249 dremap.perm[i * 2] = i;
48250 dremap.perm[i * 2 + 1] = i + nelt;
48251 dremap.perm[i * 2 + nelt2] = i + nelt2;
48252 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
48255 else if (TARGET_AVX2
48256 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
48258 /* vpunpckh* */
48259 for (i = 0; i < nelt4; ++i)
48261 remap[i + nelt4] = i * 2;
48262 remap[i + nelt + nelt4] = i * 2 + 1;
48263 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
48264 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
48265 dremap.perm[i * 2] = i + nelt4;
48266 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
48267 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
48268 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
48271 else
48272 return false;
48275 /* Use the remapping array set up above to move the elements from their
48276 swizzled locations into their final destinations. */
48277 dfinal = *d;
48278 for (i = 0; i < nelt; ++i)
48280 unsigned e = remap[d->perm[i]];
48281 gcc_assert (e < nelt);
48282 /* If same_halves is true, both halves of the remapped vector are the
48283 same. Avoid cross-lane accesses if possible. */
48284 if (same_halves && i >= nelt2)
48286 gcc_assert (e < nelt2);
48287 dfinal.perm[i] = e + nelt2;
48289 else
48290 dfinal.perm[i] = e;
48292 if (!d->testing_p)
48294 dremap.target = gen_reg_rtx (dremap.vmode);
48295 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
48297 dfinal.op1 = dfinal.op0;
48298 dfinal.one_operand_p = true;
48300 /* Test if the final remap can be done with a single insn. For V4SFmode or
48301 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
48302 start_sequence ();
48303 ok = expand_vec_perm_1 (&dfinal);
48304 seq = get_insns ();
48305 end_sequence ();
48307 if (!ok)
48308 return false;
48310 if (d->testing_p)
48311 return true;
48313 if (dremap.vmode != dfinal.vmode)
48315 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
48316 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
48319 ok = expand_vec_perm_1 (&dremap);
48320 gcc_assert (ok);
48322 emit_insn (seq);
48323 return true;
48326 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48327 a single vector cross-lane permutation into vpermq followed
48328 by any of the single insn permutations. */
48330 static bool
48331 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
48333 struct expand_vec_perm_d dremap, dfinal;
48334 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
48335 unsigned contents[2];
48336 bool ok;
48338 if (!(TARGET_AVX2
48339 && (d->vmode == V32QImode || d->vmode == V16HImode)
48340 && d->one_operand_p))
48341 return false;
48343 contents[0] = 0;
48344 contents[1] = 0;
48345 for (i = 0; i < nelt2; ++i)
48347 contents[0] |= 1u << (d->perm[i] / nelt4);
48348 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
48351 for (i = 0; i < 2; ++i)
48353 unsigned int cnt = 0;
48354 for (j = 0; j < 4; ++j)
48355 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
48356 return false;
48359 if (d->testing_p)
48360 return true;
48362 dremap = *d;
48363 dremap.vmode = V4DImode;
48364 dremap.nelt = 4;
48365 dremap.target = gen_reg_rtx (V4DImode);
48366 dremap.op0 = gen_lowpart (V4DImode, d->op0);
48367 dremap.op1 = dremap.op0;
48368 dremap.one_operand_p = true;
48369 for (i = 0; i < 2; ++i)
48371 unsigned int cnt = 0;
48372 for (j = 0; j < 4; ++j)
48373 if ((contents[i] & (1u << j)) != 0)
48374 dremap.perm[2 * i + cnt++] = j;
48375 for (; cnt < 2; ++cnt)
48376 dremap.perm[2 * i + cnt] = 0;
48379 dfinal = *d;
48380 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
48381 dfinal.op1 = dfinal.op0;
48382 dfinal.one_operand_p = true;
48383 for (i = 0, j = 0; i < nelt; ++i)
48385 if (i == nelt2)
48386 j = 2;
48387 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
48388 if ((d->perm[i] / nelt4) == dremap.perm[j])
48390 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
48391 dfinal.perm[i] |= nelt4;
48392 else
48393 gcc_unreachable ();
48396 ok = expand_vec_perm_1 (&dremap);
48397 gcc_assert (ok);
48399 ok = expand_vec_perm_1 (&dfinal);
48400 gcc_assert (ok);
48402 return true;
48405 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
48406 a vector permutation using two instructions, vperm2f128 resp.
48407 vperm2i128 followed by any single in-lane permutation. */
48409 static bool
48410 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
48412 struct expand_vec_perm_d dfirst, dsecond;
48413 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
48414 bool ok;
48416 if (!TARGET_AVX
48417 || GET_MODE_SIZE (d->vmode) != 32
48418 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
48419 return false;
48421 dsecond = *d;
48422 dsecond.one_operand_p = false;
48423 dsecond.testing_p = true;
48425 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
48426 immediate. For perm < 16 the second permutation uses
48427 d->op0 as first operand, for perm >= 16 it uses d->op1
48428 as first operand. The second operand is the result of
48429 vperm2[fi]128. */
48430 for (perm = 0; perm < 32; perm++)
48432 /* Ignore permutations which do not move anything cross-lane. */
48433 if (perm < 16)
48435 /* The second shuffle for e.g. V4DFmode has
48436 0123 and ABCD operands.
48437 Ignore AB23, as 23 is already in the second lane
48438 of the first operand. */
48439 if ((perm & 0xc) == (1 << 2)) continue;
48440 /* And 01CD, as 01 is in the first lane of the first
48441 operand. */
48442 if ((perm & 3) == 0) continue;
48443 /* And 4567, as then the vperm2[fi]128 doesn't change
48444 anything on the original 4567 second operand. */
48445 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
48447 else
48449 /* The second shuffle for e.g. V4DFmode has
48450 4567 and ABCD operands.
48451 Ignore AB67, as 67 is already in the second lane
48452 of the first operand. */
48453 if ((perm & 0xc) == (3 << 2)) continue;
48454 /* And 45CD, as 45 is in the first lane of the first
48455 operand. */
48456 if ((perm & 3) == 2) continue;
48457 /* And 0123, as then the vperm2[fi]128 doesn't change
48458 anything on the original 0123 first operand. */
48459 if ((perm & 0xf) == (1 << 2)) continue;
48462 for (i = 0; i < nelt; i++)
48464 j = d->perm[i] / nelt2;
48465 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
48466 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
48467 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
48468 dsecond.perm[i] = d->perm[i] & (nelt - 1);
48469 else
48470 break;
48473 if (i == nelt)
48475 start_sequence ();
48476 ok = expand_vec_perm_1 (&dsecond);
48477 end_sequence ();
48479 else
48480 ok = false;
48482 if (ok)
48484 if (d->testing_p)
48485 return true;
48487 /* Found a usable second shuffle. dfirst will be
48488 vperm2f128 on d->op0 and d->op1. */
48489 dsecond.testing_p = false;
48490 dfirst = *d;
48491 dfirst.target = gen_reg_rtx (d->vmode);
48492 for (i = 0; i < nelt; i++)
48493 dfirst.perm[i] = (i & (nelt2 - 1))
48494 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
48496 canonicalize_perm (&dfirst);
48497 ok = expand_vec_perm_1 (&dfirst);
48498 gcc_assert (ok);
48500 /* And dsecond is some single insn shuffle, taking
48501 d->op0 and result of vperm2f128 (if perm < 16) or
48502 d->op1 and result of vperm2f128 (otherwise). */
48503 if (perm >= 16)
48504 dsecond.op0 = dsecond.op1;
48505 dsecond.op1 = dfirst.target;
48507 ok = expand_vec_perm_1 (&dsecond);
48508 gcc_assert (ok);
48510 return true;
48513 /* For one operand, the only useful vperm2f128 permutation is 0x01
48514 aka lanes swap. */
48515 if (d->one_operand_p)
48516 return false;
48519 return false;
48522 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48523 a two vector permutation using 2 intra-lane interleave insns
48524 and cross-lane shuffle for 32-byte vectors. */
48526 static bool
48527 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
48529 unsigned i, nelt;
48530 rtx (*gen) (rtx, rtx, rtx);
48532 if (d->one_operand_p)
48533 return false;
48534 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
48536 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
48538 else
48539 return false;
48541 nelt = d->nelt;
48542 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
48543 return false;
48544 for (i = 0; i < nelt; i += 2)
48545 if (d->perm[i] != d->perm[0] + i / 2
48546 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
48547 return false;
48549 if (d->testing_p)
48550 return true;
48552 switch (d->vmode)
48554 case V32QImode:
48555 if (d->perm[0])
48556 gen = gen_vec_interleave_highv32qi;
48557 else
48558 gen = gen_vec_interleave_lowv32qi;
48559 break;
48560 case V16HImode:
48561 if (d->perm[0])
48562 gen = gen_vec_interleave_highv16hi;
48563 else
48564 gen = gen_vec_interleave_lowv16hi;
48565 break;
48566 case V8SImode:
48567 if (d->perm[0])
48568 gen = gen_vec_interleave_highv8si;
48569 else
48570 gen = gen_vec_interleave_lowv8si;
48571 break;
48572 case V4DImode:
48573 if (d->perm[0])
48574 gen = gen_vec_interleave_highv4di;
48575 else
48576 gen = gen_vec_interleave_lowv4di;
48577 break;
48578 case V8SFmode:
48579 if (d->perm[0])
48580 gen = gen_vec_interleave_highv8sf;
48581 else
48582 gen = gen_vec_interleave_lowv8sf;
48583 break;
48584 case V4DFmode:
48585 if (d->perm[0])
48586 gen = gen_vec_interleave_highv4df;
48587 else
48588 gen = gen_vec_interleave_lowv4df;
48589 break;
48590 default:
48591 gcc_unreachable ();
48594 emit_insn (gen (d->target, d->op0, d->op1));
48595 return true;
48598 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
48599 a single vector permutation using a single intra-lane vector
48600 permutation, vperm2f128 swapping the lanes and vblend* insn blending
48601 the non-swapped and swapped vectors together. */
48603 static bool
48604 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
48606 struct expand_vec_perm_d dfirst, dsecond;
48607 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
48608 rtx_insn *seq;
48609 bool ok;
48610 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
48612 if (!TARGET_AVX
48613 || TARGET_AVX2
48614 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
48615 || !d->one_operand_p)
48616 return false;
48618 dfirst = *d;
48619 for (i = 0; i < nelt; i++)
48620 dfirst.perm[i] = 0xff;
48621 for (i = 0, msk = 0; i < nelt; i++)
48623 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
48624 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
48625 return false;
48626 dfirst.perm[j] = d->perm[i];
48627 if (j != i)
48628 msk |= (1 << i);
48630 for (i = 0; i < nelt; i++)
48631 if (dfirst.perm[i] == 0xff)
48632 dfirst.perm[i] = i;
48634 if (!d->testing_p)
48635 dfirst.target = gen_reg_rtx (dfirst.vmode);
48637 start_sequence ();
48638 ok = expand_vec_perm_1 (&dfirst);
48639 seq = get_insns ();
48640 end_sequence ();
48642 if (!ok)
48643 return false;
48645 if (d->testing_p)
48646 return true;
48648 emit_insn (seq);
48650 dsecond = *d;
48651 dsecond.op0 = dfirst.target;
48652 dsecond.op1 = dfirst.target;
48653 dsecond.one_operand_p = true;
48654 dsecond.target = gen_reg_rtx (dsecond.vmode);
48655 for (i = 0; i < nelt; i++)
48656 dsecond.perm[i] = i ^ nelt2;
48658 ok = expand_vec_perm_1 (&dsecond);
48659 gcc_assert (ok);
48661 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
48662 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
48663 return true;
48666 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
48667 permutation using two vperm2f128, followed by a vshufpd insn blending
48668 the two vectors together. */
48670 static bool
48671 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
48673 struct expand_vec_perm_d dfirst, dsecond, dthird;
48674 bool ok;
48676 if (!TARGET_AVX || (d->vmode != V4DFmode))
48677 return false;
48679 if (d->testing_p)
48680 return true;
48682 dfirst = *d;
48683 dsecond = *d;
48684 dthird = *d;
48686 dfirst.perm[0] = (d->perm[0] & ~1);
48687 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
48688 dfirst.perm[2] = (d->perm[2] & ~1);
48689 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
48690 dsecond.perm[0] = (d->perm[1] & ~1);
48691 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
48692 dsecond.perm[2] = (d->perm[3] & ~1);
48693 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
48694 dthird.perm[0] = (d->perm[0] % 2);
48695 dthird.perm[1] = (d->perm[1] % 2) + 4;
48696 dthird.perm[2] = (d->perm[2] % 2) + 2;
48697 dthird.perm[3] = (d->perm[3] % 2) + 6;
48699 dfirst.target = gen_reg_rtx (dfirst.vmode);
48700 dsecond.target = gen_reg_rtx (dsecond.vmode);
48701 dthird.op0 = dfirst.target;
48702 dthird.op1 = dsecond.target;
48703 dthird.one_operand_p = false;
48705 canonicalize_perm (&dfirst);
48706 canonicalize_perm (&dsecond);
48708 ok = expand_vec_perm_1 (&dfirst)
48709 && expand_vec_perm_1 (&dsecond)
48710 && expand_vec_perm_1 (&dthird);
48712 gcc_assert (ok);
48714 return true;
48717 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
48718 permutation with two pshufb insns and an ior. We should have already
48719 failed all two instruction sequences. */
48721 static bool
48722 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
48724 rtx rperm[2][16], vperm, l, h, op, m128;
48725 unsigned int i, nelt, eltsz;
48727 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
48728 return false;
48729 gcc_assert (!d->one_operand_p);
48731 if (d->testing_p)
48732 return true;
48734 nelt = d->nelt;
48735 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48737 /* Generate two permutation masks. If the required element is within
48738 the given vector it is shuffled into the proper lane. If the required
48739 element is in the other vector, force a zero into the lane by setting
48740 bit 7 in the permutation mask. */
48741 m128 = GEN_INT (-128);
48742 for (i = 0; i < nelt; ++i)
48744 unsigned j, e = d->perm[i];
48745 unsigned which = (e >= nelt);
48746 if (e >= nelt)
48747 e -= nelt;
48749 for (j = 0; j < eltsz; ++j)
48751 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
48752 rperm[1-which][i*eltsz + j] = m128;
48756 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
48757 vperm = force_reg (V16QImode, vperm);
48759 l = gen_reg_rtx (V16QImode);
48760 op = gen_lowpart (V16QImode, d->op0);
48761 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
48763 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
48764 vperm = force_reg (V16QImode, vperm);
48766 h = gen_reg_rtx (V16QImode);
48767 op = gen_lowpart (V16QImode, d->op1);
48768 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
48770 op = d->target;
48771 if (d->vmode != V16QImode)
48772 op = gen_reg_rtx (V16QImode);
48773 emit_insn (gen_iorv16qi3 (op, l, h));
48774 if (op != d->target)
48775 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48777 return true;
48780 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
48781 with two vpshufb insns, vpermq and vpor. We should have already failed
48782 all two or three instruction sequences. */
48784 static bool
48785 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
48787 rtx rperm[2][32], vperm, l, h, hp, op, m128;
48788 unsigned int i, nelt, eltsz;
48790 if (!TARGET_AVX2
48791 || !d->one_operand_p
48792 || (d->vmode != V32QImode && d->vmode != V16HImode))
48793 return false;
48795 if (d->testing_p)
48796 return true;
48798 nelt = d->nelt;
48799 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48801 /* Generate two permutation masks. If the required element is within
48802 the same lane, it is shuffled in. If the required element from the
48803 other lane, force a zero by setting bit 7 in the permutation mask.
48804 In the other mask the mask has non-negative elements if element
48805 is requested from the other lane, but also moved to the other lane,
48806 so that the result of vpshufb can have the two V2TImode halves
48807 swapped. */
48808 m128 = GEN_INT (-128);
48809 for (i = 0; i < nelt; ++i)
48811 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48812 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48814 for (j = 0; j < eltsz; ++j)
48816 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
48817 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
48821 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48822 vperm = force_reg (V32QImode, vperm);
48824 h = gen_reg_rtx (V32QImode);
48825 op = gen_lowpart (V32QImode, d->op0);
48826 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48828 /* Swap the 128-byte lanes of h into hp. */
48829 hp = gen_reg_rtx (V4DImode);
48830 op = gen_lowpart (V4DImode, h);
48831 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
48832 const1_rtx));
48834 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48835 vperm = force_reg (V32QImode, vperm);
48837 l = gen_reg_rtx (V32QImode);
48838 op = gen_lowpart (V32QImode, d->op0);
48839 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48841 op = d->target;
48842 if (d->vmode != V32QImode)
48843 op = gen_reg_rtx (V32QImode);
48844 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
48845 if (op != d->target)
48846 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48848 return true;
48851 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48852 and extract-odd permutations of two V32QImode and V16QImode operand
48853 with two vpshufb insns, vpor and vpermq. We should have already
48854 failed all two or three instruction sequences. */
48856 static bool
48857 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
48859 rtx rperm[2][32], vperm, l, h, ior, op, m128;
48860 unsigned int i, nelt, eltsz;
48862 if (!TARGET_AVX2
48863 || d->one_operand_p
48864 || (d->vmode != V32QImode && d->vmode != V16HImode))
48865 return false;
48867 for (i = 0; i < d->nelt; ++i)
48868 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
48869 return false;
48871 if (d->testing_p)
48872 return true;
48874 nelt = d->nelt;
48875 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48877 /* Generate two permutation masks. In the first permutation mask
48878 the first quarter will contain indexes for the first half
48879 of the op0, the second quarter will contain bit 7 set, third quarter
48880 will contain indexes for the second half of the op0 and the
48881 last quarter bit 7 set. In the second permutation mask
48882 the first quarter will contain bit 7 set, the second quarter
48883 indexes for the first half of the op1, the third quarter bit 7 set
48884 and last quarter indexes for the second half of the op1.
48885 I.e. the first mask e.g. for V32QImode extract even will be:
48886 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
48887 (all values masked with 0xf except for -128) and second mask
48888 for extract even will be
48889 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
48890 m128 = GEN_INT (-128);
48891 for (i = 0; i < nelt; ++i)
48893 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48894 unsigned which = d->perm[i] >= nelt;
48895 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
48897 for (j = 0; j < eltsz; ++j)
48899 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
48900 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
48904 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48905 vperm = force_reg (V32QImode, vperm);
48907 l = gen_reg_rtx (V32QImode);
48908 op = gen_lowpart (V32QImode, d->op0);
48909 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48911 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48912 vperm = force_reg (V32QImode, vperm);
48914 h = gen_reg_rtx (V32QImode);
48915 op = gen_lowpart (V32QImode, d->op1);
48916 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48918 ior = gen_reg_rtx (V32QImode);
48919 emit_insn (gen_iorv32qi3 (ior, l, h));
48921 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
48922 op = gen_reg_rtx (V4DImode);
48923 ior = gen_lowpart (V4DImode, ior);
48924 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
48925 const1_rtx, GEN_INT (3)));
48926 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48928 return true;
48931 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48932 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
48933 with two "and" and "pack" or two "shift" and "pack" insns. We should
48934 have already failed all two instruction sequences. */
48936 static bool
48937 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
48939 rtx op, dop0, dop1, t, rperm[16];
48940 unsigned i, odd, c, s, nelt = d->nelt;
48941 bool end_perm = false;
48942 machine_mode half_mode;
48943 rtx (*gen_and) (rtx, rtx, rtx);
48944 rtx (*gen_pack) (rtx, rtx, rtx);
48945 rtx (*gen_shift) (rtx, rtx, rtx);
48947 if (d->one_operand_p)
48948 return false;
48950 switch (d->vmode)
48952 case V8HImode:
48953 /* Required for "pack". */
48954 if (!TARGET_SSE4_1)
48955 return false;
48956 c = 0xffff;
48957 s = 16;
48958 half_mode = V4SImode;
48959 gen_and = gen_andv4si3;
48960 gen_pack = gen_sse4_1_packusdw;
48961 gen_shift = gen_lshrv4si3;
48962 break;
48963 case V16QImode:
48964 /* No check as all instructions are SSE2. */
48965 c = 0xff;
48966 s = 8;
48967 half_mode = V8HImode;
48968 gen_and = gen_andv8hi3;
48969 gen_pack = gen_sse2_packuswb;
48970 gen_shift = gen_lshrv8hi3;
48971 break;
48972 case V16HImode:
48973 if (!TARGET_AVX2)
48974 return false;
48975 c = 0xffff;
48976 s = 16;
48977 half_mode = V8SImode;
48978 gen_and = gen_andv8si3;
48979 gen_pack = gen_avx2_packusdw;
48980 gen_shift = gen_lshrv8si3;
48981 end_perm = true;
48982 break;
48983 case V32QImode:
48984 if (!TARGET_AVX2)
48985 return false;
48986 c = 0xff;
48987 s = 8;
48988 half_mode = V16HImode;
48989 gen_and = gen_andv16hi3;
48990 gen_pack = gen_avx2_packuswb;
48991 gen_shift = gen_lshrv16hi3;
48992 end_perm = true;
48993 break;
48994 default:
48995 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
48996 general shuffles. */
48997 return false;
49000 /* Check that permutation is even or odd. */
49001 odd = d->perm[0];
49002 if (odd > 1)
49003 return false;
49005 for (i = 1; i < nelt; ++i)
49006 if (d->perm[i] != 2 * i + odd)
49007 return false;
49009 if (d->testing_p)
49010 return true;
49012 dop0 = gen_reg_rtx (half_mode);
49013 dop1 = gen_reg_rtx (half_mode);
49014 if (odd == 0)
49016 for (i = 0; i < nelt / 2; i++)
49017 rperm[i] = GEN_INT (c);
49018 t = gen_rtx_CONST_VECTOR (half_mode, gen_rtvec_v (nelt / 2, rperm));
49019 t = force_reg (half_mode, t);
49020 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
49021 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
49023 else
49025 emit_insn (gen_shift (dop0,
49026 gen_lowpart (half_mode, d->op0),
49027 GEN_INT (s)));
49028 emit_insn (gen_shift (dop1,
49029 gen_lowpart (half_mode, d->op1),
49030 GEN_INT (s)));
49032 /* In AVX2 for 256 bit case we need to permute pack result. */
49033 if (TARGET_AVX2 && end_perm)
49035 op = gen_reg_rtx (d->vmode);
49036 t = gen_reg_rtx (V4DImode);
49037 emit_insn (gen_pack (op, dop0, dop1));
49038 emit_insn (gen_avx2_permv4di_1 (t,
49039 gen_lowpart (V4DImode, op),
49040 const0_rtx,
49041 const2_rtx,
49042 const1_rtx,
49043 GEN_INT (3)));
49044 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
49046 else
49047 emit_insn (gen_pack (d->target, dop0, dop1));
49049 return true;
49052 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
49053 and extract-odd permutations. */
49055 static bool
49056 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
49058 rtx t1, t2, t3, t4, t5;
49060 switch (d->vmode)
49062 case V4DFmode:
49063 if (d->testing_p)
49064 break;
49065 t1 = gen_reg_rtx (V4DFmode);
49066 t2 = gen_reg_rtx (V4DFmode);
49068 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
49069 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
49070 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
49072 /* Now an unpck[lh]pd will produce the result required. */
49073 if (odd)
49074 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
49075 else
49076 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
49077 emit_insn (t3);
49078 break;
49080 case V8SFmode:
49082 int mask = odd ? 0xdd : 0x88;
49084 if (d->testing_p)
49085 break;
49086 t1 = gen_reg_rtx (V8SFmode);
49087 t2 = gen_reg_rtx (V8SFmode);
49088 t3 = gen_reg_rtx (V8SFmode);
49090 /* Shuffle within the 128-bit lanes to produce:
49091 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
49092 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
49093 GEN_INT (mask)));
49095 /* Shuffle the lanes around to produce:
49096 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
49097 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
49098 GEN_INT (0x3)));
49100 /* Shuffle within the 128-bit lanes to produce:
49101 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
49102 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
49104 /* Shuffle within the 128-bit lanes to produce:
49105 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
49106 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
49108 /* Shuffle the lanes around to produce:
49109 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
49110 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
49111 GEN_INT (0x20)));
49113 break;
49115 case V2DFmode:
49116 case V4SFmode:
49117 case V2DImode:
49118 case V4SImode:
49119 /* These are always directly implementable by expand_vec_perm_1. */
49120 gcc_unreachable ();
49122 case V8HImode:
49123 if (TARGET_SSE4_1)
49124 return expand_vec_perm_even_odd_pack (d);
49125 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
49126 return expand_vec_perm_pshufb2 (d);
49127 else
49129 if (d->testing_p)
49130 break;
49131 /* We need 2*log2(N)-1 operations to achieve odd/even
49132 with interleave. */
49133 t1 = gen_reg_rtx (V8HImode);
49134 t2 = gen_reg_rtx (V8HImode);
49135 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
49136 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
49137 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
49138 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
49139 if (odd)
49140 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
49141 else
49142 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
49143 emit_insn (t3);
49145 break;
49147 case V16QImode:
49148 return expand_vec_perm_even_odd_pack (d);
49150 case V16HImode:
49151 case V32QImode:
49152 return expand_vec_perm_even_odd_pack (d);
49154 case V4DImode:
49155 if (!TARGET_AVX2)
49157 struct expand_vec_perm_d d_copy = *d;
49158 d_copy.vmode = V4DFmode;
49159 if (d->testing_p)
49160 d_copy.target = gen_lowpart (V4DFmode, d->target);
49161 else
49162 d_copy.target = gen_reg_rtx (V4DFmode);
49163 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
49164 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
49165 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
49167 if (!d->testing_p)
49168 emit_move_insn (d->target,
49169 gen_lowpart (V4DImode, d_copy.target));
49170 return true;
49172 return false;
49175 if (d->testing_p)
49176 break;
49178 t1 = gen_reg_rtx (V4DImode);
49179 t2 = gen_reg_rtx (V4DImode);
49181 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
49182 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
49183 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
49185 /* Now an vpunpck[lh]qdq will produce the result required. */
49186 if (odd)
49187 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
49188 else
49189 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
49190 emit_insn (t3);
49191 break;
49193 case V8SImode:
49194 if (!TARGET_AVX2)
49196 struct expand_vec_perm_d d_copy = *d;
49197 d_copy.vmode = V8SFmode;
49198 if (d->testing_p)
49199 d_copy.target = gen_lowpart (V8SFmode, d->target);
49200 else
49201 d_copy.target = gen_reg_rtx (V8SFmode);
49202 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
49203 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
49204 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
49206 if (!d->testing_p)
49207 emit_move_insn (d->target,
49208 gen_lowpart (V8SImode, d_copy.target));
49209 return true;
49211 return false;
49214 if (d->testing_p)
49215 break;
49217 t1 = gen_reg_rtx (V8SImode);
49218 t2 = gen_reg_rtx (V8SImode);
49219 t3 = gen_reg_rtx (V4DImode);
49220 t4 = gen_reg_rtx (V4DImode);
49221 t5 = gen_reg_rtx (V4DImode);
49223 /* Shuffle the lanes around into
49224 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
49225 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
49226 gen_lowpart (V4DImode, d->op1),
49227 GEN_INT (0x20)));
49228 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
49229 gen_lowpart (V4DImode, d->op1),
49230 GEN_INT (0x31)));
49232 /* Swap the 2nd and 3rd position in each lane into
49233 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
49234 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
49235 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
49236 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
49237 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
49239 /* Now an vpunpck[lh]qdq will produce
49240 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
49241 if (odd)
49242 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
49243 gen_lowpart (V4DImode, t2));
49244 else
49245 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
49246 gen_lowpart (V4DImode, t2));
49247 emit_insn (t3);
49248 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
49249 break;
49251 default:
49252 gcc_unreachable ();
49255 return true;
49258 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
49259 extract-even and extract-odd permutations. */
49261 static bool
49262 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
49264 unsigned i, odd, nelt = d->nelt;
49266 odd = d->perm[0];
49267 if (odd != 0 && odd != 1)
49268 return false;
49270 for (i = 1; i < nelt; ++i)
49271 if (d->perm[i] != 2 * i + odd)
49272 return false;
49274 return expand_vec_perm_even_odd_1 (d, odd);
49277 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
49278 permutations. We assume that expand_vec_perm_1 has already failed. */
49280 static bool
49281 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
49283 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
49284 machine_mode vmode = d->vmode;
49285 unsigned char perm2[4];
49286 rtx op0 = d->op0, dest;
49287 bool ok;
49289 switch (vmode)
49291 case V4DFmode:
49292 case V8SFmode:
49293 /* These are special-cased in sse.md so that we can optionally
49294 use the vbroadcast instruction. They expand to two insns
49295 if the input happens to be in a register. */
49296 gcc_unreachable ();
49298 case V2DFmode:
49299 case V2DImode:
49300 case V4SFmode:
49301 case V4SImode:
49302 /* These are always implementable using standard shuffle patterns. */
49303 gcc_unreachable ();
49305 case V8HImode:
49306 case V16QImode:
49307 /* These can be implemented via interleave. We save one insn by
49308 stopping once we have promoted to V4SImode and then use pshufd. */
49309 if (d->testing_p)
49310 return true;
49313 rtx dest;
49314 rtx (*gen) (rtx, rtx, rtx)
49315 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
49316 : gen_vec_interleave_lowv8hi;
49318 if (elt >= nelt2)
49320 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
49321 : gen_vec_interleave_highv8hi;
49322 elt -= nelt2;
49324 nelt2 /= 2;
49326 dest = gen_reg_rtx (vmode);
49327 emit_insn (gen (dest, op0, op0));
49328 vmode = get_mode_wider_vector (vmode);
49329 op0 = gen_lowpart (vmode, dest);
49331 while (vmode != V4SImode);
49333 memset (perm2, elt, 4);
49334 dest = gen_reg_rtx (V4SImode);
49335 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
49336 gcc_assert (ok);
49337 if (!d->testing_p)
49338 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
49339 return true;
49341 case V64QImode:
49342 case V32QImode:
49343 case V16HImode:
49344 case V8SImode:
49345 case V4DImode:
49346 /* For AVX2 broadcasts of the first element vpbroadcast* or
49347 vpermq should be used by expand_vec_perm_1. */
49348 gcc_assert (!TARGET_AVX2 || d->perm[0]);
49349 return false;
49351 default:
49352 gcc_unreachable ();
49356 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
49357 broadcast permutations. */
49359 static bool
49360 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
49362 unsigned i, elt, nelt = d->nelt;
49364 if (!d->one_operand_p)
49365 return false;
49367 elt = d->perm[0];
49368 for (i = 1; i < nelt; ++i)
49369 if (d->perm[i] != elt)
49370 return false;
49372 return expand_vec_perm_broadcast_1 (d);
49375 /* Implement arbitrary permutations of two V64QImode operands
49376 will 2 vpermi2w, 2 vpshufb and one vpor instruction. */
49377 static bool
49378 expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d)
49380 if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
49381 return false;
49383 if (d->testing_p)
49384 return true;
49386 struct expand_vec_perm_d ds[2];
49387 rtx rperm[128], vperm, target0, target1;
49388 unsigned int i, nelt;
49389 machine_mode vmode;
49391 nelt = d->nelt;
49392 vmode = V64QImode;
49394 for (i = 0; i < 2; i++)
49396 ds[i] = *d;
49397 ds[i].vmode = V32HImode;
49398 ds[i].nelt = 32;
49399 ds[i].target = gen_reg_rtx (V32HImode);
49400 ds[i].op0 = gen_lowpart (V32HImode, d->op0);
49401 ds[i].op1 = gen_lowpart (V32HImode, d->op1);
49404 /* Prepare permutations such that the first one takes care of
49405 putting the even bytes into the right positions or one higher
49406 positions (ds[0]) and the second one takes care of
49407 putting the odd bytes into the right positions or one below
49408 (ds[1]). */
49410 for (i = 0; i < nelt; i++)
49412 ds[i & 1].perm[i / 2] = d->perm[i] / 2;
49413 if (i & 1)
49415 rperm[i] = constm1_rtx;
49416 rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49418 else
49420 rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49421 rperm[i + 64] = constm1_rtx;
49425 bool ok = expand_vec_perm_1 (&ds[0]);
49426 gcc_assert (ok);
49427 ds[0].target = gen_lowpart (V64QImode, ds[0].target);
49429 ok = expand_vec_perm_1 (&ds[1]);
49430 gcc_assert (ok);
49431 ds[1].target = gen_lowpart (V64QImode, ds[1].target);
49433 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
49434 vperm = force_reg (vmode, vperm);
49435 target0 = gen_reg_rtx (V64QImode);
49436 emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
49438 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
49439 vperm = force_reg (vmode, vperm);
49440 target1 = gen_reg_rtx (V64QImode);
49441 emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
49443 emit_insn (gen_iorv64qi3 (d->target, target0, target1));
49444 return true;
49447 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
49448 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
49449 all the shorter instruction sequences. */
49451 static bool
49452 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
49454 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
49455 unsigned int i, nelt, eltsz;
49456 bool used[4];
49458 if (!TARGET_AVX2
49459 || d->one_operand_p
49460 || (d->vmode != V32QImode && d->vmode != V16HImode))
49461 return false;
49463 if (d->testing_p)
49464 return true;
49466 nelt = d->nelt;
49467 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
49469 /* Generate 4 permutation masks. If the required element is within
49470 the same lane, it is shuffled in. If the required element from the
49471 other lane, force a zero by setting bit 7 in the permutation mask.
49472 In the other mask the mask has non-negative elements if element
49473 is requested from the other lane, but also moved to the other lane,
49474 so that the result of vpshufb can have the two V2TImode halves
49475 swapped. */
49476 m128 = GEN_INT (-128);
49477 for (i = 0; i < 32; ++i)
49479 rperm[0][i] = m128;
49480 rperm[1][i] = m128;
49481 rperm[2][i] = m128;
49482 rperm[3][i] = m128;
49484 used[0] = false;
49485 used[1] = false;
49486 used[2] = false;
49487 used[3] = false;
49488 for (i = 0; i < nelt; ++i)
49490 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
49491 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
49492 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
49494 for (j = 0; j < eltsz; ++j)
49495 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
49496 used[which] = true;
49499 for (i = 0; i < 2; ++i)
49501 if (!used[2 * i + 1])
49503 h[i] = NULL_RTX;
49504 continue;
49506 vperm = gen_rtx_CONST_VECTOR (V32QImode,
49507 gen_rtvec_v (32, rperm[2 * i + 1]));
49508 vperm = force_reg (V32QImode, vperm);
49509 h[i] = gen_reg_rtx (V32QImode);
49510 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49511 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
49514 /* Swap the 128-byte lanes of h[X]. */
49515 for (i = 0; i < 2; ++i)
49517 if (h[i] == NULL_RTX)
49518 continue;
49519 op = gen_reg_rtx (V4DImode);
49520 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
49521 const2_rtx, GEN_INT (3), const0_rtx,
49522 const1_rtx));
49523 h[i] = gen_lowpart (V32QImode, op);
49526 for (i = 0; i < 2; ++i)
49528 if (!used[2 * i])
49530 l[i] = NULL_RTX;
49531 continue;
49533 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
49534 vperm = force_reg (V32QImode, vperm);
49535 l[i] = gen_reg_rtx (V32QImode);
49536 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49537 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
49540 for (i = 0; i < 2; ++i)
49542 if (h[i] && l[i])
49544 op = gen_reg_rtx (V32QImode);
49545 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
49546 l[i] = op;
49548 else if (h[i])
49549 l[i] = h[i];
49552 gcc_assert (l[0] && l[1]);
49553 op = d->target;
49554 if (d->vmode != V32QImode)
49555 op = gen_reg_rtx (V32QImode);
49556 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
49557 if (op != d->target)
49558 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
49559 return true;
49562 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
49563 With all of the interface bits taken care of, perform the expansion
49564 in D and return true on success. */
49566 static bool
49567 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
49569 /* Try a single instruction expansion. */
49570 if (expand_vec_perm_1 (d))
49571 return true;
49573 /* Try sequences of two instructions. */
49575 if (expand_vec_perm_pshuflw_pshufhw (d))
49576 return true;
49578 if (expand_vec_perm_palignr (d, false))
49579 return true;
49581 if (expand_vec_perm_interleave2 (d))
49582 return true;
49584 if (expand_vec_perm_broadcast (d))
49585 return true;
49587 if (expand_vec_perm_vpermq_perm_1 (d))
49588 return true;
49590 if (expand_vec_perm_vperm2f128 (d))
49591 return true;
49593 if (expand_vec_perm_pblendv (d))
49594 return true;
49596 /* Try sequences of three instructions. */
49598 if (expand_vec_perm_even_odd_pack (d))
49599 return true;
49601 if (expand_vec_perm_2vperm2f128_vshuf (d))
49602 return true;
49604 if (expand_vec_perm_pshufb2 (d))
49605 return true;
49607 if (expand_vec_perm_interleave3 (d))
49608 return true;
49610 if (expand_vec_perm_vperm2f128_vblend (d))
49611 return true;
49613 /* Try sequences of four instructions. */
49615 if (expand_vec_perm_vpshufb2_vpermq (d))
49616 return true;
49618 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
49619 return true;
49621 if (expand_vec_perm_vpermi2_vpshub2 (d))
49622 return true;
49624 /* ??? Look for narrow permutations whose element orderings would
49625 allow the promotion to a wider mode. */
49627 /* ??? Look for sequences of interleave or a wider permute that place
49628 the data into the correct lanes for a half-vector shuffle like
49629 pshuf[lh]w or vpermilps. */
49631 /* ??? Look for sequences of interleave that produce the desired results.
49632 The combinatorics of punpck[lh] get pretty ugly... */
49634 if (expand_vec_perm_even_odd (d))
49635 return true;
49637 /* Even longer sequences. */
49638 if (expand_vec_perm_vpshufb4_vpermq2 (d))
49639 return true;
49641 return false;
49644 /* If a permutation only uses one operand, make it clear. Returns true
49645 if the permutation references both operands. */
49647 static bool
49648 canonicalize_perm (struct expand_vec_perm_d *d)
49650 int i, which, nelt = d->nelt;
49652 for (i = which = 0; i < nelt; ++i)
49653 which |= (d->perm[i] < nelt ? 1 : 2);
49655 d->one_operand_p = true;
49656 switch (which)
49658 default:
49659 gcc_unreachable();
49661 case 3:
49662 if (!rtx_equal_p (d->op0, d->op1))
49664 d->one_operand_p = false;
49665 break;
49667 /* The elements of PERM do not suggest that only the first operand
49668 is used, but both operands are identical. Allow easier matching
49669 of the permutation by folding the permutation into the single
49670 input vector. */
49671 /* FALLTHRU */
49673 case 2:
49674 for (i = 0; i < nelt; ++i)
49675 d->perm[i] &= nelt - 1;
49676 d->op0 = d->op1;
49677 break;
49679 case 1:
49680 d->op1 = d->op0;
49681 break;
49684 return (which == 3);
49687 bool
49688 ix86_expand_vec_perm_const (rtx operands[4])
49690 struct expand_vec_perm_d d;
49691 unsigned char perm[MAX_VECT_LEN];
49692 int i, nelt;
49693 bool two_args;
49694 rtx sel;
49696 d.target = operands[0];
49697 d.op0 = operands[1];
49698 d.op1 = operands[2];
49699 sel = operands[3];
49701 d.vmode = GET_MODE (d.target);
49702 gcc_assert (VECTOR_MODE_P (d.vmode));
49703 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49704 d.testing_p = false;
49706 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
49707 gcc_assert (XVECLEN (sel, 0) == nelt);
49708 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
49710 for (i = 0; i < nelt; ++i)
49712 rtx e = XVECEXP (sel, 0, i);
49713 int ei = INTVAL (e) & (2 * nelt - 1);
49714 d.perm[i] = ei;
49715 perm[i] = ei;
49718 two_args = canonicalize_perm (&d);
49720 if (ix86_expand_vec_perm_const_1 (&d))
49721 return true;
49723 /* If the selector says both arguments are needed, but the operands are the
49724 same, the above tried to expand with one_operand_p and flattened selector.
49725 If that didn't work, retry without one_operand_p; we succeeded with that
49726 during testing. */
49727 if (two_args && d.one_operand_p)
49729 d.one_operand_p = false;
49730 memcpy (d.perm, perm, sizeof (perm));
49731 return ix86_expand_vec_perm_const_1 (&d);
49734 return false;
49737 /* Implement targetm.vectorize.vec_perm_const_ok. */
49739 static bool
49740 ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
49741 const unsigned char *sel)
49743 struct expand_vec_perm_d d;
49744 unsigned int i, nelt, which;
49745 bool ret;
49747 d.vmode = vmode;
49748 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49749 d.testing_p = true;
49751 /* Given sufficient ISA support we can just return true here
49752 for selected vector modes. */
49753 switch (d.vmode)
49755 case V16SFmode:
49756 case V16SImode:
49757 case V8DImode:
49758 case V8DFmode:
49759 if (TARGET_AVX512F)
49760 /* All implementable with a single vpermi2 insn. */
49761 return true;
49762 break;
49763 case V32HImode:
49764 if (TARGET_AVX512BW)
49765 /* All implementable with a single vpermi2 insn. */
49766 return true;
49767 break;
49768 case V64QImode:
49769 if (TARGET_AVX512BW)
49770 /* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn. */
49771 return true;
49772 break;
49773 case V8SImode:
49774 case V8SFmode:
49775 case V4DFmode:
49776 case V4DImode:
49777 if (TARGET_AVX512VL)
49778 /* All implementable with a single vpermi2 insn. */
49779 return true;
49780 break;
49781 case V16HImode:
49782 if (TARGET_AVX2)
49783 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49784 return true;
49785 break;
49786 case V32QImode:
49787 if (TARGET_AVX2)
49788 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49789 return true;
49790 break;
49791 case V4SImode:
49792 case V4SFmode:
49793 case V8HImode:
49794 case V16QImode:
49795 /* All implementable with a single vpperm insn. */
49796 if (TARGET_XOP)
49797 return true;
49798 /* All implementable with 2 pshufb + 1 ior. */
49799 if (TARGET_SSSE3)
49800 return true;
49801 break;
49802 case V2DImode:
49803 case V2DFmode:
49804 /* All implementable with shufpd or unpck[lh]pd. */
49805 return true;
49806 default:
49807 return false;
49810 /* Extract the values from the vector CST into the permutation
49811 array in D. */
49812 memcpy (d.perm, sel, nelt);
49813 for (i = which = 0; i < nelt; ++i)
49815 unsigned char e = d.perm[i];
49816 gcc_assert (e < 2 * nelt);
49817 which |= (e < nelt ? 1 : 2);
49820 /* For all elements from second vector, fold the elements to first. */
49821 if (which == 2)
49822 for (i = 0; i < nelt; ++i)
49823 d.perm[i] -= nelt;
49825 /* Check whether the mask can be applied to the vector type. */
49826 d.one_operand_p = (which != 3);
49828 /* Implementable with shufps or pshufd. */
49829 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
49830 return true;
49832 /* Otherwise we have to go through the motions and see if we can
49833 figure out how to generate the requested permutation. */
49834 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
49835 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
49836 if (!d.one_operand_p)
49837 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
49839 start_sequence ();
49840 ret = ix86_expand_vec_perm_const_1 (&d);
49841 end_sequence ();
49843 return ret;
49846 void
49847 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
49849 struct expand_vec_perm_d d;
49850 unsigned i, nelt;
49852 d.target = targ;
49853 d.op0 = op0;
49854 d.op1 = op1;
49855 d.vmode = GET_MODE (targ);
49856 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49857 d.one_operand_p = false;
49858 d.testing_p = false;
49860 for (i = 0; i < nelt; ++i)
49861 d.perm[i] = i * 2 + odd;
49863 /* We'll either be able to implement the permutation directly... */
49864 if (expand_vec_perm_1 (&d))
49865 return;
49867 /* ... or we use the special-case patterns. */
49868 expand_vec_perm_even_odd_1 (&d, odd);
49871 static void
49872 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
49874 struct expand_vec_perm_d d;
49875 unsigned i, nelt, base;
49876 bool ok;
49878 d.target = targ;
49879 d.op0 = op0;
49880 d.op1 = op1;
49881 d.vmode = GET_MODE (targ);
49882 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49883 d.one_operand_p = false;
49884 d.testing_p = false;
49886 base = high_p ? nelt / 2 : 0;
49887 for (i = 0; i < nelt / 2; ++i)
49889 d.perm[i * 2] = i + base;
49890 d.perm[i * 2 + 1] = i + base + nelt;
49893 /* Note that for AVX this isn't one instruction. */
49894 ok = ix86_expand_vec_perm_const_1 (&d);
49895 gcc_assert (ok);
49899 /* Expand a vector operation CODE for a V*QImode in terms of the
49900 same operation on V*HImode. */
49902 void
49903 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
49905 machine_mode qimode = GET_MODE (dest);
49906 machine_mode himode;
49907 rtx (*gen_il) (rtx, rtx, rtx);
49908 rtx (*gen_ih) (rtx, rtx, rtx);
49909 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
49910 struct expand_vec_perm_d d;
49911 bool ok, full_interleave;
49912 bool uns_p = false;
49913 int i;
49915 switch (qimode)
49917 case V16QImode:
49918 himode = V8HImode;
49919 gen_il = gen_vec_interleave_lowv16qi;
49920 gen_ih = gen_vec_interleave_highv16qi;
49921 break;
49922 case V32QImode:
49923 himode = V16HImode;
49924 gen_il = gen_avx2_interleave_lowv32qi;
49925 gen_ih = gen_avx2_interleave_highv32qi;
49926 break;
49927 case V64QImode:
49928 himode = V32HImode;
49929 gen_il = gen_avx512bw_interleave_lowv64qi;
49930 gen_ih = gen_avx512bw_interleave_highv64qi;
49931 break;
49932 default:
49933 gcc_unreachable ();
49936 op2_l = op2_h = op2;
49937 switch (code)
49939 case MULT:
49940 /* Unpack data such that we've got a source byte in each low byte of
49941 each word. We don't care what goes into the high byte of each word.
49942 Rather than trying to get zero in there, most convenient is to let
49943 it be a copy of the low byte. */
49944 op2_l = gen_reg_rtx (qimode);
49945 op2_h = gen_reg_rtx (qimode);
49946 emit_insn (gen_il (op2_l, op2, op2));
49947 emit_insn (gen_ih (op2_h, op2, op2));
49948 /* FALLTHRU */
49950 op1_l = gen_reg_rtx (qimode);
49951 op1_h = gen_reg_rtx (qimode);
49952 emit_insn (gen_il (op1_l, op1, op1));
49953 emit_insn (gen_ih (op1_h, op1, op1));
49954 full_interleave = qimode == V16QImode;
49955 break;
49957 case ASHIFT:
49958 case LSHIFTRT:
49959 uns_p = true;
49960 /* FALLTHRU */
49961 case ASHIFTRT:
49962 op1_l = gen_reg_rtx (himode);
49963 op1_h = gen_reg_rtx (himode);
49964 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
49965 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
49966 full_interleave = true;
49967 break;
49968 default:
49969 gcc_unreachable ();
49972 /* Perform the operation. */
49973 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
49974 1, OPTAB_DIRECT);
49975 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
49976 1, OPTAB_DIRECT);
49977 gcc_assert (res_l && res_h);
49979 /* Merge the data back into the right place. */
49980 d.target = dest;
49981 d.op0 = gen_lowpart (qimode, res_l);
49982 d.op1 = gen_lowpart (qimode, res_h);
49983 d.vmode = qimode;
49984 d.nelt = GET_MODE_NUNITS (qimode);
49985 d.one_operand_p = false;
49986 d.testing_p = false;
49988 if (full_interleave)
49990 /* For SSE2, we used an full interleave, so the desired
49991 results are in the even elements. */
49992 for (i = 0; i < 64; ++i)
49993 d.perm[i] = i * 2;
49995 else
49997 /* For AVX, the interleave used above was not cross-lane. So the
49998 extraction is evens but with the second and third quarter swapped.
49999 Happily, that is even one insn shorter than even extraction. */
50000 for (i = 0; i < 64; ++i)
50001 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
50004 ok = ix86_expand_vec_perm_const_1 (&d);
50005 gcc_assert (ok);
50007 set_unique_reg_note (get_last_insn (), REG_EQUAL,
50008 gen_rtx_fmt_ee (code, qimode, op1, op2));
50011 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
50012 if op is CONST_VECTOR with all odd elements equal to their
50013 preceding element. */
50015 static bool
50016 const_vector_equal_evenodd_p (rtx op)
50018 machine_mode mode = GET_MODE (op);
50019 int i, nunits = GET_MODE_NUNITS (mode);
50020 if (GET_CODE (op) != CONST_VECTOR
50021 || nunits != CONST_VECTOR_NUNITS (op))
50022 return false;
50023 for (i = 0; i < nunits; i += 2)
50024 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
50025 return false;
50026 return true;
50029 void
50030 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
50031 bool uns_p, bool odd_p)
50033 machine_mode mode = GET_MODE (op1);
50034 machine_mode wmode = GET_MODE (dest);
50035 rtx x;
50036 rtx orig_op1 = op1, orig_op2 = op2;
50038 if (!nonimmediate_operand (op1, mode))
50039 op1 = force_reg (mode, op1);
50040 if (!nonimmediate_operand (op2, mode))
50041 op2 = force_reg (mode, op2);
50043 /* We only play even/odd games with vectors of SImode. */
50044 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
50046 /* If we're looking for the odd results, shift those members down to
50047 the even slots. For some cpus this is faster than a PSHUFD. */
50048 if (odd_p)
50050 /* For XOP use vpmacsdqh, but only for smult, as it is only
50051 signed. */
50052 if (TARGET_XOP && mode == V4SImode && !uns_p)
50054 x = force_reg (wmode, CONST0_RTX (wmode));
50055 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
50056 return;
50059 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
50060 if (!const_vector_equal_evenodd_p (orig_op1))
50061 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
50062 x, NULL, 1, OPTAB_DIRECT);
50063 if (!const_vector_equal_evenodd_p (orig_op2))
50064 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
50065 x, NULL, 1, OPTAB_DIRECT);
50066 op1 = gen_lowpart (mode, op1);
50067 op2 = gen_lowpart (mode, op2);
50070 if (mode == V16SImode)
50072 if (uns_p)
50073 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
50074 else
50075 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
50077 else if (mode == V8SImode)
50079 if (uns_p)
50080 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
50081 else
50082 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
50084 else if (uns_p)
50085 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
50086 else if (TARGET_SSE4_1)
50087 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
50088 else
50090 rtx s1, s2, t0, t1, t2;
50092 /* The easiest way to implement this without PMULDQ is to go through
50093 the motions as if we are performing a full 64-bit multiply. With
50094 the exception that we need to do less shuffling of the elements. */
50096 /* Compute the sign-extension, aka highparts, of the two operands. */
50097 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
50098 op1, pc_rtx, pc_rtx);
50099 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
50100 op2, pc_rtx, pc_rtx);
50102 /* Multiply LO(A) * HI(B), and vice-versa. */
50103 t1 = gen_reg_rtx (wmode);
50104 t2 = gen_reg_rtx (wmode);
50105 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
50106 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
50108 /* Multiply LO(A) * LO(B). */
50109 t0 = gen_reg_rtx (wmode);
50110 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
50112 /* Combine and shift the highparts into place. */
50113 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
50114 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
50115 1, OPTAB_DIRECT);
50117 /* Combine high and low parts. */
50118 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
50119 return;
50121 emit_insn (x);
50124 void
50125 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
50126 bool uns_p, bool high_p)
50128 machine_mode wmode = GET_MODE (dest);
50129 machine_mode mode = GET_MODE (op1);
50130 rtx t1, t2, t3, t4, mask;
50132 switch (mode)
50134 case V4SImode:
50135 t1 = gen_reg_rtx (mode);
50136 t2 = gen_reg_rtx (mode);
50137 if (TARGET_XOP && !uns_p)
50139 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
50140 shuffle the elements once so that all elements are in the right
50141 place for immediate use: { A C B D }. */
50142 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
50143 const1_rtx, GEN_INT (3)));
50144 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
50145 const1_rtx, GEN_INT (3)));
50147 else
50149 /* Put the elements into place for the multiply. */
50150 ix86_expand_vec_interleave (t1, op1, op1, high_p);
50151 ix86_expand_vec_interleave (t2, op2, op2, high_p);
50152 high_p = false;
50154 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
50155 break;
50157 case V8SImode:
50158 /* Shuffle the elements between the lanes. After this we
50159 have { A B E F | C D G H } for each operand. */
50160 t1 = gen_reg_rtx (V4DImode);
50161 t2 = gen_reg_rtx (V4DImode);
50162 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
50163 const0_rtx, const2_rtx,
50164 const1_rtx, GEN_INT (3)));
50165 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
50166 const0_rtx, const2_rtx,
50167 const1_rtx, GEN_INT (3)));
50169 /* Shuffle the elements within the lanes. After this we
50170 have { A A B B | C C D D } or { E E F F | G G H H }. */
50171 t3 = gen_reg_rtx (V8SImode);
50172 t4 = gen_reg_rtx (V8SImode);
50173 mask = GEN_INT (high_p
50174 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
50175 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
50176 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
50177 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
50179 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
50180 break;
50182 case V8HImode:
50183 case V16HImode:
50184 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
50185 uns_p, OPTAB_DIRECT);
50186 t2 = expand_binop (mode,
50187 uns_p ? umul_highpart_optab : smul_highpart_optab,
50188 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
50189 gcc_assert (t1 && t2);
50191 t3 = gen_reg_rtx (mode);
50192 ix86_expand_vec_interleave (t3, t1, t2, high_p);
50193 emit_move_insn (dest, gen_lowpart (wmode, t3));
50194 break;
50196 case V16QImode:
50197 case V32QImode:
50198 case V32HImode:
50199 case V16SImode:
50200 case V64QImode:
50201 t1 = gen_reg_rtx (wmode);
50202 t2 = gen_reg_rtx (wmode);
50203 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
50204 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
50206 emit_insn (gen_rtx_SET (dest, gen_rtx_MULT (wmode, t1, t2)));
50207 break;
50209 default:
50210 gcc_unreachable ();
50214 void
50215 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
50217 rtx res_1, res_2, res_3, res_4;
50219 res_1 = gen_reg_rtx (V4SImode);
50220 res_2 = gen_reg_rtx (V4SImode);
50221 res_3 = gen_reg_rtx (V2DImode);
50222 res_4 = gen_reg_rtx (V2DImode);
50223 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
50224 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
50226 /* Move the results in element 2 down to element 1; we don't care
50227 what goes in elements 2 and 3. Then we can merge the parts
50228 back together with an interleave.
50230 Note that two other sequences were tried:
50231 (1) Use interleaves at the start instead of psrldq, which allows
50232 us to use a single shufps to merge things back at the end.
50233 (2) Use shufps here to combine the two vectors, then pshufd to
50234 put the elements in the correct order.
50235 In both cases the cost of the reformatting stall was too high
50236 and the overall sequence slower. */
50238 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
50239 const0_rtx, const2_rtx,
50240 const0_rtx, const0_rtx));
50241 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
50242 const0_rtx, const2_rtx,
50243 const0_rtx, const0_rtx));
50244 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
50246 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
50249 void
50250 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
50252 machine_mode mode = GET_MODE (op0);
50253 rtx t1, t2, t3, t4, t5, t6;
50255 if (TARGET_AVX512DQ && mode == V8DImode)
50256 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
50257 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
50258 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
50259 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
50260 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
50261 else if (TARGET_XOP && mode == V2DImode)
50263 /* op1: A,B,C,D, op2: E,F,G,H */
50264 op1 = gen_lowpart (V4SImode, op1);
50265 op2 = gen_lowpart (V4SImode, op2);
50267 t1 = gen_reg_rtx (V4SImode);
50268 t2 = gen_reg_rtx (V4SImode);
50269 t3 = gen_reg_rtx (V2DImode);
50270 t4 = gen_reg_rtx (V2DImode);
50272 /* t1: B,A,D,C */
50273 emit_insn (gen_sse2_pshufd_1 (t1, op1,
50274 GEN_INT (1),
50275 GEN_INT (0),
50276 GEN_INT (3),
50277 GEN_INT (2)));
50279 /* t2: (B*E),(A*F),(D*G),(C*H) */
50280 emit_insn (gen_mulv4si3 (t2, t1, op2));
50282 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
50283 emit_insn (gen_xop_phadddq (t3, t2));
50285 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
50286 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
50288 /* Multiply lower parts and add all */
50289 t5 = gen_reg_rtx (V2DImode);
50290 emit_insn (gen_vec_widen_umult_even_v4si (t5,
50291 gen_lowpart (V4SImode, op1),
50292 gen_lowpart (V4SImode, op2)));
50293 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
50296 else
50298 machine_mode nmode;
50299 rtx (*umul) (rtx, rtx, rtx);
50301 if (mode == V2DImode)
50303 umul = gen_vec_widen_umult_even_v4si;
50304 nmode = V4SImode;
50306 else if (mode == V4DImode)
50308 umul = gen_vec_widen_umult_even_v8si;
50309 nmode = V8SImode;
50311 else if (mode == V8DImode)
50313 umul = gen_vec_widen_umult_even_v16si;
50314 nmode = V16SImode;
50316 else
50317 gcc_unreachable ();
50320 /* Multiply low parts. */
50321 t1 = gen_reg_rtx (mode);
50322 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
50324 /* Shift input vectors right 32 bits so we can multiply high parts. */
50325 t6 = GEN_INT (32);
50326 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
50327 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
50329 /* Multiply high parts by low parts. */
50330 t4 = gen_reg_rtx (mode);
50331 t5 = gen_reg_rtx (mode);
50332 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
50333 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
50335 /* Combine and shift the highparts back. */
50336 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
50337 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
50339 /* Combine high and low parts. */
50340 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
50343 set_unique_reg_note (get_last_insn (), REG_EQUAL,
50344 gen_rtx_MULT (mode, op1, op2));
50347 /* Return 1 if control tansfer instruction INSN
50348 should be encoded with bnd prefix.
50349 If insn is NULL then return 1 when control
50350 transfer instructions should be prefixed with
50351 bnd by default for current function. */
50353 bool
50354 ix86_bnd_prefixed_insn_p (rtx insn)
50356 /* For call insns check special flag. */
50357 if (insn && CALL_P (insn))
50359 rtx call = get_call_rtx_from (insn);
50360 if (call)
50361 return CALL_EXPR_WITH_BOUNDS_P (call);
50364 /* All other insns are prefixed only if function is instrumented. */
50365 return chkp_function_instrumented_p (current_function_decl);
50368 /* Calculate integer abs() using only SSE2 instructions. */
50370 void
50371 ix86_expand_sse2_abs (rtx target, rtx input)
50373 machine_mode mode = GET_MODE (target);
50374 rtx tmp0, tmp1, x;
50376 switch (mode)
50378 /* For 32-bit signed integer X, the best way to calculate the absolute
50379 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
50380 case V4SImode:
50381 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
50382 GEN_INT (GET_MODE_BITSIZE
50383 (GET_MODE_INNER (mode)) - 1),
50384 NULL, 0, OPTAB_DIRECT);
50385 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
50386 NULL, 0, OPTAB_DIRECT);
50387 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
50388 target, 0, OPTAB_DIRECT);
50389 break;
50391 /* For 16-bit signed integer X, the best way to calculate the absolute
50392 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
50393 case V8HImode:
50394 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50396 x = expand_simple_binop (mode, SMAX, tmp0, input,
50397 target, 0, OPTAB_DIRECT);
50398 break;
50400 /* For 8-bit signed integer X, the best way to calculate the absolute
50401 value of X is min ((unsigned char) X, (unsigned char) (-X)),
50402 as SSE2 provides the PMINUB insn. */
50403 case V16QImode:
50404 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50406 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
50407 target, 0, OPTAB_DIRECT);
50408 break;
50410 default:
50411 gcc_unreachable ();
50414 if (x != target)
50415 emit_move_insn (target, x);
50418 /* Expand an insert into a vector register through pinsr insn.
50419 Return true if successful. */
50421 bool
50422 ix86_expand_pinsr (rtx *operands)
50424 rtx dst = operands[0];
50425 rtx src = operands[3];
50427 unsigned int size = INTVAL (operands[1]);
50428 unsigned int pos = INTVAL (operands[2]);
50430 if (GET_CODE (dst) == SUBREG)
50432 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
50433 dst = SUBREG_REG (dst);
50436 if (GET_CODE (src) == SUBREG)
50437 src = SUBREG_REG (src);
50439 switch (GET_MODE (dst))
50441 case V16QImode:
50442 case V8HImode:
50443 case V4SImode:
50444 case V2DImode:
50446 machine_mode srcmode, dstmode;
50447 rtx (*pinsr)(rtx, rtx, rtx, rtx);
50449 srcmode = mode_for_size (size, MODE_INT, 0);
50451 switch (srcmode)
50453 case QImode:
50454 if (!TARGET_SSE4_1)
50455 return false;
50456 dstmode = V16QImode;
50457 pinsr = gen_sse4_1_pinsrb;
50458 break;
50460 case HImode:
50461 if (!TARGET_SSE2)
50462 return false;
50463 dstmode = V8HImode;
50464 pinsr = gen_sse2_pinsrw;
50465 break;
50467 case SImode:
50468 if (!TARGET_SSE4_1)
50469 return false;
50470 dstmode = V4SImode;
50471 pinsr = gen_sse4_1_pinsrd;
50472 break;
50474 case DImode:
50475 gcc_assert (TARGET_64BIT);
50476 if (!TARGET_SSE4_1)
50477 return false;
50478 dstmode = V2DImode;
50479 pinsr = gen_sse4_1_pinsrq;
50480 break;
50482 default:
50483 return false;
50486 rtx d = dst;
50487 if (GET_MODE (dst) != dstmode)
50488 d = gen_reg_rtx (dstmode);
50489 src = gen_lowpart (srcmode, src);
50491 pos /= size;
50493 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
50494 GEN_INT (1 << pos)));
50495 if (d != dst)
50496 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
50497 return true;
50500 default:
50501 return false;
50505 /* This function returns the calling abi specific va_list type node.
50506 It returns the FNDECL specific va_list type. */
50508 static tree
50509 ix86_fn_abi_va_list (tree fndecl)
50511 if (!TARGET_64BIT)
50512 return va_list_type_node;
50513 gcc_assert (fndecl != NULL_TREE);
50515 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
50516 return ms_va_list_type_node;
50517 else
50518 return sysv_va_list_type_node;
50521 /* Returns the canonical va_list type specified by TYPE. If there
50522 is no valid TYPE provided, it return NULL_TREE. */
50524 static tree
50525 ix86_canonical_va_list_type (tree type)
50527 tree wtype, htype;
50529 /* Resolve references and pointers to va_list type. */
50530 if (TREE_CODE (type) == MEM_REF)
50531 type = TREE_TYPE (type);
50532 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
50533 type = TREE_TYPE (type);
50534 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
50535 type = TREE_TYPE (type);
50537 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
50539 wtype = va_list_type_node;
50540 gcc_assert (wtype != NULL_TREE);
50541 htype = type;
50542 if (TREE_CODE (wtype) == ARRAY_TYPE)
50544 /* If va_list is an array type, the argument may have decayed
50545 to a pointer type, e.g. by being passed to another function.
50546 In that case, unwrap both types so that we can compare the
50547 underlying records. */
50548 if (TREE_CODE (htype) == ARRAY_TYPE
50549 || POINTER_TYPE_P (htype))
50551 wtype = TREE_TYPE (wtype);
50552 htype = TREE_TYPE (htype);
50555 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50556 return va_list_type_node;
50557 wtype = sysv_va_list_type_node;
50558 gcc_assert (wtype != NULL_TREE);
50559 htype = type;
50560 if (TREE_CODE (wtype) == ARRAY_TYPE)
50562 /* If va_list is an array type, the argument may have decayed
50563 to a pointer type, e.g. by being passed to another function.
50564 In that case, unwrap both types so that we can compare the
50565 underlying records. */
50566 if (TREE_CODE (htype) == ARRAY_TYPE
50567 || POINTER_TYPE_P (htype))
50569 wtype = TREE_TYPE (wtype);
50570 htype = TREE_TYPE (htype);
50573 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50574 return sysv_va_list_type_node;
50575 wtype = ms_va_list_type_node;
50576 gcc_assert (wtype != NULL_TREE);
50577 htype = type;
50578 if (TREE_CODE (wtype) == ARRAY_TYPE)
50580 /* If va_list is an array type, the argument may have decayed
50581 to a pointer type, e.g. by being passed to another function.
50582 In that case, unwrap both types so that we can compare the
50583 underlying records. */
50584 if (TREE_CODE (htype) == ARRAY_TYPE
50585 || POINTER_TYPE_P (htype))
50587 wtype = TREE_TYPE (wtype);
50588 htype = TREE_TYPE (htype);
50591 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50592 return ms_va_list_type_node;
50593 return NULL_TREE;
50595 return std_canonical_va_list_type (type);
50598 /* Iterate through the target-specific builtin types for va_list.
50599 IDX denotes the iterator, *PTREE is set to the result type of
50600 the va_list builtin, and *PNAME to its internal type.
50601 Returns zero if there is no element for this index, otherwise
50602 IDX should be increased upon the next call.
50603 Note, do not iterate a base builtin's name like __builtin_va_list.
50604 Used from c_common_nodes_and_builtins. */
50606 static int
50607 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
50609 if (TARGET_64BIT)
50611 switch (idx)
50613 default:
50614 break;
50616 case 0:
50617 *ptree = ms_va_list_type_node;
50618 *pname = "__builtin_ms_va_list";
50619 return 1;
50621 case 1:
50622 *ptree = sysv_va_list_type_node;
50623 *pname = "__builtin_sysv_va_list";
50624 return 1;
50628 return 0;
50631 #undef TARGET_SCHED_DISPATCH
50632 #define TARGET_SCHED_DISPATCH has_dispatch
50633 #undef TARGET_SCHED_DISPATCH_DO
50634 #define TARGET_SCHED_DISPATCH_DO do_dispatch
50635 #undef TARGET_SCHED_REASSOCIATION_WIDTH
50636 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
50637 #undef TARGET_SCHED_REORDER
50638 #define TARGET_SCHED_REORDER ix86_sched_reorder
50639 #undef TARGET_SCHED_ADJUST_PRIORITY
50640 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
50641 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
50642 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
50643 ix86_dependencies_evaluation_hook
50645 /* The size of the dispatch window is the total number of bytes of
50646 object code allowed in a window. */
50647 #define DISPATCH_WINDOW_SIZE 16
50649 /* Number of dispatch windows considered for scheduling. */
50650 #define MAX_DISPATCH_WINDOWS 3
50652 /* Maximum number of instructions in a window. */
50653 #define MAX_INSN 4
50655 /* Maximum number of immediate operands in a window. */
50656 #define MAX_IMM 4
50658 /* Maximum number of immediate bits allowed in a window. */
50659 #define MAX_IMM_SIZE 128
50661 /* Maximum number of 32 bit immediates allowed in a window. */
50662 #define MAX_IMM_32 4
50664 /* Maximum number of 64 bit immediates allowed in a window. */
50665 #define MAX_IMM_64 2
50667 /* Maximum total of loads or prefetches allowed in a window. */
50668 #define MAX_LOAD 2
50670 /* Maximum total of stores allowed in a window. */
50671 #define MAX_STORE 1
50673 #undef BIG
50674 #define BIG 100
50677 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
50678 enum dispatch_group {
50679 disp_no_group = 0,
50680 disp_load,
50681 disp_store,
50682 disp_load_store,
50683 disp_prefetch,
50684 disp_imm,
50685 disp_imm_32,
50686 disp_imm_64,
50687 disp_branch,
50688 disp_cmp,
50689 disp_jcc,
50690 disp_last
50693 /* Number of allowable groups in a dispatch window. It is an array
50694 indexed by dispatch_group enum. 100 is used as a big number,
50695 because the number of these kind of operations does not have any
50696 effect in dispatch window, but we need them for other reasons in
50697 the table. */
50698 static unsigned int num_allowable_groups[disp_last] = {
50699 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
50702 char group_name[disp_last + 1][16] = {
50703 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
50704 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
50705 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
50708 /* Instruction path. */
50709 enum insn_path {
50710 no_path = 0,
50711 path_single, /* Single micro op. */
50712 path_double, /* Double micro op. */
50713 path_multi, /* Instructions with more than 2 micro op.. */
50714 last_path
50717 /* sched_insn_info defines a window to the instructions scheduled in
50718 the basic block. It contains a pointer to the insn_info table and
50719 the instruction scheduled.
50721 Windows are allocated for each basic block and are linked
50722 together. */
50723 typedef struct sched_insn_info_s {
50724 rtx insn;
50725 enum dispatch_group group;
50726 enum insn_path path;
50727 int byte_len;
50728 int imm_bytes;
50729 } sched_insn_info;
50731 /* Linked list of dispatch windows. This is a two way list of
50732 dispatch windows of a basic block. It contains information about
50733 the number of uops in the window and the total number of
50734 instructions and of bytes in the object code for this dispatch
50735 window. */
50736 typedef struct dispatch_windows_s {
50737 int num_insn; /* Number of insn in the window. */
50738 int num_uops; /* Number of uops in the window. */
50739 int window_size; /* Number of bytes in the window. */
50740 int window_num; /* Window number between 0 or 1. */
50741 int num_imm; /* Number of immediates in an insn. */
50742 int num_imm_32; /* Number of 32 bit immediates in an insn. */
50743 int num_imm_64; /* Number of 64 bit immediates in an insn. */
50744 int imm_size; /* Total immediates in the window. */
50745 int num_loads; /* Total memory loads in the window. */
50746 int num_stores; /* Total memory stores in the window. */
50747 int violation; /* Violation exists in window. */
50748 sched_insn_info *window; /* Pointer to the window. */
50749 struct dispatch_windows_s *next;
50750 struct dispatch_windows_s *prev;
50751 } dispatch_windows;
50753 /* Immediate valuse used in an insn. */
50754 typedef struct imm_info_s
50756 int imm;
50757 int imm32;
50758 int imm64;
50759 } imm_info;
50761 static dispatch_windows *dispatch_window_list;
50762 static dispatch_windows *dispatch_window_list1;
50764 /* Get dispatch group of insn. */
50766 static enum dispatch_group
50767 get_mem_group (rtx_insn *insn)
50769 enum attr_memory memory;
50771 if (INSN_CODE (insn) < 0)
50772 return disp_no_group;
50773 memory = get_attr_memory (insn);
50774 if (memory == MEMORY_STORE)
50775 return disp_store;
50777 if (memory == MEMORY_LOAD)
50778 return disp_load;
50780 if (memory == MEMORY_BOTH)
50781 return disp_load_store;
50783 return disp_no_group;
50786 /* Return true if insn is a compare instruction. */
50788 static bool
50789 is_cmp (rtx_insn *insn)
50791 enum attr_type type;
50793 type = get_attr_type (insn);
50794 return (type == TYPE_TEST
50795 || type == TYPE_ICMP
50796 || type == TYPE_FCMP
50797 || GET_CODE (PATTERN (insn)) == COMPARE);
50800 /* Return true if a dispatch violation encountered. */
50802 static bool
50803 dispatch_violation (void)
50805 if (dispatch_window_list->next)
50806 return dispatch_window_list->next->violation;
50807 return dispatch_window_list->violation;
50810 /* Return true if insn is a branch instruction. */
50812 static bool
50813 is_branch (rtx_insn *insn)
50815 return (CALL_P (insn) || JUMP_P (insn));
50818 /* Return true if insn is a prefetch instruction. */
50820 static bool
50821 is_prefetch (rtx_insn *insn)
50823 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
50826 /* This function initializes a dispatch window and the list container holding a
50827 pointer to the window. */
50829 static void
50830 init_window (int window_num)
50832 int i;
50833 dispatch_windows *new_list;
50835 if (window_num == 0)
50836 new_list = dispatch_window_list;
50837 else
50838 new_list = dispatch_window_list1;
50840 new_list->num_insn = 0;
50841 new_list->num_uops = 0;
50842 new_list->window_size = 0;
50843 new_list->next = NULL;
50844 new_list->prev = NULL;
50845 new_list->window_num = window_num;
50846 new_list->num_imm = 0;
50847 new_list->num_imm_32 = 0;
50848 new_list->num_imm_64 = 0;
50849 new_list->imm_size = 0;
50850 new_list->num_loads = 0;
50851 new_list->num_stores = 0;
50852 new_list->violation = false;
50854 for (i = 0; i < MAX_INSN; i++)
50856 new_list->window[i].insn = NULL;
50857 new_list->window[i].group = disp_no_group;
50858 new_list->window[i].path = no_path;
50859 new_list->window[i].byte_len = 0;
50860 new_list->window[i].imm_bytes = 0;
50862 return;
50865 /* This function allocates and initializes a dispatch window and the
50866 list container holding a pointer to the window. */
50868 static dispatch_windows *
50869 allocate_window (void)
50871 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
50872 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
50874 return new_list;
50877 /* This routine initializes the dispatch scheduling information. It
50878 initiates building dispatch scheduler tables and constructs the
50879 first dispatch window. */
50881 static void
50882 init_dispatch_sched (void)
50884 /* Allocate a dispatch list and a window. */
50885 dispatch_window_list = allocate_window ();
50886 dispatch_window_list1 = allocate_window ();
50887 init_window (0);
50888 init_window (1);
50891 /* This function returns true if a branch is detected. End of a basic block
50892 does not have to be a branch, but here we assume only branches end a
50893 window. */
50895 static bool
50896 is_end_basic_block (enum dispatch_group group)
50898 return group == disp_branch;
50901 /* This function is called when the end of a window processing is reached. */
50903 static void
50904 process_end_window (void)
50906 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
50907 if (dispatch_window_list->next)
50909 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
50910 gcc_assert (dispatch_window_list->window_size
50911 + dispatch_window_list1->window_size <= 48);
50912 init_window (1);
50914 init_window (0);
50917 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
50918 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
50919 for 48 bytes of instructions. Note that these windows are not dispatch
50920 windows that their sizes are DISPATCH_WINDOW_SIZE. */
50922 static dispatch_windows *
50923 allocate_next_window (int window_num)
50925 if (window_num == 0)
50927 if (dispatch_window_list->next)
50928 init_window (1);
50929 init_window (0);
50930 return dispatch_window_list;
50933 dispatch_window_list->next = dispatch_window_list1;
50934 dispatch_window_list1->prev = dispatch_window_list;
50936 return dispatch_window_list1;
50939 /* Compute number of immediate operands of an instruction. */
50941 static void
50942 find_constant (rtx in_rtx, imm_info *imm_values)
50944 if (INSN_P (in_rtx))
50945 in_rtx = PATTERN (in_rtx);
50946 subrtx_iterator::array_type array;
50947 FOR_EACH_SUBRTX (iter, array, in_rtx, ALL)
50948 if (const_rtx x = *iter)
50949 switch (GET_CODE (x))
50951 case CONST:
50952 case SYMBOL_REF:
50953 case CONST_INT:
50954 (imm_values->imm)++;
50955 if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode))
50956 (imm_values->imm32)++;
50957 else
50958 (imm_values->imm64)++;
50959 break;
50961 case CONST_DOUBLE:
50962 case CONST_WIDE_INT:
50963 (imm_values->imm)++;
50964 (imm_values->imm64)++;
50965 break;
50967 case CODE_LABEL:
50968 if (LABEL_KIND (x) == LABEL_NORMAL)
50970 (imm_values->imm)++;
50971 (imm_values->imm32)++;
50973 break;
50975 default:
50976 break;
50980 /* Return total size of immediate operands of an instruction along with number
50981 of corresponding immediate-operands. It initializes its parameters to zero
50982 befor calling FIND_CONSTANT.
50983 INSN is the input instruction. IMM is the total of immediates.
50984 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
50985 bit immediates. */
50987 static int
50988 get_num_immediates (rtx_insn *insn, int *imm, int *imm32, int *imm64)
50990 imm_info imm_values = {0, 0, 0};
50992 find_constant (insn, &imm_values);
50993 *imm = imm_values.imm;
50994 *imm32 = imm_values.imm32;
50995 *imm64 = imm_values.imm64;
50996 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
50999 /* This function indicates if an operand of an instruction is an
51000 immediate. */
51002 static bool
51003 has_immediate (rtx_insn *insn)
51005 int num_imm_operand;
51006 int num_imm32_operand;
51007 int num_imm64_operand;
51009 if (insn)
51010 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51011 &num_imm64_operand);
51012 return false;
51015 /* Return single or double path for instructions. */
51017 static enum insn_path
51018 get_insn_path (rtx_insn *insn)
51020 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
51022 if ((int)path == 0)
51023 return path_single;
51025 if ((int)path == 1)
51026 return path_double;
51028 return path_multi;
51031 /* Return insn dispatch group. */
51033 static enum dispatch_group
51034 get_insn_group (rtx_insn *insn)
51036 enum dispatch_group group = get_mem_group (insn);
51037 if (group)
51038 return group;
51040 if (is_branch (insn))
51041 return disp_branch;
51043 if (is_cmp (insn))
51044 return disp_cmp;
51046 if (has_immediate (insn))
51047 return disp_imm;
51049 if (is_prefetch (insn))
51050 return disp_prefetch;
51052 return disp_no_group;
51055 /* Count number of GROUP restricted instructions in a dispatch
51056 window WINDOW_LIST. */
51058 static int
51059 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
51061 enum dispatch_group group = get_insn_group (insn);
51062 int imm_size;
51063 int num_imm_operand;
51064 int num_imm32_operand;
51065 int num_imm64_operand;
51067 if (group == disp_no_group)
51068 return 0;
51070 if (group == disp_imm)
51072 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51073 &num_imm64_operand);
51074 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
51075 || num_imm_operand + window_list->num_imm > MAX_IMM
51076 || (num_imm32_operand > 0
51077 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
51078 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
51079 || (num_imm64_operand > 0
51080 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
51081 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
51082 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
51083 && num_imm64_operand > 0
51084 && ((window_list->num_imm_64 > 0
51085 && window_list->num_insn >= 2)
51086 || window_list->num_insn >= 3)))
51087 return BIG;
51089 return 1;
51092 if ((group == disp_load_store
51093 && (window_list->num_loads >= MAX_LOAD
51094 || window_list->num_stores >= MAX_STORE))
51095 || ((group == disp_load
51096 || group == disp_prefetch)
51097 && window_list->num_loads >= MAX_LOAD)
51098 || (group == disp_store
51099 && window_list->num_stores >= MAX_STORE))
51100 return BIG;
51102 return 1;
51105 /* This function returns true if insn satisfies dispatch rules on the
51106 last window scheduled. */
51108 static bool
51109 fits_dispatch_window (rtx_insn *insn)
51111 dispatch_windows *window_list = dispatch_window_list;
51112 dispatch_windows *window_list_next = dispatch_window_list->next;
51113 unsigned int num_restrict;
51114 enum dispatch_group group = get_insn_group (insn);
51115 enum insn_path path = get_insn_path (insn);
51116 int sum;
51118 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
51119 instructions should be given the lowest priority in the
51120 scheduling process in Haifa scheduler to make sure they will be
51121 scheduled in the same dispatch window as the reference to them. */
51122 if (group == disp_jcc || group == disp_cmp)
51123 return false;
51125 /* Check nonrestricted. */
51126 if (group == disp_no_group || group == disp_branch)
51127 return true;
51129 /* Get last dispatch window. */
51130 if (window_list_next)
51131 window_list = window_list_next;
51133 if (window_list->window_num == 1)
51135 sum = window_list->prev->window_size + window_list->window_size;
51137 if (sum == 32
51138 || (min_insn_size (insn) + sum) >= 48)
51139 /* Window 1 is full. Go for next window. */
51140 return true;
51143 num_restrict = count_num_restricted (insn, window_list);
51145 if (num_restrict > num_allowable_groups[group])
51146 return false;
51148 /* See if it fits in the first window. */
51149 if (window_list->window_num == 0)
51151 /* The first widow should have only single and double path
51152 uops. */
51153 if (path == path_double
51154 && (window_list->num_uops + 2) > MAX_INSN)
51155 return false;
51156 else if (path != path_single)
51157 return false;
51159 return true;
51162 /* Add an instruction INSN with NUM_UOPS micro-operations to the
51163 dispatch window WINDOW_LIST. */
51165 static void
51166 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
51168 int byte_len = min_insn_size (insn);
51169 int num_insn = window_list->num_insn;
51170 int imm_size;
51171 sched_insn_info *window = window_list->window;
51172 enum dispatch_group group = get_insn_group (insn);
51173 enum insn_path path = get_insn_path (insn);
51174 int num_imm_operand;
51175 int num_imm32_operand;
51176 int num_imm64_operand;
51178 if (!window_list->violation && group != disp_cmp
51179 && !fits_dispatch_window (insn))
51180 window_list->violation = true;
51182 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51183 &num_imm64_operand);
51185 /* Initialize window with new instruction. */
51186 window[num_insn].insn = insn;
51187 window[num_insn].byte_len = byte_len;
51188 window[num_insn].group = group;
51189 window[num_insn].path = path;
51190 window[num_insn].imm_bytes = imm_size;
51192 window_list->window_size += byte_len;
51193 window_list->num_insn = num_insn + 1;
51194 window_list->num_uops = window_list->num_uops + num_uops;
51195 window_list->imm_size += imm_size;
51196 window_list->num_imm += num_imm_operand;
51197 window_list->num_imm_32 += num_imm32_operand;
51198 window_list->num_imm_64 += num_imm64_operand;
51200 if (group == disp_store)
51201 window_list->num_stores += 1;
51202 else if (group == disp_load
51203 || group == disp_prefetch)
51204 window_list->num_loads += 1;
51205 else if (group == disp_load_store)
51207 window_list->num_stores += 1;
51208 window_list->num_loads += 1;
51212 /* Adds a scheduled instruction, INSN, to the current dispatch window.
51213 If the total bytes of instructions or the number of instructions in
51214 the window exceed allowable, it allocates a new window. */
51216 static void
51217 add_to_dispatch_window (rtx_insn *insn)
51219 int byte_len;
51220 dispatch_windows *window_list;
51221 dispatch_windows *next_list;
51222 dispatch_windows *window0_list;
51223 enum insn_path path;
51224 enum dispatch_group insn_group;
51225 bool insn_fits;
51226 int num_insn;
51227 int num_uops;
51228 int window_num;
51229 int insn_num_uops;
51230 int sum;
51232 if (INSN_CODE (insn) < 0)
51233 return;
51235 byte_len = min_insn_size (insn);
51236 window_list = dispatch_window_list;
51237 next_list = window_list->next;
51238 path = get_insn_path (insn);
51239 insn_group = get_insn_group (insn);
51241 /* Get the last dispatch window. */
51242 if (next_list)
51243 window_list = dispatch_window_list->next;
51245 if (path == path_single)
51246 insn_num_uops = 1;
51247 else if (path == path_double)
51248 insn_num_uops = 2;
51249 else
51250 insn_num_uops = (int) path;
51252 /* If current window is full, get a new window.
51253 Window number zero is full, if MAX_INSN uops are scheduled in it.
51254 Window number one is full, if window zero's bytes plus window
51255 one's bytes is 32, or if the bytes of the new instruction added
51256 to the total makes it greater than 48, or it has already MAX_INSN
51257 instructions in it. */
51258 num_insn = window_list->num_insn;
51259 num_uops = window_list->num_uops;
51260 window_num = window_list->window_num;
51261 insn_fits = fits_dispatch_window (insn);
51263 if (num_insn >= MAX_INSN
51264 || num_uops + insn_num_uops > MAX_INSN
51265 || !(insn_fits))
51267 window_num = ~window_num & 1;
51268 window_list = allocate_next_window (window_num);
51271 if (window_num == 0)
51273 add_insn_window (insn, window_list, insn_num_uops);
51274 if (window_list->num_insn >= MAX_INSN
51275 && insn_group == disp_branch)
51277 process_end_window ();
51278 return;
51281 else if (window_num == 1)
51283 window0_list = window_list->prev;
51284 sum = window0_list->window_size + window_list->window_size;
51285 if (sum == 32
51286 || (byte_len + sum) >= 48)
51288 process_end_window ();
51289 window_list = dispatch_window_list;
51292 add_insn_window (insn, window_list, insn_num_uops);
51294 else
51295 gcc_unreachable ();
51297 if (is_end_basic_block (insn_group))
51299 /* End of basic block is reached do end-basic-block process. */
51300 process_end_window ();
51301 return;
51305 /* Print the dispatch window, WINDOW_NUM, to FILE. */
51307 DEBUG_FUNCTION static void
51308 debug_dispatch_window_file (FILE *file, int window_num)
51310 dispatch_windows *list;
51311 int i;
51313 if (window_num == 0)
51314 list = dispatch_window_list;
51315 else
51316 list = dispatch_window_list1;
51318 fprintf (file, "Window #%d:\n", list->window_num);
51319 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
51320 list->num_insn, list->num_uops, list->window_size);
51321 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
51322 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
51324 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
51325 list->num_stores);
51326 fprintf (file, " insn info:\n");
51328 for (i = 0; i < MAX_INSN; i++)
51330 if (!list->window[i].insn)
51331 break;
51332 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
51333 i, group_name[list->window[i].group],
51334 i, (void *)list->window[i].insn,
51335 i, list->window[i].path,
51336 i, list->window[i].byte_len,
51337 i, list->window[i].imm_bytes);
51341 /* Print to stdout a dispatch window. */
51343 DEBUG_FUNCTION void
51344 debug_dispatch_window (int window_num)
51346 debug_dispatch_window_file (stdout, window_num);
51349 /* Print INSN dispatch information to FILE. */
51351 DEBUG_FUNCTION static void
51352 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
51354 int byte_len;
51355 enum insn_path path;
51356 enum dispatch_group group;
51357 int imm_size;
51358 int num_imm_operand;
51359 int num_imm32_operand;
51360 int num_imm64_operand;
51362 if (INSN_CODE (insn) < 0)
51363 return;
51365 byte_len = min_insn_size (insn);
51366 path = get_insn_path (insn);
51367 group = get_insn_group (insn);
51368 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51369 &num_imm64_operand);
51371 fprintf (file, " insn info:\n");
51372 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
51373 group_name[group], path, byte_len);
51374 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
51375 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
51378 /* Print to STDERR the status of the ready list with respect to
51379 dispatch windows. */
51381 DEBUG_FUNCTION void
51382 debug_ready_dispatch (void)
51384 int i;
51385 int no_ready = number_in_ready ();
51387 fprintf (stdout, "Number of ready: %d\n", no_ready);
51389 for (i = 0; i < no_ready; i++)
51390 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
51393 /* This routine is the driver of the dispatch scheduler. */
51395 static void
51396 do_dispatch (rtx_insn *insn, int mode)
51398 if (mode == DISPATCH_INIT)
51399 init_dispatch_sched ();
51400 else if (mode == ADD_TO_DISPATCH_WINDOW)
51401 add_to_dispatch_window (insn);
51404 /* Return TRUE if Dispatch Scheduling is supported. */
51406 static bool
51407 has_dispatch (rtx_insn *insn, int action)
51409 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
51410 && flag_dispatch_scheduler)
51411 switch (action)
51413 default:
51414 return false;
51416 case IS_DISPATCH_ON:
51417 return true;
51418 break;
51420 case IS_CMP:
51421 return is_cmp (insn);
51423 case DISPATCH_VIOLATION:
51424 return dispatch_violation ();
51426 case FITS_DISPATCH_WINDOW:
51427 return fits_dispatch_window (insn);
51430 return false;
51433 /* Implementation of reassociation_width target hook used by
51434 reassoc phase to identify parallelism level in reassociated
51435 tree. Statements tree_code is passed in OPC. Arguments type
51436 is passed in MODE.
51438 Currently parallel reassociation is enabled for Atom
51439 processors only and we set reassociation width to be 2
51440 because Atom may issue up to 2 instructions per cycle.
51442 Return value should be fixed if parallel reassociation is
51443 enabled for other processors. */
51445 static int
51446 ix86_reassociation_width (unsigned int, machine_mode mode)
51448 /* Vector part. */
51449 if (VECTOR_MODE_P (mode))
51451 if (TARGET_VECTOR_PARALLEL_EXECUTION)
51452 return 2;
51453 else
51454 return 1;
51457 /* Scalar part. */
51458 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
51459 return 2;
51460 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
51461 return 2;
51462 else
51463 return 1;
51466 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
51467 place emms and femms instructions. */
51469 static machine_mode
51470 ix86_preferred_simd_mode (machine_mode mode)
51472 if (!TARGET_SSE)
51473 return word_mode;
51475 switch (mode)
51477 case QImode:
51478 return TARGET_AVX512BW ? V64QImode :
51479 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
51480 case HImode:
51481 return TARGET_AVX512BW ? V32HImode :
51482 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
51483 case SImode:
51484 return TARGET_AVX512F ? V16SImode :
51485 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
51486 case DImode:
51487 return TARGET_AVX512F ? V8DImode :
51488 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
51490 case SFmode:
51491 if (TARGET_AVX512F)
51492 return V16SFmode;
51493 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51494 return V8SFmode;
51495 else
51496 return V4SFmode;
51498 case DFmode:
51499 if (!TARGET_VECTORIZE_DOUBLE)
51500 return word_mode;
51501 else if (TARGET_AVX512F)
51502 return V8DFmode;
51503 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51504 return V4DFmode;
51505 else if (TARGET_SSE2)
51506 return V2DFmode;
51507 /* FALLTHRU */
51509 default:
51510 return word_mode;
51514 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
51515 vectors. If AVX512F is enabled then try vectorizing with 512bit,
51516 256bit and 128bit vectors. */
51518 static unsigned int
51519 ix86_autovectorize_vector_sizes (void)
51521 return TARGET_AVX512F ? 64 | 32 | 16 :
51522 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
51527 /* Return class of registers which could be used for pseudo of MODE
51528 and of class RCLASS for spilling instead of memory. Return NO_REGS
51529 if it is not possible or non-profitable. */
51530 static reg_class_t
51531 ix86_spill_class (reg_class_t rclass, machine_mode mode)
51533 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
51534 && (mode == SImode || (TARGET_64BIT && mode == DImode))
51535 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
51536 return ALL_SSE_REGS;
51537 return NO_REGS;
51540 /* Implement targetm.vectorize.init_cost. */
51542 static void *
51543 ix86_init_cost (struct loop *)
51545 unsigned *cost = XNEWVEC (unsigned, 3);
51546 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
51547 return cost;
51550 /* Implement targetm.vectorize.add_stmt_cost. */
51552 static unsigned
51553 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
51554 struct _stmt_vec_info *stmt_info, int misalign,
51555 enum vect_cost_model_location where)
51557 unsigned *cost = (unsigned *) data;
51558 unsigned retval = 0;
51560 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
51561 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
51563 /* Statements in an inner loop relative to the loop being
51564 vectorized are weighted more heavily. The value here is
51565 arbitrary and could potentially be improved with analysis. */
51566 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
51567 count *= 50; /* FIXME. */
51569 retval = (unsigned) (count * stmt_cost);
51571 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
51572 for Silvermont as it has out of order integer pipeline and can execute
51573 2 scalar instruction per tick, but has in order SIMD pipeline. */
51574 if (TARGET_SILVERMONT || TARGET_INTEL)
51575 if (stmt_info && stmt_info->stmt)
51577 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
51578 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
51579 retval = (retval * 17) / 10;
51582 cost[where] += retval;
51584 return retval;
51587 /* Implement targetm.vectorize.finish_cost. */
51589 static void
51590 ix86_finish_cost (void *data, unsigned *prologue_cost,
51591 unsigned *body_cost, unsigned *epilogue_cost)
51593 unsigned *cost = (unsigned *) data;
51594 *prologue_cost = cost[vect_prologue];
51595 *body_cost = cost[vect_body];
51596 *epilogue_cost = cost[vect_epilogue];
51599 /* Implement targetm.vectorize.destroy_cost_data. */
51601 static void
51602 ix86_destroy_cost_data (void *data)
51604 free (data);
51607 /* Validate target specific memory model bits in VAL. */
51609 static unsigned HOST_WIDE_INT
51610 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
51612 enum memmodel model = memmodel_from_int (val);
51613 bool strong;
51615 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
51616 |MEMMODEL_MASK)
51617 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
51619 warning (OPT_Winvalid_memory_model,
51620 "Unknown architecture specific memory model");
51621 return MEMMODEL_SEQ_CST;
51623 strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
51624 if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
51626 warning (OPT_Winvalid_memory_model,
51627 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
51628 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
51630 if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
51632 warning (OPT_Winvalid_memory_model,
51633 "HLE_RELEASE not used with RELEASE or stronger memory model");
51634 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
51636 return val;
51639 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
51640 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
51641 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
51642 or number of vecsize_mangle variants that should be emitted. */
51644 static int
51645 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
51646 struct cgraph_simd_clone *clonei,
51647 tree base_type, int num)
51649 int ret = 1;
51651 if (clonei->simdlen
51652 && (clonei->simdlen < 2
51653 || clonei->simdlen > 16
51654 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
51656 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51657 "unsupported simdlen %d", clonei->simdlen);
51658 return 0;
51661 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
51662 if (TREE_CODE (ret_type) != VOID_TYPE)
51663 switch (TYPE_MODE (ret_type))
51665 case QImode:
51666 case HImode:
51667 case SImode:
51668 case DImode:
51669 case SFmode:
51670 case DFmode:
51671 /* case SCmode: */
51672 /* case DCmode: */
51673 break;
51674 default:
51675 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51676 "unsupported return type %qT for simd\n", ret_type);
51677 return 0;
51680 tree t;
51681 int i;
51683 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
51684 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
51685 switch (TYPE_MODE (TREE_TYPE (t)))
51687 case QImode:
51688 case HImode:
51689 case SImode:
51690 case DImode:
51691 case SFmode:
51692 case DFmode:
51693 /* case SCmode: */
51694 /* case DCmode: */
51695 break;
51696 default:
51697 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51698 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
51699 return 0;
51702 if (clonei->cilk_elemental)
51704 /* Parse here processor clause. If not present, default to 'b'. */
51705 clonei->vecsize_mangle = 'b';
51707 else if (!TREE_PUBLIC (node->decl))
51709 /* If the function isn't exported, we can pick up just one ISA
51710 for the clones. */
51711 if (TARGET_AVX2)
51712 clonei->vecsize_mangle = 'd';
51713 else if (TARGET_AVX)
51714 clonei->vecsize_mangle = 'c';
51715 else
51716 clonei->vecsize_mangle = 'b';
51717 ret = 1;
51719 else
51721 clonei->vecsize_mangle = "bcd"[num];
51722 ret = 3;
51724 switch (clonei->vecsize_mangle)
51726 case 'b':
51727 clonei->vecsize_int = 128;
51728 clonei->vecsize_float = 128;
51729 break;
51730 case 'c':
51731 clonei->vecsize_int = 128;
51732 clonei->vecsize_float = 256;
51733 break;
51734 case 'd':
51735 clonei->vecsize_int = 256;
51736 clonei->vecsize_float = 256;
51737 break;
51739 if (clonei->simdlen == 0)
51741 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
51742 clonei->simdlen = clonei->vecsize_int;
51743 else
51744 clonei->simdlen = clonei->vecsize_float;
51745 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
51746 if (clonei->simdlen > 16)
51747 clonei->simdlen = 16;
51749 return ret;
51752 /* Add target attribute to SIMD clone NODE if needed. */
51754 static void
51755 ix86_simd_clone_adjust (struct cgraph_node *node)
51757 const char *str = NULL;
51758 gcc_assert (node->decl == cfun->decl);
51759 switch (node->simdclone->vecsize_mangle)
51761 case 'b':
51762 if (!TARGET_SSE2)
51763 str = "sse2";
51764 break;
51765 case 'c':
51766 if (!TARGET_AVX)
51767 str = "avx";
51768 break;
51769 case 'd':
51770 if (!TARGET_AVX2)
51771 str = "avx2";
51772 break;
51773 default:
51774 gcc_unreachable ();
51776 if (str == NULL)
51777 return;
51778 push_cfun (NULL);
51779 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
51780 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
51781 gcc_assert (ok);
51782 pop_cfun ();
51783 ix86_reset_previous_fndecl ();
51784 ix86_set_current_function (node->decl);
51787 /* If SIMD clone NODE can't be used in a vectorized loop
51788 in current function, return -1, otherwise return a badness of using it
51789 (0 if it is most desirable from vecsize_mangle point of view, 1
51790 slightly less desirable, etc.). */
51792 static int
51793 ix86_simd_clone_usable (struct cgraph_node *node)
51795 switch (node->simdclone->vecsize_mangle)
51797 case 'b':
51798 if (!TARGET_SSE2)
51799 return -1;
51800 if (!TARGET_AVX)
51801 return 0;
51802 return TARGET_AVX2 ? 2 : 1;
51803 case 'c':
51804 if (!TARGET_AVX)
51805 return -1;
51806 return TARGET_AVX2 ? 1 : 0;
51807 break;
51808 case 'd':
51809 if (!TARGET_AVX2)
51810 return -1;
51811 return 0;
51812 default:
51813 gcc_unreachable ();
51817 /* This function adjusts the unroll factor based on
51818 the hardware capabilities. For ex, bdver3 has
51819 a loop buffer which makes unrolling of smaller
51820 loops less important. This function decides the
51821 unroll factor using number of memory references
51822 (value 32 is used) as a heuristic. */
51824 static unsigned
51825 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
51827 basic_block *bbs;
51828 rtx_insn *insn;
51829 unsigned i;
51830 unsigned mem_count = 0;
51832 if (!TARGET_ADJUST_UNROLL)
51833 return nunroll;
51835 /* Count the number of memory references within the loop body.
51836 This value determines the unrolling factor for bdver3 and bdver4
51837 architectures. */
51838 subrtx_iterator::array_type array;
51839 bbs = get_loop_body (loop);
51840 for (i = 0; i < loop->num_nodes; i++)
51841 FOR_BB_INSNS (bbs[i], insn)
51842 if (NONDEBUG_INSN_P (insn))
51843 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
51844 if (const_rtx x = *iter)
51845 if (MEM_P (x))
51847 machine_mode mode = GET_MODE (x);
51848 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
51849 if (n_words > 4)
51850 mem_count += 2;
51851 else
51852 mem_count += 1;
51854 free (bbs);
51856 if (mem_count && mem_count <=32)
51857 return 32/mem_count;
51859 return nunroll;
51863 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
51865 static bool
51866 ix86_float_exceptions_rounding_supported_p (void)
51868 /* For x87 floating point with standard excess precision handling,
51869 there is no adddf3 pattern (since x87 floating point only has
51870 XFmode operations) so the default hook implementation gets this
51871 wrong. */
51872 return TARGET_80387 || TARGET_SSE_MATH;
51875 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
51877 static void
51878 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
51880 if (!TARGET_80387 && !TARGET_SSE_MATH)
51881 return;
51882 tree exceptions_var = create_tmp_var (integer_type_node);
51883 if (TARGET_80387)
51885 tree fenv_index_type = build_index_type (size_int (6));
51886 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
51887 tree fenv_var = create_tmp_var (fenv_type);
51888 mark_addressable (fenv_var);
51889 tree fenv_ptr = build_pointer_type (fenv_type);
51890 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
51891 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
51892 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
51893 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
51894 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
51895 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
51896 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
51897 tree hold_fnclex = build_call_expr (fnclex, 0);
51898 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
51899 hold_fnclex);
51900 *clear = build_call_expr (fnclex, 0);
51901 tree sw_var = create_tmp_var (short_unsigned_type_node);
51902 tree fnstsw_call = build_call_expr (fnstsw, 0);
51903 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
51904 sw_var, fnstsw_call);
51905 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
51906 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
51907 exceptions_var, exceptions_x87);
51908 *update = build2 (COMPOUND_EXPR, integer_type_node,
51909 sw_mod, update_mod);
51910 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
51911 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
51913 if (TARGET_SSE_MATH)
51915 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node);
51916 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node);
51917 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
51918 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
51919 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
51920 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
51921 mxcsr_orig_var, stmxcsr_hold_call);
51922 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
51923 mxcsr_orig_var,
51924 build_int_cst (unsigned_type_node, 0x1f80));
51925 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
51926 build_int_cst (unsigned_type_node, 0xffffffc0));
51927 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
51928 mxcsr_mod_var, hold_mod_val);
51929 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51930 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
51931 hold_assign_orig, hold_assign_mod);
51932 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
51933 ldmxcsr_hold_call);
51934 if (*hold)
51935 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
51936 else
51937 *hold = hold_all;
51938 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51939 if (*clear)
51940 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
51941 ldmxcsr_clear_call);
51942 else
51943 *clear = ldmxcsr_clear_call;
51944 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
51945 tree exceptions_sse = fold_convert (integer_type_node,
51946 stxmcsr_update_call);
51947 if (*update)
51949 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
51950 exceptions_var, exceptions_sse);
51951 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
51952 exceptions_var, exceptions_mod);
51953 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
51954 exceptions_assign);
51956 else
51957 *update = build2 (MODIFY_EXPR, integer_type_node,
51958 exceptions_var, exceptions_sse);
51959 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
51960 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51961 ldmxcsr_update_call);
51963 tree atomic_feraiseexcept
51964 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
51965 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
51966 1, exceptions_var);
51967 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51968 atomic_feraiseexcept_call);
51971 /* Return mode to be used for bounds or VOIDmode
51972 if bounds are not supported. */
51974 static enum machine_mode
51975 ix86_mpx_bound_mode ()
51977 /* Do not support pointer checker if MPX
51978 is not enabled. */
51979 if (!TARGET_MPX)
51981 if (flag_check_pointer_bounds)
51982 warning (0, "Pointer Checker requires MPX support on this target."
51983 " Use -mmpx options to enable MPX.");
51984 return VOIDmode;
51987 return BNDmode;
51990 /* Return constant used to statically initialize constant bounds.
51992 This function is used to create special bound values. For now
51993 only INIT bounds and NONE bounds are expected. More special
51994 values may be added later. */
51996 static tree
51997 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
51999 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
52000 : build_zero_cst (pointer_sized_int_node);
52001 tree high = ub ? build_zero_cst (pointer_sized_int_node)
52002 : build_minus_one_cst (pointer_sized_int_node);
52004 /* This function is supposed to be used to create INIT and
52005 NONE bounds only. */
52006 gcc_assert ((lb == 0 && ub == -1)
52007 || (lb == -1 && ub == 0));
52009 return build_complex (NULL, low, high);
52012 /* Generate a list of statements STMTS to initialize pointer bounds
52013 variable VAR with bounds LB and UB. Return the number of generated
52014 statements. */
52016 static int
52017 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
52019 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
52020 tree lhs, modify, var_p;
52022 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
52023 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
52025 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
52026 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
52027 append_to_statement_list (modify, stmts);
52029 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
52030 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
52031 TYPE_SIZE_UNIT (pointer_sized_int_node)));
52032 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
52033 append_to_statement_list (modify, stmts);
52035 return 2;
52038 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
52039 /* For i386, common symbol is local only for non-PIE binaries. For
52040 x86-64, common symbol is local only for non-PIE binaries or linker
52041 supports copy reloc in PIE binaries. */
52043 static bool
52044 ix86_binds_local_p (const_tree exp)
52046 return default_binds_local_p_3 (exp, flag_shlib != 0, true, true,
52047 (!flag_pic
52048 || (TARGET_64BIT
52049 && HAVE_LD_PIE_COPYRELOC != 0)));
52051 #endif
52053 /* If MEM is in the form of [base+offset], extract the two parts
52054 of address and set to BASE and OFFSET, otherwise return false. */
52056 static bool
52057 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
52059 rtx addr;
52061 gcc_assert (MEM_P (mem));
52063 addr = XEXP (mem, 0);
52065 if (GET_CODE (addr) == CONST)
52066 addr = XEXP (addr, 0);
52068 if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
52070 *base = addr;
52071 *offset = const0_rtx;
52072 return true;
52075 if (GET_CODE (addr) == PLUS
52076 && (REG_P (XEXP (addr, 0))
52077 || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
52078 && CONST_INT_P (XEXP (addr, 1)))
52080 *base = XEXP (addr, 0);
52081 *offset = XEXP (addr, 1);
52082 return true;
52085 return false;
52088 /* Given OPERANDS of consecutive load/store, check if we can merge
52089 them into move multiple. LOAD is true if they are load instructions.
52090 MODE is the mode of memory operands. */
52092 bool
52093 ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
52094 enum machine_mode mode)
52096 HOST_WIDE_INT offval_1, offval_2, msize;
52097 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
52099 if (load)
52101 mem_1 = operands[1];
52102 mem_2 = operands[3];
52103 reg_1 = operands[0];
52104 reg_2 = operands[2];
52106 else
52108 mem_1 = operands[0];
52109 mem_2 = operands[2];
52110 reg_1 = operands[1];
52111 reg_2 = operands[3];
52114 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
52116 if (REGNO (reg_1) != REGNO (reg_2))
52117 return false;
52119 /* Check if the addresses are in the form of [base+offset]. */
52120 if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
52121 return false;
52122 if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
52123 return false;
52125 /* Check if the bases are the same. */
52126 if (!rtx_equal_p (base_1, base_2))
52127 return false;
52129 offval_1 = INTVAL (offset_1);
52130 offval_2 = INTVAL (offset_2);
52131 msize = GET_MODE_SIZE (mode);
52132 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
52133 if (offval_1 + msize != offval_2)
52134 return false;
52136 return true;
52139 /* Initialize the GCC target structure. */
52140 #undef TARGET_RETURN_IN_MEMORY
52141 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
52143 #undef TARGET_LEGITIMIZE_ADDRESS
52144 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
52146 #undef TARGET_ATTRIBUTE_TABLE
52147 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
52148 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
52149 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
52150 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
52151 # undef TARGET_MERGE_DECL_ATTRIBUTES
52152 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
52153 #endif
52155 #undef TARGET_COMP_TYPE_ATTRIBUTES
52156 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
52158 #undef TARGET_INIT_BUILTINS
52159 #define TARGET_INIT_BUILTINS ix86_init_builtins
52160 #undef TARGET_BUILTIN_DECL
52161 #define TARGET_BUILTIN_DECL ix86_builtin_decl
52162 #undef TARGET_EXPAND_BUILTIN
52163 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
52165 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
52166 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
52167 ix86_builtin_vectorized_function
52169 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
52170 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
52172 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
52173 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
52175 #undef TARGET_VECTORIZE_BUILTIN_GATHER
52176 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
52178 #undef TARGET_BUILTIN_RECIPROCAL
52179 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
52181 #undef TARGET_ASM_FUNCTION_EPILOGUE
52182 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
52184 #undef TARGET_ENCODE_SECTION_INFO
52185 #ifndef SUBTARGET_ENCODE_SECTION_INFO
52186 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
52187 #else
52188 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
52189 #endif
52191 #undef TARGET_ASM_OPEN_PAREN
52192 #define TARGET_ASM_OPEN_PAREN ""
52193 #undef TARGET_ASM_CLOSE_PAREN
52194 #define TARGET_ASM_CLOSE_PAREN ""
52196 #undef TARGET_ASM_BYTE_OP
52197 #define TARGET_ASM_BYTE_OP ASM_BYTE
52199 #undef TARGET_ASM_ALIGNED_HI_OP
52200 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
52201 #undef TARGET_ASM_ALIGNED_SI_OP
52202 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
52203 #ifdef ASM_QUAD
52204 #undef TARGET_ASM_ALIGNED_DI_OP
52205 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
52206 #endif
52208 #undef TARGET_PROFILE_BEFORE_PROLOGUE
52209 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
52211 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
52212 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
52214 #undef TARGET_ASM_UNALIGNED_HI_OP
52215 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
52216 #undef TARGET_ASM_UNALIGNED_SI_OP
52217 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
52218 #undef TARGET_ASM_UNALIGNED_DI_OP
52219 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
52221 #undef TARGET_PRINT_OPERAND
52222 #define TARGET_PRINT_OPERAND ix86_print_operand
52223 #undef TARGET_PRINT_OPERAND_ADDRESS
52224 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
52225 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
52226 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
52227 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
52228 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
52230 #undef TARGET_SCHED_INIT_GLOBAL
52231 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
52232 #undef TARGET_SCHED_ADJUST_COST
52233 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
52234 #undef TARGET_SCHED_ISSUE_RATE
52235 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
52236 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
52237 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
52238 ia32_multipass_dfa_lookahead
52239 #undef TARGET_SCHED_MACRO_FUSION_P
52240 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
52241 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
52242 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
52244 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
52245 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
52247 #undef TARGET_MEMMODEL_CHECK
52248 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
52250 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
52251 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
52253 #ifdef HAVE_AS_TLS
52254 #undef TARGET_HAVE_TLS
52255 #define TARGET_HAVE_TLS true
52256 #endif
52257 #undef TARGET_CANNOT_FORCE_CONST_MEM
52258 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
52259 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
52260 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
52262 #undef TARGET_DELEGITIMIZE_ADDRESS
52263 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
52265 #undef TARGET_MS_BITFIELD_LAYOUT_P
52266 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
52268 #if TARGET_MACHO
52269 #undef TARGET_BINDS_LOCAL_P
52270 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
52271 #else
52272 #undef TARGET_BINDS_LOCAL_P
52273 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
52274 #endif
52275 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
52276 #undef TARGET_BINDS_LOCAL_P
52277 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
52278 #endif
52280 #undef TARGET_ASM_OUTPUT_MI_THUNK
52281 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
52282 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
52283 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
52285 #undef TARGET_ASM_FILE_START
52286 #define TARGET_ASM_FILE_START x86_file_start
52288 #undef TARGET_OPTION_OVERRIDE
52289 #define TARGET_OPTION_OVERRIDE ix86_option_override
52291 #undef TARGET_REGISTER_MOVE_COST
52292 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
52293 #undef TARGET_MEMORY_MOVE_COST
52294 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
52295 #undef TARGET_RTX_COSTS
52296 #define TARGET_RTX_COSTS ix86_rtx_costs
52297 #undef TARGET_ADDRESS_COST
52298 #define TARGET_ADDRESS_COST ix86_address_cost
52300 #undef TARGET_FIXED_CONDITION_CODE_REGS
52301 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
52302 #undef TARGET_CC_MODES_COMPATIBLE
52303 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
52305 #undef TARGET_MACHINE_DEPENDENT_REORG
52306 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
52308 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
52309 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
52311 #undef TARGET_BUILD_BUILTIN_VA_LIST
52312 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
52314 #undef TARGET_FOLD_BUILTIN
52315 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
52317 #undef TARGET_COMPARE_VERSION_PRIORITY
52318 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
52320 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
52321 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
52322 ix86_generate_version_dispatcher_body
52324 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
52325 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
52326 ix86_get_function_versions_dispatcher
52328 #undef TARGET_ENUM_VA_LIST_P
52329 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
52331 #undef TARGET_FN_ABI_VA_LIST
52332 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
52334 #undef TARGET_CANONICAL_VA_LIST_TYPE
52335 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
52337 #undef TARGET_EXPAND_BUILTIN_VA_START
52338 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
52340 #undef TARGET_MD_ASM_ADJUST
52341 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
52343 #undef TARGET_PROMOTE_PROTOTYPES
52344 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
52345 #undef TARGET_SETUP_INCOMING_VARARGS
52346 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
52347 #undef TARGET_MUST_PASS_IN_STACK
52348 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
52349 #undef TARGET_FUNCTION_ARG_ADVANCE
52350 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
52351 #undef TARGET_FUNCTION_ARG
52352 #define TARGET_FUNCTION_ARG ix86_function_arg
52353 #undef TARGET_INIT_PIC_REG
52354 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
52355 #undef TARGET_USE_PSEUDO_PIC_REG
52356 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
52357 #undef TARGET_FUNCTION_ARG_BOUNDARY
52358 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
52359 #undef TARGET_PASS_BY_REFERENCE
52360 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
52361 #undef TARGET_INTERNAL_ARG_POINTER
52362 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
52363 #undef TARGET_UPDATE_STACK_BOUNDARY
52364 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
52365 #undef TARGET_GET_DRAP_RTX
52366 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
52367 #undef TARGET_STRICT_ARGUMENT_NAMING
52368 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
52369 #undef TARGET_STATIC_CHAIN
52370 #define TARGET_STATIC_CHAIN ix86_static_chain
52371 #undef TARGET_TRAMPOLINE_INIT
52372 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
52373 #undef TARGET_RETURN_POPS_ARGS
52374 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
52376 #undef TARGET_LEGITIMATE_COMBINED_INSN
52377 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
52379 #undef TARGET_ASAN_SHADOW_OFFSET
52380 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
52382 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
52383 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
52385 #undef TARGET_SCALAR_MODE_SUPPORTED_P
52386 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
52388 #undef TARGET_VECTOR_MODE_SUPPORTED_P
52389 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
52391 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
52392 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
52393 ix86_libgcc_floating_mode_supported_p
52395 #undef TARGET_C_MODE_FOR_SUFFIX
52396 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
52398 #ifdef HAVE_AS_TLS
52399 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
52400 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
52401 #endif
52403 #ifdef SUBTARGET_INSERT_ATTRIBUTES
52404 #undef TARGET_INSERT_ATTRIBUTES
52405 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
52406 #endif
52408 #undef TARGET_MANGLE_TYPE
52409 #define TARGET_MANGLE_TYPE ix86_mangle_type
52411 #if !TARGET_MACHO
52412 #undef TARGET_STACK_PROTECT_FAIL
52413 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
52414 #endif
52416 #undef TARGET_FUNCTION_VALUE
52417 #define TARGET_FUNCTION_VALUE ix86_function_value
52419 #undef TARGET_FUNCTION_VALUE_REGNO_P
52420 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
52422 #undef TARGET_PROMOTE_FUNCTION_MODE
52423 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
52425 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
52426 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
52428 #undef TARGET_MEMBER_TYPE_FORCES_BLK
52429 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
52431 #undef TARGET_INSTANTIATE_DECLS
52432 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
52434 #undef TARGET_SECONDARY_RELOAD
52435 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
52437 #undef TARGET_CLASS_MAX_NREGS
52438 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
52440 #undef TARGET_PREFERRED_RELOAD_CLASS
52441 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
52442 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
52443 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
52444 #undef TARGET_CLASS_LIKELY_SPILLED_P
52445 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
52447 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
52448 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
52449 ix86_builtin_vectorization_cost
52450 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
52451 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
52452 ix86_vectorize_vec_perm_const_ok
52453 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
52454 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
52455 ix86_preferred_simd_mode
52456 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
52457 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
52458 ix86_autovectorize_vector_sizes
52459 #undef TARGET_VECTORIZE_INIT_COST
52460 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
52461 #undef TARGET_VECTORIZE_ADD_STMT_COST
52462 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
52463 #undef TARGET_VECTORIZE_FINISH_COST
52464 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
52465 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
52466 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
52468 #undef TARGET_SET_CURRENT_FUNCTION
52469 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
52471 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
52472 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
52474 #undef TARGET_OPTION_SAVE
52475 #define TARGET_OPTION_SAVE ix86_function_specific_save
52477 #undef TARGET_OPTION_RESTORE
52478 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
52480 #undef TARGET_OPTION_POST_STREAM_IN
52481 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
52483 #undef TARGET_OPTION_PRINT
52484 #define TARGET_OPTION_PRINT ix86_function_specific_print
52486 #undef TARGET_OPTION_FUNCTION_VERSIONS
52487 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
52489 #undef TARGET_CAN_INLINE_P
52490 #define TARGET_CAN_INLINE_P ix86_can_inline_p
52492 #undef TARGET_EXPAND_TO_RTL_HOOK
52493 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
52495 #undef TARGET_LEGITIMATE_ADDRESS_P
52496 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
52498 #undef TARGET_LRA_P
52499 #define TARGET_LRA_P hook_bool_void_true
52501 #undef TARGET_REGISTER_PRIORITY
52502 #define TARGET_REGISTER_PRIORITY ix86_register_priority
52504 #undef TARGET_REGISTER_USAGE_LEVELING_P
52505 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
52507 #undef TARGET_LEGITIMATE_CONSTANT_P
52508 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
52510 #undef TARGET_FRAME_POINTER_REQUIRED
52511 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
52513 #undef TARGET_CAN_ELIMINATE
52514 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
52516 #undef TARGET_EXTRA_LIVE_ON_ENTRY
52517 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
52519 #undef TARGET_ASM_CODE_END
52520 #define TARGET_ASM_CODE_END ix86_code_end
52522 #undef TARGET_CONDITIONAL_REGISTER_USAGE
52523 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
52525 #if TARGET_MACHO
52526 #undef TARGET_INIT_LIBFUNCS
52527 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
52528 #endif
52530 #undef TARGET_LOOP_UNROLL_ADJUST
52531 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
52533 #undef TARGET_SPILL_CLASS
52534 #define TARGET_SPILL_CLASS ix86_spill_class
52536 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
52537 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
52538 ix86_simd_clone_compute_vecsize_and_simdlen
52540 #undef TARGET_SIMD_CLONE_ADJUST
52541 #define TARGET_SIMD_CLONE_ADJUST \
52542 ix86_simd_clone_adjust
52544 #undef TARGET_SIMD_CLONE_USABLE
52545 #define TARGET_SIMD_CLONE_USABLE \
52546 ix86_simd_clone_usable
52548 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
52549 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
52550 ix86_float_exceptions_rounding_supported_p
52552 #undef TARGET_MODE_EMIT
52553 #define TARGET_MODE_EMIT ix86_emit_mode_set
52555 #undef TARGET_MODE_NEEDED
52556 #define TARGET_MODE_NEEDED ix86_mode_needed
52558 #undef TARGET_MODE_AFTER
52559 #define TARGET_MODE_AFTER ix86_mode_after
52561 #undef TARGET_MODE_ENTRY
52562 #define TARGET_MODE_ENTRY ix86_mode_entry
52564 #undef TARGET_MODE_EXIT
52565 #define TARGET_MODE_EXIT ix86_mode_exit
52567 #undef TARGET_MODE_PRIORITY
52568 #define TARGET_MODE_PRIORITY ix86_mode_priority
52570 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
52571 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
52573 #undef TARGET_LOAD_BOUNDS_FOR_ARG
52574 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
52576 #undef TARGET_STORE_BOUNDS_FOR_ARG
52577 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
52579 #undef TARGET_LOAD_RETURNED_BOUNDS
52580 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
52582 #undef TARGET_STORE_RETURNED_BOUNDS
52583 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
52585 #undef TARGET_CHKP_BOUND_MODE
52586 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
52588 #undef TARGET_BUILTIN_CHKP_FUNCTION
52589 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
52591 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
52592 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
52594 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
52595 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
52597 #undef TARGET_CHKP_INITIALIZE_BOUNDS
52598 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
52600 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
52601 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
52603 #undef TARGET_OFFLOAD_OPTIONS
52604 #define TARGET_OFFLOAD_OPTIONS \
52605 ix86_offload_options
52607 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
52608 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
52610 struct gcc_target targetm = TARGET_INITIALIZER;
52612 #include "gt-i386.h"