Merge trunk version 221445 into gupc branch.
[official-gcc.git] / gcc / config / i386 / i386.c
blobb9d19617bf185f686fb8b495155d015116078886
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "hash-set.h"
26 #include "machmode.h"
27 #include "vec.h"
28 #include "double-int.h"
29 #include "input.h"
30 #include "alias.h"
31 #include "symtab.h"
32 #include "wide-int.h"
33 #include "inchash.h"
34 #include "tree.h"
35 #include "fold-const.h"
36 #include "stringpool.h"
37 #include "attribs.h"
38 #include "calls.h"
39 #include "stor-layout.h"
40 #include "varasm.h"
41 #include "tm_p.h"
42 #include "regs.h"
43 #include "hard-reg-set.h"
44 #include "insn-config.h"
45 #include "conditions.h"
46 #include "output.h"
47 #include "insn-codes.h"
48 #include "insn-attr.h"
49 #include "flags.h"
50 #include "except.h"
51 #include "function.h"
52 #include "recog.h"
53 #include "hashtab.h"
54 #include "statistics.h"
55 #include "real.h"
56 #include "fixed-value.h"
57 #include "expmed.h"
58 #include "dojump.h"
59 #include "explow.h"
60 #include "emit-rtl.h"
61 #include "stmt.h"
62 #include "expr.h"
63 #include "optabs.h"
64 #include "diagnostic-core.h"
65 #include "toplev.h"
66 #include "predict.h"
67 #include "dominance.h"
68 #include "cfg.h"
69 #include "cfgrtl.h"
70 #include "cfganal.h"
71 #include "lcm.h"
72 #include "cfgbuild.h"
73 #include "cfgcleanup.h"
74 #include "basic-block.h"
75 #include "ggc.h"
76 #include "target.h"
77 #include "target-def.h"
78 #include "common/common-target.h"
79 #include "langhooks.h"
80 #include "reload.h"
81 #include "hash-map.h"
82 #include "is-a.h"
83 #include "plugin-api.h"
84 #include "ipa-ref.h"
85 #include "cgraph.h"
86 #include "hash-table.h"
87 #include "tree-ssa-alias.h"
88 #include "internal-fn.h"
89 #include "gimple-fold.h"
90 #include "tree-eh.h"
91 #include "gimple-expr.h"
92 #include "gimple.h"
93 #include "gimplify.h"
94 #include "cfgloop.h"
95 #include "dwarf2.h"
96 #include "df.h"
97 #include "tm-constrs.h"
98 #include "params.h"
99 #include "cselib.h"
100 #include "debug.h"
101 #include "sched-int.h"
102 #include "sbitmap.h"
103 #include "fibheap.h"
104 #include "opts.h"
105 #include "diagnostic.h"
106 #include "dumpfile.h"
107 #include "tree-pass.h"
108 #include "context.h"
109 #include "pass_manager.h"
110 #include "target-globals.h"
111 #include "tree-vectorizer.h"
112 #include "shrink-wrap.h"
113 #include "builtins.h"
114 #include "rtl-iter.h"
115 #include "tree-iterator.h"
116 #include "tree-chkp.h"
117 #include "rtl-chkp.h"
119 static rtx legitimize_dllimport_symbol (rtx, bool);
120 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
121 static rtx legitimize_pe_coff_symbol (rtx, bool);
123 #ifndef CHECK_STACK_LIMIT
124 #define CHECK_STACK_LIMIT (-1)
125 #endif
127 /* Return index of given mode in mult and division cost tables. */
128 #define MODE_INDEX(mode) \
129 ((mode) == QImode ? 0 \
130 : (mode) == HImode ? 1 \
131 : (mode) == SImode ? 2 \
132 : (mode) == DImode ? 3 \
133 : 4)
135 /* Processor costs (relative to an add) */
136 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
137 #define COSTS_N_BYTES(N) ((N) * 2)
139 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
141 static stringop_algs ix86_size_memcpy[2] = {
142 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
143 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
144 static stringop_algs ix86_size_memset[2] = {
145 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
146 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
148 const
149 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
150 COSTS_N_BYTES (2), /* cost of an add instruction */
151 COSTS_N_BYTES (3), /* cost of a lea instruction */
152 COSTS_N_BYTES (2), /* variable shift costs */
153 COSTS_N_BYTES (3), /* constant shift costs */
154 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
155 COSTS_N_BYTES (3), /* HI */
156 COSTS_N_BYTES (3), /* SI */
157 COSTS_N_BYTES (3), /* DI */
158 COSTS_N_BYTES (5)}, /* other */
159 0, /* cost of multiply per each bit set */
160 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
161 COSTS_N_BYTES (3), /* HI */
162 COSTS_N_BYTES (3), /* SI */
163 COSTS_N_BYTES (3), /* DI */
164 COSTS_N_BYTES (5)}, /* other */
165 COSTS_N_BYTES (3), /* cost of movsx */
166 COSTS_N_BYTES (3), /* cost of movzx */
167 0, /* "large" insn */
168 2, /* MOVE_RATIO */
169 2, /* cost for loading QImode using movzbl */
170 {2, 2, 2}, /* cost of loading integer registers
171 in QImode, HImode and SImode.
172 Relative to reg-reg move (2). */
173 {2, 2, 2}, /* cost of storing integer registers */
174 2, /* cost of reg,reg fld/fst */
175 {2, 2, 2}, /* cost of loading fp registers
176 in SFmode, DFmode and XFmode */
177 {2, 2, 2}, /* cost of storing fp registers
178 in SFmode, DFmode and XFmode */
179 3, /* cost of moving MMX register */
180 {3, 3}, /* cost of loading MMX registers
181 in SImode and DImode */
182 {3, 3}, /* cost of storing MMX registers
183 in SImode and DImode */
184 3, /* cost of moving SSE register */
185 {3, 3, 3}, /* cost of loading SSE registers
186 in SImode, DImode and TImode */
187 {3, 3, 3}, /* cost of storing SSE registers
188 in SImode, DImode and TImode */
189 3, /* MMX or SSE register to integer */
190 0, /* size of l1 cache */
191 0, /* size of l2 cache */
192 0, /* size of prefetch block */
193 0, /* number of parallel prefetches */
194 2, /* Branch cost */
195 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
196 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
197 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
198 COSTS_N_BYTES (2), /* cost of FABS instruction. */
199 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
200 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
201 ix86_size_memcpy,
202 ix86_size_memset,
203 1, /* scalar_stmt_cost. */
204 1, /* scalar load_cost. */
205 1, /* scalar_store_cost. */
206 1, /* vec_stmt_cost. */
207 1, /* vec_to_scalar_cost. */
208 1, /* scalar_to_vec_cost. */
209 1, /* vec_align_load_cost. */
210 1, /* vec_unalign_load_cost. */
211 1, /* vec_store_cost. */
212 1, /* cond_taken_branch_cost. */
213 1, /* cond_not_taken_branch_cost. */
216 /* Processor costs (relative to an add) */
217 static stringop_algs i386_memcpy[2] = {
218 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
219 DUMMY_STRINGOP_ALGS};
220 static stringop_algs i386_memset[2] = {
221 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
222 DUMMY_STRINGOP_ALGS};
224 static const
225 struct processor_costs i386_cost = { /* 386 specific costs */
226 COSTS_N_INSNS (1), /* cost of an add instruction */
227 COSTS_N_INSNS (1), /* cost of a lea instruction */
228 COSTS_N_INSNS (3), /* variable shift costs */
229 COSTS_N_INSNS (2), /* constant shift costs */
230 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
231 COSTS_N_INSNS (6), /* HI */
232 COSTS_N_INSNS (6), /* SI */
233 COSTS_N_INSNS (6), /* DI */
234 COSTS_N_INSNS (6)}, /* other */
235 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
236 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
237 COSTS_N_INSNS (23), /* HI */
238 COSTS_N_INSNS (23), /* SI */
239 COSTS_N_INSNS (23), /* DI */
240 COSTS_N_INSNS (23)}, /* other */
241 COSTS_N_INSNS (3), /* cost of movsx */
242 COSTS_N_INSNS (2), /* cost of movzx */
243 15, /* "large" insn */
244 3, /* MOVE_RATIO */
245 4, /* cost for loading QImode using movzbl */
246 {2, 4, 2}, /* cost of loading integer registers
247 in QImode, HImode and SImode.
248 Relative to reg-reg move (2). */
249 {2, 4, 2}, /* cost of storing integer registers */
250 2, /* cost of reg,reg fld/fst */
251 {8, 8, 8}, /* cost of loading fp registers
252 in SFmode, DFmode and XFmode */
253 {8, 8, 8}, /* cost of storing fp registers
254 in SFmode, DFmode and XFmode */
255 2, /* cost of moving MMX register */
256 {4, 8}, /* cost of loading MMX registers
257 in SImode and DImode */
258 {4, 8}, /* cost of storing MMX registers
259 in SImode and DImode */
260 2, /* cost of moving SSE register */
261 {4, 8, 16}, /* cost of loading SSE registers
262 in SImode, DImode and TImode */
263 {4, 8, 16}, /* cost of storing SSE registers
264 in SImode, DImode and TImode */
265 3, /* MMX or SSE register to integer */
266 0, /* size of l1 cache */
267 0, /* size of l2 cache */
268 0, /* size of prefetch block */
269 0, /* number of parallel prefetches */
270 1, /* Branch cost */
271 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
272 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
273 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
274 COSTS_N_INSNS (22), /* cost of FABS instruction. */
275 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
276 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
277 i386_memcpy,
278 i386_memset,
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
292 static stringop_algs i486_memcpy[2] = {
293 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
294 DUMMY_STRINGOP_ALGS};
295 static stringop_algs i486_memset[2] = {
296 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
297 DUMMY_STRINGOP_ALGS};
299 static const
300 struct processor_costs i486_cost = { /* 486 specific costs */
301 COSTS_N_INSNS (1), /* cost of an add instruction */
302 COSTS_N_INSNS (1), /* cost of a lea instruction */
303 COSTS_N_INSNS (3), /* variable shift costs */
304 COSTS_N_INSNS (2), /* constant shift costs */
305 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
306 COSTS_N_INSNS (12), /* HI */
307 COSTS_N_INSNS (12), /* SI */
308 COSTS_N_INSNS (12), /* DI */
309 COSTS_N_INSNS (12)}, /* other */
310 1, /* cost of multiply per each bit set */
311 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
312 COSTS_N_INSNS (40), /* HI */
313 COSTS_N_INSNS (40), /* SI */
314 COSTS_N_INSNS (40), /* DI */
315 COSTS_N_INSNS (40)}, /* other */
316 COSTS_N_INSNS (3), /* cost of movsx */
317 COSTS_N_INSNS (2), /* cost of movzx */
318 15, /* "large" insn */
319 3, /* MOVE_RATIO */
320 4, /* cost for loading QImode using movzbl */
321 {2, 4, 2}, /* cost of loading integer registers
322 in QImode, HImode and SImode.
323 Relative to reg-reg move (2). */
324 {2, 4, 2}, /* cost of storing integer registers */
325 2, /* cost of reg,reg fld/fst */
326 {8, 8, 8}, /* cost of loading fp registers
327 in SFmode, DFmode and XFmode */
328 {8, 8, 8}, /* cost of storing fp registers
329 in SFmode, DFmode and XFmode */
330 2, /* cost of moving MMX register */
331 {4, 8}, /* cost of loading MMX registers
332 in SImode and DImode */
333 {4, 8}, /* cost of storing MMX registers
334 in SImode and DImode */
335 2, /* cost of moving SSE register */
336 {4, 8, 16}, /* cost of loading SSE registers
337 in SImode, DImode and TImode */
338 {4, 8, 16}, /* cost of storing SSE registers
339 in SImode, DImode and TImode */
340 3, /* MMX or SSE register to integer */
341 4, /* size of l1 cache. 486 has 8kB cache
342 shared for code and data, so 4kB is
343 not really precise. */
344 4, /* size of l2 cache */
345 0, /* size of prefetch block */
346 0, /* number of parallel prefetches */
347 1, /* Branch cost */
348 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
349 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
350 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
351 COSTS_N_INSNS (3), /* cost of FABS instruction. */
352 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
353 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
354 i486_memcpy,
355 i486_memset,
356 1, /* scalar_stmt_cost. */
357 1, /* scalar load_cost. */
358 1, /* scalar_store_cost. */
359 1, /* vec_stmt_cost. */
360 1, /* vec_to_scalar_cost. */
361 1, /* scalar_to_vec_cost. */
362 1, /* vec_align_load_cost. */
363 2, /* vec_unalign_load_cost. */
364 1, /* vec_store_cost. */
365 3, /* cond_taken_branch_cost. */
366 1, /* cond_not_taken_branch_cost. */
369 static stringop_algs pentium_memcpy[2] = {
370 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
371 DUMMY_STRINGOP_ALGS};
372 static stringop_algs pentium_memset[2] = {
373 {libcall, {{-1, rep_prefix_4_byte, false}}},
374 DUMMY_STRINGOP_ALGS};
376 static const
377 struct processor_costs pentium_cost = {
378 COSTS_N_INSNS (1), /* cost of an add instruction */
379 COSTS_N_INSNS (1), /* cost of a lea instruction */
380 COSTS_N_INSNS (4), /* variable shift costs */
381 COSTS_N_INSNS (1), /* constant shift costs */
382 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
383 COSTS_N_INSNS (11), /* HI */
384 COSTS_N_INSNS (11), /* SI */
385 COSTS_N_INSNS (11), /* DI */
386 COSTS_N_INSNS (11)}, /* other */
387 0, /* cost of multiply per each bit set */
388 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
389 COSTS_N_INSNS (25), /* HI */
390 COSTS_N_INSNS (25), /* SI */
391 COSTS_N_INSNS (25), /* DI */
392 COSTS_N_INSNS (25)}, /* other */
393 COSTS_N_INSNS (3), /* cost of movsx */
394 COSTS_N_INSNS (2), /* cost of movzx */
395 8, /* "large" insn */
396 6, /* MOVE_RATIO */
397 6, /* cost for loading QImode using movzbl */
398 {2, 4, 2}, /* cost of loading integer registers
399 in QImode, HImode and SImode.
400 Relative to reg-reg move (2). */
401 {2, 4, 2}, /* cost of storing integer registers */
402 2, /* cost of reg,reg fld/fst */
403 {2, 2, 6}, /* cost of loading fp registers
404 in SFmode, DFmode and XFmode */
405 {4, 4, 6}, /* cost of storing fp registers
406 in SFmode, DFmode and XFmode */
407 8, /* cost of moving MMX register */
408 {8, 8}, /* cost of loading MMX registers
409 in SImode and DImode */
410 {8, 8}, /* cost of storing MMX registers
411 in SImode and DImode */
412 2, /* cost of moving SSE register */
413 {4, 8, 16}, /* cost of loading SSE registers
414 in SImode, DImode and TImode */
415 {4, 8, 16}, /* cost of storing SSE registers
416 in SImode, DImode and TImode */
417 3, /* MMX or SSE register to integer */
418 8, /* size of l1 cache. */
419 8, /* size of l2 cache */
420 0, /* size of prefetch block */
421 0, /* number of parallel prefetches */
422 2, /* Branch cost */
423 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
424 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
425 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
426 COSTS_N_INSNS (1), /* cost of FABS instruction. */
427 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
428 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
429 pentium_memcpy,
430 pentium_memset,
431 1, /* scalar_stmt_cost. */
432 1, /* scalar load_cost. */
433 1, /* scalar_store_cost. */
434 1, /* vec_stmt_cost. */
435 1, /* vec_to_scalar_cost. */
436 1, /* scalar_to_vec_cost. */
437 1, /* vec_align_load_cost. */
438 2, /* vec_unalign_load_cost. */
439 1, /* vec_store_cost. */
440 3, /* cond_taken_branch_cost. */
441 1, /* cond_not_taken_branch_cost. */
444 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
445 (we ensure the alignment). For small blocks inline loop is still a
446 noticeable win, for bigger blocks either rep movsl or rep movsb is
447 way to go. Rep movsb has apparently more expensive startup time in CPU,
448 but after 4K the difference is down in the noise. */
449 static stringop_algs pentiumpro_memcpy[2] = {
450 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
451 {8192, rep_prefix_4_byte, false},
452 {-1, rep_prefix_1_byte, false}}},
453 DUMMY_STRINGOP_ALGS};
454 static stringop_algs pentiumpro_memset[2] = {
455 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
456 {8192, rep_prefix_4_byte, false},
457 {-1, libcall, false}}},
458 DUMMY_STRINGOP_ALGS};
459 static const
460 struct processor_costs pentiumpro_cost = {
461 COSTS_N_INSNS (1), /* cost of an add instruction */
462 COSTS_N_INSNS (1), /* cost of a lea instruction */
463 COSTS_N_INSNS (1), /* variable shift costs */
464 COSTS_N_INSNS (1), /* constant shift costs */
465 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
466 COSTS_N_INSNS (4), /* HI */
467 COSTS_N_INSNS (4), /* SI */
468 COSTS_N_INSNS (4), /* DI */
469 COSTS_N_INSNS (4)}, /* other */
470 0, /* cost of multiply per each bit set */
471 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
472 COSTS_N_INSNS (17), /* HI */
473 COSTS_N_INSNS (17), /* SI */
474 COSTS_N_INSNS (17), /* DI */
475 COSTS_N_INSNS (17)}, /* other */
476 COSTS_N_INSNS (1), /* cost of movsx */
477 COSTS_N_INSNS (1), /* cost of movzx */
478 8, /* "large" insn */
479 6, /* MOVE_RATIO */
480 2, /* cost for loading QImode using movzbl */
481 {4, 4, 4}, /* cost of loading integer registers
482 in QImode, HImode and SImode.
483 Relative to reg-reg move (2). */
484 {2, 2, 2}, /* cost of storing integer registers */
485 2, /* cost of reg,reg fld/fst */
486 {2, 2, 6}, /* cost of loading fp registers
487 in SFmode, DFmode and XFmode */
488 {4, 4, 6}, /* cost of storing fp registers
489 in SFmode, DFmode and XFmode */
490 2, /* cost of moving MMX register */
491 {2, 2}, /* cost of loading MMX registers
492 in SImode and DImode */
493 {2, 2}, /* cost of storing MMX registers
494 in SImode and DImode */
495 2, /* cost of moving SSE register */
496 {2, 2, 8}, /* cost of loading SSE registers
497 in SImode, DImode and TImode */
498 {2, 2, 8}, /* cost of storing SSE registers
499 in SImode, DImode and TImode */
500 3, /* MMX or SSE register to integer */
501 8, /* size of l1 cache. */
502 256, /* size of l2 cache */
503 32, /* size of prefetch block */
504 6, /* number of parallel prefetches */
505 2, /* Branch cost */
506 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
507 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
508 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
509 COSTS_N_INSNS (2), /* cost of FABS instruction. */
510 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
511 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
512 pentiumpro_memcpy,
513 pentiumpro_memset,
514 1, /* scalar_stmt_cost. */
515 1, /* scalar load_cost. */
516 1, /* scalar_store_cost. */
517 1, /* vec_stmt_cost. */
518 1, /* vec_to_scalar_cost. */
519 1, /* scalar_to_vec_cost. */
520 1, /* vec_align_load_cost. */
521 2, /* vec_unalign_load_cost. */
522 1, /* vec_store_cost. */
523 3, /* cond_taken_branch_cost. */
524 1, /* cond_not_taken_branch_cost. */
527 static stringop_algs geode_memcpy[2] = {
528 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
529 DUMMY_STRINGOP_ALGS};
530 static stringop_algs geode_memset[2] = {
531 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
532 DUMMY_STRINGOP_ALGS};
533 static const
534 struct processor_costs geode_cost = {
535 COSTS_N_INSNS (1), /* cost of an add instruction */
536 COSTS_N_INSNS (1), /* cost of a lea instruction */
537 COSTS_N_INSNS (2), /* variable shift costs */
538 COSTS_N_INSNS (1), /* constant shift costs */
539 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
540 COSTS_N_INSNS (4), /* HI */
541 COSTS_N_INSNS (7), /* SI */
542 COSTS_N_INSNS (7), /* DI */
543 COSTS_N_INSNS (7)}, /* other */
544 0, /* cost of multiply per each bit set */
545 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
546 COSTS_N_INSNS (23), /* HI */
547 COSTS_N_INSNS (39), /* SI */
548 COSTS_N_INSNS (39), /* DI */
549 COSTS_N_INSNS (39)}, /* other */
550 COSTS_N_INSNS (1), /* cost of movsx */
551 COSTS_N_INSNS (1), /* cost of movzx */
552 8, /* "large" insn */
553 4, /* MOVE_RATIO */
554 1, /* cost for loading QImode using movzbl */
555 {1, 1, 1}, /* cost of loading integer registers
556 in QImode, HImode and SImode.
557 Relative to reg-reg move (2). */
558 {1, 1, 1}, /* cost of storing integer registers */
559 1, /* cost of reg,reg fld/fst */
560 {1, 1, 1}, /* cost of loading fp registers
561 in SFmode, DFmode and XFmode */
562 {4, 6, 6}, /* cost of storing fp registers
563 in SFmode, DFmode and XFmode */
565 1, /* cost of moving MMX register */
566 {1, 1}, /* cost of loading MMX registers
567 in SImode and DImode */
568 {1, 1}, /* cost of storing MMX registers
569 in SImode and DImode */
570 1, /* cost of moving SSE register */
571 {1, 1, 1}, /* cost of loading SSE registers
572 in SImode, DImode and TImode */
573 {1, 1, 1}, /* cost of storing SSE registers
574 in SImode, DImode and TImode */
575 1, /* MMX or SSE register to integer */
576 64, /* size of l1 cache. */
577 128, /* size of l2 cache. */
578 32, /* size of prefetch block */
579 1, /* number of parallel prefetches */
580 1, /* Branch cost */
581 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
582 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
583 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
584 COSTS_N_INSNS (1), /* cost of FABS instruction. */
585 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
586 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
587 geode_memcpy,
588 geode_memset,
589 1, /* scalar_stmt_cost. */
590 1, /* scalar load_cost. */
591 1, /* scalar_store_cost. */
592 1, /* vec_stmt_cost. */
593 1, /* vec_to_scalar_cost. */
594 1, /* scalar_to_vec_cost. */
595 1, /* vec_align_load_cost. */
596 2, /* vec_unalign_load_cost. */
597 1, /* vec_store_cost. */
598 3, /* cond_taken_branch_cost. */
599 1, /* cond_not_taken_branch_cost. */
602 static stringop_algs k6_memcpy[2] = {
603 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
604 DUMMY_STRINGOP_ALGS};
605 static stringop_algs k6_memset[2] = {
606 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
607 DUMMY_STRINGOP_ALGS};
608 static const
609 struct processor_costs k6_cost = {
610 COSTS_N_INSNS (1), /* cost of an add instruction */
611 COSTS_N_INSNS (2), /* cost of a lea instruction */
612 COSTS_N_INSNS (1), /* variable shift costs */
613 COSTS_N_INSNS (1), /* constant shift costs */
614 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
615 COSTS_N_INSNS (3), /* HI */
616 COSTS_N_INSNS (3), /* SI */
617 COSTS_N_INSNS (3), /* DI */
618 COSTS_N_INSNS (3)}, /* other */
619 0, /* cost of multiply per each bit set */
620 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
621 COSTS_N_INSNS (18), /* HI */
622 COSTS_N_INSNS (18), /* SI */
623 COSTS_N_INSNS (18), /* DI */
624 COSTS_N_INSNS (18)}, /* other */
625 COSTS_N_INSNS (2), /* cost of movsx */
626 COSTS_N_INSNS (2), /* cost of movzx */
627 8, /* "large" insn */
628 4, /* MOVE_RATIO */
629 3, /* cost for loading QImode using movzbl */
630 {4, 5, 4}, /* cost of loading integer registers
631 in QImode, HImode and SImode.
632 Relative to reg-reg move (2). */
633 {2, 3, 2}, /* cost of storing integer registers */
634 4, /* cost of reg,reg fld/fst */
635 {6, 6, 6}, /* cost of loading fp registers
636 in SFmode, DFmode and XFmode */
637 {4, 4, 4}, /* cost of storing fp registers
638 in SFmode, DFmode and XFmode */
639 2, /* cost of moving MMX register */
640 {2, 2}, /* cost of loading MMX registers
641 in SImode and DImode */
642 {2, 2}, /* cost of storing MMX registers
643 in SImode and DImode */
644 2, /* cost of moving SSE register */
645 {2, 2, 8}, /* cost of loading SSE registers
646 in SImode, DImode and TImode */
647 {2, 2, 8}, /* cost of storing SSE registers
648 in SImode, DImode and TImode */
649 6, /* MMX or SSE register to integer */
650 32, /* size of l1 cache. */
651 32, /* size of l2 cache. Some models
652 have integrated l2 cache, but
653 optimizing for k6 is not important
654 enough to worry about that. */
655 32, /* size of prefetch block */
656 1, /* number of parallel prefetches */
657 1, /* Branch cost */
658 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
659 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
660 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
661 COSTS_N_INSNS (2), /* cost of FABS instruction. */
662 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
663 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
664 k6_memcpy,
665 k6_memset,
666 1, /* scalar_stmt_cost. */
667 1, /* scalar load_cost. */
668 1, /* scalar_store_cost. */
669 1, /* vec_stmt_cost. */
670 1, /* vec_to_scalar_cost. */
671 1, /* scalar_to_vec_cost. */
672 1, /* vec_align_load_cost. */
673 2, /* vec_unalign_load_cost. */
674 1, /* vec_store_cost. */
675 3, /* cond_taken_branch_cost. */
676 1, /* cond_not_taken_branch_cost. */
679 /* For some reason, Athlon deals better with REP prefix (relative to loops)
680 compared to K8. Alignment becomes important after 8 bytes for memcpy and
681 128 bytes for memset. */
682 static stringop_algs athlon_memcpy[2] = {
683 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
684 DUMMY_STRINGOP_ALGS};
685 static stringop_algs athlon_memset[2] = {
686 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
687 DUMMY_STRINGOP_ALGS};
688 static const
689 struct processor_costs athlon_cost = {
690 COSTS_N_INSNS (1), /* cost of an add instruction */
691 COSTS_N_INSNS (2), /* cost of a lea instruction */
692 COSTS_N_INSNS (1), /* variable shift costs */
693 COSTS_N_INSNS (1), /* constant shift costs */
694 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
695 COSTS_N_INSNS (5), /* HI */
696 COSTS_N_INSNS (5), /* SI */
697 COSTS_N_INSNS (5), /* DI */
698 COSTS_N_INSNS (5)}, /* other */
699 0, /* cost of multiply per each bit set */
700 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
701 COSTS_N_INSNS (26), /* HI */
702 COSTS_N_INSNS (42), /* SI */
703 COSTS_N_INSNS (74), /* DI */
704 COSTS_N_INSNS (74)}, /* other */
705 COSTS_N_INSNS (1), /* cost of movsx */
706 COSTS_N_INSNS (1), /* cost of movzx */
707 8, /* "large" insn */
708 9, /* MOVE_RATIO */
709 4, /* cost for loading QImode using movzbl */
710 {3, 4, 3}, /* cost of loading integer registers
711 in QImode, HImode and SImode.
712 Relative to reg-reg move (2). */
713 {3, 4, 3}, /* cost of storing integer registers */
714 4, /* cost of reg,reg fld/fst */
715 {4, 4, 12}, /* cost of loading fp registers
716 in SFmode, DFmode and XFmode */
717 {6, 6, 8}, /* cost of storing fp registers
718 in SFmode, DFmode and XFmode */
719 2, /* cost of moving MMX register */
720 {4, 4}, /* cost of loading MMX registers
721 in SImode and DImode */
722 {4, 4}, /* cost of storing MMX registers
723 in SImode and DImode */
724 2, /* cost of moving SSE register */
725 {4, 4, 6}, /* cost of loading SSE registers
726 in SImode, DImode and TImode */
727 {4, 4, 5}, /* cost of storing SSE registers
728 in SImode, DImode and TImode */
729 5, /* MMX or SSE register to integer */
730 64, /* size of l1 cache. */
731 256, /* size of l2 cache. */
732 64, /* size of prefetch block */
733 6, /* number of parallel prefetches */
734 5, /* Branch cost */
735 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
736 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
737 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
738 COSTS_N_INSNS (2), /* cost of FABS instruction. */
739 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
740 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
741 athlon_memcpy,
742 athlon_memset,
743 1, /* scalar_stmt_cost. */
744 1, /* scalar load_cost. */
745 1, /* scalar_store_cost. */
746 1, /* vec_stmt_cost. */
747 1, /* vec_to_scalar_cost. */
748 1, /* scalar_to_vec_cost. */
749 1, /* vec_align_load_cost. */
750 2, /* vec_unalign_load_cost. */
751 1, /* vec_store_cost. */
752 3, /* cond_taken_branch_cost. */
753 1, /* cond_not_taken_branch_cost. */
756 /* K8 has optimized REP instruction for medium sized blocks, but for very
757 small blocks it is better to use loop. For large blocks, libcall can
758 do nontemporary accesses and beat inline considerably. */
759 static stringop_algs k8_memcpy[2] = {
760 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
761 {-1, rep_prefix_4_byte, false}}},
762 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
763 {-1, libcall, false}}}};
764 static stringop_algs k8_memset[2] = {
765 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
766 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
767 {libcall, {{48, unrolled_loop, false},
768 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
769 static const
770 struct processor_costs k8_cost = {
771 COSTS_N_INSNS (1), /* cost of an add instruction */
772 COSTS_N_INSNS (2), /* cost of a lea instruction */
773 COSTS_N_INSNS (1), /* variable shift costs */
774 COSTS_N_INSNS (1), /* constant shift costs */
775 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
776 COSTS_N_INSNS (4), /* HI */
777 COSTS_N_INSNS (3), /* SI */
778 COSTS_N_INSNS (4), /* DI */
779 COSTS_N_INSNS (5)}, /* other */
780 0, /* cost of multiply per each bit set */
781 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
782 COSTS_N_INSNS (26), /* HI */
783 COSTS_N_INSNS (42), /* SI */
784 COSTS_N_INSNS (74), /* DI */
785 COSTS_N_INSNS (74)}, /* other */
786 COSTS_N_INSNS (1), /* cost of movsx */
787 COSTS_N_INSNS (1), /* cost of movzx */
788 8, /* "large" insn */
789 9, /* MOVE_RATIO */
790 4, /* cost for loading QImode using movzbl */
791 {3, 4, 3}, /* cost of loading integer registers
792 in QImode, HImode and SImode.
793 Relative to reg-reg move (2). */
794 {3, 4, 3}, /* cost of storing integer registers */
795 4, /* cost of reg,reg fld/fst */
796 {4, 4, 12}, /* cost of loading fp registers
797 in SFmode, DFmode and XFmode */
798 {6, 6, 8}, /* cost of storing fp registers
799 in SFmode, DFmode and XFmode */
800 2, /* cost of moving MMX register */
801 {3, 3}, /* cost of loading MMX registers
802 in SImode and DImode */
803 {4, 4}, /* cost of storing MMX registers
804 in SImode and DImode */
805 2, /* cost of moving SSE register */
806 {4, 3, 6}, /* cost of loading SSE registers
807 in SImode, DImode and TImode */
808 {4, 4, 5}, /* cost of storing SSE registers
809 in SImode, DImode and TImode */
810 5, /* MMX or SSE register to integer */
811 64, /* size of l1 cache. */
812 512, /* size of l2 cache. */
813 64, /* size of prefetch block */
814 /* New AMD processors never drop prefetches; if they cannot be performed
815 immediately, they are queued. We set number of simultaneous prefetches
816 to a large constant to reflect this (it probably is not a good idea not
817 to limit number of prefetches at all, as their execution also takes some
818 time). */
819 100, /* number of parallel prefetches */
820 3, /* Branch cost */
821 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
822 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
823 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
824 COSTS_N_INSNS (2), /* cost of FABS instruction. */
825 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
826 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
828 k8_memcpy,
829 k8_memset,
830 4, /* scalar_stmt_cost. */
831 2, /* scalar load_cost. */
832 2, /* scalar_store_cost. */
833 5, /* vec_stmt_cost. */
834 0, /* vec_to_scalar_cost. */
835 2, /* scalar_to_vec_cost. */
836 2, /* vec_align_load_cost. */
837 3, /* vec_unalign_load_cost. */
838 3, /* vec_store_cost. */
839 3, /* cond_taken_branch_cost. */
840 2, /* cond_not_taken_branch_cost. */
843 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
844 very small blocks it is better to use loop. For large blocks, libcall can
845 do nontemporary accesses and beat inline considerably. */
846 static stringop_algs amdfam10_memcpy[2] = {
847 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
848 {-1, rep_prefix_4_byte, false}}},
849 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
850 {-1, libcall, false}}}};
851 static stringop_algs amdfam10_memset[2] = {
852 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
853 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
854 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
855 {-1, libcall, false}}}};
856 struct processor_costs amdfam10_cost = {
857 COSTS_N_INSNS (1), /* cost of an add instruction */
858 COSTS_N_INSNS (2), /* cost of a lea instruction */
859 COSTS_N_INSNS (1), /* variable shift costs */
860 COSTS_N_INSNS (1), /* constant shift costs */
861 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
862 COSTS_N_INSNS (4), /* HI */
863 COSTS_N_INSNS (3), /* SI */
864 COSTS_N_INSNS (4), /* DI */
865 COSTS_N_INSNS (5)}, /* other */
866 0, /* cost of multiply per each bit set */
867 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
868 COSTS_N_INSNS (35), /* HI */
869 COSTS_N_INSNS (51), /* SI */
870 COSTS_N_INSNS (83), /* DI */
871 COSTS_N_INSNS (83)}, /* other */
872 COSTS_N_INSNS (1), /* cost of movsx */
873 COSTS_N_INSNS (1), /* cost of movzx */
874 8, /* "large" insn */
875 9, /* MOVE_RATIO */
876 4, /* cost for loading QImode using movzbl */
877 {3, 4, 3}, /* cost of loading integer registers
878 in QImode, HImode and SImode.
879 Relative to reg-reg move (2). */
880 {3, 4, 3}, /* cost of storing integer registers */
881 4, /* cost of reg,reg fld/fst */
882 {4, 4, 12}, /* cost of loading fp registers
883 in SFmode, DFmode and XFmode */
884 {6, 6, 8}, /* cost of storing fp registers
885 in SFmode, DFmode and XFmode */
886 2, /* cost of moving MMX register */
887 {3, 3}, /* cost of loading MMX registers
888 in SImode and DImode */
889 {4, 4}, /* cost of storing MMX registers
890 in SImode and DImode */
891 2, /* cost of moving SSE register */
892 {4, 4, 3}, /* cost of loading SSE registers
893 in SImode, DImode and TImode */
894 {4, 4, 5}, /* cost of storing SSE registers
895 in SImode, DImode and TImode */
896 3, /* MMX or SSE register to integer */
897 /* On K8:
898 MOVD reg64, xmmreg Double FSTORE 4
899 MOVD reg32, xmmreg Double FSTORE 4
900 On AMDFAM10:
901 MOVD reg64, xmmreg Double FADD 3
902 1/1 1/1
903 MOVD reg32, xmmreg Double FADD 3
904 1/1 1/1 */
905 64, /* size of l1 cache. */
906 512, /* size of l2 cache. */
907 64, /* size of prefetch block */
908 /* New AMD processors never drop prefetches; if they cannot be performed
909 immediately, they are queued. We set number of simultaneous prefetches
910 to a large constant to reflect this (it probably is not a good idea not
911 to limit number of prefetches at all, as their execution also takes some
912 time). */
913 100, /* number of parallel prefetches */
914 2, /* Branch cost */
915 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
916 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
917 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
918 COSTS_N_INSNS (2), /* cost of FABS instruction. */
919 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
920 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
922 amdfam10_memcpy,
923 amdfam10_memset,
924 4, /* scalar_stmt_cost. */
925 2, /* scalar load_cost. */
926 2, /* scalar_store_cost. */
927 6, /* vec_stmt_cost. */
928 0, /* vec_to_scalar_cost. */
929 2, /* scalar_to_vec_cost. */
930 2, /* vec_align_load_cost. */
931 2, /* vec_unalign_load_cost. */
932 2, /* vec_store_cost. */
933 2, /* cond_taken_branch_cost. */
934 1, /* cond_not_taken_branch_cost. */
937 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
938 very small blocks it is better to use loop. For large blocks, libcall
939 can do nontemporary accesses and beat inline considerably. */
940 static stringop_algs bdver1_memcpy[2] = {
941 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
942 {-1, rep_prefix_4_byte, false}}},
943 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
944 {-1, libcall, false}}}};
945 static stringop_algs bdver1_memset[2] = {
946 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
947 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
948 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
949 {-1, libcall, false}}}};
951 const struct processor_costs bdver1_cost = {
952 COSTS_N_INSNS (1), /* cost of an add instruction */
953 COSTS_N_INSNS (1), /* cost of a lea instruction */
954 COSTS_N_INSNS (1), /* variable shift costs */
955 COSTS_N_INSNS (1), /* constant shift costs */
956 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
957 COSTS_N_INSNS (4), /* HI */
958 COSTS_N_INSNS (4), /* SI */
959 COSTS_N_INSNS (6), /* DI */
960 COSTS_N_INSNS (6)}, /* other */
961 0, /* cost of multiply per each bit set */
962 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
963 COSTS_N_INSNS (35), /* HI */
964 COSTS_N_INSNS (51), /* SI */
965 COSTS_N_INSNS (83), /* DI */
966 COSTS_N_INSNS (83)}, /* other */
967 COSTS_N_INSNS (1), /* cost of movsx */
968 COSTS_N_INSNS (1), /* cost of movzx */
969 8, /* "large" insn */
970 9, /* MOVE_RATIO */
971 4, /* cost for loading QImode using movzbl */
972 {5, 5, 4}, /* cost of loading integer registers
973 in QImode, HImode and SImode.
974 Relative to reg-reg move (2). */
975 {4, 4, 4}, /* cost of storing integer registers */
976 2, /* cost of reg,reg fld/fst */
977 {5, 5, 12}, /* cost of loading fp registers
978 in SFmode, DFmode and XFmode */
979 {4, 4, 8}, /* cost of storing fp registers
980 in SFmode, DFmode and XFmode */
981 2, /* cost of moving MMX register */
982 {4, 4}, /* cost of loading MMX registers
983 in SImode and DImode */
984 {4, 4}, /* cost of storing MMX registers
985 in SImode and DImode */
986 2, /* cost of moving SSE register */
987 {4, 4, 4}, /* cost of loading SSE registers
988 in SImode, DImode and TImode */
989 {4, 4, 4}, /* cost of storing SSE registers
990 in SImode, DImode and TImode */
991 2, /* MMX or SSE register to integer */
992 /* On K8:
993 MOVD reg64, xmmreg Double FSTORE 4
994 MOVD reg32, xmmreg Double FSTORE 4
995 On AMDFAM10:
996 MOVD reg64, xmmreg Double FADD 3
997 1/1 1/1
998 MOVD reg32, xmmreg Double FADD 3
999 1/1 1/1 */
1000 16, /* size of l1 cache. */
1001 2048, /* size of l2 cache. */
1002 64, /* size of prefetch block */
1003 /* New AMD processors never drop prefetches; if they cannot be performed
1004 immediately, they are queued. We set number of simultaneous prefetches
1005 to a large constant to reflect this (it probably is not a good idea not
1006 to limit number of prefetches at all, as their execution also takes some
1007 time). */
1008 100, /* number of parallel prefetches */
1009 2, /* Branch cost */
1010 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1011 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1012 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1013 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1014 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1015 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1017 bdver1_memcpy,
1018 bdver1_memset,
1019 6, /* scalar_stmt_cost. */
1020 4, /* scalar load_cost. */
1021 4, /* scalar_store_cost. */
1022 6, /* vec_stmt_cost. */
1023 0, /* vec_to_scalar_cost. */
1024 2, /* scalar_to_vec_cost. */
1025 4, /* vec_align_load_cost. */
1026 4, /* vec_unalign_load_cost. */
1027 4, /* vec_store_cost. */
1028 2, /* cond_taken_branch_cost. */
1029 1, /* cond_not_taken_branch_cost. */
1032 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1033 very small blocks it is better to use loop. For large blocks, libcall
1034 can do nontemporary accesses and beat inline considerably. */
1036 static stringop_algs bdver2_memcpy[2] = {
1037 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1038 {-1, rep_prefix_4_byte, false}}},
1039 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1040 {-1, libcall, false}}}};
1041 static stringop_algs bdver2_memset[2] = {
1042 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1043 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1044 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1045 {-1, libcall, false}}}};
1047 const struct processor_costs bdver2_cost = {
1048 COSTS_N_INSNS (1), /* cost of an add instruction */
1049 COSTS_N_INSNS (1), /* cost of a lea instruction */
1050 COSTS_N_INSNS (1), /* variable shift costs */
1051 COSTS_N_INSNS (1), /* constant shift costs */
1052 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1053 COSTS_N_INSNS (4), /* HI */
1054 COSTS_N_INSNS (4), /* SI */
1055 COSTS_N_INSNS (6), /* DI */
1056 COSTS_N_INSNS (6)}, /* other */
1057 0, /* cost of multiply per each bit set */
1058 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1059 COSTS_N_INSNS (35), /* HI */
1060 COSTS_N_INSNS (51), /* SI */
1061 COSTS_N_INSNS (83), /* DI */
1062 COSTS_N_INSNS (83)}, /* other */
1063 COSTS_N_INSNS (1), /* cost of movsx */
1064 COSTS_N_INSNS (1), /* cost of movzx */
1065 8, /* "large" insn */
1066 9, /* MOVE_RATIO */
1067 4, /* cost for loading QImode using movzbl */
1068 {5, 5, 4}, /* cost of loading integer registers
1069 in QImode, HImode and SImode.
1070 Relative to reg-reg move (2). */
1071 {4, 4, 4}, /* cost of storing integer registers */
1072 2, /* cost of reg,reg fld/fst */
1073 {5, 5, 12}, /* cost of loading fp registers
1074 in SFmode, DFmode and XFmode */
1075 {4, 4, 8}, /* cost of storing fp registers
1076 in SFmode, DFmode and XFmode */
1077 2, /* cost of moving MMX register */
1078 {4, 4}, /* cost of loading MMX registers
1079 in SImode and DImode */
1080 {4, 4}, /* cost of storing MMX registers
1081 in SImode and DImode */
1082 2, /* cost of moving SSE register */
1083 {4, 4, 4}, /* cost of loading SSE registers
1084 in SImode, DImode and TImode */
1085 {4, 4, 4}, /* cost of storing SSE registers
1086 in SImode, DImode and TImode */
1087 2, /* MMX or SSE register to integer */
1088 /* On K8:
1089 MOVD reg64, xmmreg Double FSTORE 4
1090 MOVD reg32, xmmreg Double FSTORE 4
1091 On AMDFAM10:
1092 MOVD reg64, xmmreg Double FADD 3
1093 1/1 1/1
1094 MOVD reg32, xmmreg Double FADD 3
1095 1/1 1/1 */
1096 16, /* size of l1 cache. */
1097 2048, /* size of l2 cache. */
1098 64, /* size of prefetch block */
1099 /* New AMD processors never drop prefetches; if they cannot be performed
1100 immediately, they are queued. We set number of simultaneous prefetches
1101 to a large constant to reflect this (it probably is not a good idea not
1102 to limit number of prefetches at all, as their execution also takes some
1103 time). */
1104 100, /* number of parallel prefetches */
1105 2, /* Branch cost */
1106 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1107 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1108 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1109 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1110 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1111 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1113 bdver2_memcpy,
1114 bdver2_memset,
1115 6, /* scalar_stmt_cost. */
1116 4, /* scalar load_cost. */
1117 4, /* scalar_store_cost. */
1118 6, /* vec_stmt_cost. */
1119 0, /* vec_to_scalar_cost. */
1120 2, /* scalar_to_vec_cost. */
1121 4, /* vec_align_load_cost. */
1122 4, /* vec_unalign_load_cost. */
1123 4, /* vec_store_cost. */
1124 2, /* cond_taken_branch_cost. */
1125 1, /* cond_not_taken_branch_cost. */
1129 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1130 very small blocks it is better to use loop. For large blocks, libcall
1131 can do nontemporary accesses and beat inline considerably. */
1132 static stringop_algs bdver3_memcpy[2] = {
1133 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1134 {-1, rep_prefix_4_byte, false}}},
1135 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1136 {-1, libcall, false}}}};
1137 static stringop_algs bdver3_memset[2] = {
1138 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1139 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1140 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1141 {-1, libcall, false}}}};
1142 struct processor_costs bdver3_cost = {
1143 COSTS_N_INSNS (1), /* cost of an add instruction */
1144 COSTS_N_INSNS (1), /* cost of a lea instruction */
1145 COSTS_N_INSNS (1), /* variable shift costs */
1146 COSTS_N_INSNS (1), /* constant shift costs */
1147 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1148 COSTS_N_INSNS (4), /* HI */
1149 COSTS_N_INSNS (4), /* SI */
1150 COSTS_N_INSNS (6), /* DI */
1151 COSTS_N_INSNS (6)}, /* other */
1152 0, /* cost of multiply per each bit set */
1153 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1154 COSTS_N_INSNS (35), /* HI */
1155 COSTS_N_INSNS (51), /* SI */
1156 COSTS_N_INSNS (83), /* DI */
1157 COSTS_N_INSNS (83)}, /* other */
1158 COSTS_N_INSNS (1), /* cost of movsx */
1159 COSTS_N_INSNS (1), /* cost of movzx */
1160 8, /* "large" insn */
1161 9, /* MOVE_RATIO */
1162 4, /* cost for loading QImode using movzbl */
1163 {5, 5, 4}, /* cost of loading integer registers
1164 in QImode, HImode and SImode.
1165 Relative to reg-reg move (2). */
1166 {4, 4, 4}, /* cost of storing integer registers */
1167 2, /* cost of reg,reg fld/fst */
1168 {5, 5, 12}, /* cost of loading fp registers
1169 in SFmode, DFmode and XFmode */
1170 {4, 4, 8}, /* cost of storing fp registers
1171 in SFmode, DFmode and XFmode */
1172 2, /* cost of moving MMX register */
1173 {4, 4}, /* cost of loading MMX registers
1174 in SImode and DImode */
1175 {4, 4}, /* cost of storing MMX registers
1176 in SImode and DImode */
1177 2, /* cost of moving SSE register */
1178 {4, 4, 4}, /* cost of loading SSE registers
1179 in SImode, DImode and TImode */
1180 {4, 4, 4}, /* cost of storing SSE registers
1181 in SImode, DImode and TImode */
1182 2, /* MMX or SSE register to integer */
1183 16, /* size of l1 cache. */
1184 2048, /* size of l2 cache. */
1185 64, /* size of prefetch block */
1186 /* New AMD processors never drop prefetches; if they cannot be performed
1187 immediately, they are queued. We set number of simultaneous prefetches
1188 to a large constant to reflect this (it probably is not a good idea not
1189 to limit number of prefetches at all, as their execution also takes some
1190 time). */
1191 100, /* number of parallel prefetches */
1192 2, /* Branch cost */
1193 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1194 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1195 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1196 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1197 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1198 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1200 bdver3_memcpy,
1201 bdver3_memset,
1202 6, /* scalar_stmt_cost. */
1203 4, /* scalar load_cost. */
1204 4, /* scalar_store_cost. */
1205 6, /* vec_stmt_cost. */
1206 0, /* vec_to_scalar_cost. */
1207 2, /* scalar_to_vec_cost. */
1208 4, /* vec_align_load_cost. */
1209 4, /* vec_unalign_load_cost. */
1210 4, /* vec_store_cost. */
1211 2, /* cond_taken_branch_cost. */
1212 1, /* cond_not_taken_branch_cost. */
1215 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1216 very small blocks it is better to use loop. For large blocks, libcall
1217 can do nontemporary accesses and beat inline considerably. */
1218 static stringop_algs bdver4_memcpy[2] = {
1219 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1220 {-1, rep_prefix_4_byte, false}}},
1221 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1222 {-1, libcall, false}}}};
1223 static stringop_algs bdver4_memset[2] = {
1224 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1225 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1226 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1227 {-1, libcall, false}}}};
1228 struct processor_costs bdver4_cost = {
1229 COSTS_N_INSNS (1), /* cost of an add instruction */
1230 COSTS_N_INSNS (1), /* cost of a lea instruction */
1231 COSTS_N_INSNS (1), /* variable shift costs */
1232 COSTS_N_INSNS (1), /* constant shift costs */
1233 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1234 COSTS_N_INSNS (4), /* HI */
1235 COSTS_N_INSNS (4), /* SI */
1236 COSTS_N_INSNS (6), /* DI */
1237 COSTS_N_INSNS (6)}, /* other */
1238 0, /* cost of multiply per each bit set */
1239 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1240 COSTS_N_INSNS (35), /* HI */
1241 COSTS_N_INSNS (51), /* SI */
1242 COSTS_N_INSNS (83), /* DI */
1243 COSTS_N_INSNS (83)}, /* other */
1244 COSTS_N_INSNS (1), /* cost of movsx */
1245 COSTS_N_INSNS (1), /* cost of movzx */
1246 8, /* "large" insn */
1247 9, /* MOVE_RATIO */
1248 4, /* cost for loading QImode using movzbl */
1249 {5, 5, 4}, /* cost of loading integer registers
1250 in QImode, HImode and SImode.
1251 Relative to reg-reg move (2). */
1252 {4, 4, 4}, /* cost of storing integer registers */
1253 2, /* cost of reg,reg fld/fst */
1254 {5, 5, 12}, /* cost of loading fp registers
1255 in SFmode, DFmode and XFmode */
1256 {4, 4, 8}, /* cost of storing fp registers
1257 in SFmode, DFmode and XFmode */
1258 2, /* cost of moving MMX register */
1259 {4, 4}, /* cost of loading MMX registers
1260 in SImode and DImode */
1261 {4, 4}, /* cost of storing MMX registers
1262 in SImode and DImode */
1263 2, /* cost of moving SSE register */
1264 {4, 4, 4}, /* cost of loading SSE registers
1265 in SImode, DImode and TImode */
1266 {4, 4, 4}, /* cost of storing SSE registers
1267 in SImode, DImode and TImode */
1268 2, /* MMX or SSE register to integer */
1269 16, /* size of l1 cache. */
1270 2048, /* size of l2 cache. */
1271 64, /* size of prefetch block */
1272 /* New AMD processors never drop prefetches; if they cannot be performed
1273 immediately, they are queued. We set number of simultaneous prefetches
1274 to a large constant to reflect this (it probably is not a good idea not
1275 to limit number of prefetches at all, as their execution also takes some
1276 time). */
1277 100, /* number of parallel prefetches */
1278 2, /* Branch cost */
1279 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1280 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1281 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1282 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1283 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1284 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1286 bdver4_memcpy,
1287 bdver4_memset,
1288 6, /* scalar_stmt_cost. */
1289 4, /* scalar load_cost. */
1290 4, /* scalar_store_cost. */
1291 6, /* vec_stmt_cost. */
1292 0, /* vec_to_scalar_cost. */
1293 2, /* scalar_to_vec_cost. */
1294 4, /* vec_align_load_cost. */
1295 4, /* vec_unalign_load_cost. */
1296 4, /* vec_store_cost. */
1297 2, /* cond_taken_branch_cost. */
1298 1, /* cond_not_taken_branch_cost. */
1301 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1302 very small blocks it is better to use loop. For large blocks, libcall can
1303 do nontemporary accesses and beat inline considerably. */
1304 static stringop_algs btver1_memcpy[2] = {
1305 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1306 {-1, rep_prefix_4_byte, false}}},
1307 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1308 {-1, libcall, false}}}};
1309 static stringop_algs btver1_memset[2] = {
1310 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1311 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1312 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1313 {-1, libcall, false}}}};
1314 const struct processor_costs btver1_cost = {
1315 COSTS_N_INSNS (1), /* cost of an add instruction */
1316 COSTS_N_INSNS (2), /* cost of a lea instruction */
1317 COSTS_N_INSNS (1), /* variable shift costs */
1318 COSTS_N_INSNS (1), /* constant shift costs */
1319 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1320 COSTS_N_INSNS (4), /* HI */
1321 COSTS_N_INSNS (3), /* SI */
1322 COSTS_N_INSNS (4), /* DI */
1323 COSTS_N_INSNS (5)}, /* other */
1324 0, /* cost of multiply per each bit set */
1325 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1326 COSTS_N_INSNS (35), /* HI */
1327 COSTS_N_INSNS (51), /* SI */
1328 COSTS_N_INSNS (83), /* DI */
1329 COSTS_N_INSNS (83)}, /* other */
1330 COSTS_N_INSNS (1), /* cost of movsx */
1331 COSTS_N_INSNS (1), /* cost of movzx */
1332 8, /* "large" insn */
1333 9, /* MOVE_RATIO */
1334 4, /* cost for loading QImode using movzbl */
1335 {3, 4, 3}, /* cost of loading integer registers
1336 in QImode, HImode and SImode.
1337 Relative to reg-reg move (2). */
1338 {3, 4, 3}, /* cost of storing integer registers */
1339 4, /* cost of reg,reg fld/fst */
1340 {4, 4, 12}, /* cost of loading fp registers
1341 in SFmode, DFmode and XFmode */
1342 {6, 6, 8}, /* cost of storing fp registers
1343 in SFmode, DFmode and XFmode */
1344 2, /* cost of moving MMX register */
1345 {3, 3}, /* cost of loading MMX registers
1346 in SImode and DImode */
1347 {4, 4}, /* cost of storing MMX registers
1348 in SImode and DImode */
1349 2, /* cost of moving SSE register */
1350 {4, 4, 3}, /* cost of loading SSE registers
1351 in SImode, DImode and TImode */
1352 {4, 4, 5}, /* cost of storing SSE registers
1353 in SImode, DImode and TImode */
1354 3, /* MMX or SSE register to integer */
1355 /* On K8:
1356 MOVD reg64, xmmreg Double FSTORE 4
1357 MOVD reg32, xmmreg Double FSTORE 4
1358 On AMDFAM10:
1359 MOVD reg64, xmmreg Double FADD 3
1360 1/1 1/1
1361 MOVD reg32, xmmreg Double FADD 3
1362 1/1 1/1 */
1363 32, /* size of l1 cache. */
1364 512, /* size of l2 cache. */
1365 64, /* size of prefetch block */
1366 100, /* number of parallel prefetches */
1367 2, /* Branch cost */
1368 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1369 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1370 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1371 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1372 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1373 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1375 btver1_memcpy,
1376 btver1_memset,
1377 4, /* scalar_stmt_cost. */
1378 2, /* scalar load_cost. */
1379 2, /* scalar_store_cost. */
1380 6, /* vec_stmt_cost. */
1381 0, /* vec_to_scalar_cost. */
1382 2, /* scalar_to_vec_cost. */
1383 2, /* vec_align_load_cost. */
1384 2, /* vec_unalign_load_cost. */
1385 2, /* vec_store_cost. */
1386 2, /* cond_taken_branch_cost. */
1387 1, /* cond_not_taken_branch_cost. */
1390 static stringop_algs btver2_memcpy[2] = {
1391 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1392 {-1, rep_prefix_4_byte, false}}},
1393 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1394 {-1, libcall, false}}}};
1395 static stringop_algs btver2_memset[2] = {
1396 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1397 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1398 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1399 {-1, libcall, false}}}};
1400 const struct processor_costs btver2_cost = {
1401 COSTS_N_INSNS (1), /* cost of an add instruction */
1402 COSTS_N_INSNS (2), /* cost of a lea instruction */
1403 COSTS_N_INSNS (1), /* variable shift costs */
1404 COSTS_N_INSNS (1), /* constant shift costs */
1405 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1406 COSTS_N_INSNS (4), /* HI */
1407 COSTS_N_INSNS (3), /* SI */
1408 COSTS_N_INSNS (4), /* DI */
1409 COSTS_N_INSNS (5)}, /* other */
1410 0, /* cost of multiply per each bit set */
1411 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1412 COSTS_N_INSNS (35), /* HI */
1413 COSTS_N_INSNS (51), /* SI */
1414 COSTS_N_INSNS (83), /* DI */
1415 COSTS_N_INSNS (83)}, /* other */
1416 COSTS_N_INSNS (1), /* cost of movsx */
1417 COSTS_N_INSNS (1), /* cost of movzx */
1418 8, /* "large" insn */
1419 9, /* MOVE_RATIO */
1420 4, /* cost for loading QImode using movzbl */
1421 {3, 4, 3}, /* cost of loading integer registers
1422 in QImode, HImode and SImode.
1423 Relative to reg-reg move (2). */
1424 {3, 4, 3}, /* cost of storing integer registers */
1425 4, /* cost of reg,reg fld/fst */
1426 {4, 4, 12}, /* cost of loading fp registers
1427 in SFmode, DFmode and XFmode */
1428 {6, 6, 8}, /* cost of storing fp registers
1429 in SFmode, DFmode and XFmode */
1430 2, /* cost of moving MMX register */
1431 {3, 3}, /* cost of loading MMX registers
1432 in SImode and DImode */
1433 {4, 4}, /* cost of storing MMX registers
1434 in SImode and DImode */
1435 2, /* cost of moving SSE register */
1436 {4, 4, 3}, /* cost of loading SSE registers
1437 in SImode, DImode and TImode */
1438 {4, 4, 5}, /* cost of storing SSE registers
1439 in SImode, DImode and TImode */
1440 3, /* MMX or SSE register to integer */
1441 /* On K8:
1442 MOVD reg64, xmmreg Double FSTORE 4
1443 MOVD reg32, xmmreg Double FSTORE 4
1444 On AMDFAM10:
1445 MOVD reg64, xmmreg Double FADD 3
1446 1/1 1/1
1447 MOVD reg32, xmmreg Double FADD 3
1448 1/1 1/1 */
1449 32, /* size of l1 cache. */
1450 2048, /* size of l2 cache. */
1451 64, /* size of prefetch block */
1452 100, /* number of parallel prefetches */
1453 2, /* Branch cost */
1454 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1455 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1456 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1457 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1458 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1459 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1460 btver2_memcpy,
1461 btver2_memset,
1462 4, /* scalar_stmt_cost. */
1463 2, /* scalar load_cost. */
1464 2, /* scalar_store_cost. */
1465 6, /* vec_stmt_cost. */
1466 0, /* vec_to_scalar_cost. */
1467 2, /* scalar_to_vec_cost. */
1468 2, /* vec_align_load_cost. */
1469 2, /* vec_unalign_load_cost. */
1470 2, /* vec_store_cost. */
1471 2, /* cond_taken_branch_cost. */
1472 1, /* cond_not_taken_branch_cost. */
1475 static stringop_algs pentium4_memcpy[2] = {
1476 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1477 DUMMY_STRINGOP_ALGS};
1478 static stringop_algs pentium4_memset[2] = {
1479 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1480 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1481 DUMMY_STRINGOP_ALGS};
1483 static const
1484 struct processor_costs pentium4_cost = {
1485 COSTS_N_INSNS (1), /* cost of an add instruction */
1486 COSTS_N_INSNS (3), /* cost of a lea instruction */
1487 COSTS_N_INSNS (4), /* variable shift costs */
1488 COSTS_N_INSNS (4), /* constant shift costs */
1489 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1490 COSTS_N_INSNS (15), /* HI */
1491 COSTS_N_INSNS (15), /* SI */
1492 COSTS_N_INSNS (15), /* DI */
1493 COSTS_N_INSNS (15)}, /* other */
1494 0, /* cost of multiply per each bit set */
1495 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1496 COSTS_N_INSNS (56), /* HI */
1497 COSTS_N_INSNS (56), /* SI */
1498 COSTS_N_INSNS (56), /* DI */
1499 COSTS_N_INSNS (56)}, /* other */
1500 COSTS_N_INSNS (1), /* cost of movsx */
1501 COSTS_N_INSNS (1), /* cost of movzx */
1502 16, /* "large" insn */
1503 6, /* MOVE_RATIO */
1504 2, /* cost for loading QImode using movzbl */
1505 {4, 5, 4}, /* cost of loading integer registers
1506 in QImode, HImode and SImode.
1507 Relative to reg-reg move (2). */
1508 {2, 3, 2}, /* cost of storing integer registers */
1509 2, /* cost of reg,reg fld/fst */
1510 {2, 2, 6}, /* cost of loading fp registers
1511 in SFmode, DFmode and XFmode */
1512 {4, 4, 6}, /* cost of storing fp registers
1513 in SFmode, DFmode and XFmode */
1514 2, /* cost of moving MMX register */
1515 {2, 2}, /* cost of loading MMX registers
1516 in SImode and DImode */
1517 {2, 2}, /* cost of storing MMX registers
1518 in SImode and DImode */
1519 12, /* cost of moving SSE register */
1520 {12, 12, 12}, /* cost of loading SSE registers
1521 in SImode, DImode and TImode */
1522 {2, 2, 8}, /* cost of storing SSE registers
1523 in SImode, DImode and TImode */
1524 10, /* MMX or SSE register to integer */
1525 8, /* size of l1 cache. */
1526 256, /* size of l2 cache. */
1527 64, /* size of prefetch block */
1528 6, /* number of parallel prefetches */
1529 2, /* Branch cost */
1530 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1531 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1532 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1533 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1534 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1535 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1536 pentium4_memcpy,
1537 pentium4_memset,
1538 1, /* scalar_stmt_cost. */
1539 1, /* scalar load_cost. */
1540 1, /* scalar_store_cost. */
1541 1, /* vec_stmt_cost. */
1542 1, /* vec_to_scalar_cost. */
1543 1, /* scalar_to_vec_cost. */
1544 1, /* vec_align_load_cost. */
1545 2, /* vec_unalign_load_cost. */
1546 1, /* vec_store_cost. */
1547 3, /* cond_taken_branch_cost. */
1548 1, /* cond_not_taken_branch_cost. */
1551 static stringop_algs nocona_memcpy[2] = {
1552 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1553 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1554 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1556 static stringop_algs nocona_memset[2] = {
1557 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1558 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1559 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1560 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1562 static const
1563 struct processor_costs nocona_cost = {
1564 COSTS_N_INSNS (1), /* cost of an add instruction */
1565 COSTS_N_INSNS (1), /* cost of a lea instruction */
1566 COSTS_N_INSNS (1), /* variable shift costs */
1567 COSTS_N_INSNS (1), /* constant shift costs */
1568 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1569 COSTS_N_INSNS (10), /* HI */
1570 COSTS_N_INSNS (10), /* SI */
1571 COSTS_N_INSNS (10), /* DI */
1572 COSTS_N_INSNS (10)}, /* other */
1573 0, /* cost of multiply per each bit set */
1574 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1575 COSTS_N_INSNS (66), /* HI */
1576 COSTS_N_INSNS (66), /* SI */
1577 COSTS_N_INSNS (66), /* DI */
1578 COSTS_N_INSNS (66)}, /* other */
1579 COSTS_N_INSNS (1), /* cost of movsx */
1580 COSTS_N_INSNS (1), /* cost of movzx */
1581 16, /* "large" insn */
1582 17, /* MOVE_RATIO */
1583 4, /* cost for loading QImode using movzbl */
1584 {4, 4, 4}, /* cost of loading integer registers
1585 in QImode, HImode and SImode.
1586 Relative to reg-reg move (2). */
1587 {4, 4, 4}, /* cost of storing integer registers */
1588 3, /* cost of reg,reg fld/fst */
1589 {12, 12, 12}, /* cost of loading fp registers
1590 in SFmode, DFmode and XFmode */
1591 {4, 4, 4}, /* cost of storing fp registers
1592 in SFmode, DFmode and XFmode */
1593 6, /* cost of moving MMX register */
1594 {12, 12}, /* cost of loading MMX registers
1595 in SImode and DImode */
1596 {12, 12}, /* cost of storing MMX registers
1597 in SImode and DImode */
1598 6, /* cost of moving SSE register */
1599 {12, 12, 12}, /* cost of loading SSE registers
1600 in SImode, DImode and TImode */
1601 {12, 12, 12}, /* cost of storing SSE registers
1602 in SImode, DImode and TImode */
1603 8, /* MMX or SSE register to integer */
1604 8, /* size of l1 cache. */
1605 1024, /* size of l2 cache. */
1606 64, /* size of prefetch block */
1607 8, /* number of parallel prefetches */
1608 1, /* Branch cost */
1609 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1610 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1611 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1612 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1613 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1614 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1615 nocona_memcpy,
1616 nocona_memset,
1617 1, /* scalar_stmt_cost. */
1618 1, /* scalar load_cost. */
1619 1, /* scalar_store_cost. */
1620 1, /* vec_stmt_cost. */
1621 1, /* vec_to_scalar_cost. */
1622 1, /* scalar_to_vec_cost. */
1623 1, /* vec_align_load_cost. */
1624 2, /* vec_unalign_load_cost. */
1625 1, /* vec_store_cost. */
1626 3, /* cond_taken_branch_cost. */
1627 1, /* cond_not_taken_branch_cost. */
1630 static stringop_algs atom_memcpy[2] = {
1631 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1632 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1633 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1634 static stringop_algs atom_memset[2] = {
1635 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1636 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1637 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1638 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1639 static const
1640 struct processor_costs atom_cost = {
1641 COSTS_N_INSNS (1), /* cost of an add instruction */
1642 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1643 COSTS_N_INSNS (1), /* variable shift costs */
1644 COSTS_N_INSNS (1), /* constant shift costs */
1645 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1646 COSTS_N_INSNS (4), /* HI */
1647 COSTS_N_INSNS (3), /* SI */
1648 COSTS_N_INSNS (4), /* DI */
1649 COSTS_N_INSNS (2)}, /* other */
1650 0, /* cost of multiply per each bit set */
1651 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1652 COSTS_N_INSNS (26), /* HI */
1653 COSTS_N_INSNS (42), /* SI */
1654 COSTS_N_INSNS (74), /* DI */
1655 COSTS_N_INSNS (74)}, /* other */
1656 COSTS_N_INSNS (1), /* cost of movsx */
1657 COSTS_N_INSNS (1), /* cost of movzx */
1658 8, /* "large" insn */
1659 17, /* MOVE_RATIO */
1660 4, /* cost for loading QImode using movzbl */
1661 {4, 4, 4}, /* cost of loading integer registers
1662 in QImode, HImode and SImode.
1663 Relative to reg-reg move (2). */
1664 {4, 4, 4}, /* cost of storing integer registers */
1665 4, /* cost of reg,reg fld/fst */
1666 {12, 12, 12}, /* cost of loading fp registers
1667 in SFmode, DFmode and XFmode */
1668 {6, 6, 8}, /* cost of storing fp registers
1669 in SFmode, DFmode and XFmode */
1670 2, /* cost of moving MMX register */
1671 {8, 8}, /* cost of loading MMX registers
1672 in SImode and DImode */
1673 {8, 8}, /* cost of storing MMX registers
1674 in SImode and DImode */
1675 2, /* cost of moving SSE register */
1676 {8, 8, 8}, /* cost of loading SSE registers
1677 in SImode, DImode and TImode */
1678 {8, 8, 8}, /* cost of storing SSE registers
1679 in SImode, DImode and TImode */
1680 5, /* MMX or SSE register to integer */
1681 32, /* size of l1 cache. */
1682 256, /* size of l2 cache. */
1683 64, /* size of prefetch block */
1684 6, /* number of parallel prefetches */
1685 3, /* Branch cost */
1686 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1687 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1688 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1689 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1690 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1691 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1692 atom_memcpy,
1693 atom_memset,
1694 1, /* scalar_stmt_cost. */
1695 1, /* scalar load_cost. */
1696 1, /* scalar_store_cost. */
1697 1, /* vec_stmt_cost. */
1698 1, /* vec_to_scalar_cost. */
1699 1, /* scalar_to_vec_cost. */
1700 1, /* vec_align_load_cost. */
1701 2, /* vec_unalign_load_cost. */
1702 1, /* vec_store_cost. */
1703 3, /* cond_taken_branch_cost. */
1704 1, /* cond_not_taken_branch_cost. */
1707 static stringop_algs slm_memcpy[2] = {
1708 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1709 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1710 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1711 static stringop_algs slm_memset[2] = {
1712 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1713 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1714 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1715 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1716 static const
1717 struct processor_costs slm_cost = {
1718 COSTS_N_INSNS (1), /* cost of an add instruction */
1719 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1720 COSTS_N_INSNS (1), /* variable shift costs */
1721 COSTS_N_INSNS (1), /* constant shift costs */
1722 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1723 COSTS_N_INSNS (3), /* HI */
1724 COSTS_N_INSNS (3), /* SI */
1725 COSTS_N_INSNS (4), /* DI */
1726 COSTS_N_INSNS (2)}, /* other */
1727 0, /* cost of multiply per each bit set */
1728 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1729 COSTS_N_INSNS (26), /* HI */
1730 COSTS_N_INSNS (42), /* SI */
1731 COSTS_N_INSNS (74), /* DI */
1732 COSTS_N_INSNS (74)}, /* other */
1733 COSTS_N_INSNS (1), /* cost of movsx */
1734 COSTS_N_INSNS (1), /* cost of movzx */
1735 8, /* "large" insn */
1736 17, /* MOVE_RATIO */
1737 4, /* cost for loading QImode using movzbl */
1738 {4, 4, 4}, /* cost of loading integer registers
1739 in QImode, HImode and SImode.
1740 Relative to reg-reg move (2). */
1741 {4, 4, 4}, /* cost of storing integer registers */
1742 4, /* cost of reg,reg fld/fst */
1743 {12, 12, 12}, /* cost of loading fp registers
1744 in SFmode, DFmode and XFmode */
1745 {6, 6, 8}, /* cost of storing fp registers
1746 in SFmode, DFmode and XFmode */
1747 2, /* cost of moving MMX register */
1748 {8, 8}, /* cost of loading MMX registers
1749 in SImode and DImode */
1750 {8, 8}, /* cost of storing MMX registers
1751 in SImode and DImode */
1752 2, /* cost of moving SSE register */
1753 {8, 8, 8}, /* cost of loading SSE registers
1754 in SImode, DImode and TImode */
1755 {8, 8, 8}, /* cost of storing SSE registers
1756 in SImode, DImode and TImode */
1757 5, /* MMX or SSE register to integer */
1758 32, /* size of l1 cache. */
1759 256, /* size of l2 cache. */
1760 64, /* size of prefetch block */
1761 6, /* number of parallel prefetches */
1762 3, /* Branch cost */
1763 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1764 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1765 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1766 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1767 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1768 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1769 slm_memcpy,
1770 slm_memset,
1771 1, /* scalar_stmt_cost. */
1772 1, /* scalar load_cost. */
1773 1, /* scalar_store_cost. */
1774 1, /* vec_stmt_cost. */
1775 4, /* vec_to_scalar_cost. */
1776 1, /* scalar_to_vec_cost. */
1777 1, /* vec_align_load_cost. */
1778 2, /* vec_unalign_load_cost. */
1779 1, /* vec_store_cost. */
1780 3, /* cond_taken_branch_cost. */
1781 1, /* cond_not_taken_branch_cost. */
1784 static stringop_algs intel_memcpy[2] = {
1785 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1786 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1787 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1788 static stringop_algs intel_memset[2] = {
1789 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1790 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1791 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1792 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1793 static const
1794 struct processor_costs intel_cost = {
1795 COSTS_N_INSNS (1), /* cost of an add instruction */
1796 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1797 COSTS_N_INSNS (1), /* variable shift costs */
1798 COSTS_N_INSNS (1), /* constant shift costs */
1799 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1800 COSTS_N_INSNS (3), /* HI */
1801 COSTS_N_INSNS (3), /* SI */
1802 COSTS_N_INSNS (4), /* DI */
1803 COSTS_N_INSNS (2)}, /* other */
1804 0, /* cost of multiply per each bit set */
1805 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1806 COSTS_N_INSNS (26), /* HI */
1807 COSTS_N_INSNS (42), /* SI */
1808 COSTS_N_INSNS (74), /* DI */
1809 COSTS_N_INSNS (74)}, /* other */
1810 COSTS_N_INSNS (1), /* cost of movsx */
1811 COSTS_N_INSNS (1), /* cost of movzx */
1812 8, /* "large" insn */
1813 17, /* MOVE_RATIO */
1814 4, /* cost for loading QImode using movzbl */
1815 {4, 4, 4}, /* cost of loading integer registers
1816 in QImode, HImode and SImode.
1817 Relative to reg-reg move (2). */
1818 {4, 4, 4}, /* cost of storing integer registers */
1819 4, /* cost of reg,reg fld/fst */
1820 {12, 12, 12}, /* cost of loading fp registers
1821 in SFmode, DFmode and XFmode */
1822 {6, 6, 8}, /* cost of storing fp registers
1823 in SFmode, DFmode and XFmode */
1824 2, /* cost of moving MMX register */
1825 {8, 8}, /* cost of loading MMX registers
1826 in SImode and DImode */
1827 {8, 8}, /* cost of storing MMX registers
1828 in SImode and DImode */
1829 2, /* cost of moving SSE register */
1830 {8, 8, 8}, /* cost of loading SSE registers
1831 in SImode, DImode and TImode */
1832 {8, 8, 8}, /* cost of storing SSE registers
1833 in SImode, DImode and TImode */
1834 5, /* MMX or SSE register to integer */
1835 32, /* size of l1 cache. */
1836 256, /* size of l2 cache. */
1837 64, /* size of prefetch block */
1838 6, /* number of parallel prefetches */
1839 3, /* Branch cost */
1840 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1841 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1842 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1843 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1844 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1845 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1846 intel_memcpy,
1847 intel_memset,
1848 1, /* scalar_stmt_cost. */
1849 1, /* scalar load_cost. */
1850 1, /* scalar_store_cost. */
1851 1, /* vec_stmt_cost. */
1852 4, /* vec_to_scalar_cost. */
1853 1, /* scalar_to_vec_cost. */
1854 1, /* vec_align_load_cost. */
1855 2, /* vec_unalign_load_cost. */
1856 1, /* vec_store_cost. */
1857 3, /* cond_taken_branch_cost. */
1858 1, /* cond_not_taken_branch_cost. */
1861 /* Generic should produce code tuned for Core-i7 (and newer chips)
1862 and btver1 (and newer chips). */
1864 static stringop_algs generic_memcpy[2] = {
1865 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1866 {-1, libcall, false}}},
1867 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1868 {-1, libcall, false}}}};
1869 static stringop_algs generic_memset[2] = {
1870 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1871 {-1, libcall, false}}},
1872 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1873 {-1, libcall, false}}}};
1874 static const
1875 struct processor_costs generic_cost = {
1876 COSTS_N_INSNS (1), /* cost of an add instruction */
1877 /* On all chips taken into consideration lea is 2 cycles and more. With
1878 this cost however our current implementation of synth_mult results in
1879 use of unnecessary temporary registers causing regression on several
1880 SPECfp benchmarks. */
1881 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1882 COSTS_N_INSNS (1), /* variable shift costs */
1883 COSTS_N_INSNS (1), /* constant shift costs */
1884 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1885 COSTS_N_INSNS (4), /* HI */
1886 COSTS_N_INSNS (3), /* SI */
1887 COSTS_N_INSNS (4), /* DI */
1888 COSTS_N_INSNS (2)}, /* other */
1889 0, /* cost of multiply per each bit set */
1890 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1891 COSTS_N_INSNS (26), /* HI */
1892 COSTS_N_INSNS (42), /* SI */
1893 COSTS_N_INSNS (74), /* DI */
1894 COSTS_N_INSNS (74)}, /* other */
1895 COSTS_N_INSNS (1), /* cost of movsx */
1896 COSTS_N_INSNS (1), /* cost of movzx */
1897 8, /* "large" insn */
1898 17, /* MOVE_RATIO */
1899 4, /* cost for loading QImode using movzbl */
1900 {4, 4, 4}, /* cost of loading integer registers
1901 in QImode, HImode and SImode.
1902 Relative to reg-reg move (2). */
1903 {4, 4, 4}, /* cost of storing integer registers */
1904 4, /* cost of reg,reg fld/fst */
1905 {12, 12, 12}, /* cost of loading fp registers
1906 in SFmode, DFmode and XFmode */
1907 {6, 6, 8}, /* cost of storing fp registers
1908 in SFmode, DFmode and XFmode */
1909 2, /* cost of moving MMX register */
1910 {8, 8}, /* cost of loading MMX registers
1911 in SImode and DImode */
1912 {8, 8}, /* cost of storing MMX registers
1913 in SImode and DImode */
1914 2, /* cost of moving SSE register */
1915 {8, 8, 8}, /* cost of loading SSE registers
1916 in SImode, DImode and TImode */
1917 {8, 8, 8}, /* cost of storing SSE registers
1918 in SImode, DImode and TImode */
1919 5, /* MMX or SSE register to integer */
1920 32, /* size of l1 cache. */
1921 512, /* size of l2 cache. */
1922 64, /* size of prefetch block */
1923 6, /* number of parallel prefetches */
1924 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1925 value is increased to perhaps more appropriate value of 5. */
1926 3, /* Branch cost */
1927 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1928 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1929 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1930 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1931 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1932 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1933 generic_memcpy,
1934 generic_memset,
1935 1, /* scalar_stmt_cost. */
1936 1, /* scalar load_cost. */
1937 1, /* scalar_store_cost. */
1938 1, /* vec_stmt_cost. */
1939 1, /* vec_to_scalar_cost. */
1940 1, /* scalar_to_vec_cost. */
1941 1, /* vec_align_load_cost. */
1942 2, /* vec_unalign_load_cost. */
1943 1, /* vec_store_cost. */
1944 3, /* cond_taken_branch_cost. */
1945 1, /* cond_not_taken_branch_cost. */
1948 /* core_cost should produce code tuned for Core familly of CPUs. */
1949 static stringop_algs core_memcpy[2] = {
1950 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1951 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1952 {-1, libcall, false}}}};
1953 static stringop_algs core_memset[2] = {
1954 {libcall, {{6, loop_1_byte, true},
1955 {24, loop, true},
1956 {8192, rep_prefix_4_byte, true},
1957 {-1, libcall, false}}},
1958 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1959 {-1, libcall, false}}}};
1961 static const
1962 struct processor_costs core_cost = {
1963 COSTS_N_INSNS (1), /* cost of an add instruction */
1964 /* On all chips taken into consideration lea is 2 cycles and more. With
1965 this cost however our current implementation of synth_mult results in
1966 use of unnecessary temporary registers causing regression on several
1967 SPECfp benchmarks. */
1968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1969 COSTS_N_INSNS (1), /* variable shift costs */
1970 COSTS_N_INSNS (1), /* constant shift costs */
1971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1972 COSTS_N_INSNS (4), /* HI */
1973 COSTS_N_INSNS (3), /* SI */
1974 COSTS_N_INSNS (4), /* DI */
1975 COSTS_N_INSNS (2)}, /* other */
1976 0, /* cost of multiply per each bit set */
1977 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1978 COSTS_N_INSNS (26), /* HI */
1979 COSTS_N_INSNS (42), /* SI */
1980 COSTS_N_INSNS (74), /* DI */
1981 COSTS_N_INSNS (74)}, /* other */
1982 COSTS_N_INSNS (1), /* cost of movsx */
1983 COSTS_N_INSNS (1), /* cost of movzx */
1984 8, /* "large" insn */
1985 17, /* MOVE_RATIO */
1986 4, /* cost for loading QImode using movzbl */
1987 {4, 4, 4}, /* cost of loading integer registers
1988 in QImode, HImode and SImode.
1989 Relative to reg-reg move (2). */
1990 {4, 4, 4}, /* cost of storing integer registers */
1991 4, /* cost of reg,reg fld/fst */
1992 {12, 12, 12}, /* cost of loading fp registers
1993 in SFmode, DFmode and XFmode */
1994 {6, 6, 8}, /* cost of storing fp registers
1995 in SFmode, DFmode and XFmode */
1996 2, /* cost of moving MMX register */
1997 {8, 8}, /* cost of loading MMX registers
1998 in SImode and DImode */
1999 {8, 8}, /* cost of storing MMX registers
2000 in SImode and DImode */
2001 2, /* cost of moving SSE register */
2002 {8, 8, 8}, /* cost of loading SSE registers
2003 in SImode, DImode and TImode */
2004 {8, 8, 8}, /* cost of storing SSE registers
2005 in SImode, DImode and TImode */
2006 5, /* MMX or SSE register to integer */
2007 64, /* size of l1 cache. */
2008 512, /* size of l2 cache. */
2009 64, /* size of prefetch block */
2010 6, /* number of parallel prefetches */
2011 /* FIXME perhaps more appropriate value is 5. */
2012 3, /* Branch cost */
2013 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2014 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2015 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2016 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2017 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2018 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2019 core_memcpy,
2020 core_memset,
2021 1, /* scalar_stmt_cost. */
2022 1, /* scalar load_cost. */
2023 1, /* scalar_store_cost. */
2024 1, /* vec_stmt_cost. */
2025 1, /* vec_to_scalar_cost. */
2026 1, /* scalar_to_vec_cost. */
2027 1, /* vec_align_load_cost. */
2028 2, /* vec_unalign_load_cost. */
2029 1, /* vec_store_cost. */
2030 3, /* cond_taken_branch_cost. */
2031 1, /* cond_not_taken_branch_cost. */
2035 /* Set by -mtune. */
2036 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2038 /* Set by -mtune or -Os. */
2039 const struct processor_costs *ix86_cost = &pentium_cost;
2041 /* Processor feature/optimization bitmasks. */
2042 #define m_386 (1<<PROCESSOR_I386)
2043 #define m_486 (1<<PROCESSOR_I486)
2044 #define m_PENT (1<<PROCESSOR_PENTIUM)
2045 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2046 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2047 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2048 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2049 #define m_CORE2 (1<<PROCESSOR_CORE2)
2050 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2051 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2052 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2053 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2054 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2055 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2056 #define m_KNL (1<<PROCESSOR_KNL)
2057 #define m_INTEL (1<<PROCESSOR_INTEL)
2059 #define m_GEODE (1<<PROCESSOR_GEODE)
2060 #define m_K6 (1<<PROCESSOR_K6)
2061 #define m_K6_GEODE (m_K6 | m_GEODE)
2062 #define m_K8 (1<<PROCESSOR_K8)
2063 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2064 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2065 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2066 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2067 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2068 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2069 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2070 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2071 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2072 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2073 #define m_BTVER (m_BTVER1 | m_BTVER2)
2074 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2076 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2078 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2079 #undef DEF_TUNE
2080 #define DEF_TUNE(tune, name, selector) name,
2081 #include "x86-tune.def"
2082 #undef DEF_TUNE
2085 /* Feature tests against the various tunings. */
2086 unsigned char ix86_tune_features[X86_TUNE_LAST];
2088 /* Feature tests against the various tunings used to create ix86_tune_features
2089 based on the processor mask. */
2090 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2091 #undef DEF_TUNE
2092 #define DEF_TUNE(tune, name, selector) selector,
2093 #include "x86-tune.def"
2094 #undef DEF_TUNE
2097 /* Feature tests against the various architecture variations. */
2098 unsigned char ix86_arch_features[X86_ARCH_LAST];
2100 /* Feature tests against the various architecture variations, used to create
2101 ix86_arch_features based on the processor mask. */
2102 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2103 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2104 ~(m_386 | m_486 | m_PENT | m_K6),
2106 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2107 ~m_386,
2109 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2110 ~(m_386 | m_486),
2112 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2113 ~m_386,
2115 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2116 ~m_386,
2119 /* In case the average insn count for single function invocation is
2120 lower than this constant, emit fast (but longer) prologue and
2121 epilogue code. */
2122 #define FAST_PROLOGUE_INSN_COUNT 20
2124 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2125 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2126 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2127 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2129 /* Array of the smallest class containing reg number REGNO, indexed by
2130 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2132 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2134 /* ax, dx, cx, bx */
2135 AREG, DREG, CREG, BREG,
2136 /* si, di, bp, sp */
2137 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2138 /* FP registers */
2139 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2140 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2141 /* arg pointer */
2142 NON_Q_REGS,
2143 /* flags, fpsr, fpcr, frame */
2144 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2145 /* SSE registers */
2146 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2147 SSE_REGS, SSE_REGS,
2148 /* MMX registers */
2149 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2150 MMX_REGS, MMX_REGS,
2151 /* REX registers */
2152 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2153 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2154 /* SSE REX registers */
2155 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2156 SSE_REGS, SSE_REGS,
2157 /* AVX-512 SSE registers */
2158 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2159 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2160 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2161 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2162 /* Mask registers. */
2163 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2164 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2165 /* MPX bound registers */
2166 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
2169 /* The "default" register map used in 32bit mode. */
2171 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2173 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2174 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2175 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2176 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2177 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2178 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2179 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2180 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2181 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2182 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2183 101, 102, 103, 104, /* bound registers */
2186 /* The "default" register map used in 64bit mode. */
2188 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2190 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2191 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2192 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2193 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2194 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2195 8,9,10,11,12,13,14,15, /* extended integer registers */
2196 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2197 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2198 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2199 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2200 126, 127, 128, 129, /* bound registers */
2203 /* Define the register numbers to be used in Dwarf debugging information.
2204 The SVR4 reference port C compiler uses the following register numbers
2205 in its Dwarf output code:
2206 0 for %eax (gcc regno = 0)
2207 1 for %ecx (gcc regno = 2)
2208 2 for %edx (gcc regno = 1)
2209 3 for %ebx (gcc regno = 3)
2210 4 for %esp (gcc regno = 7)
2211 5 for %ebp (gcc regno = 6)
2212 6 for %esi (gcc regno = 4)
2213 7 for %edi (gcc regno = 5)
2214 The following three DWARF register numbers are never generated by
2215 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2216 believes these numbers have these meanings.
2217 8 for %eip (no gcc equivalent)
2218 9 for %eflags (gcc regno = 17)
2219 10 for %trapno (no gcc equivalent)
2220 It is not at all clear how we should number the FP stack registers
2221 for the x86 architecture. If the version of SDB on x86/svr4 were
2222 a bit less brain dead with respect to floating-point then we would
2223 have a precedent to follow with respect to DWARF register numbers
2224 for x86 FP registers, but the SDB on x86/svr4 is so completely
2225 broken with respect to FP registers that it is hardly worth thinking
2226 of it as something to strive for compatibility with.
2227 The version of x86/svr4 SDB I have at the moment does (partially)
2228 seem to believe that DWARF register number 11 is associated with
2229 the x86 register %st(0), but that's about all. Higher DWARF
2230 register numbers don't seem to be associated with anything in
2231 particular, and even for DWARF regno 11, SDB only seems to under-
2232 stand that it should say that a variable lives in %st(0) (when
2233 asked via an `=' command) if we said it was in DWARF regno 11,
2234 but SDB still prints garbage when asked for the value of the
2235 variable in question (via a `/' command).
2236 (Also note that the labels SDB prints for various FP stack regs
2237 when doing an `x' command are all wrong.)
2238 Note that these problems generally don't affect the native SVR4
2239 C compiler because it doesn't allow the use of -O with -g and
2240 because when it is *not* optimizing, it allocates a memory
2241 location for each floating-point variable, and the memory
2242 location is what gets described in the DWARF AT_location
2243 attribute for the variable in question.
2244 Regardless of the severe mental illness of the x86/svr4 SDB, we
2245 do something sensible here and we use the following DWARF
2246 register numbers. Note that these are all stack-top-relative
2247 numbers.
2248 11 for %st(0) (gcc regno = 8)
2249 12 for %st(1) (gcc regno = 9)
2250 13 for %st(2) (gcc regno = 10)
2251 14 for %st(3) (gcc regno = 11)
2252 15 for %st(4) (gcc regno = 12)
2253 16 for %st(5) (gcc regno = 13)
2254 17 for %st(6) (gcc regno = 14)
2255 18 for %st(7) (gcc regno = 15)
2257 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2259 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2260 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2261 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2262 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2263 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2264 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2265 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2266 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2267 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2268 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2269 101, 102, 103, 104, /* bound registers */
2272 /* Define parameter passing and return registers. */
2274 static int const x86_64_int_parameter_registers[6] =
2276 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2279 static int const x86_64_ms_abi_int_parameter_registers[4] =
2281 CX_REG, DX_REG, R8_REG, R9_REG
2284 static int const x86_64_int_return_registers[4] =
2286 AX_REG, DX_REG, DI_REG, SI_REG
2289 /* Additional registers that are clobbered by SYSV calls. */
2291 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2293 SI_REG, DI_REG,
2294 XMM6_REG, XMM7_REG,
2295 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2296 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2299 /* Define the structure for the machine field in struct function. */
2301 struct GTY(()) stack_local_entry {
2302 unsigned short mode;
2303 unsigned short n;
2304 rtx rtl;
2305 struct stack_local_entry *next;
2308 /* Structure describing stack frame layout.
2309 Stack grows downward:
2311 [arguments]
2312 <- ARG_POINTER
2313 saved pc
2315 saved static chain if ix86_static_chain_on_stack
2317 saved frame pointer if frame_pointer_needed
2318 <- HARD_FRAME_POINTER
2319 [saved regs]
2320 <- regs_save_offset
2321 [padding0]
2323 [saved SSE regs]
2324 <- sse_regs_save_offset
2325 [padding1] |
2326 | <- FRAME_POINTER
2327 [va_arg registers] |
2329 [frame] |
2331 [padding2] | = to_allocate
2332 <- STACK_POINTER
2334 struct ix86_frame
2336 int nsseregs;
2337 int nregs;
2338 int va_arg_size;
2339 int red_zone_size;
2340 int outgoing_arguments_size;
2342 /* The offsets relative to ARG_POINTER. */
2343 HOST_WIDE_INT frame_pointer_offset;
2344 HOST_WIDE_INT hard_frame_pointer_offset;
2345 HOST_WIDE_INT stack_pointer_offset;
2346 HOST_WIDE_INT hfp_save_offset;
2347 HOST_WIDE_INT reg_save_offset;
2348 HOST_WIDE_INT sse_reg_save_offset;
2350 /* When save_regs_using_mov is set, emit prologue using
2351 move instead of push instructions. */
2352 bool save_regs_using_mov;
2355 /* Which cpu are we scheduling for. */
2356 enum attr_cpu ix86_schedule;
2358 /* Which cpu are we optimizing for. */
2359 enum processor_type ix86_tune;
2361 /* Which instruction set architecture to use. */
2362 enum processor_type ix86_arch;
2364 /* True if processor has SSE prefetch instruction. */
2365 unsigned char x86_prefetch_sse;
2367 /* -mstackrealign option */
2368 static const char ix86_force_align_arg_pointer_string[]
2369 = "force_align_arg_pointer";
2371 static rtx (*ix86_gen_leave) (void);
2372 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2373 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2374 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2375 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2376 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2377 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2378 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2379 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2380 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2381 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2382 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2384 /* Preferred alignment for stack boundary in bits. */
2385 unsigned int ix86_preferred_stack_boundary;
2387 /* Alignment for incoming stack boundary in bits specified at
2388 command line. */
2389 static unsigned int ix86_user_incoming_stack_boundary;
2391 /* Default alignment for incoming stack boundary in bits. */
2392 static unsigned int ix86_default_incoming_stack_boundary;
2394 /* Alignment for incoming stack boundary in bits. */
2395 unsigned int ix86_incoming_stack_boundary;
2397 /* Calling abi specific va_list type nodes. */
2398 static GTY(()) tree sysv_va_list_type_node;
2399 static GTY(()) tree ms_va_list_type_node;
2401 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2402 char internal_label_prefix[16];
2403 int internal_label_prefix_len;
2405 /* Fence to use after loop using movnt. */
2406 tree x86_mfence;
2408 /* Register class used for passing given 64bit part of the argument.
2409 These represent classes as documented by the PS ABI, with the exception
2410 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2411 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2413 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2414 whenever possible (upper half does contain padding). */
2415 enum x86_64_reg_class
2417 X86_64_NO_CLASS,
2418 X86_64_INTEGER_CLASS,
2419 X86_64_INTEGERSI_CLASS,
2420 X86_64_SSE_CLASS,
2421 X86_64_SSESF_CLASS,
2422 X86_64_SSEDF_CLASS,
2423 X86_64_SSEUP_CLASS,
2424 X86_64_X87_CLASS,
2425 X86_64_X87UP_CLASS,
2426 X86_64_COMPLEX_X87_CLASS,
2427 X86_64_MEMORY_CLASS
2430 #define MAX_CLASSES 8
2432 /* Table of constants used by fldpi, fldln2, etc.... */
2433 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2434 static bool ext_80387_constants_init = 0;
2437 static struct machine_function * ix86_init_machine_status (void);
2438 static rtx ix86_function_value (const_tree, const_tree, bool);
2439 static bool ix86_function_value_regno_p (const unsigned int);
2440 static unsigned int ix86_function_arg_boundary (machine_mode,
2441 const_tree);
2442 static rtx ix86_static_chain (const_tree, bool);
2443 static int ix86_function_regparm (const_tree, const_tree);
2444 static void ix86_compute_frame_layout (struct ix86_frame *);
2445 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
2446 rtx, rtx, int);
2447 static void ix86_add_new_builtins (HOST_WIDE_INT);
2448 static tree ix86_canonical_va_list_type (tree);
2449 static void predict_jump (int);
2450 static unsigned int split_stack_prologue_scratch_regno (void);
2451 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2453 enum ix86_function_specific_strings
2455 IX86_FUNCTION_SPECIFIC_ARCH,
2456 IX86_FUNCTION_SPECIFIC_TUNE,
2457 IX86_FUNCTION_SPECIFIC_MAX
2460 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2461 const char *, enum fpmath_unit, bool);
2462 static void ix86_function_specific_save (struct cl_target_option *,
2463 struct gcc_options *opts);
2464 static void ix86_function_specific_restore (struct gcc_options *opts,
2465 struct cl_target_option *);
2466 static void ix86_function_specific_post_stream_in (struct cl_target_option *);
2467 static void ix86_function_specific_print (FILE *, int,
2468 struct cl_target_option *);
2469 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2470 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2471 struct gcc_options *,
2472 struct gcc_options *,
2473 struct gcc_options *);
2474 static bool ix86_can_inline_p (tree, tree);
2475 static void ix86_set_current_function (tree);
2476 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2478 static enum calling_abi ix86_function_abi (const_tree);
2481 #ifndef SUBTARGET32_DEFAULT_CPU
2482 #define SUBTARGET32_DEFAULT_CPU "i386"
2483 #endif
2485 /* Whether -mtune= or -march= were specified */
2486 static int ix86_tune_defaulted;
2487 static int ix86_arch_specified;
2489 /* Vectorization library interface and handlers. */
2490 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2492 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2493 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2495 /* Processor target table, indexed by processor number */
2496 struct ptt
2498 const char *const name; /* processor name */
2499 const struct processor_costs *cost; /* Processor costs */
2500 const int align_loop; /* Default alignments. */
2501 const int align_loop_max_skip;
2502 const int align_jump;
2503 const int align_jump_max_skip;
2504 const int align_func;
2507 /* This table must be in sync with enum processor_type in i386.h. */
2508 static const struct ptt processor_target_table[PROCESSOR_max] =
2510 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2511 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2512 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2513 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2514 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2515 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2516 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2517 {"core2", &core_cost, 16, 10, 16, 10, 16},
2518 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2519 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2520 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2521 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2522 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2523 {"knl", &slm_cost, 16, 15, 16, 7, 16},
2524 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2525 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2526 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2527 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2528 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2529 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2530 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2531 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2532 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2533 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2534 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2535 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2538 static unsigned int
2539 rest_of_handle_insert_vzeroupper (void)
2541 int i;
2543 /* vzeroupper instructions are inserted immediately after reload to
2544 account for possible spills from 256bit registers. The pass
2545 reuses mode switching infrastructure by re-running mode insertion
2546 pass, so disable entities that have already been processed. */
2547 for (i = 0; i < MAX_386_ENTITIES; i++)
2548 ix86_optimize_mode_switching[i] = 0;
2550 ix86_optimize_mode_switching[AVX_U128] = 1;
2552 /* Call optimize_mode_switching. */
2553 g->get_passes ()->execute_pass_mode_switching ();
2554 return 0;
2557 namespace {
2559 const pass_data pass_data_insert_vzeroupper =
2561 RTL_PASS, /* type */
2562 "vzeroupper", /* name */
2563 OPTGROUP_NONE, /* optinfo_flags */
2564 TV_NONE, /* tv_id */
2565 0, /* properties_required */
2566 0, /* properties_provided */
2567 0, /* properties_destroyed */
2568 0, /* todo_flags_start */
2569 TODO_df_finish, /* todo_flags_finish */
2572 class pass_insert_vzeroupper : public rtl_opt_pass
2574 public:
2575 pass_insert_vzeroupper(gcc::context *ctxt)
2576 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2579 /* opt_pass methods: */
2580 virtual bool gate (function *)
2582 return TARGET_AVX && !TARGET_AVX512F
2583 && TARGET_VZEROUPPER && flag_expensive_optimizations
2584 && !optimize_size;
2587 virtual unsigned int execute (function *)
2589 return rest_of_handle_insert_vzeroupper ();
2592 }; // class pass_insert_vzeroupper
2594 } // anon namespace
2596 rtl_opt_pass *
2597 make_pass_insert_vzeroupper (gcc::context *ctxt)
2599 return new pass_insert_vzeroupper (ctxt);
2602 /* Return true if a red-zone is in use. */
2604 static inline bool
2605 ix86_using_red_zone (void)
2607 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2610 /* Return a string that documents the current -m options. The caller is
2611 responsible for freeing the string. */
2613 static char *
2614 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2615 const char *tune, enum fpmath_unit fpmath,
2616 bool add_nl_p)
2618 struct ix86_target_opts
2620 const char *option; /* option string */
2621 HOST_WIDE_INT mask; /* isa mask options */
2624 /* This table is ordered so that options like -msse4.2 that imply
2625 preceding options while match those first. */
2626 static struct ix86_target_opts isa_opts[] =
2628 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2629 { "-mfma", OPTION_MASK_ISA_FMA },
2630 { "-mxop", OPTION_MASK_ISA_XOP },
2631 { "-mlwp", OPTION_MASK_ISA_LWP },
2632 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2633 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2634 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2635 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2636 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2637 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2638 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2639 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
2640 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
2641 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2642 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2643 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2644 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2645 { "-msse3", OPTION_MASK_ISA_SSE3 },
2646 { "-msse2", OPTION_MASK_ISA_SSE2 },
2647 { "-msse", OPTION_MASK_ISA_SSE },
2648 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2649 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2650 { "-mmmx", OPTION_MASK_ISA_MMX },
2651 { "-mabm", OPTION_MASK_ISA_ABM },
2652 { "-mbmi", OPTION_MASK_ISA_BMI },
2653 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2654 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2655 { "-mhle", OPTION_MASK_ISA_HLE },
2656 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2657 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2658 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2659 { "-madx", OPTION_MASK_ISA_ADX },
2660 { "-mtbm", OPTION_MASK_ISA_TBM },
2661 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2662 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2663 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2664 { "-maes", OPTION_MASK_ISA_AES },
2665 { "-msha", OPTION_MASK_ISA_SHA },
2666 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2667 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2668 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2669 { "-mf16c", OPTION_MASK_ISA_F16C },
2670 { "-mrtm", OPTION_MASK_ISA_RTM },
2671 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2672 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2673 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2674 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2675 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2676 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2677 { "-mmpx", OPTION_MASK_ISA_MPX },
2678 { "-mclwb", OPTION_MASK_ISA_CLWB },
2679 { "-mpcommit", OPTION_MASK_ISA_PCOMMIT },
2682 /* Flag options. */
2683 static struct ix86_target_opts flag_opts[] =
2685 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2686 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2687 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2688 { "-m80387", MASK_80387 },
2689 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2690 { "-malign-double", MASK_ALIGN_DOUBLE },
2691 { "-mcld", MASK_CLD },
2692 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2693 { "-mieee-fp", MASK_IEEE_FP },
2694 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2695 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2696 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2697 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2698 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2699 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2700 { "-mno-red-zone", MASK_NO_RED_ZONE },
2701 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2702 { "-mrecip", MASK_RECIP },
2703 { "-mrtd", MASK_RTD },
2704 { "-msseregparm", MASK_SSEREGPARM },
2705 { "-mstack-arg-probe", MASK_STACK_PROBE },
2706 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2707 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2708 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2709 { "-mvzeroupper", MASK_VZEROUPPER },
2710 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2711 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2712 { "-mprefer-avx128", MASK_PREFER_AVX128},
2715 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2717 char isa_other[40];
2718 char target_other[40];
2719 unsigned num = 0;
2720 unsigned i, j;
2721 char *ret;
2722 char *ptr;
2723 size_t len;
2724 size_t line_len;
2725 size_t sep_len;
2726 const char *abi;
2728 memset (opts, '\0', sizeof (opts));
2730 /* Add -march= option. */
2731 if (arch)
2733 opts[num][0] = "-march=";
2734 opts[num++][1] = arch;
2737 /* Add -mtune= option. */
2738 if (tune)
2740 opts[num][0] = "-mtune=";
2741 opts[num++][1] = tune;
2744 /* Add -m32/-m64/-mx32. */
2745 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2747 if ((isa & OPTION_MASK_ABI_64) != 0)
2748 abi = "-m64";
2749 else
2750 abi = "-mx32";
2751 isa &= ~ (OPTION_MASK_ISA_64BIT
2752 | OPTION_MASK_ABI_64
2753 | OPTION_MASK_ABI_X32);
2755 else
2756 abi = "-m32";
2757 opts[num++][0] = abi;
2759 /* Pick out the options in isa options. */
2760 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2762 if ((isa & isa_opts[i].mask) != 0)
2764 opts[num++][0] = isa_opts[i].option;
2765 isa &= ~ isa_opts[i].mask;
2769 if (isa && add_nl_p)
2771 opts[num++][0] = isa_other;
2772 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2773 isa);
2776 /* Add flag options. */
2777 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2779 if ((flags & flag_opts[i].mask) != 0)
2781 opts[num++][0] = flag_opts[i].option;
2782 flags &= ~ flag_opts[i].mask;
2786 if (flags && add_nl_p)
2788 opts[num++][0] = target_other;
2789 sprintf (target_other, "(other flags: %#x)", flags);
2792 /* Add -fpmath= option. */
2793 if (fpmath)
2795 opts[num][0] = "-mfpmath=";
2796 switch ((int) fpmath)
2798 case FPMATH_387:
2799 opts[num++][1] = "387";
2800 break;
2802 case FPMATH_SSE:
2803 opts[num++][1] = "sse";
2804 break;
2806 case FPMATH_387 | FPMATH_SSE:
2807 opts[num++][1] = "sse+387";
2808 break;
2810 default:
2811 gcc_unreachable ();
2815 /* Any options? */
2816 if (num == 0)
2817 return NULL;
2819 gcc_assert (num < ARRAY_SIZE (opts));
2821 /* Size the string. */
2822 len = 0;
2823 sep_len = (add_nl_p) ? 3 : 1;
2824 for (i = 0; i < num; i++)
2826 len += sep_len;
2827 for (j = 0; j < 2; j++)
2828 if (opts[i][j])
2829 len += strlen (opts[i][j]);
2832 /* Build the string. */
2833 ret = ptr = (char *) xmalloc (len);
2834 line_len = 0;
2836 for (i = 0; i < num; i++)
2838 size_t len2[2];
2840 for (j = 0; j < 2; j++)
2841 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2843 if (i != 0)
2845 *ptr++ = ' ';
2846 line_len++;
2848 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2850 *ptr++ = '\\';
2851 *ptr++ = '\n';
2852 line_len = 0;
2856 for (j = 0; j < 2; j++)
2857 if (opts[i][j])
2859 memcpy (ptr, opts[i][j], len2[j]);
2860 ptr += len2[j];
2861 line_len += len2[j];
2865 *ptr = '\0';
2866 gcc_assert (ret + len >= ptr);
2868 return ret;
2871 /* Return true, if profiling code should be emitted before
2872 prologue. Otherwise it returns false.
2873 Note: For x86 with "hotfix" it is sorried. */
2874 static bool
2875 ix86_profile_before_prologue (void)
2877 return flag_fentry != 0;
2880 /* Function that is callable from the debugger to print the current
2881 options. */
2882 void ATTRIBUTE_UNUSED
2883 ix86_debug_options (void)
2885 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2886 ix86_arch_string, ix86_tune_string,
2887 ix86_fpmath, true);
2889 if (opts)
2891 fprintf (stderr, "%s\n\n", opts);
2892 free (opts);
2894 else
2895 fputs ("<no options>\n\n", stderr);
2897 return;
2900 static const char *stringop_alg_names[] = {
2901 #define DEF_ENUM
2902 #define DEF_ALG(alg, name) #name,
2903 #include "stringop.def"
2904 #undef DEF_ENUM
2905 #undef DEF_ALG
2908 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2909 The string is of the following form (or comma separated list of it):
2911 strategy_alg:max_size:[align|noalign]
2913 where the full size range for the strategy is either [0, max_size] or
2914 [min_size, max_size], in which min_size is the max_size + 1 of the
2915 preceding range. The last size range must have max_size == -1.
2917 Examples:
2920 -mmemcpy-strategy=libcall:-1:noalign
2922 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2926 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2928 This is to tell the compiler to use the following strategy for memset
2929 1) when the expected size is between [1, 16], use rep_8byte strategy;
2930 2) when the size is between [17, 2048], use vector_loop;
2931 3) when the size is > 2048, use libcall. */
2933 struct stringop_size_range
2935 int max;
2936 stringop_alg alg;
2937 bool noalign;
2940 static void
2941 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2943 const struct stringop_algs *default_algs;
2944 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2945 char *curr_range_str, *next_range_str;
2946 int i = 0, n = 0;
2948 if (is_memset)
2949 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2950 else
2951 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2953 curr_range_str = strategy_str;
2957 int maxs;
2958 char alg_name[128];
2959 char align[16];
2960 next_range_str = strchr (curr_range_str, ',');
2961 if (next_range_str)
2962 *next_range_str++ = '\0';
2964 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2965 alg_name, &maxs, align))
2967 error ("wrong arg %s to option %s", curr_range_str,
2968 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2969 return;
2972 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2974 error ("size ranges of option %s should be increasing",
2975 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2976 return;
2979 for (i = 0; i < last_alg; i++)
2980 if (!strcmp (alg_name, stringop_alg_names[i]))
2981 break;
2983 if (i == last_alg)
2985 error ("wrong stringop strategy name %s specified for option %s",
2986 alg_name,
2987 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2988 return;
2991 input_ranges[n].max = maxs;
2992 input_ranges[n].alg = (stringop_alg) i;
2993 if (!strcmp (align, "align"))
2994 input_ranges[n].noalign = false;
2995 else if (!strcmp (align, "noalign"))
2996 input_ranges[n].noalign = true;
2997 else
2999 error ("unknown alignment %s specified for option %s",
3000 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3001 return;
3003 n++;
3004 curr_range_str = next_range_str;
3006 while (curr_range_str);
3008 if (input_ranges[n - 1].max != -1)
3010 error ("the max value for the last size range should be -1"
3011 " for option %s",
3012 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3013 return;
3016 if (n > MAX_STRINGOP_ALGS)
3018 error ("too many size ranges specified in option %s",
3019 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3020 return;
3023 /* Now override the default algs array. */
3024 for (i = 0; i < n; i++)
3026 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
3027 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
3028 = input_ranges[i].alg;
3029 *const_cast<int *>(&default_algs->size[i].noalign)
3030 = input_ranges[i].noalign;
3035 /* parse -mtune-ctrl= option. When DUMP is true,
3036 print the features that are explicitly set. */
3038 static void
3039 parse_mtune_ctrl_str (bool dump)
3041 if (!ix86_tune_ctrl_string)
3042 return;
3044 char *next_feature_string = NULL;
3045 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3046 char *orig = curr_feature_string;
3047 int i;
3050 bool clear = false;
3052 next_feature_string = strchr (curr_feature_string, ',');
3053 if (next_feature_string)
3054 *next_feature_string++ = '\0';
3055 if (*curr_feature_string == '^')
3057 curr_feature_string++;
3058 clear = true;
3060 for (i = 0; i < X86_TUNE_LAST; i++)
3062 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3064 ix86_tune_features[i] = !clear;
3065 if (dump)
3066 fprintf (stderr, "Explicitly %s feature %s\n",
3067 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3068 break;
3071 if (i == X86_TUNE_LAST)
3072 error ("Unknown parameter to option -mtune-ctrl: %s",
3073 clear ? curr_feature_string - 1 : curr_feature_string);
3074 curr_feature_string = next_feature_string;
3076 while (curr_feature_string);
3077 free (orig);
3080 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3081 processor type. */
3083 static void
3084 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3086 unsigned int ix86_tune_mask = 1u << ix86_tune;
3087 int i;
3089 for (i = 0; i < X86_TUNE_LAST; ++i)
3091 if (ix86_tune_no_default)
3092 ix86_tune_features[i] = 0;
3093 else
3094 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3097 if (dump)
3099 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3100 for (i = 0; i < X86_TUNE_LAST; i++)
3101 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3102 ix86_tune_features[i] ? "on" : "off");
3105 parse_mtune_ctrl_str (dump);
3109 /* Override various settings based on options. If MAIN_ARGS_P, the
3110 options are from the command line, otherwise they are from
3111 attributes. */
3113 static void
3114 ix86_option_override_internal (bool main_args_p,
3115 struct gcc_options *opts,
3116 struct gcc_options *opts_set)
3118 int i;
3119 unsigned int ix86_arch_mask;
3120 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3121 const char *prefix;
3122 const char *suffix;
3123 const char *sw;
3125 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3126 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3127 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3128 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3129 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3130 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3131 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3132 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3133 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3134 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3135 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3136 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3137 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3138 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3139 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3140 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3141 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3142 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3143 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3144 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3145 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3146 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3147 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3148 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3149 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3150 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3151 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3152 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3153 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3154 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3155 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3156 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3157 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3158 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3159 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3160 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3161 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3162 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3163 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3164 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3165 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3166 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3167 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3168 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3169 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
3170 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3171 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3172 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3173 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3174 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3175 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3176 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3177 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3178 #define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53)
3179 #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
3180 #define PTA_CLWB (HOST_WIDE_INT_1 << 55)
3181 #define PTA_PCOMMIT (HOST_WIDE_INT_1 << 56)
3183 #define PTA_CORE2 \
3184 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3185 | PTA_CX16 | PTA_FXSR)
3186 #define PTA_NEHALEM \
3187 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3188 #define PTA_WESTMERE \
3189 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3190 #define PTA_SANDYBRIDGE \
3191 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3192 #define PTA_IVYBRIDGE \
3193 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3194 #define PTA_HASWELL \
3195 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3196 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3197 #define PTA_BROADWELL \
3198 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3199 #define PTA_KNL \
3200 (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
3201 #define PTA_BONNELL \
3202 (PTA_CORE2 | PTA_MOVBE)
3203 #define PTA_SILVERMONT \
3204 (PTA_WESTMERE | PTA_MOVBE)
3206 /* if this reaches 64, need to widen struct pta flags below */
3208 static struct pta
3210 const char *const name; /* processor name or nickname. */
3211 const enum processor_type processor;
3212 const enum attr_cpu schedule;
3213 const unsigned HOST_WIDE_INT flags;
3215 const processor_alias_table[] =
3217 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3218 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3219 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3220 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3221 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3222 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3223 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3224 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3225 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3226 PTA_MMX | PTA_SSE | PTA_FXSR},
3227 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3228 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3229 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3230 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3231 PTA_MMX | PTA_SSE | PTA_FXSR},
3232 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3233 PTA_MMX | PTA_SSE | PTA_FXSR},
3234 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3235 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3236 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3237 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3238 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3239 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3240 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3241 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3242 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3243 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3244 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3245 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3246 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3247 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3248 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3249 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3250 PTA_SANDYBRIDGE},
3251 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3252 PTA_SANDYBRIDGE},
3253 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3254 PTA_IVYBRIDGE},
3255 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3256 PTA_IVYBRIDGE},
3257 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3258 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3259 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3260 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3261 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3262 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3263 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3264 {"knl", PROCESSOR_KNL, CPU_KNL, PTA_KNL},
3265 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3266 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3267 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3268 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3269 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3270 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3271 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3272 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3273 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3274 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3275 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3276 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3277 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3278 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3279 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3280 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3281 {"x86-64", PROCESSOR_K8, CPU_K8,
3282 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3283 {"k8", PROCESSOR_K8, CPU_K8,
3284 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3285 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3286 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3287 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3288 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3289 {"opteron", PROCESSOR_K8, CPU_K8,
3290 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3291 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3292 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3293 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3294 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3295 {"athlon64", PROCESSOR_K8, CPU_K8,
3296 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3297 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3298 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3299 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3300 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3301 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3302 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3303 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3304 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3305 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3306 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3307 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3308 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3309 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3310 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3311 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3312 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3313 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3314 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3315 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3316 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3317 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3318 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3319 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3320 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3321 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3322 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3323 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3324 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3325 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3326 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3327 | PTA_XSAVEOPT | PTA_FSGSBASE},
3328 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3329 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3330 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3331 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3332 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3333 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3334 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3335 | PTA_MOVBE},
3336 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3337 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3338 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3339 | PTA_FXSR | PTA_XSAVE},
3340 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3341 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3342 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3343 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3344 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3345 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3347 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3348 PTA_64BIT
3349 | PTA_HLE /* flags are only used for -march switch. */ },
3352 /* -mrecip options. */
3353 static struct
3355 const char *string; /* option name */
3356 unsigned int mask; /* mask bits to set */
3358 const recip_options[] =
3360 { "all", RECIP_MASK_ALL },
3361 { "none", RECIP_MASK_NONE },
3362 { "div", RECIP_MASK_DIV },
3363 { "sqrt", RECIP_MASK_SQRT },
3364 { "vec-div", RECIP_MASK_VEC_DIV },
3365 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3368 int const pta_size = ARRAY_SIZE (processor_alias_table);
3370 /* Set up prefix/suffix so the error messages refer to either the command
3371 line argument, or the attribute(target). */
3372 if (main_args_p)
3374 prefix = "-m";
3375 suffix = "";
3376 sw = "switch";
3378 else
3380 prefix = "option(\"";
3381 suffix = "\")";
3382 sw = "attribute";
3385 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3386 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3387 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3388 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3389 #ifdef TARGET_BI_ARCH
3390 else
3392 #if TARGET_BI_ARCH == 1
3393 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3394 is on and OPTION_MASK_ABI_X32 is off. We turn off
3395 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3396 -mx32. */
3397 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3398 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3399 #else
3400 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3401 on and OPTION_MASK_ABI_64 is off. We turn off
3402 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3403 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3404 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3405 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3406 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3407 #endif
3409 #endif
3411 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3413 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3414 OPTION_MASK_ABI_64 for TARGET_X32. */
3415 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3416 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3418 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3419 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3420 | OPTION_MASK_ABI_X32
3421 | OPTION_MASK_ABI_64);
3422 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3424 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3425 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3426 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3427 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3430 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3431 SUBTARGET_OVERRIDE_OPTIONS;
3432 #endif
3434 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3435 SUBSUBTARGET_OVERRIDE_OPTIONS;
3436 #endif
3438 /* -fPIC is the default for x86_64. */
3439 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3440 opts->x_flag_pic = 2;
3442 /* Need to check -mtune=generic first. */
3443 if (opts->x_ix86_tune_string)
3445 /* As special support for cross compilers we read -mtune=native
3446 as -mtune=generic. With native compilers we won't see the
3447 -mtune=native, as it was changed by the driver. */
3448 if (!strcmp (opts->x_ix86_tune_string, "native"))
3450 opts->x_ix86_tune_string = "generic";
3452 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3453 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3454 "%stune=k8%s or %stune=generic%s instead as appropriate",
3455 prefix, suffix, prefix, suffix, prefix, suffix);
3457 else
3459 if (opts->x_ix86_arch_string)
3460 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3461 if (!opts->x_ix86_tune_string)
3463 opts->x_ix86_tune_string
3464 = processor_target_table[TARGET_CPU_DEFAULT].name;
3465 ix86_tune_defaulted = 1;
3468 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3469 or defaulted. We need to use a sensible tune option. */
3470 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3472 opts->x_ix86_tune_string = "generic";
3476 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3477 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3479 /* rep; movq isn't available in 32-bit code. */
3480 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3481 opts->x_ix86_stringop_alg = no_stringop;
3484 if (!opts->x_ix86_arch_string)
3485 opts->x_ix86_arch_string
3486 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3487 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3488 else
3489 ix86_arch_specified = 1;
3491 if (opts_set->x_ix86_pmode)
3493 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3494 && opts->x_ix86_pmode == PMODE_SI)
3495 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3496 && opts->x_ix86_pmode == PMODE_DI))
3497 error ("address mode %qs not supported in the %s bit mode",
3498 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3499 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3501 else
3502 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3503 ? PMODE_DI : PMODE_SI;
3505 if (!opts_set->x_ix86_abi)
3506 opts->x_ix86_abi = DEFAULT_ABI;
3508 /* For targets using ms ABI enable ms-extensions, if not
3509 explicit turned off. For non-ms ABI we turn off this
3510 option. */
3511 if (!opts_set->x_flag_ms_extensions)
3512 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3514 if (opts_set->x_ix86_cmodel)
3516 switch (opts->x_ix86_cmodel)
3518 case CM_SMALL:
3519 case CM_SMALL_PIC:
3520 if (opts->x_flag_pic)
3521 opts->x_ix86_cmodel = CM_SMALL_PIC;
3522 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3523 error ("code model %qs not supported in the %s bit mode",
3524 "small", "32");
3525 break;
3527 case CM_MEDIUM:
3528 case CM_MEDIUM_PIC:
3529 if (opts->x_flag_pic)
3530 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3531 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3532 error ("code model %qs not supported in the %s bit mode",
3533 "medium", "32");
3534 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3535 error ("code model %qs not supported in x32 mode",
3536 "medium");
3537 break;
3539 case CM_LARGE:
3540 case CM_LARGE_PIC:
3541 if (opts->x_flag_pic)
3542 opts->x_ix86_cmodel = CM_LARGE_PIC;
3543 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3544 error ("code model %qs not supported in the %s bit mode",
3545 "large", "32");
3546 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3547 error ("code model %qs not supported in x32 mode",
3548 "large");
3549 break;
3551 case CM_32:
3552 if (opts->x_flag_pic)
3553 error ("code model %s does not support PIC mode", "32");
3554 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3555 error ("code model %qs not supported in the %s bit mode",
3556 "32", "64");
3557 break;
3559 case CM_KERNEL:
3560 if (opts->x_flag_pic)
3562 error ("code model %s does not support PIC mode", "kernel");
3563 opts->x_ix86_cmodel = CM_32;
3565 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3566 error ("code model %qs not supported in the %s bit mode",
3567 "kernel", "32");
3568 break;
3570 default:
3571 gcc_unreachable ();
3574 else
3576 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3577 use of rip-relative addressing. This eliminates fixups that
3578 would otherwise be needed if this object is to be placed in a
3579 DLL, and is essentially just as efficient as direct addressing. */
3580 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3581 && (TARGET_RDOS || TARGET_PECOFF))
3582 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3583 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3584 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3585 else
3586 opts->x_ix86_cmodel = CM_32;
3588 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3590 error ("-masm=intel not supported in this configuration");
3591 opts->x_ix86_asm_dialect = ASM_ATT;
3593 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3594 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3595 sorry ("%i-bit mode not compiled in",
3596 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3598 for (i = 0; i < pta_size; i++)
3599 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3601 ix86_schedule = processor_alias_table[i].schedule;
3602 ix86_arch = processor_alias_table[i].processor;
3603 /* Default cpu tuning to the architecture. */
3604 ix86_tune = ix86_arch;
3606 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3607 && !(processor_alias_table[i].flags & PTA_64BIT))
3608 error ("CPU you selected does not support x86-64 "
3609 "instruction set");
3611 if (processor_alias_table[i].flags & PTA_MMX
3612 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3613 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3614 if (processor_alias_table[i].flags & PTA_3DNOW
3615 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3616 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3617 if (processor_alias_table[i].flags & PTA_3DNOW_A
3618 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3619 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3620 if (processor_alias_table[i].flags & PTA_SSE
3621 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3622 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3623 if (processor_alias_table[i].flags & PTA_SSE2
3624 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3625 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3626 if (processor_alias_table[i].flags & PTA_SSE3
3627 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3628 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3629 if (processor_alias_table[i].flags & PTA_SSSE3
3630 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3631 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3632 if (processor_alias_table[i].flags & PTA_SSE4_1
3633 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3634 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3635 if (processor_alias_table[i].flags & PTA_SSE4_2
3636 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3637 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3638 if (processor_alias_table[i].flags & PTA_AVX
3639 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3640 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3641 if (processor_alias_table[i].flags & PTA_AVX2
3642 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3643 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3644 if (processor_alias_table[i].flags & PTA_FMA
3645 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3646 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3647 if (processor_alias_table[i].flags & PTA_SSE4A
3648 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3649 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3650 if (processor_alias_table[i].flags & PTA_FMA4
3651 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3652 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3653 if (processor_alias_table[i].flags & PTA_XOP
3654 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3655 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3656 if (processor_alias_table[i].flags & PTA_LWP
3657 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3658 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3659 if (processor_alias_table[i].flags & PTA_ABM
3660 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3661 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3662 if (processor_alias_table[i].flags & PTA_BMI
3663 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3664 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3665 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3666 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3667 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3668 if (processor_alias_table[i].flags & PTA_TBM
3669 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3670 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3671 if (processor_alias_table[i].flags & PTA_BMI2
3672 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3673 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3674 if (processor_alias_table[i].flags & PTA_CX16
3675 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3676 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3677 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3678 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3679 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3680 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3681 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3682 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3683 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3684 if (processor_alias_table[i].flags & PTA_MOVBE
3685 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3686 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3687 if (processor_alias_table[i].flags & PTA_AES
3688 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3689 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3690 if (processor_alias_table[i].flags & PTA_SHA
3691 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3692 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3693 if (processor_alias_table[i].flags & PTA_PCLMUL
3694 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3695 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3696 if (processor_alias_table[i].flags & PTA_FSGSBASE
3697 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3698 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3699 if (processor_alias_table[i].flags & PTA_RDRND
3700 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3701 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3702 if (processor_alias_table[i].flags & PTA_F16C
3703 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3704 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3705 if (processor_alias_table[i].flags & PTA_RTM
3706 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3707 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3708 if (processor_alias_table[i].flags & PTA_HLE
3709 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3710 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3711 if (processor_alias_table[i].flags & PTA_PRFCHW
3712 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3713 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3714 if (processor_alias_table[i].flags & PTA_RDSEED
3715 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3716 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3717 if (processor_alias_table[i].flags & PTA_ADX
3718 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3719 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3720 if (processor_alias_table[i].flags & PTA_FXSR
3721 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3722 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3723 if (processor_alias_table[i].flags & PTA_XSAVE
3724 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3725 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3726 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3727 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3728 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3729 if (processor_alias_table[i].flags & PTA_AVX512F
3730 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3731 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3732 if (processor_alias_table[i].flags & PTA_AVX512ER
3733 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3734 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3735 if (processor_alias_table[i].flags & PTA_AVX512PF
3736 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3737 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3738 if (processor_alias_table[i].flags & PTA_AVX512CD
3739 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3740 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3741 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3742 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3743 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3744 if (processor_alias_table[i].flags & PTA_PCOMMIT
3745 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCOMMIT))
3746 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCOMMIT;
3747 if (processor_alias_table[i].flags & PTA_CLWB
3748 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
3749 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
3750 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3751 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3752 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3753 if (processor_alias_table[i].flags & PTA_XSAVEC
3754 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3755 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3756 if (processor_alias_table[i].flags & PTA_XSAVES
3757 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3758 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3759 if (processor_alias_table[i].flags & PTA_AVX512DQ
3760 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3761 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3762 if (processor_alias_table[i].flags & PTA_AVX512BW
3763 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3764 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3765 if (processor_alias_table[i].flags & PTA_AVX512VL
3766 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3767 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3768 if (processor_alias_table[i].flags & PTA_MPX
3769 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX))
3770 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX;
3771 if (processor_alias_table[i].flags & PTA_AVX512VBMI
3772 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
3773 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
3774 if (processor_alias_table[i].flags & PTA_AVX512IFMA
3775 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
3776 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
3777 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3778 x86_prefetch_sse = true;
3780 break;
3783 if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
3784 error ("Intel MPX does not support x32");
3786 if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
3787 error ("Intel MPX does not support x32");
3789 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3790 error ("generic CPU can be used only for %stune=%s %s",
3791 prefix, suffix, sw);
3792 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3793 error ("intel CPU can be used only for %stune=%s %s",
3794 prefix, suffix, sw);
3795 else if (i == pta_size)
3796 error ("bad value (%s) for %sarch=%s %s",
3797 opts->x_ix86_arch_string, prefix, suffix, sw);
3799 ix86_arch_mask = 1u << ix86_arch;
3800 for (i = 0; i < X86_ARCH_LAST; ++i)
3801 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3803 for (i = 0; i < pta_size; i++)
3804 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3806 ix86_schedule = processor_alias_table[i].schedule;
3807 ix86_tune = processor_alias_table[i].processor;
3808 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3810 if (!(processor_alias_table[i].flags & PTA_64BIT))
3812 if (ix86_tune_defaulted)
3814 opts->x_ix86_tune_string = "x86-64";
3815 for (i = 0; i < pta_size; i++)
3816 if (! strcmp (opts->x_ix86_tune_string,
3817 processor_alias_table[i].name))
3818 break;
3819 ix86_schedule = processor_alias_table[i].schedule;
3820 ix86_tune = processor_alias_table[i].processor;
3822 else
3823 error ("CPU you selected does not support x86-64 "
3824 "instruction set");
3827 /* Intel CPUs have always interpreted SSE prefetch instructions as
3828 NOPs; so, we can enable SSE prefetch instructions even when
3829 -mtune (rather than -march) points us to a processor that has them.
3830 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3831 higher processors. */
3832 if (TARGET_CMOV
3833 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3834 x86_prefetch_sse = true;
3835 break;
3838 if (ix86_tune_specified && i == pta_size)
3839 error ("bad value (%s) for %stune=%s %s",
3840 opts->x_ix86_tune_string, prefix, suffix, sw);
3842 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3844 #ifndef USE_IX86_FRAME_POINTER
3845 #define USE_IX86_FRAME_POINTER 0
3846 #endif
3848 #ifndef USE_X86_64_FRAME_POINTER
3849 #define USE_X86_64_FRAME_POINTER 0
3850 #endif
3852 /* Set the default values for switches whose default depends on TARGET_64BIT
3853 in case they weren't overwritten by command line options. */
3854 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3856 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3857 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3858 if (opts->x_flag_asynchronous_unwind_tables
3859 && !opts_set->x_flag_unwind_tables
3860 && TARGET_64BIT_MS_ABI)
3861 opts->x_flag_unwind_tables = 1;
3862 if (opts->x_flag_asynchronous_unwind_tables == 2)
3863 opts->x_flag_unwind_tables
3864 = opts->x_flag_asynchronous_unwind_tables = 1;
3865 if (opts->x_flag_pcc_struct_return == 2)
3866 opts->x_flag_pcc_struct_return = 0;
3868 else
3870 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3871 opts->x_flag_omit_frame_pointer
3872 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3873 if (opts->x_flag_asynchronous_unwind_tables == 2)
3874 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3875 if (opts->x_flag_pcc_struct_return == 2)
3876 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3879 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3880 /* TODO: ix86_cost should be chosen at instruction or function granuality
3881 so for cold code we use size_cost even in !optimize_size compilation. */
3882 if (opts->x_optimize_size)
3883 ix86_cost = &ix86_size_cost;
3884 else
3885 ix86_cost = ix86_tune_cost;
3887 /* Arrange to set up i386_stack_locals for all functions. */
3888 init_machine_status = ix86_init_machine_status;
3890 /* Validate -mregparm= value. */
3891 if (opts_set->x_ix86_regparm)
3893 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3894 warning (0, "-mregparm is ignored in 64-bit mode");
3895 if (opts->x_ix86_regparm > REGPARM_MAX)
3897 error ("-mregparm=%d is not between 0 and %d",
3898 opts->x_ix86_regparm, REGPARM_MAX);
3899 opts->x_ix86_regparm = 0;
3902 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3903 opts->x_ix86_regparm = REGPARM_MAX;
3905 /* Default align_* from the processor table. */
3906 if (opts->x_align_loops == 0)
3908 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3909 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3911 if (opts->x_align_jumps == 0)
3913 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3914 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3916 if (opts->x_align_functions == 0)
3918 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3921 /* Provide default for -mbranch-cost= value. */
3922 if (!opts_set->x_ix86_branch_cost)
3923 opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost;
3925 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3927 opts->x_target_flags
3928 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3930 /* Enable by default the SSE and MMX builtins. Do allow the user to
3931 explicitly disable any of these. In particular, disabling SSE and
3932 MMX for kernel code is extremely useful. */
3933 if (!ix86_arch_specified)
3934 opts->x_ix86_isa_flags
3935 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3936 | TARGET_SUBTARGET64_ISA_DEFAULT)
3937 & ~opts->x_ix86_isa_flags_explicit);
3939 if (TARGET_RTD_P (opts->x_target_flags))
3940 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3942 else
3944 opts->x_target_flags
3945 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3947 if (!ix86_arch_specified)
3948 opts->x_ix86_isa_flags
3949 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3951 /* i386 ABI does not specify red zone. It still makes sense to use it
3952 when programmer takes care to stack from being destroyed. */
3953 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3954 opts->x_target_flags |= MASK_NO_RED_ZONE;
3957 /* Keep nonleaf frame pointers. */
3958 if (opts->x_flag_omit_frame_pointer)
3959 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3960 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3961 opts->x_flag_omit_frame_pointer = 1;
3963 /* If we're doing fast math, we don't care about comparison order
3964 wrt NaNs. This lets us use a shorter comparison sequence. */
3965 if (opts->x_flag_finite_math_only)
3966 opts->x_target_flags &= ~MASK_IEEE_FP;
3968 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3969 since the insns won't need emulation. */
3970 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3971 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3973 /* Likewise, if the target doesn't have a 387, or we've specified
3974 software floating point, don't use 387 inline intrinsics. */
3975 if (!TARGET_80387_P (opts->x_target_flags))
3976 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3978 /* Turn on MMX builtins for -msse. */
3979 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3980 opts->x_ix86_isa_flags
3981 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3983 /* Enable SSE prefetch. */
3984 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3985 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3986 x86_prefetch_sse = true;
3988 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3989 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3990 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3991 opts->x_ix86_isa_flags
3992 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3994 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3995 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3996 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3997 opts->x_ix86_isa_flags
3998 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
4000 /* Enable lzcnt instruction for -mabm. */
4001 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
4002 opts->x_ix86_isa_flags
4003 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
4005 /* Validate -mpreferred-stack-boundary= value or default it to
4006 PREFERRED_STACK_BOUNDARY_DEFAULT. */
4007 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
4008 if (opts_set->x_ix86_preferred_stack_boundary_arg)
4010 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4011 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
4012 int max = (TARGET_SEH ? 4 : 12);
4014 if (opts->x_ix86_preferred_stack_boundary_arg < min
4015 || opts->x_ix86_preferred_stack_boundary_arg > max)
4017 if (min == max)
4018 error ("-mpreferred-stack-boundary is not supported "
4019 "for this target");
4020 else
4021 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
4022 opts->x_ix86_preferred_stack_boundary_arg, min, max);
4024 else
4025 ix86_preferred_stack_boundary
4026 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
4029 /* Set the default value for -mstackrealign. */
4030 if (opts->x_ix86_force_align_arg_pointer == -1)
4031 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
4033 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
4035 /* Validate -mincoming-stack-boundary= value or default it to
4036 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
4037 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4038 if (opts_set->x_ix86_incoming_stack_boundary_arg)
4040 if (opts->x_ix86_incoming_stack_boundary_arg
4041 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
4042 || opts->x_ix86_incoming_stack_boundary_arg > 12)
4043 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4044 opts->x_ix86_incoming_stack_boundary_arg,
4045 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
4046 else
4048 ix86_user_incoming_stack_boundary
4049 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
4050 ix86_incoming_stack_boundary
4051 = ix86_user_incoming_stack_boundary;
4055 #ifndef NO_PROFILE_COUNTERS
4056 if (flag_nop_mcount)
4057 error ("-mnop-mcount is not compatible with this target");
4058 #endif
4059 if (flag_nop_mcount && flag_pic)
4060 error ("-mnop-mcount is not implemented for -fPIC");
4062 /* Accept -msseregparm only if at least SSE support is enabled. */
4063 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
4064 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
4065 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
4067 if (opts_set->x_ix86_fpmath)
4069 if (opts->x_ix86_fpmath & FPMATH_SSE)
4071 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
4073 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4074 opts->x_ix86_fpmath = FPMATH_387;
4076 else if ((opts->x_ix86_fpmath & FPMATH_387)
4077 && !TARGET_80387_P (opts->x_target_flags))
4079 warning (0, "387 instruction set disabled, using SSE arithmetics");
4080 opts->x_ix86_fpmath = FPMATH_SSE;
4084 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4085 fpmath=387. The second is however default at many targets since the
4086 extra 80bit precision of temporaries is considered to be part of ABI.
4087 Overwrite the default at least for -ffast-math.
4088 TODO: -mfpmath=both seems to produce same performing code with bit
4089 smaller binaries. It is however not clear if register allocation is
4090 ready for this setting.
4091 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4092 codegen. We may switch to 387 with -ffast-math for size optimized
4093 functions. */
4094 else if (fast_math_flags_set_p (&global_options)
4095 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4096 opts->x_ix86_fpmath = FPMATH_SSE;
4097 else
4098 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4100 /* If the i387 is disabled, then do not return values in it. */
4101 if (!TARGET_80387_P (opts->x_target_flags))
4102 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4104 /* Use external vectorized library in vectorizing intrinsics. */
4105 if (opts_set->x_ix86_veclibabi_type)
4106 switch (opts->x_ix86_veclibabi_type)
4108 case ix86_veclibabi_type_svml:
4109 ix86_veclib_handler = ix86_veclibabi_svml;
4110 break;
4112 case ix86_veclibabi_type_acml:
4113 ix86_veclib_handler = ix86_veclibabi_acml;
4114 break;
4116 default:
4117 gcc_unreachable ();
4120 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4121 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4122 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4124 /* If stack probes are required, the space used for large function
4125 arguments on the stack must also be probed, so enable
4126 -maccumulate-outgoing-args so this happens in the prologue. */
4127 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4128 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4130 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4131 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4132 "for correctness", prefix, suffix);
4133 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4136 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4138 char *p;
4139 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4140 p = strchr (internal_label_prefix, 'X');
4141 internal_label_prefix_len = p - internal_label_prefix;
4142 *p = '\0';
4145 /* When scheduling description is not available, disable scheduler pass
4146 so it won't slow down the compilation and make x87 code slower. */
4147 if (!TARGET_SCHEDULE)
4148 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4150 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4151 ix86_tune_cost->simultaneous_prefetches,
4152 opts->x_param_values,
4153 opts_set->x_param_values);
4154 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4155 ix86_tune_cost->prefetch_block,
4156 opts->x_param_values,
4157 opts_set->x_param_values);
4158 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4159 ix86_tune_cost->l1_cache_size,
4160 opts->x_param_values,
4161 opts_set->x_param_values);
4162 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4163 ix86_tune_cost->l2_cache_size,
4164 opts->x_param_values,
4165 opts_set->x_param_values);
4167 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4168 if (opts->x_flag_prefetch_loop_arrays < 0
4169 && HAVE_prefetch
4170 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4171 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4172 opts->x_flag_prefetch_loop_arrays = 1;
4174 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4175 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4176 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4177 targetm.expand_builtin_va_start = NULL;
4179 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4181 ix86_gen_leave = gen_leave_rex64;
4182 if (Pmode == DImode)
4184 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4185 ix86_gen_tls_local_dynamic_base_64
4186 = gen_tls_local_dynamic_base_64_di;
4188 else
4190 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4191 ix86_gen_tls_local_dynamic_base_64
4192 = gen_tls_local_dynamic_base_64_si;
4195 else
4196 ix86_gen_leave = gen_leave;
4198 if (Pmode == DImode)
4200 ix86_gen_add3 = gen_adddi3;
4201 ix86_gen_sub3 = gen_subdi3;
4202 ix86_gen_sub3_carry = gen_subdi3_carry;
4203 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4204 ix86_gen_andsp = gen_anddi3;
4205 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4206 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4207 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4208 ix86_gen_monitor = gen_sse3_monitor_di;
4210 else
4212 ix86_gen_add3 = gen_addsi3;
4213 ix86_gen_sub3 = gen_subsi3;
4214 ix86_gen_sub3_carry = gen_subsi3_carry;
4215 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4216 ix86_gen_andsp = gen_andsi3;
4217 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4218 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4219 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4220 ix86_gen_monitor = gen_sse3_monitor_si;
4223 #ifdef USE_IX86_CLD
4224 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4225 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4226 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4227 #endif
4229 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4231 if (opts->x_flag_fentry > 0)
4232 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4233 "with -fpic");
4234 opts->x_flag_fentry = 0;
4236 else if (TARGET_SEH)
4238 if (opts->x_flag_fentry == 0)
4239 sorry ("-mno-fentry isn%'t compatible with SEH");
4240 opts->x_flag_fentry = 1;
4242 else if (opts->x_flag_fentry < 0)
4244 #if defined(PROFILE_BEFORE_PROLOGUE)
4245 opts->x_flag_fentry = 1;
4246 #else
4247 opts->x_flag_fentry = 0;
4248 #endif
4251 if (!(opts_set->x_target_flags & MASK_VZEROUPPER))
4252 opts->x_target_flags |= MASK_VZEROUPPER;
4253 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4254 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4255 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4256 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4257 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4258 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4259 /* Enable 128-bit AVX instruction generation
4260 for the auto-vectorizer. */
4261 if (TARGET_AVX128_OPTIMAL
4262 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4263 opts->x_target_flags |= MASK_PREFER_AVX128;
4265 if (opts->x_ix86_recip_name)
4267 char *p = ASTRDUP (opts->x_ix86_recip_name);
4268 char *q;
4269 unsigned int mask, i;
4270 bool invert;
4272 while ((q = strtok (p, ",")) != NULL)
4274 p = NULL;
4275 if (*q == '!')
4277 invert = true;
4278 q++;
4280 else
4281 invert = false;
4283 if (!strcmp (q, "default"))
4284 mask = RECIP_MASK_ALL;
4285 else
4287 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4288 if (!strcmp (q, recip_options[i].string))
4290 mask = recip_options[i].mask;
4291 break;
4294 if (i == ARRAY_SIZE (recip_options))
4296 error ("unknown option for -mrecip=%s", q);
4297 invert = false;
4298 mask = RECIP_MASK_NONE;
4302 opts->x_recip_mask_explicit |= mask;
4303 if (invert)
4304 opts->x_recip_mask &= ~mask;
4305 else
4306 opts->x_recip_mask |= mask;
4310 if (TARGET_RECIP_P (opts->x_target_flags))
4311 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4312 else if (opts_set->x_target_flags & MASK_RECIP)
4313 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4315 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4316 for 64-bit Bionic. */
4317 if (TARGET_HAS_BIONIC
4318 && !(opts_set->x_target_flags
4319 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4320 opts->x_target_flags |= (TARGET_64BIT
4321 ? MASK_LONG_DOUBLE_128
4322 : MASK_LONG_DOUBLE_64);
4324 /* Only one of them can be active. */
4325 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4326 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4328 /* Save the initial options in case the user does function specific
4329 options. */
4330 if (main_args_p)
4331 target_option_default_node = target_option_current_node
4332 = build_target_option_node (opts);
4334 /* Handle stack protector */
4335 if (!opts_set->x_ix86_stack_protector_guard)
4336 opts->x_ix86_stack_protector_guard
4337 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4339 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4340 if (opts->x_ix86_tune_memcpy_strategy)
4342 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4343 ix86_parse_stringop_strategy_string (str, false);
4344 free (str);
4347 if (opts->x_ix86_tune_memset_strategy)
4349 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4350 ix86_parse_stringop_strategy_string (str, true);
4351 free (str);
4355 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4357 static void
4358 ix86_option_override (void)
4360 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4361 struct register_pass_info insert_vzeroupper_info
4362 = { pass_insert_vzeroupper, "reload",
4363 1, PASS_POS_INSERT_AFTER
4366 ix86_option_override_internal (true, &global_options, &global_options_set);
4369 /* This needs to be done at start up. It's convenient to do it here. */
4370 register_pass (&insert_vzeroupper_info);
4373 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
4374 static char *
4375 ix86_offload_options (void)
4377 if (TARGET_LP64)
4378 return xstrdup ("-foffload-abi=lp64");
4379 return xstrdup ("-foffload-abi=ilp32");
4382 /* Update register usage after having seen the compiler flags. */
4384 static void
4385 ix86_conditional_register_usage (void)
4387 int i, c_mask;
4389 /* For 32-bit targets, squash the REX registers. */
4390 if (! TARGET_64BIT)
4392 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4393 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4394 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4395 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4396 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4397 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4400 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4401 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4402 : TARGET_64BIT ? (1 << 2)
4403 : (1 << 1));
4405 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4407 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4409 /* Set/reset conditionally defined registers from
4410 CALL_USED_REGISTERS initializer. */
4411 if (call_used_regs[i] > 1)
4412 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4414 /* Calculate registers of CLOBBERED_REGS register set
4415 as call used registers from GENERAL_REGS register set. */
4416 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4417 && call_used_regs[i])
4418 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4421 /* If MMX is disabled, squash the registers. */
4422 if (! TARGET_MMX)
4423 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4424 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4425 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4427 /* If SSE is disabled, squash the registers. */
4428 if (! TARGET_SSE)
4429 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4430 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4431 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4433 /* If the FPU is disabled, squash the registers. */
4434 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4435 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4436 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4437 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4439 /* If AVX512F is disabled, squash the registers. */
4440 if (! TARGET_AVX512F)
4442 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4443 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4445 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4446 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4449 /* If MPX is disabled, squash the registers. */
4450 if (! TARGET_MPX)
4451 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
4452 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4456 /* Save the current options */
4458 static void
4459 ix86_function_specific_save (struct cl_target_option *ptr,
4460 struct gcc_options *opts)
4462 ptr->arch = ix86_arch;
4463 ptr->schedule = ix86_schedule;
4464 ptr->prefetch_sse = x86_prefetch_sse;
4465 ptr->tune = ix86_tune;
4466 ptr->branch_cost = ix86_branch_cost;
4467 ptr->tune_defaulted = ix86_tune_defaulted;
4468 ptr->arch_specified = ix86_arch_specified;
4469 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4470 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4471 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4472 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4473 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4474 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4475 ptr->x_ix86_abi = opts->x_ix86_abi;
4476 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4477 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4478 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4479 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4480 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4481 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4482 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4483 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4484 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4485 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4486 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4487 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4488 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4489 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4490 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4491 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4492 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4493 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4494 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4495 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4497 /* The fields are char but the variables are not; make sure the
4498 values fit in the fields. */
4499 gcc_assert (ptr->arch == ix86_arch);
4500 gcc_assert (ptr->schedule == ix86_schedule);
4501 gcc_assert (ptr->tune == ix86_tune);
4502 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4505 /* Restore the current options */
4507 static void
4508 ix86_function_specific_restore (struct gcc_options *opts,
4509 struct cl_target_option *ptr)
4511 enum processor_type old_tune = ix86_tune;
4512 enum processor_type old_arch = ix86_arch;
4513 unsigned int ix86_arch_mask;
4514 int i;
4516 /* We don't change -fPIC. */
4517 opts->x_flag_pic = flag_pic;
4519 ix86_arch = (enum processor_type) ptr->arch;
4520 ix86_schedule = (enum attr_cpu) ptr->schedule;
4521 ix86_tune = (enum processor_type) ptr->tune;
4522 x86_prefetch_sse = ptr->prefetch_sse;
4523 opts->x_ix86_branch_cost = ptr->branch_cost;
4524 ix86_tune_defaulted = ptr->tune_defaulted;
4525 ix86_arch_specified = ptr->arch_specified;
4526 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4527 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4528 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4529 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4530 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4531 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4532 opts->x_ix86_abi = ptr->x_ix86_abi;
4533 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4534 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4535 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4536 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4537 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4538 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4539 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4540 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4541 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4542 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4543 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4544 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4545 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4546 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4547 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4548 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4549 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4550 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4551 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4552 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4553 ix86_tune_cost = processor_target_table[ix86_tune].cost;
4554 /* TODO: ix86_cost should be chosen at instruction or function granuality
4555 so for cold code we use size_cost even in !optimize_size compilation. */
4556 if (opts->x_optimize_size)
4557 ix86_cost = &ix86_size_cost;
4558 else
4559 ix86_cost = ix86_tune_cost;
4561 /* Recreate the arch feature tests if the arch changed */
4562 if (old_arch != ix86_arch)
4564 ix86_arch_mask = 1u << ix86_arch;
4565 for (i = 0; i < X86_ARCH_LAST; ++i)
4566 ix86_arch_features[i]
4567 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4570 /* Recreate the tune optimization tests */
4571 if (old_tune != ix86_tune)
4572 set_ix86_tune_features (ix86_tune, false);
4575 /* Adjust target options after streaming them in. This is mainly about
4576 reconciling them with global options. */
4578 static void
4579 ix86_function_specific_post_stream_in (struct cl_target_option *ptr)
4581 /* flag_pic is a global option, but ix86_cmodel is target saved option
4582 partly computed from flag_pic. If flag_pic is on, adjust x_ix86_cmodel
4583 for PIC, or error out. */
4584 if (flag_pic)
4585 switch (ptr->x_ix86_cmodel)
4587 case CM_SMALL:
4588 ptr->x_ix86_cmodel = CM_SMALL_PIC;
4589 break;
4591 case CM_MEDIUM:
4592 ptr->x_ix86_cmodel = CM_MEDIUM_PIC;
4593 break;
4595 case CM_LARGE:
4596 ptr->x_ix86_cmodel = CM_LARGE_PIC;
4597 break;
4599 case CM_KERNEL:
4600 error ("code model %s does not support PIC mode", "kernel");
4601 break;
4603 default:
4604 break;
4606 else
4607 switch (ptr->x_ix86_cmodel)
4609 case CM_SMALL_PIC:
4610 ptr->x_ix86_cmodel = CM_SMALL;
4611 break;
4613 case CM_MEDIUM_PIC:
4614 ptr->x_ix86_cmodel = CM_MEDIUM;
4615 break;
4617 case CM_LARGE_PIC:
4618 ptr->x_ix86_cmodel = CM_LARGE;
4619 break;
4621 default:
4622 break;
4626 /* Print the current options */
4628 static void
4629 ix86_function_specific_print (FILE *file, int indent,
4630 struct cl_target_option *ptr)
4632 char *target_string
4633 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4634 NULL, NULL, ptr->x_ix86_fpmath, false);
4636 gcc_assert (ptr->arch < PROCESSOR_max);
4637 fprintf (file, "%*sarch = %d (%s)\n",
4638 indent, "",
4639 ptr->arch, processor_target_table[ptr->arch].name);
4641 gcc_assert (ptr->tune < PROCESSOR_max);
4642 fprintf (file, "%*stune = %d (%s)\n",
4643 indent, "",
4644 ptr->tune, processor_target_table[ptr->tune].name);
4646 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4648 if (target_string)
4650 fprintf (file, "%*s%s\n", indent, "", target_string);
4651 free (target_string);
4656 /* Inner function to process the attribute((target(...))), take an argument and
4657 set the current options from the argument. If we have a list, recursively go
4658 over the list. */
4660 static bool
4661 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4662 struct gcc_options *opts,
4663 struct gcc_options *opts_set,
4664 struct gcc_options *enum_opts_set)
4666 char *next_optstr;
4667 bool ret = true;
4669 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4670 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4671 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4672 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4673 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4675 enum ix86_opt_type
4677 ix86_opt_unknown,
4678 ix86_opt_yes,
4679 ix86_opt_no,
4680 ix86_opt_str,
4681 ix86_opt_enum,
4682 ix86_opt_isa
4685 static const struct
4687 const char *string;
4688 size_t len;
4689 enum ix86_opt_type type;
4690 int opt;
4691 int mask;
4692 } attrs[] = {
4693 /* isa options */
4694 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4695 IX86_ATTR_ISA ("abm", OPT_mabm),
4696 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4697 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4698 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4699 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4700 IX86_ATTR_ISA ("aes", OPT_maes),
4701 IX86_ATTR_ISA ("sha", OPT_msha),
4702 IX86_ATTR_ISA ("avx", OPT_mavx),
4703 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4704 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4705 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4706 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4707 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4708 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
4709 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
4710 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
4711 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4712 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4713 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4714 IX86_ATTR_ISA ("sse", OPT_msse),
4715 IX86_ATTR_ISA ("sse2", OPT_msse2),
4716 IX86_ATTR_ISA ("sse3", OPT_msse3),
4717 IX86_ATTR_ISA ("sse4", OPT_msse4),
4718 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4719 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4720 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4721 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4722 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4723 IX86_ATTR_ISA ("fma", OPT_mfma),
4724 IX86_ATTR_ISA ("xop", OPT_mxop),
4725 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4726 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4727 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4728 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4729 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4730 IX86_ATTR_ISA ("hle", OPT_mhle),
4731 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4732 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4733 IX86_ATTR_ISA ("adx", OPT_madx),
4734 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4735 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4736 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4737 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4738 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4739 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4740 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4741 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
4742 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
4743 IX86_ATTR_ISA ("clwb", OPT_mclwb),
4744 IX86_ATTR_ISA ("pcommit", OPT_mpcommit),
4746 /* enum options */
4747 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4749 /* string options */
4750 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4751 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4753 /* flag options */
4754 IX86_ATTR_YES ("cld",
4755 OPT_mcld,
4756 MASK_CLD),
4758 IX86_ATTR_NO ("fancy-math-387",
4759 OPT_mfancy_math_387,
4760 MASK_NO_FANCY_MATH_387),
4762 IX86_ATTR_YES ("ieee-fp",
4763 OPT_mieee_fp,
4764 MASK_IEEE_FP),
4766 IX86_ATTR_YES ("inline-all-stringops",
4767 OPT_minline_all_stringops,
4768 MASK_INLINE_ALL_STRINGOPS),
4770 IX86_ATTR_YES ("inline-stringops-dynamically",
4771 OPT_minline_stringops_dynamically,
4772 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4774 IX86_ATTR_NO ("align-stringops",
4775 OPT_mno_align_stringops,
4776 MASK_NO_ALIGN_STRINGOPS),
4778 IX86_ATTR_YES ("recip",
4779 OPT_mrecip,
4780 MASK_RECIP),
4784 /* If this is a list, recurse to get the options. */
4785 if (TREE_CODE (args) == TREE_LIST)
4787 bool ret = true;
4789 for (; args; args = TREE_CHAIN (args))
4790 if (TREE_VALUE (args)
4791 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4792 p_strings, opts, opts_set,
4793 enum_opts_set))
4794 ret = false;
4796 return ret;
4799 else if (TREE_CODE (args) != STRING_CST)
4801 error ("attribute %<target%> argument not a string");
4802 return false;
4805 /* Handle multiple arguments separated by commas. */
4806 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4808 while (next_optstr && *next_optstr != '\0')
4810 char *p = next_optstr;
4811 char *orig_p = p;
4812 char *comma = strchr (next_optstr, ',');
4813 const char *opt_string;
4814 size_t len, opt_len;
4815 int opt;
4816 bool opt_set_p;
4817 char ch;
4818 unsigned i;
4819 enum ix86_opt_type type = ix86_opt_unknown;
4820 int mask = 0;
4822 if (comma)
4824 *comma = '\0';
4825 len = comma - next_optstr;
4826 next_optstr = comma + 1;
4828 else
4830 len = strlen (p);
4831 next_optstr = NULL;
4834 /* Recognize no-xxx. */
4835 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4837 opt_set_p = false;
4838 p += 3;
4839 len -= 3;
4841 else
4842 opt_set_p = true;
4844 /* Find the option. */
4845 ch = *p;
4846 opt = N_OPTS;
4847 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4849 type = attrs[i].type;
4850 opt_len = attrs[i].len;
4851 if (ch == attrs[i].string[0]
4852 && ((type != ix86_opt_str && type != ix86_opt_enum)
4853 ? len == opt_len
4854 : len > opt_len)
4855 && memcmp (p, attrs[i].string, opt_len) == 0)
4857 opt = attrs[i].opt;
4858 mask = attrs[i].mask;
4859 opt_string = attrs[i].string;
4860 break;
4864 /* Process the option. */
4865 if (opt == N_OPTS)
4867 error ("attribute(target(\"%s\")) is unknown", orig_p);
4868 ret = false;
4871 else if (type == ix86_opt_isa)
4873 struct cl_decoded_option decoded;
4875 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4876 ix86_handle_option (opts, opts_set,
4877 &decoded, input_location);
4880 else if (type == ix86_opt_yes || type == ix86_opt_no)
4882 if (type == ix86_opt_no)
4883 opt_set_p = !opt_set_p;
4885 if (opt_set_p)
4886 opts->x_target_flags |= mask;
4887 else
4888 opts->x_target_flags &= ~mask;
4891 else if (type == ix86_opt_str)
4893 if (p_strings[opt])
4895 error ("option(\"%s\") was already specified", opt_string);
4896 ret = false;
4898 else
4899 p_strings[opt] = xstrdup (p + opt_len);
4902 else if (type == ix86_opt_enum)
4904 bool arg_ok;
4905 int value;
4907 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4908 if (arg_ok)
4909 set_option (opts, enum_opts_set, opt, value,
4910 p + opt_len, DK_UNSPECIFIED, input_location,
4911 global_dc);
4912 else
4914 error ("attribute(target(\"%s\")) is unknown", orig_p);
4915 ret = false;
4919 else
4920 gcc_unreachable ();
4923 return ret;
4926 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4928 tree
4929 ix86_valid_target_attribute_tree (tree args,
4930 struct gcc_options *opts,
4931 struct gcc_options *opts_set)
4933 const char *orig_arch_string = opts->x_ix86_arch_string;
4934 const char *orig_tune_string = opts->x_ix86_tune_string;
4935 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4936 int orig_tune_defaulted = ix86_tune_defaulted;
4937 int orig_arch_specified = ix86_arch_specified;
4938 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4939 tree t = NULL_TREE;
4940 int i;
4941 struct cl_target_option *def
4942 = TREE_TARGET_OPTION (target_option_default_node);
4943 struct gcc_options enum_opts_set;
4945 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4947 /* Process each of the options on the chain. */
4948 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4949 opts_set, &enum_opts_set))
4950 return error_mark_node;
4952 /* If the changed options are different from the default, rerun
4953 ix86_option_override_internal, and then save the options away.
4954 The string options are are attribute options, and will be undone
4955 when we copy the save structure. */
4956 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4957 || opts->x_target_flags != def->x_target_flags
4958 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4959 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4960 || enum_opts_set.x_ix86_fpmath)
4962 /* If we are using the default tune= or arch=, undo the string assigned,
4963 and use the default. */
4964 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4965 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4966 else if (!orig_arch_specified)
4967 opts->x_ix86_arch_string = NULL;
4969 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4970 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4971 else if (orig_tune_defaulted)
4972 opts->x_ix86_tune_string = NULL;
4974 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4975 if (enum_opts_set.x_ix86_fpmath)
4976 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4977 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4978 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4980 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4981 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4984 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4985 ix86_option_override_internal (false, opts, opts_set);
4987 /* Add any builtin functions with the new isa if any. */
4988 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4990 /* Save the current options unless we are validating options for
4991 #pragma. */
4992 t = build_target_option_node (opts);
4994 opts->x_ix86_arch_string = orig_arch_string;
4995 opts->x_ix86_tune_string = orig_tune_string;
4996 opts_set->x_ix86_fpmath = orig_fpmath_set;
4998 /* Free up memory allocated to hold the strings */
4999 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
5000 free (option_strings[i]);
5003 return t;
5006 /* Hook to validate attribute((target("string"))). */
5008 static bool
5009 ix86_valid_target_attribute_p (tree fndecl,
5010 tree ARG_UNUSED (name),
5011 tree args,
5012 int ARG_UNUSED (flags))
5014 struct gcc_options func_options;
5015 tree new_target, new_optimize;
5016 bool ret = true;
5018 /* attribute((target("default"))) does nothing, beyond
5019 affecting multi-versioning. */
5020 if (TREE_VALUE (args)
5021 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
5022 && TREE_CHAIN (args) == NULL_TREE
5023 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
5024 return true;
5026 tree old_optimize = build_optimization_node (&global_options);
5028 /* Get the optimization options of the current function. */
5029 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
5031 if (!func_optimize)
5032 func_optimize = old_optimize;
5034 /* Init func_options. */
5035 memset (&func_options, 0, sizeof (func_options));
5036 init_options_struct (&func_options, NULL);
5037 lang_hooks.init_options_struct (&func_options);
5039 cl_optimization_restore (&func_options,
5040 TREE_OPTIMIZATION (func_optimize));
5042 /* Initialize func_options to the default before its target options can
5043 be set. */
5044 cl_target_option_restore (&func_options,
5045 TREE_TARGET_OPTION (target_option_default_node));
5047 new_target = ix86_valid_target_attribute_tree (args, &func_options,
5048 &global_options_set);
5050 new_optimize = build_optimization_node (&func_options);
5052 if (new_target == error_mark_node)
5053 ret = false;
5055 else if (fndecl && new_target)
5057 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
5059 if (old_optimize != new_optimize)
5060 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
5063 return ret;
5067 /* Hook to determine if one function can safely inline another. */
5069 static bool
5070 ix86_can_inline_p (tree caller, tree callee)
5072 bool ret = false;
5073 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
5074 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
5076 /* If callee has no option attributes, then it is ok to inline. */
5077 if (!callee_tree)
5078 ret = true;
5080 /* If caller has no option attributes, but callee does then it is not ok to
5081 inline. */
5082 else if (!caller_tree)
5083 ret = false;
5085 else
5087 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
5088 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
5090 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
5091 can inline a SSE2 function but a SSE2 function can't inline a SSE4
5092 function. */
5093 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
5094 != callee_opts->x_ix86_isa_flags)
5095 ret = false;
5097 /* See if we have the same non-isa options. */
5098 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
5099 ret = false;
5101 /* See if arch, tune, etc. are the same. */
5102 else if (caller_opts->arch != callee_opts->arch)
5103 ret = false;
5105 else if (caller_opts->tune != callee_opts->tune)
5106 ret = false;
5108 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
5109 ret = false;
5111 else if (caller_opts->branch_cost != callee_opts->branch_cost)
5112 ret = false;
5114 else
5115 ret = true;
5118 return ret;
5122 /* Remember the last target of ix86_set_current_function. */
5123 static GTY(()) tree ix86_previous_fndecl;
5125 /* Set targets globals to the default (or current #pragma GCC target
5126 if active). Invalidate ix86_previous_fndecl cache. */
5128 void
5129 ix86_reset_previous_fndecl (void)
5131 tree new_tree = target_option_current_node;
5132 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5133 if (TREE_TARGET_GLOBALS (new_tree))
5134 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5135 else if (new_tree == target_option_default_node)
5136 restore_target_globals (&default_target_globals);
5137 else
5138 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5139 ix86_previous_fndecl = NULL_TREE;
5142 /* Establish appropriate back-end context for processing the function
5143 FNDECL. The argument might be NULL to indicate processing at top
5144 level, outside of any function scope. */
5145 static void
5146 ix86_set_current_function (tree fndecl)
5148 /* Only change the context if the function changes. This hook is called
5149 several times in the course of compiling a function, and we don't want to
5150 slow things down too much or call target_reinit when it isn't safe. */
5151 if (fndecl == ix86_previous_fndecl)
5152 return;
5154 tree old_tree;
5155 if (ix86_previous_fndecl == NULL_TREE)
5156 old_tree = target_option_current_node;
5157 else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl))
5158 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl);
5159 else
5160 old_tree = target_option_default_node;
5162 if (fndecl == NULL_TREE)
5164 if (old_tree != target_option_current_node)
5165 ix86_reset_previous_fndecl ();
5166 return;
5169 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
5170 if (new_tree == NULL_TREE)
5171 new_tree = target_option_default_node;
5173 if (old_tree != new_tree)
5175 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5176 if (TREE_TARGET_GLOBALS (new_tree))
5177 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5178 else if (new_tree == target_option_default_node)
5179 restore_target_globals (&default_target_globals);
5180 else
5181 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5183 ix86_previous_fndecl = fndecl;
5187 /* Return true if this goes in large data/bss. */
5189 static bool
5190 ix86_in_large_data_p (tree exp)
5192 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5193 return false;
5195 /* Functions are never large data. */
5196 if (TREE_CODE (exp) == FUNCTION_DECL)
5197 return false;
5199 /* Automatic variables are never large data. */
5200 if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
5201 return false;
5203 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5205 const char *section = DECL_SECTION_NAME (exp);
5206 if (strcmp (section, ".ldata") == 0
5207 || strcmp (section, ".lbss") == 0)
5208 return true;
5209 return false;
5211 else
5213 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5215 /* If this is an incomplete type with size 0, then we can't put it
5216 in data because it might be too big when completed. Also,
5217 int_size_in_bytes returns -1 if size can vary or is larger than
5218 an integer in which case also it is safer to assume that it goes in
5219 large data. */
5220 if (size <= 0 || size > ix86_section_threshold)
5221 return true;
5224 return false;
5227 /* Switch to the appropriate section for output of DECL.
5228 DECL is either a `VAR_DECL' node or a constant of some sort.
5229 RELOC indicates whether forming the initial value of DECL requires
5230 link-time relocations. */
5232 ATTRIBUTE_UNUSED static section *
5233 x86_64_elf_select_section (tree decl, int reloc,
5234 unsigned HOST_WIDE_INT align)
5236 if (ix86_in_large_data_p (decl))
5238 const char *sname = NULL;
5239 unsigned int flags = SECTION_WRITE;
5240 switch (categorize_decl_for_section (decl, reloc))
5242 case SECCAT_DATA:
5243 sname = ".ldata";
5244 break;
5245 case SECCAT_DATA_REL:
5246 sname = ".ldata.rel";
5247 break;
5248 case SECCAT_DATA_REL_LOCAL:
5249 sname = ".ldata.rel.local";
5250 break;
5251 case SECCAT_DATA_REL_RO:
5252 sname = ".ldata.rel.ro";
5253 break;
5254 case SECCAT_DATA_REL_RO_LOCAL:
5255 sname = ".ldata.rel.ro.local";
5256 break;
5257 case SECCAT_BSS:
5258 sname = ".lbss";
5259 flags |= SECTION_BSS;
5260 break;
5261 case SECCAT_RODATA:
5262 case SECCAT_RODATA_MERGE_STR:
5263 case SECCAT_RODATA_MERGE_STR_INIT:
5264 case SECCAT_RODATA_MERGE_CONST:
5265 sname = ".lrodata";
5266 flags = 0;
5267 break;
5268 case SECCAT_SRODATA:
5269 case SECCAT_SDATA:
5270 case SECCAT_SBSS:
5271 gcc_unreachable ();
5272 case SECCAT_TEXT:
5273 case SECCAT_TDATA:
5274 case SECCAT_TBSS:
5275 /* We don't split these for medium model. Place them into
5276 default sections and hope for best. */
5277 break;
5279 if (sname)
5281 /* We might get called with string constants, but get_named_section
5282 doesn't like them as they are not DECLs. Also, we need to set
5283 flags in that case. */
5284 if (!DECL_P (decl))
5285 return get_section (sname, flags, NULL);
5286 return get_named_section (decl, sname, reloc);
5289 return default_elf_select_section (decl, reloc, align);
5292 /* Select a set of attributes for section NAME based on the properties
5293 of DECL and whether or not RELOC indicates that DECL's initializer
5294 might contain runtime relocations. */
5296 static unsigned int ATTRIBUTE_UNUSED
5297 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5299 unsigned int flags = default_section_type_flags (decl, name, reloc);
5301 if (decl == NULL_TREE
5302 && (strcmp (name, ".ldata.rel.ro") == 0
5303 || strcmp (name, ".ldata.rel.ro.local") == 0))
5304 flags |= SECTION_RELRO;
5306 if (strcmp (name, ".lbss") == 0
5307 || strncmp (name, ".lbss.", 5) == 0
5308 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5309 flags |= SECTION_BSS;
5311 return flags;
5314 /* Build up a unique section name, expressed as a
5315 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5316 RELOC indicates whether the initial value of EXP requires
5317 link-time relocations. */
5319 static void ATTRIBUTE_UNUSED
5320 x86_64_elf_unique_section (tree decl, int reloc)
5322 if (ix86_in_large_data_p (decl))
5324 const char *prefix = NULL;
5325 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5326 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5328 switch (categorize_decl_for_section (decl, reloc))
5330 case SECCAT_DATA:
5331 case SECCAT_DATA_REL:
5332 case SECCAT_DATA_REL_LOCAL:
5333 case SECCAT_DATA_REL_RO:
5334 case SECCAT_DATA_REL_RO_LOCAL:
5335 prefix = one_only ? ".ld" : ".ldata";
5336 break;
5337 case SECCAT_BSS:
5338 prefix = one_only ? ".lb" : ".lbss";
5339 break;
5340 case SECCAT_RODATA:
5341 case SECCAT_RODATA_MERGE_STR:
5342 case SECCAT_RODATA_MERGE_STR_INIT:
5343 case SECCAT_RODATA_MERGE_CONST:
5344 prefix = one_only ? ".lr" : ".lrodata";
5345 break;
5346 case SECCAT_SRODATA:
5347 case SECCAT_SDATA:
5348 case SECCAT_SBSS:
5349 gcc_unreachable ();
5350 case SECCAT_TEXT:
5351 case SECCAT_TDATA:
5352 case SECCAT_TBSS:
5353 /* We don't split these for medium model. Place them into
5354 default sections and hope for best. */
5355 break;
5357 if (prefix)
5359 const char *name, *linkonce;
5360 char *string;
5362 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5363 name = targetm.strip_name_encoding (name);
5365 /* If we're using one_only, then there needs to be a .gnu.linkonce
5366 prefix to the section name. */
5367 linkonce = one_only ? ".gnu.linkonce" : "";
5369 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5371 set_decl_section_name (decl, string);
5372 return;
5375 default_unique_section (decl, reloc);
5378 #ifdef COMMON_ASM_OP
5379 /* This says how to output assembler code to declare an
5380 uninitialized external linkage data object.
5382 For medium model x86-64 we need to use .largecomm opcode for
5383 large objects. */
5384 void
5385 x86_elf_aligned_common (FILE *file,
5386 const char *name, unsigned HOST_WIDE_INT size,
5387 int align)
5389 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5390 && size > (unsigned int)ix86_section_threshold)
5391 fputs ("\t.largecomm\t", file);
5392 else
5393 fputs (COMMON_ASM_OP, file);
5394 assemble_name (file, name);
5395 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5396 size, align / BITS_PER_UNIT);
5398 #endif
5400 /* Utility function for targets to use in implementing
5401 ASM_OUTPUT_ALIGNED_BSS. */
5403 void
5404 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5405 unsigned HOST_WIDE_INT size, int align)
5407 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5408 && size > (unsigned int)ix86_section_threshold)
5409 switch_to_section (get_named_section (decl, ".lbss", 0));
5410 else
5411 switch_to_section (bss_section);
5412 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5413 #ifdef ASM_DECLARE_OBJECT_NAME
5414 last_assemble_variable_decl = decl;
5415 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5416 #else
5417 /* Standard thing is just output label for the object. */
5418 ASM_OUTPUT_LABEL (file, name);
5419 #endif /* ASM_DECLARE_OBJECT_NAME */
5420 ASM_OUTPUT_SKIP (file, size ? size : 1);
5423 /* Decide whether we must probe the stack before any space allocation
5424 on this target. It's essentially TARGET_STACK_PROBE except when
5425 -fstack-check causes the stack to be already probed differently. */
5427 bool
5428 ix86_target_stack_probe (void)
5430 /* Do not probe the stack twice if static stack checking is enabled. */
5431 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5432 return false;
5434 return TARGET_STACK_PROBE;
5437 /* Decide whether we can make a sibling call to a function. DECL is the
5438 declaration of the function being targeted by the call and EXP is the
5439 CALL_EXPR representing the call. */
5441 static bool
5442 ix86_function_ok_for_sibcall (tree decl, tree exp)
5444 tree type, decl_or_type;
5445 rtx a, b;
5447 /* If we are generating position-independent code, we cannot sibcall
5448 optimize any indirect call, or a direct call to a global function,
5449 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5450 if (!TARGET_MACHO
5451 && !TARGET_64BIT
5452 && flag_pic
5453 && (!decl || !targetm.binds_local_p (decl)))
5454 return false;
5456 /* If we need to align the outgoing stack, then sibcalling would
5457 unalign the stack, which may break the called function. */
5458 if (ix86_minimum_incoming_stack_boundary (true)
5459 < PREFERRED_STACK_BOUNDARY)
5460 return false;
5462 if (decl)
5464 decl_or_type = decl;
5465 type = TREE_TYPE (decl);
5467 else
5469 /* We're looking at the CALL_EXPR, we need the type of the function. */
5470 type = CALL_EXPR_FN (exp); /* pointer expression */
5471 type = TREE_TYPE (type); /* pointer type */
5472 type = TREE_TYPE (type); /* function type */
5473 decl_or_type = type;
5476 /* Check that the return value locations are the same. Like
5477 if we are returning floats on the 80387 register stack, we cannot
5478 make a sibcall from a function that doesn't return a float to a
5479 function that does or, conversely, from a function that does return
5480 a float to a function that doesn't; the necessary stack adjustment
5481 would not be executed. This is also the place we notice
5482 differences in the return value ABI. Note that it is ok for one
5483 of the functions to have void return type as long as the return
5484 value of the other is passed in a register. */
5485 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5486 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5487 cfun->decl, false);
5488 if (STACK_REG_P (a) || STACK_REG_P (b))
5490 if (!rtx_equal_p (a, b))
5491 return false;
5493 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5495 else if (!rtx_equal_p (a, b))
5496 return false;
5498 if (TARGET_64BIT)
5500 /* The SYSV ABI has more call-clobbered registers;
5501 disallow sibcalls from MS to SYSV. */
5502 if (cfun->machine->call_abi == MS_ABI
5503 && ix86_function_type_abi (type) == SYSV_ABI)
5504 return false;
5506 else
5508 /* If this call is indirect, we'll need to be able to use a
5509 call-clobbered register for the address of the target function.
5510 Make sure that all such registers are not used for passing
5511 parameters. Note that DLLIMPORT functions are indirect. */
5512 if (!decl
5513 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5515 if (ix86_function_regparm (type, NULL) >= 3)
5517 /* ??? Need to count the actual number of registers to be used,
5518 not the possible number of registers. Fix later. */
5519 return false;
5524 /* Otherwise okay. That also includes certain types of indirect calls. */
5525 return true;
5528 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5529 and "sseregparm" calling convention attributes;
5530 arguments as in struct attribute_spec.handler. */
5532 static tree
5533 ix86_handle_cconv_attribute (tree *node, tree name,
5534 tree args,
5535 int,
5536 bool *no_add_attrs)
5538 if (TREE_CODE (*node) != FUNCTION_TYPE
5539 && TREE_CODE (*node) != METHOD_TYPE
5540 && TREE_CODE (*node) != FIELD_DECL
5541 && TREE_CODE (*node) != TYPE_DECL)
5543 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5544 name);
5545 *no_add_attrs = true;
5546 return NULL_TREE;
5549 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5550 if (is_attribute_p ("regparm", name))
5552 tree cst;
5554 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5556 error ("fastcall and regparm attributes are not compatible");
5559 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5561 error ("regparam and thiscall attributes are not compatible");
5564 cst = TREE_VALUE (args);
5565 if (TREE_CODE (cst) != INTEGER_CST)
5567 warning (OPT_Wattributes,
5568 "%qE attribute requires an integer constant argument",
5569 name);
5570 *no_add_attrs = true;
5572 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5574 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5575 name, REGPARM_MAX);
5576 *no_add_attrs = true;
5579 return NULL_TREE;
5582 if (TARGET_64BIT)
5584 /* Do not warn when emulating the MS ABI. */
5585 if ((TREE_CODE (*node) != FUNCTION_TYPE
5586 && TREE_CODE (*node) != METHOD_TYPE)
5587 || ix86_function_type_abi (*node) != MS_ABI)
5588 warning (OPT_Wattributes, "%qE attribute ignored",
5589 name);
5590 *no_add_attrs = true;
5591 return NULL_TREE;
5594 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5595 if (is_attribute_p ("fastcall", name))
5597 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5599 error ("fastcall and cdecl attributes are not compatible");
5601 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5603 error ("fastcall and stdcall attributes are not compatible");
5605 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5607 error ("fastcall and regparm attributes are not compatible");
5609 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5611 error ("fastcall and thiscall attributes are not compatible");
5615 /* Can combine stdcall with fastcall (redundant), regparm and
5616 sseregparm. */
5617 else if (is_attribute_p ("stdcall", name))
5619 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5621 error ("stdcall and cdecl attributes are not compatible");
5623 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5625 error ("stdcall and fastcall attributes are not compatible");
5627 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5629 error ("stdcall and thiscall attributes are not compatible");
5633 /* Can combine cdecl with regparm and sseregparm. */
5634 else if (is_attribute_p ("cdecl", name))
5636 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5638 error ("stdcall and cdecl attributes are not compatible");
5640 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5642 error ("fastcall and cdecl attributes are not compatible");
5644 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5646 error ("cdecl and thiscall attributes are not compatible");
5649 else if (is_attribute_p ("thiscall", name))
5651 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5652 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5653 name);
5654 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5656 error ("stdcall and thiscall attributes are not compatible");
5658 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5660 error ("fastcall and thiscall attributes are not compatible");
5662 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5664 error ("cdecl and thiscall attributes are not compatible");
5668 /* Can combine sseregparm with all attributes. */
5670 return NULL_TREE;
5673 /* The transactional memory builtins are implicitly regparm or fastcall
5674 depending on the ABI. Override the generic do-nothing attribute that
5675 these builtins were declared with, and replace it with one of the two
5676 attributes that we expect elsewhere. */
5678 static tree
5679 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5680 int flags, bool *no_add_attrs)
5682 tree alt;
5684 /* In no case do we want to add the placeholder attribute. */
5685 *no_add_attrs = true;
5687 /* The 64-bit ABI is unchanged for transactional memory. */
5688 if (TARGET_64BIT)
5689 return NULL_TREE;
5691 /* ??? Is there a better way to validate 32-bit windows? We have
5692 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5693 if (CHECK_STACK_LIMIT > 0)
5694 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5695 else
5697 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5698 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5700 decl_attributes (node, alt, flags);
5702 return NULL_TREE;
5705 /* This function determines from TYPE the calling-convention. */
5707 unsigned int
5708 ix86_get_callcvt (const_tree type)
5710 unsigned int ret = 0;
5711 bool is_stdarg;
5712 tree attrs;
5714 if (TARGET_64BIT)
5715 return IX86_CALLCVT_CDECL;
5717 attrs = TYPE_ATTRIBUTES (type);
5718 if (attrs != NULL_TREE)
5720 if (lookup_attribute ("cdecl", attrs))
5721 ret |= IX86_CALLCVT_CDECL;
5722 else if (lookup_attribute ("stdcall", attrs))
5723 ret |= IX86_CALLCVT_STDCALL;
5724 else if (lookup_attribute ("fastcall", attrs))
5725 ret |= IX86_CALLCVT_FASTCALL;
5726 else if (lookup_attribute ("thiscall", attrs))
5727 ret |= IX86_CALLCVT_THISCALL;
5729 /* Regparam isn't allowed for thiscall and fastcall. */
5730 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5732 if (lookup_attribute ("regparm", attrs))
5733 ret |= IX86_CALLCVT_REGPARM;
5734 if (lookup_attribute ("sseregparm", attrs))
5735 ret |= IX86_CALLCVT_SSEREGPARM;
5738 if (IX86_BASE_CALLCVT(ret) != 0)
5739 return ret;
5742 is_stdarg = stdarg_p (type);
5743 if (TARGET_RTD && !is_stdarg)
5744 return IX86_CALLCVT_STDCALL | ret;
5746 if (ret != 0
5747 || is_stdarg
5748 || TREE_CODE (type) != METHOD_TYPE
5749 || ix86_function_type_abi (type) != MS_ABI)
5750 return IX86_CALLCVT_CDECL | ret;
5752 return IX86_CALLCVT_THISCALL;
5755 /* Return 0 if the attributes for two types are incompatible, 1 if they
5756 are compatible, and 2 if they are nearly compatible (which causes a
5757 warning to be generated). */
5759 static int
5760 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5762 unsigned int ccvt1, ccvt2;
5764 if (TREE_CODE (type1) != FUNCTION_TYPE
5765 && TREE_CODE (type1) != METHOD_TYPE)
5766 return 1;
5768 ccvt1 = ix86_get_callcvt (type1);
5769 ccvt2 = ix86_get_callcvt (type2);
5770 if (ccvt1 != ccvt2)
5771 return 0;
5772 if (ix86_function_regparm (type1, NULL)
5773 != ix86_function_regparm (type2, NULL))
5774 return 0;
5776 return 1;
5779 /* Return the regparm value for a function with the indicated TYPE and DECL.
5780 DECL may be NULL when calling function indirectly
5781 or considering a libcall. */
5783 static int
5784 ix86_function_regparm (const_tree type, const_tree decl)
5786 tree attr;
5787 int regparm;
5788 unsigned int ccvt;
5790 if (TARGET_64BIT)
5791 return (ix86_function_type_abi (type) == SYSV_ABI
5792 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5793 ccvt = ix86_get_callcvt (type);
5794 regparm = ix86_regparm;
5796 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5798 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5799 if (attr)
5801 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5802 return regparm;
5805 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5806 return 2;
5807 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5808 return 1;
5810 /* Use register calling convention for local functions when possible. */
5811 if (decl
5812 && TREE_CODE (decl) == FUNCTION_DECL)
5814 cgraph_node *target = cgraph_node::get (decl);
5815 if (target)
5816 target = target->function_symbol ();
5818 /* Caller and callee must agree on the calling convention, so
5819 checking here just optimize means that with
5820 __attribute__((optimize (...))) caller could use regparm convention
5821 and callee not, or vice versa. Instead look at whether the callee
5822 is optimized or not. */
5823 if (target && opt_for_fn (target->decl, optimize)
5824 && !(profile_flag && !flag_fentry))
5826 cgraph_local_info *i = &target->local;
5827 if (i && i->local && i->can_change_signature)
5829 int local_regparm, globals = 0, regno;
5831 /* Make sure no regparm register is taken by a
5832 fixed register variable. */
5833 for (local_regparm = 0; local_regparm < REGPARM_MAX;
5834 local_regparm++)
5835 if (fixed_regs[local_regparm])
5836 break;
5838 /* We don't want to use regparm(3) for nested functions as
5839 these use a static chain pointer in the third argument. */
5840 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
5841 local_regparm = 2;
5843 /* Save a register for the split stack. */
5844 if (local_regparm == 3 && flag_split_stack)
5845 local_regparm = 2;
5847 /* Each fixed register usage increases register pressure,
5848 so less registers should be used for argument passing.
5849 This functionality can be overriden by an explicit
5850 regparm value. */
5851 for (regno = AX_REG; regno <= DI_REG; regno++)
5852 if (fixed_regs[regno])
5853 globals++;
5855 local_regparm
5856 = globals < local_regparm ? local_regparm - globals : 0;
5858 if (local_regparm > regparm)
5859 regparm = local_regparm;
5864 return regparm;
5867 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5868 DFmode (2) arguments in SSE registers for a function with the
5869 indicated TYPE and DECL. DECL may be NULL when calling function
5870 indirectly or considering a libcall. Otherwise return 0. */
5872 static int
5873 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5875 gcc_assert (!TARGET_64BIT);
5877 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5878 by the sseregparm attribute. */
5879 if (TARGET_SSEREGPARM
5880 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5882 if (!TARGET_SSE)
5884 if (warn)
5886 if (decl)
5887 error ("calling %qD with attribute sseregparm without "
5888 "SSE/SSE2 enabled", decl);
5889 else
5890 error ("calling %qT with attribute sseregparm without "
5891 "SSE/SSE2 enabled", type);
5893 return 0;
5896 return 2;
5899 if (!decl)
5900 return 0;
5902 cgraph_node *target = cgraph_node::get (decl);
5903 if (target)
5904 target = target->function_symbol ();
5906 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5907 (and DFmode for SSE2) arguments in SSE registers. */
5908 if (target
5909 /* TARGET_SSE_MATH */
5910 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
5911 && opt_for_fn (target->decl, optimize)
5912 && !(profile_flag && !flag_fentry))
5914 cgraph_local_info *i = &target->local;
5915 if (i && i->local && i->can_change_signature)
5917 /* Refuse to produce wrong code when local function with SSE enabled
5918 is called from SSE disabled function.
5919 We may work hard to work out these scenarios but hopefully
5920 it doesnot matter in practice. */
5921 if (!TARGET_SSE && warn)
5923 error ("calling %qD with SSE caling convention without "
5924 "SSE/SSE2 enabled", decl);
5925 return 0;
5927 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
5928 ->x_ix86_isa_flags) ? 2 : 1;
5932 return 0;
5935 /* Return true if EAX is live at the start of the function. Used by
5936 ix86_expand_prologue to determine if we need special help before
5937 calling allocate_stack_worker. */
5939 static bool
5940 ix86_eax_live_at_start_p (void)
5942 /* Cheat. Don't bother working forward from ix86_function_regparm
5943 to the function type to whether an actual argument is located in
5944 eax. Instead just look at cfg info, which is still close enough
5945 to correct at this point. This gives false positives for broken
5946 functions that might use uninitialized data that happens to be
5947 allocated in eax, but who cares? */
5948 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5951 static bool
5952 ix86_keep_aggregate_return_pointer (tree fntype)
5954 tree attr;
5956 if (!TARGET_64BIT)
5958 attr = lookup_attribute ("callee_pop_aggregate_return",
5959 TYPE_ATTRIBUTES (fntype));
5960 if (attr)
5961 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5963 /* For 32-bit MS-ABI the default is to keep aggregate
5964 return pointer. */
5965 if (ix86_function_type_abi (fntype) == MS_ABI)
5966 return true;
5968 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5971 /* Value is the number of bytes of arguments automatically
5972 popped when returning from a subroutine call.
5973 FUNDECL is the declaration node of the function (as a tree),
5974 FUNTYPE is the data type of the function (as a tree),
5975 or for a library call it is an identifier node for the subroutine name.
5976 SIZE is the number of bytes of arguments passed on the stack.
5978 On the 80386, the RTD insn may be used to pop them if the number
5979 of args is fixed, but if the number is variable then the caller
5980 must pop them all. RTD can't be used for library calls now
5981 because the library is compiled with the Unix compiler.
5982 Use of RTD is a selectable option, since it is incompatible with
5983 standard Unix calling sequences. If the option is not selected,
5984 the caller must always pop the args.
5986 The attribute stdcall is equivalent to RTD on a per module basis. */
5988 static int
5989 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5991 unsigned int ccvt;
5993 /* None of the 64-bit ABIs pop arguments. */
5994 if (TARGET_64BIT)
5995 return 0;
5997 ccvt = ix86_get_callcvt (funtype);
5999 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
6000 | IX86_CALLCVT_THISCALL)) != 0
6001 && ! stdarg_p (funtype))
6002 return size;
6004 /* Lose any fake structure return argument if it is passed on the stack. */
6005 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
6006 && !ix86_keep_aggregate_return_pointer (funtype))
6008 int nregs = ix86_function_regparm (funtype, fundecl);
6009 if (nregs == 0)
6010 return GET_MODE_SIZE (Pmode);
6013 return 0;
6016 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
6018 static bool
6019 ix86_legitimate_combined_insn (rtx_insn *insn)
6021 /* Check operand constraints in case hard registers were propagated
6022 into insn pattern. This check prevents combine pass from
6023 generating insn patterns with invalid hard register operands.
6024 These invalid insns can eventually confuse reload to error out
6025 with a spill failure. See also PRs 46829 and 46843. */
6026 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
6028 int i;
6030 extract_insn (insn);
6031 preprocess_constraints (insn);
6033 int n_operands = recog_data.n_operands;
6034 int n_alternatives = recog_data.n_alternatives;
6035 for (i = 0; i < n_operands; i++)
6037 rtx op = recog_data.operand[i];
6038 machine_mode mode = GET_MODE (op);
6039 const operand_alternative *op_alt;
6040 int offset = 0;
6041 bool win;
6042 int j;
6044 /* For pre-AVX disallow unaligned loads/stores where the
6045 instructions don't support it. */
6046 if (!TARGET_AVX
6047 && VECTOR_MODE_P (GET_MODE (op))
6048 && misaligned_operand (op, GET_MODE (op)))
6050 int min_align = get_attr_ssememalign (insn);
6051 if (min_align == 0)
6052 return false;
6055 /* A unary operator may be accepted by the predicate, but it
6056 is irrelevant for matching constraints. */
6057 if (UNARY_P (op))
6058 op = XEXP (op, 0);
6060 if (GET_CODE (op) == SUBREG)
6062 if (REG_P (SUBREG_REG (op))
6063 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
6064 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
6065 GET_MODE (SUBREG_REG (op)),
6066 SUBREG_BYTE (op),
6067 GET_MODE (op));
6068 op = SUBREG_REG (op);
6071 if (!(REG_P (op) && HARD_REGISTER_P (op)))
6072 continue;
6074 op_alt = recog_op_alt;
6076 /* Operand has no constraints, anything is OK. */
6077 win = !n_alternatives;
6079 alternative_mask preferred = get_preferred_alternatives (insn);
6080 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
6082 if (!TEST_BIT (preferred, j))
6083 continue;
6084 if (op_alt[i].anything_ok
6085 || (op_alt[i].matches != -1
6086 && operands_match_p
6087 (recog_data.operand[i],
6088 recog_data.operand[op_alt[i].matches]))
6089 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
6091 win = true;
6092 break;
6096 if (!win)
6097 return false;
6101 return true;
6104 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
6106 static unsigned HOST_WIDE_INT
6107 ix86_asan_shadow_offset (void)
6109 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
6110 : HOST_WIDE_INT_C (0x7fff8000))
6111 : (HOST_WIDE_INT_1 << 29);
6114 /* Argument support functions. */
6116 /* Return true when register may be used to pass function parameters. */
6117 bool
6118 ix86_function_arg_regno_p (int regno)
6120 int i;
6121 const int *parm_regs;
6123 if (TARGET_MPX && BND_REGNO_P (regno))
6124 return true;
6126 if (!TARGET_64BIT)
6128 if (TARGET_MACHO)
6129 return (regno < REGPARM_MAX
6130 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
6131 else
6132 return (regno < REGPARM_MAX
6133 || (TARGET_MMX && MMX_REGNO_P (regno)
6134 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
6135 || (TARGET_SSE && SSE_REGNO_P (regno)
6136 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
6139 if (TARGET_SSE && SSE_REGNO_P (regno)
6140 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
6141 return true;
6143 /* TODO: The function should depend on current function ABI but
6144 builtins.c would need updating then. Therefore we use the
6145 default ABI. */
6147 /* RAX is used as hidden argument to va_arg functions. */
6148 if (ix86_abi == SYSV_ABI && regno == AX_REG)
6149 return true;
6151 if (ix86_abi == MS_ABI)
6152 parm_regs = x86_64_ms_abi_int_parameter_registers;
6153 else
6154 parm_regs = x86_64_int_parameter_registers;
6155 for (i = 0; i < (ix86_abi == MS_ABI
6156 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
6157 if (regno == parm_regs[i])
6158 return true;
6159 return false;
6162 /* Return if we do not know how to pass TYPE solely in registers. */
6164 static bool
6165 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
6167 if (must_pass_in_stack_var_size_or_pad (mode, type))
6168 return true;
6170 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
6171 The layout_type routine is crafty and tries to trick us into passing
6172 currently unsupported vector types on the stack by using TImode. */
6173 return (!TARGET_64BIT && mode == TImode
6174 && type && TREE_CODE (type) != VECTOR_TYPE);
6177 /* It returns the size, in bytes, of the area reserved for arguments passed
6178 in registers for the function represented by fndecl dependent to the used
6179 abi format. */
6181 ix86_reg_parm_stack_space (const_tree fndecl)
6183 enum calling_abi call_abi = SYSV_ABI;
6184 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6185 call_abi = ix86_function_abi (fndecl);
6186 else
6187 call_abi = ix86_function_type_abi (fndecl);
6188 if (TARGET_64BIT && call_abi == MS_ABI)
6189 return 32;
6190 return 0;
6193 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
6194 call abi used. */
6195 enum calling_abi
6196 ix86_function_type_abi (const_tree fntype)
6198 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
6200 enum calling_abi abi = ix86_abi;
6201 if (abi == SYSV_ABI)
6203 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6205 if (TARGET_X32)
6207 static bool warned = false;
6208 if (!warned)
6210 error ("X32 does not support ms_abi attribute");
6211 warned = true;
6214 abi = MS_ABI;
6217 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6218 abi = SYSV_ABI;
6219 return abi;
6221 return ix86_abi;
6224 /* We add this as a workaround in order to use libc_has_function
6225 hook in i386.md. */
6226 bool
6227 ix86_libc_has_function (enum function_class fn_class)
6229 return targetm.libc_has_function (fn_class);
6232 static bool
6233 ix86_function_ms_hook_prologue (const_tree fn)
6235 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6237 if (decl_function_context (fn) != NULL_TREE)
6238 error_at (DECL_SOURCE_LOCATION (fn),
6239 "ms_hook_prologue is not compatible with nested function");
6240 else
6241 return true;
6243 return false;
6246 static enum calling_abi
6247 ix86_function_abi (const_tree fndecl)
6249 if (! fndecl)
6250 return ix86_abi;
6251 return ix86_function_type_abi (TREE_TYPE (fndecl));
6254 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6255 call abi used. */
6256 enum calling_abi
6257 ix86_cfun_abi (void)
6259 if (! cfun)
6260 return ix86_abi;
6261 return cfun->machine->call_abi;
6264 /* Write the extra assembler code needed to declare a function properly. */
6266 void
6267 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6268 tree decl)
6270 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6272 if (is_ms_hook)
6274 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6275 unsigned int filler_cc = 0xcccccccc;
6277 for (i = 0; i < filler_count; i += 4)
6278 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6281 #ifdef SUBTARGET_ASM_UNWIND_INIT
6282 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6283 #endif
6285 ASM_OUTPUT_LABEL (asm_out_file, fname);
6287 /* Output magic byte marker, if hot-patch attribute is set. */
6288 if (is_ms_hook)
6290 if (TARGET_64BIT)
6292 /* leaq [%rsp + 0], %rsp */
6293 asm_fprintf (asm_out_file, ASM_BYTE
6294 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6296 else
6298 /* movl.s %edi, %edi
6299 push %ebp
6300 movl.s %esp, %ebp */
6301 asm_fprintf (asm_out_file, ASM_BYTE
6302 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6307 /* regclass.c */
6308 extern void init_regs (void);
6310 /* Implementation of call abi switching target hook. Specific to FNDECL
6311 the specific call register sets are set. See also
6312 ix86_conditional_register_usage for more details. */
6313 void
6314 ix86_call_abi_override (const_tree fndecl)
6316 if (fndecl == NULL_TREE)
6317 cfun->machine->call_abi = ix86_abi;
6318 else
6319 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6322 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6323 expensive re-initialization of init_regs each time we switch function context
6324 since this is needed only during RTL expansion. */
6325 static void
6326 ix86_maybe_switch_abi (void)
6328 if (TARGET_64BIT &&
6329 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6330 reinit_regs ();
6333 /* Return 1 if pseudo register should be created and used to hold
6334 GOT address for PIC code. */
6335 bool
6336 ix86_use_pseudo_pic_reg (void)
6338 if ((TARGET_64BIT
6339 && (ix86_cmodel == CM_SMALL_PIC
6340 || TARGET_PECOFF))
6341 || !flag_pic)
6342 return false;
6343 return true;
6346 /* Initialize large model PIC register. */
6348 static void
6349 ix86_init_large_pic_reg (unsigned int tmp_regno)
6351 rtx_code_label *label;
6352 rtx tmp_reg;
6354 gcc_assert (Pmode == DImode);
6355 label = gen_label_rtx ();
6356 emit_label (label);
6357 LABEL_PRESERVE_P (label) = 1;
6358 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
6359 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
6360 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
6361 label));
6362 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6363 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
6364 pic_offset_table_rtx, tmp_reg));
6367 /* Create and initialize PIC register if required. */
6368 static void
6369 ix86_init_pic_reg (void)
6371 edge entry_edge;
6372 rtx_insn *seq;
6374 if (!ix86_use_pseudo_pic_reg ())
6375 return;
6377 start_sequence ();
6379 if (TARGET_64BIT)
6381 if (ix86_cmodel == CM_LARGE_PIC)
6382 ix86_init_large_pic_reg (R11_REG);
6383 else
6384 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6386 else
6388 /* If there is future mcount call in the function it is more profitable
6389 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
6390 rtx reg = crtl->profile
6391 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
6392 : pic_offset_table_rtx;
6393 rtx insn = emit_insn (gen_set_got (reg));
6394 RTX_FRAME_RELATED_P (insn) = 1;
6395 if (crtl->profile)
6396 emit_move_insn (pic_offset_table_rtx, reg);
6397 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
6400 seq = get_insns ();
6401 end_sequence ();
6403 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6404 insert_insn_on_edge (seq, entry_edge);
6405 commit_one_edge_insertion (entry_edge);
6408 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6409 for a call to a function whose data type is FNTYPE.
6410 For a library call, FNTYPE is 0. */
6412 void
6413 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6414 tree fntype, /* tree ptr for function decl */
6415 rtx libname, /* SYMBOL_REF of library name or 0 */
6416 tree fndecl,
6417 int caller)
6419 struct cgraph_local_info *i = NULL;
6420 struct cgraph_node *target = NULL;
6422 memset (cum, 0, sizeof (*cum));
6424 if (fndecl)
6426 target = cgraph_node::get (fndecl);
6427 if (target)
6429 target = target->function_symbol ();
6430 i = cgraph_node::local_info (target->decl);
6431 cum->call_abi = ix86_function_abi (target->decl);
6433 else
6434 cum->call_abi = ix86_function_abi (fndecl);
6436 else
6437 cum->call_abi = ix86_function_type_abi (fntype);
6439 cum->caller = caller;
6441 /* Set up the number of registers to use for passing arguments. */
6442 cum->nregs = ix86_regparm;
6443 if (TARGET_64BIT)
6445 cum->nregs = (cum->call_abi == SYSV_ABI
6446 ? X86_64_REGPARM_MAX
6447 : X86_64_MS_REGPARM_MAX);
6449 if (TARGET_SSE)
6451 cum->sse_nregs = SSE_REGPARM_MAX;
6452 if (TARGET_64BIT)
6454 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6455 ? X86_64_SSE_REGPARM_MAX
6456 : X86_64_MS_SSE_REGPARM_MAX);
6459 if (TARGET_MMX)
6460 cum->mmx_nregs = MMX_REGPARM_MAX;
6461 cum->warn_avx512f = true;
6462 cum->warn_avx = true;
6463 cum->warn_sse = true;
6464 cum->warn_mmx = true;
6466 /* Because type might mismatch in between caller and callee, we need to
6467 use actual type of function for local calls.
6468 FIXME: cgraph_analyze can be told to actually record if function uses
6469 va_start so for local functions maybe_vaarg can be made aggressive
6470 helping K&R code.
6471 FIXME: once typesytem is fixed, we won't need this code anymore. */
6472 if (i && i->local && i->can_change_signature)
6473 fntype = TREE_TYPE (target->decl);
6474 cum->stdarg = stdarg_p (fntype);
6475 cum->maybe_vaarg = (fntype
6476 ? (!prototype_p (fntype) || stdarg_p (fntype))
6477 : !libname);
6479 cum->bnd_regno = FIRST_BND_REG;
6480 cum->bnds_in_bt = 0;
6481 cum->force_bnd_pass = 0;
6483 if (!TARGET_64BIT)
6485 /* If there are variable arguments, then we won't pass anything
6486 in registers in 32-bit mode. */
6487 if (stdarg_p (fntype))
6489 cum->nregs = 0;
6490 cum->sse_nregs = 0;
6491 cum->mmx_nregs = 0;
6492 cum->warn_avx512f = false;
6493 cum->warn_avx = false;
6494 cum->warn_sse = false;
6495 cum->warn_mmx = false;
6496 return;
6499 /* Use ecx and edx registers if function has fastcall attribute,
6500 else look for regparm information. */
6501 if (fntype)
6503 unsigned int ccvt = ix86_get_callcvt (fntype);
6504 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6506 cum->nregs = 1;
6507 cum->fastcall = 1; /* Same first register as in fastcall. */
6509 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6511 cum->nregs = 2;
6512 cum->fastcall = 1;
6514 else
6515 cum->nregs = ix86_function_regparm (fntype, fndecl);
6518 /* Set up the number of SSE registers used for passing SFmode
6519 and DFmode arguments. Warn for mismatching ABI. */
6520 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6524 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6525 But in the case of vector types, it is some vector mode.
6527 When we have only some of our vector isa extensions enabled, then there
6528 are some modes for which vector_mode_supported_p is false. For these
6529 modes, the generic vector support in gcc will choose some non-vector mode
6530 in order to implement the type. By computing the natural mode, we'll
6531 select the proper ABI location for the operand and not depend on whatever
6532 the middle-end decides to do with these vector types.
6534 The midde-end can't deal with the vector types > 16 bytes. In this
6535 case, we return the original mode and warn ABI change if CUM isn't
6536 NULL.
6538 If INT_RETURN is true, warn ABI change if the vector mode isn't
6539 available for function return value. */
6541 static machine_mode
6542 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6543 bool in_return)
6545 machine_mode mode = TYPE_MODE (type);
6547 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6549 HOST_WIDE_INT size = int_size_in_bytes (type);
6550 if ((size == 8 || size == 16 || size == 32 || size == 64)
6551 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6552 && TYPE_VECTOR_SUBPARTS (type) > 1)
6554 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6556 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6557 mode = MIN_MODE_VECTOR_FLOAT;
6558 else
6559 mode = MIN_MODE_VECTOR_INT;
6561 /* Get the mode which has this inner mode and number of units. */
6562 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6563 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6564 && GET_MODE_INNER (mode) == innermode)
6566 if (size == 64 && !TARGET_AVX512F)
6568 static bool warnedavx512f;
6569 static bool warnedavx512f_ret;
6571 if (cum && cum->warn_avx512f && !warnedavx512f)
6573 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6574 "without AVX512F enabled changes the ABI"))
6575 warnedavx512f = true;
6577 else if (in_return && !warnedavx512f_ret)
6579 if (warning (OPT_Wpsabi, "AVX512F vector return "
6580 "without AVX512F enabled changes the ABI"))
6581 warnedavx512f_ret = true;
6584 return TYPE_MODE (type);
6586 else if (size == 32 && !TARGET_AVX)
6588 static bool warnedavx;
6589 static bool warnedavx_ret;
6591 if (cum && cum->warn_avx && !warnedavx)
6593 if (warning (OPT_Wpsabi, "AVX vector argument "
6594 "without AVX enabled changes the ABI"))
6595 warnedavx = true;
6597 else if (in_return && !warnedavx_ret)
6599 if (warning (OPT_Wpsabi, "AVX vector return "
6600 "without AVX enabled changes the ABI"))
6601 warnedavx_ret = true;
6604 return TYPE_MODE (type);
6606 else if (((size == 8 && TARGET_64BIT) || size == 16)
6607 && !TARGET_SSE)
6609 static bool warnedsse;
6610 static bool warnedsse_ret;
6612 if (cum && cum->warn_sse && !warnedsse)
6614 if (warning (OPT_Wpsabi, "SSE vector argument "
6615 "without SSE enabled changes the ABI"))
6616 warnedsse = true;
6618 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6620 if (warning (OPT_Wpsabi, "SSE vector return "
6621 "without SSE enabled changes the ABI"))
6622 warnedsse_ret = true;
6625 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6627 static bool warnedmmx;
6628 static bool warnedmmx_ret;
6630 if (cum && cum->warn_mmx && !warnedmmx)
6632 if (warning (OPT_Wpsabi, "MMX vector argument "
6633 "without MMX enabled changes the ABI"))
6634 warnedmmx = true;
6636 else if (in_return && !warnedmmx_ret)
6638 if (warning (OPT_Wpsabi, "MMX vector return "
6639 "without MMX enabled changes the ABI"))
6640 warnedmmx_ret = true;
6643 return mode;
6646 gcc_unreachable ();
6650 return mode;
6653 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6654 this may not agree with the mode that the type system has chosen for the
6655 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6656 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6658 static rtx
6659 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
6660 unsigned int regno)
6662 rtx tmp;
6664 if (orig_mode != BLKmode)
6665 tmp = gen_rtx_REG (orig_mode, regno);
6666 else
6668 tmp = gen_rtx_REG (mode, regno);
6669 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6670 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6673 return tmp;
6676 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6677 of this code is to classify each 8bytes of incoming argument by the register
6678 class and assign registers accordingly. */
6680 /* Return the union class of CLASS1 and CLASS2.
6681 See the x86-64 PS ABI for details. */
6683 static enum x86_64_reg_class
6684 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6686 /* Rule #1: If both classes are equal, this is the resulting class. */
6687 if (class1 == class2)
6688 return class1;
6690 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6691 the other class. */
6692 if (class1 == X86_64_NO_CLASS)
6693 return class2;
6694 if (class2 == X86_64_NO_CLASS)
6695 return class1;
6697 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6698 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6699 return X86_64_MEMORY_CLASS;
6701 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6702 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6703 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6704 return X86_64_INTEGERSI_CLASS;
6705 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6706 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6707 return X86_64_INTEGER_CLASS;
6709 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6710 MEMORY is used. */
6711 if (class1 == X86_64_X87_CLASS
6712 || class1 == X86_64_X87UP_CLASS
6713 || class1 == X86_64_COMPLEX_X87_CLASS
6714 || class2 == X86_64_X87_CLASS
6715 || class2 == X86_64_X87UP_CLASS
6716 || class2 == X86_64_COMPLEX_X87_CLASS)
6717 return X86_64_MEMORY_CLASS;
6719 /* Rule #6: Otherwise class SSE is used. */
6720 return X86_64_SSE_CLASS;
6723 /* Classify the argument of type TYPE and mode MODE.
6724 CLASSES will be filled by the register class used to pass each word
6725 of the operand. The number of words is returned. In case the parameter
6726 should be passed in memory, 0 is returned. As a special case for zero
6727 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6729 BIT_OFFSET is used internally for handling records and specifies offset
6730 of the offset in bits modulo 512 to avoid overflow cases.
6732 See the x86-64 PS ABI for details.
6735 static int
6736 classify_argument (machine_mode mode, const_tree type,
6737 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6739 HOST_WIDE_INT bytes =
6740 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6741 int words
6742 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6744 /* Variable sized entities are always passed/returned in memory. */
6745 if (bytes < 0)
6746 return 0;
6748 if (mode != VOIDmode
6749 && targetm.calls.must_pass_in_stack (mode, type))
6750 return 0;
6752 /* Special case check for pointer to shared, on 64-bit target. */
6753 if (TARGET_64BIT && mode == TImode
6754 && type && TREE_CODE (type) == POINTER_TYPE
6755 && upc_shared_type_p (TREE_TYPE (type)))
6757 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6758 return 2;
6761 if (type && AGGREGATE_TYPE_P (type))
6763 int i;
6764 tree field;
6765 enum x86_64_reg_class subclasses[MAX_CLASSES];
6767 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6768 if (bytes > 64)
6769 return 0;
6771 for (i = 0; i < words; i++)
6772 classes[i] = X86_64_NO_CLASS;
6774 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6775 signalize memory class, so handle it as special case. */
6776 if (!words)
6778 classes[0] = X86_64_NO_CLASS;
6779 return 1;
6782 /* Classify each field of record and merge classes. */
6783 switch (TREE_CODE (type))
6785 case RECORD_TYPE:
6786 /* And now merge the fields of structure. */
6787 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6789 if (TREE_CODE (field) == FIELD_DECL)
6791 int num;
6793 if (TREE_TYPE (field) == error_mark_node)
6794 continue;
6796 /* Bitfields are always classified as integer. Handle them
6797 early, since later code would consider them to be
6798 misaligned integers. */
6799 if (DECL_BIT_FIELD (field))
6801 for (i = (int_bit_position (field)
6802 + (bit_offset % 64)) / 8 / 8;
6803 i < ((int_bit_position (field) + (bit_offset % 64))
6804 + tree_to_shwi (DECL_SIZE (field))
6805 + 63) / 8 / 8; i++)
6806 classes[i] =
6807 merge_classes (X86_64_INTEGER_CLASS,
6808 classes[i]);
6810 else
6812 int pos;
6814 type = TREE_TYPE (field);
6816 /* Flexible array member is ignored. */
6817 if (TYPE_MODE (type) == BLKmode
6818 && TREE_CODE (type) == ARRAY_TYPE
6819 && TYPE_SIZE (type) == NULL_TREE
6820 && TYPE_DOMAIN (type) != NULL_TREE
6821 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6822 == NULL_TREE))
6824 static bool warned;
6826 if (!warned && warn_psabi)
6828 warned = true;
6829 inform (input_location,
6830 "the ABI of passing struct with"
6831 " a flexible array member has"
6832 " changed in GCC 4.4");
6834 continue;
6836 num = classify_argument (TYPE_MODE (type), type,
6837 subclasses,
6838 (int_bit_position (field)
6839 + bit_offset) % 512);
6840 if (!num)
6841 return 0;
6842 pos = (int_bit_position (field)
6843 + (bit_offset % 64)) / 8 / 8;
6844 for (i = 0; i < num && (i + pos) < words; i++)
6845 classes[i + pos] =
6846 merge_classes (subclasses[i], classes[i + pos]);
6850 break;
6852 case ARRAY_TYPE:
6853 /* Arrays are handled as small records. */
6855 int num;
6856 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6857 TREE_TYPE (type), subclasses, bit_offset);
6858 if (!num)
6859 return 0;
6861 /* The partial classes are now full classes. */
6862 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6863 subclasses[0] = X86_64_SSE_CLASS;
6864 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6865 && !((bit_offset % 64) == 0 && bytes == 4))
6866 subclasses[0] = X86_64_INTEGER_CLASS;
6868 for (i = 0; i < words; i++)
6869 classes[i] = subclasses[i % num];
6871 break;
6873 case UNION_TYPE:
6874 case QUAL_UNION_TYPE:
6875 /* Unions are similar to RECORD_TYPE but offset is always 0.
6877 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6879 if (TREE_CODE (field) == FIELD_DECL)
6881 int num;
6883 if (TREE_TYPE (field) == error_mark_node)
6884 continue;
6886 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6887 TREE_TYPE (field), subclasses,
6888 bit_offset);
6889 if (!num)
6890 return 0;
6891 for (i = 0; i < num && i < words; i++)
6892 classes[i] = merge_classes (subclasses[i], classes[i]);
6895 break;
6897 default:
6898 gcc_unreachable ();
6901 if (words > 2)
6903 /* When size > 16 bytes, if the first one isn't
6904 X86_64_SSE_CLASS or any other ones aren't
6905 X86_64_SSEUP_CLASS, everything should be passed in
6906 memory. */
6907 if (classes[0] != X86_64_SSE_CLASS)
6908 return 0;
6910 for (i = 1; i < words; i++)
6911 if (classes[i] != X86_64_SSEUP_CLASS)
6912 return 0;
6915 /* Final merger cleanup. */
6916 for (i = 0; i < words; i++)
6918 /* If one class is MEMORY, everything should be passed in
6919 memory. */
6920 if (classes[i] == X86_64_MEMORY_CLASS)
6921 return 0;
6923 /* The X86_64_SSEUP_CLASS should be always preceded by
6924 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6925 if (classes[i] == X86_64_SSEUP_CLASS
6926 && classes[i - 1] != X86_64_SSE_CLASS
6927 && classes[i - 1] != X86_64_SSEUP_CLASS)
6929 /* The first one should never be X86_64_SSEUP_CLASS. */
6930 gcc_assert (i != 0);
6931 classes[i] = X86_64_SSE_CLASS;
6934 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6935 everything should be passed in memory. */
6936 if (classes[i] == X86_64_X87UP_CLASS
6937 && (classes[i - 1] != X86_64_X87_CLASS))
6939 static bool warned;
6941 /* The first one should never be X86_64_X87UP_CLASS. */
6942 gcc_assert (i != 0);
6943 if (!warned && warn_psabi)
6945 warned = true;
6946 inform (input_location,
6947 "the ABI of passing union with long double"
6948 " has changed in GCC 4.4");
6950 return 0;
6953 return words;
6956 /* Compute alignment needed. We align all types to natural boundaries with
6957 exception of XFmode that is aligned to 64bits. */
6958 if (mode != VOIDmode && mode != BLKmode)
6960 int mode_alignment = GET_MODE_BITSIZE (mode);
6962 if (mode == XFmode)
6963 mode_alignment = 128;
6964 else if (mode == XCmode)
6965 mode_alignment = 256;
6966 if (COMPLEX_MODE_P (mode))
6967 mode_alignment /= 2;
6968 /* Misaligned fields are always returned in memory. */
6969 if (bit_offset % mode_alignment)
6970 return 0;
6973 /* for V1xx modes, just use the base mode */
6974 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6975 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6976 mode = GET_MODE_INNER (mode);
6978 /* Classification of atomic types. */
6979 switch (mode)
6981 case SDmode:
6982 case DDmode:
6983 classes[0] = X86_64_SSE_CLASS;
6984 return 1;
6985 case TDmode:
6986 classes[0] = X86_64_SSE_CLASS;
6987 classes[1] = X86_64_SSEUP_CLASS;
6988 return 2;
6989 case DImode:
6990 case SImode:
6991 case HImode:
6992 case QImode:
6993 case CSImode:
6994 case CHImode:
6995 case CQImode:
6997 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6999 /* Analyze last 128 bits only. */
7000 size = (size - 1) & 0x7f;
7002 if (size < 32)
7004 classes[0] = X86_64_INTEGERSI_CLASS;
7005 return 1;
7007 else if (size < 64)
7009 classes[0] = X86_64_INTEGER_CLASS;
7010 return 1;
7012 else if (size < 64+32)
7014 classes[0] = X86_64_INTEGER_CLASS;
7015 classes[1] = X86_64_INTEGERSI_CLASS;
7016 return 2;
7018 else if (size < 64+64)
7020 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7021 return 2;
7023 else
7024 gcc_unreachable ();
7026 case CDImode:
7027 case TImode:
7028 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7029 return 2;
7030 case COImode:
7031 case OImode:
7032 /* OImode shouldn't be used directly. */
7033 gcc_unreachable ();
7034 case CTImode:
7035 return 0;
7036 case SFmode:
7037 if (!(bit_offset % 64))
7038 classes[0] = X86_64_SSESF_CLASS;
7039 else
7040 classes[0] = X86_64_SSE_CLASS;
7041 return 1;
7042 case DFmode:
7043 classes[0] = X86_64_SSEDF_CLASS;
7044 return 1;
7045 case XFmode:
7046 classes[0] = X86_64_X87_CLASS;
7047 classes[1] = X86_64_X87UP_CLASS;
7048 return 2;
7049 case TFmode:
7050 classes[0] = X86_64_SSE_CLASS;
7051 classes[1] = X86_64_SSEUP_CLASS;
7052 return 2;
7053 case SCmode:
7054 classes[0] = X86_64_SSE_CLASS;
7055 if (!(bit_offset % 64))
7056 return 1;
7057 else
7059 static bool warned;
7061 if (!warned && warn_psabi)
7063 warned = true;
7064 inform (input_location,
7065 "the ABI of passing structure with complex float"
7066 " member has changed in GCC 4.4");
7068 classes[1] = X86_64_SSESF_CLASS;
7069 return 2;
7071 case DCmode:
7072 classes[0] = X86_64_SSEDF_CLASS;
7073 classes[1] = X86_64_SSEDF_CLASS;
7074 return 2;
7075 case XCmode:
7076 classes[0] = X86_64_COMPLEX_X87_CLASS;
7077 return 1;
7078 case TCmode:
7079 /* This modes is larger than 16 bytes. */
7080 return 0;
7081 case V8SFmode:
7082 case V8SImode:
7083 case V32QImode:
7084 case V16HImode:
7085 case V4DFmode:
7086 case V4DImode:
7087 classes[0] = X86_64_SSE_CLASS;
7088 classes[1] = X86_64_SSEUP_CLASS;
7089 classes[2] = X86_64_SSEUP_CLASS;
7090 classes[3] = X86_64_SSEUP_CLASS;
7091 return 4;
7092 case V8DFmode:
7093 case V16SFmode:
7094 case V8DImode:
7095 case V16SImode:
7096 case V32HImode:
7097 case V64QImode:
7098 classes[0] = X86_64_SSE_CLASS;
7099 classes[1] = X86_64_SSEUP_CLASS;
7100 classes[2] = X86_64_SSEUP_CLASS;
7101 classes[3] = X86_64_SSEUP_CLASS;
7102 classes[4] = X86_64_SSEUP_CLASS;
7103 classes[5] = X86_64_SSEUP_CLASS;
7104 classes[6] = X86_64_SSEUP_CLASS;
7105 classes[7] = X86_64_SSEUP_CLASS;
7106 return 8;
7107 case V4SFmode:
7108 case V4SImode:
7109 case V16QImode:
7110 case V8HImode:
7111 case V2DFmode:
7112 case V2DImode:
7113 classes[0] = X86_64_SSE_CLASS;
7114 classes[1] = X86_64_SSEUP_CLASS;
7115 return 2;
7116 case V1TImode:
7117 case V1DImode:
7118 case V2SFmode:
7119 case V2SImode:
7120 case V4HImode:
7121 case V8QImode:
7122 classes[0] = X86_64_SSE_CLASS;
7123 return 1;
7124 case BLKmode:
7125 case VOIDmode:
7126 return 0;
7127 default:
7128 gcc_assert (VECTOR_MODE_P (mode));
7130 if (bytes > 16)
7131 return 0;
7133 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
7135 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
7136 classes[0] = X86_64_INTEGERSI_CLASS;
7137 else
7138 classes[0] = X86_64_INTEGER_CLASS;
7139 classes[1] = X86_64_INTEGER_CLASS;
7140 return 1 + (bytes > 8);
7144 /* Examine the argument and return set number of register required in each
7145 class. Return true iff parameter should be passed in memory. */
7147 static bool
7148 examine_argument (machine_mode mode, const_tree type, int in_return,
7149 int *int_nregs, int *sse_nregs)
7151 enum x86_64_reg_class regclass[MAX_CLASSES];
7152 int n = classify_argument (mode, type, regclass, 0);
7154 *int_nregs = 0;
7155 *sse_nregs = 0;
7157 if (!n)
7158 return true;
7159 for (n--; n >= 0; n--)
7160 switch (regclass[n])
7162 case X86_64_INTEGER_CLASS:
7163 case X86_64_INTEGERSI_CLASS:
7164 (*int_nregs)++;
7165 break;
7166 case X86_64_SSE_CLASS:
7167 case X86_64_SSESF_CLASS:
7168 case X86_64_SSEDF_CLASS:
7169 (*sse_nregs)++;
7170 break;
7171 case X86_64_NO_CLASS:
7172 case X86_64_SSEUP_CLASS:
7173 break;
7174 case X86_64_X87_CLASS:
7175 case X86_64_X87UP_CLASS:
7176 case X86_64_COMPLEX_X87_CLASS:
7177 if (!in_return)
7178 return true;
7179 break;
7180 case X86_64_MEMORY_CLASS:
7181 gcc_unreachable ();
7184 return false;
7187 /* Construct container for the argument used by GCC interface. See
7188 FUNCTION_ARG for the detailed description. */
7190 static rtx
7191 construct_container (machine_mode mode, machine_mode orig_mode,
7192 const_tree type, int in_return, int nintregs, int nsseregs,
7193 const int *intreg, int sse_regno)
7195 /* The following variables hold the static issued_error state. */
7196 static bool issued_sse_arg_error;
7197 static bool issued_sse_ret_error;
7198 static bool issued_x87_ret_error;
7200 machine_mode tmpmode;
7201 int bytes =
7202 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7203 enum x86_64_reg_class regclass[MAX_CLASSES];
7204 int n;
7205 int i;
7206 int nexps = 0;
7207 int needed_sseregs, needed_intregs;
7208 rtx exp[MAX_CLASSES];
7209 rtx ret;
7211 n = classify_argument (mode, type, regclass, 0);
7212 if (!n)
7213 return NULL;
7214 if (examine_argument (mode, type, in_return, &needed_intregs,
7215 &needed_sseregs))
7216 return NULL;
7217 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
7218 return NULL;
7220 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
7221 some less clueful developer tries to use floating-point anyway. */
7222 if (needed_sseregs && !TARGET_SSE)
7224 if (in_return)
7226 if (!issued_sse_ret_error)
7228 error ("SSE register return with SSE disabled");
7229 issued_sse_ret_error = true;
7232 else if (!issued_sse_arg_error)
7234 error ("SSE register argument with SSE disabled");
7235 issued_sse_arg_error = true;
7237 return NULL;
7240 /* Likewise, error if the ABI requires us to return values in the
7241 x87 registers and the user specified -mno-80387. */
7242 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
7243 for (i = 0; i < n; i++)
7244 if (regclass[i] == X86_64_X87_CLASS
7245 || regclass[i] == X86_64_X87UP_CLASS
7246 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
7248 if (!issued_x87_ret_error)
7250 error ("x87 register return with x87 disabled");
7251 issued_x87_ret_error = true;
7253 return NULL;
7256 /* First construct simple cases. Avoid SCmode, since we want to use
7257 single register to pass this type. */
7258 if (n == 1 && mode != SCmode)
7259 switch (regclass[0])
7261 case X86_64_INTEGER_CLASS:
7262 case X86_64_INTEGERSI_CLASS:
7263 return gen_rtx_REG (mode, intreg[0]);
7264 case X86_64_SSE_CLASS:
7265 case X86_64_SSESF_CLASS:
7266 case X86_64_SSEDF_CLASS:
7267 if (mode != BLKmode)
7268 return gen_reg_or_parallel (mode, orig_mode,
7269 SSE_REGNO (sse_regno));
7270 break;
7271 case X86_64_X87_CLASS:
7272 case X86_64_COMPLEX_X87_CLASS:
7273 return gen_rtx_REG (mode, FIRST_STACK_REG);
7274 case X86_64_NO_CLASS:
7275 /* Zero sized array, struct or class. */
7276 return NULL;
7277 default:
7278 gcc_unreachable ();
7280 if (n == 2
7281 && regclass[0] == X86_64_SSE_CLASS
7282 && regclass[1] == X86_64_SSEUP_CLASS
7283 && mode != BLKmode)
7284 return gen_reg_or_parallel (mode, orig_mode,
7285 SSE_REGNO (sse_regno));
7286 if (n == 4
7287 && regclass[0] == X86_64_SSE_CLASS
7288 && regclass[1] == X86_64_SSEUP_CLASS
7289 && regclass[2] == X86_64_SSEUP_CLASS
7290 && regclass[3] == X86_64_SSEUP_CLASS
7291 && mode != BLKmode)
7292 return gen_reg_or_parallel (mode, orig_mode,
7293 SSE_REGNO (sse_regno));
7294 if (n == 8
7295 && regclass[0] == X86_64_SSE_CLASS
7296 && regclass[1] == X86_64_SSEUP_CLASS
7297 && regclass[2] == X86_64_SSEUP_CLASS
7298 && regclass[3] == X86_64_SSEUP_CLASS
7299 && regclass[4] == X86_64_SSEUP_CLASS
7300 && regclass[5] == X86_64_SSEUP_CLASS
7301 && regclass[6] == X86_64_SSEUP_CLASS
7302 && regclass[7] == X86_64_SSEUP_CLASS
7303 && mode != BLKmode)
7304 return gen_reg_or_parallel (mode, orig_mode,
7305 SSE_REGNO (sse_regno));
7306 if (n == 2
7307 && regclass[0] == X86_64_X87_CLASS
7308 && regclass[1] == X86_64_X87UP_CLASS)
7309 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7311 if (n == 2
7312 && regclass[0] == X86_64_INTEGER_CLASS
7313 && regclass[1] == X86_64_INTEGER_CLASS
7314 && (mode == CDImode || mode == TImode)
7315 && intreg[0] + 1 == intreg[1])
7316 return gen_rtx_REG (mode, intreg[0]);
7318 /* Otherwise figure out the entries of the PARALLEL. */
7319 for (i = 0; i < n; i++)
7321 int pos;
7323 switch (regclass[i])
7325 case X86_64_NO_CLASS:
7326 break;
7327 case X86_64_INTEGER_CLASS:
7328 case X86_64_INTEGERSI_CLASS:
7329 /* Merge TImodes on aligned occasions here too. */
7330 if (i * 8 + 8 > bytes)
7331 tmpmode
7332 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7333 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7334 tmpmode = SImode;
7335 else
7336 tmpmode = DImode;
7337 /* We've requested 24 bytes we
7338 don't have mode for. Use DImode. */
7339 if (tmpmode == BLKmode)
7340 tmpmode = DImode;
7341 exp [nexps++]
7342 = gen_rtx_EXPR_LIST (VOIDmode,
7343 gen_rtx_REG (tmpmode, *intreg),
7344 GEN_INT (i*8));
7345 intreg++;
7346 break;
7347 case X86_64_SSESF_CLASS:
7348 exp [nexps++]
7349 = gen_rtx_EXPR_LIST (VOIDmode,
7350 gen_rtx_REG (SFmode,
7351 SSE_REGNO (sse_regno)),
7352 GEN_INT (i*8));
7353 sse_regno++;
7354 break;
7355 case X86_64_SSEDF_CLASS:
7356 exp [nexps++]
7357 = gen_rtx_EXPR_LIST (VOIDmode,
7358 gen_rtx_REG (DFmode,
7359 SSE_REGNO (sse_regno)),
7360 GEN_INT (i*8));
7361 sse_regno++;
7362 break;
7363 case X86_64_SSE_CLASS:
7364 pos = i;
7365 switch (n)
7367 case 1:
7368 tmpmode = DImode;
7369 break;
7370 case 2:
7371 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7373 tmpmode = TImode;
7374 i++;
7376 else
7377 tmpmode = DImode;
7378 break;
7379 case 4:
7380 gcc_assert (i == 0
7381 && regclass[1] == X86_64_SSEUP_CLASS
7382 && regclass[2] == X86_64_SSEUP_CLASS
7383 && regclass[3] == X86_64_SSEUP_CLASS);
7384 tmpmode = OImode;
7385 i += 3;
7386 break;
7387 case 8:
7388 gcc_assert (i == 0
7389 && regclass[1] == X86_64_SSEUP_CLASS
7390 && regclass[2] == X86_64_SSEUP_CLASS
7391 && regclass[3] == X86_64_SSEUP_CLASS
7392 && regclass[4] == X86_64_SSEUP_CLASS
7393 && regclass[5] == X86_64_SSEUP_CLASS
7394 && regclass[6] == X86_64_SSEUP_CLASS
7395 && regclass[7] == X86_64_SSEUP_CLASS);
7396 tmpmode = XImode;
7397 i += 7;
7398 break;
7399 default:
7400 gcc_unreachable ();
7402 exp [nexps++]
7403 = gen_rtx_EXPR_LIST (VOIDmode,
7404 gen_rtx_REG (tmpmode,
7405 SSE_REGNO (sse_regno)),
7406 GEN_INT (pos*8));
7407 sse_regno++;
7408 break;
7409 default:
7410 gcc_unreachable ();
7414 /* Empty aligned struct, union or class. */
7415 if (nexps == 0)
7416 return NULL;
7418 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7419 for (i = 0; i < nexps; i++)
7420 XVECEXP (ret, 0, i) = exp [i];
7421 return ret;
7424 /* Update the data in CUM to advance over an argument of mode MODE
7425 and data type TYPE. (TYPE is null for libcalls where that information
7426 may not be available.)
7428 Return a number of integer regsiters advanced over. */
7430 static int
7431 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7432 const_tree type, HOST_WIDE_INT bytes,
7433 HOST_WIDE_INT words)
7435 int res = 0;
7437 switch (mode)
7439 default:
7440 break;
7442 case BLKmode:
7443 if (bytes < 0)
7444 break;
7445 /* FALLTHRU */
7447 case DImode:
7448 case SImode:
7449 case HImode:
7450 case QImode:
7451 cum->words += words;
7452 cum->nregs -= words;
7453 cum->regno += words;
7454 if (cum->nregs >= 0)
7455 res = words;
7456 if (cum->nregs <= 0)
7458 cum->nregs = 0;
7459 cum->regno = 0;
7461 break;
7463 case OImode:
7464 /* OImode shouldn't be used directly. */
7465 gcc_unreachable ();
7467 case DFmode:
7468 if (cum->float_in_sse < 2)
7469 break;
7470 case SFmode:
7471 if (cum->float_in_sse < 1)
7472 break;
7473 /* FALLTHRU */
7475 case V8SFmode:
7476 case V8SImode:
7477 case V64QImode:
7478 case V32HImode:
7479 case V16SImode:
7480 case V8DImode:
7481 case V16SFmode:
7482 case V8DFmode:
7483 case V32QImode:
7484 case V16HImode:
7485 case V4DFmode:
7486 case V4DImode:
7487 case TImode:
7488 case V16QImode:
7489 case V8HImode:
7490 case V4SImode:
7491 case V2DImode:
7492 case V4SFmode:
7493 case V2DFmode:
7494 if (!type || !AGGREGATE_TYPE_P (type))
7496 cum->sse_words += words;
7497 cum->sse_nregs -= 1;
7498 cum->sse_regno += 1;
7499 if (cum->sse_nregs <= 0)
7501 cum->sse_nregs = 0;
7502 cum->sse_regno = 0;
7505 break;
7507 case V8QImode:
7508 case V4HImode:
7509 case V2SImode:
7510 case V2SFmode:
7511 case V1TImode:
7512 case V1DImode:
7513 if (!type || !AGGREGATE_TYPE_P (type))
7515 cum->mmx_words += words;
7516 cum->mmx_nregs -= 1;
7517 cum->mmx_regno += 1;
7518 if (cum->mmx_nregs <= 0)
7520 cum->mmx_nregs = 0;
7521 cum->mmx_regno = 0;
7524 break;
7527 return res;
7530 static int
7531 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
7532 const_tree type, HOST_WIDE_INT words, bool named)
7534 int int_nregs, sse_nregs;
7536 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7537 if (!named && (VALID_AVX512F_REG_MODE (mode)
7538 || VALID_AVX256_REG_MODE (mode)))
7539 return 0;
7541 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7542 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7544 cum->nregs -= int_nregs;
7545 cum->sse_nregs -= sse_nregs;
7546 cum->regno += int_nregs;
7547 cum->sse_regno += sse_nregs;
7548 return int_nregs;
7550 else
7552 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7553 cum->words = (cum->words + align - 1) & ~(align - 1);
7554 cum->words += words;
7555 return 0;
7559 static int
7560 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7561 HOST_WIDE_INT words)
7563 /* Otherwise, this should be passed indirect. */
7564 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7566 cum->words += words;
7567 if (cum->nregs > 0)
7569 cum->nregs -= 1;
7570 cum->regno += 1;
7571 return 1;
7573 return 0;
7576 /* Update the data in CUM to advance over an argument of mode MODE and
7577 data type TYPE. (TYPE is null for libcalls where that information
7578 may not be available.) */
7580 static void
7581 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7582 const_tree type, bool named)
7584 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7585 HOST_WIDE_INT bytes, words;
7586 int nregs;
7588 if (mode == BLKmode)
7589 bytes = int_size_in_bytes (type);
7590 else
7591 bytes = GET_MODE_SIZE (mode);
7592 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7594 if (type)
7595 mode = type_natural_mode (type, NULL, false);
7597 if ((type && POINTER_BOUNDS_TYPE_P (type))
7598 || POINTER_BOUNDS_MODE_P (mode))
7600 /* If we pass bounds in BT then just update remained bounds count. */
7601 if (cum->bnds_in_bt)
7603 cum->bnds_in_bt--;
7604 return;
7607 /* Update remained number of bounds to force. */
7608 if (cum->force_bnd_pass)
7609 cum->force_bnd_pass--;
7611 cum->bnd_regno++;
7613 return;
7616 /* The first arg not going to Bounds Tables resets this counter. */
7617 cum->bnds_in_bt = 0;
7618 /* For unnamed args we always pass bounds to avoid bounds mess when
7619 passed and received types do not match. If bounds do not follow
7620 unnamed arg, still pretend required number of bounds were passed. */
7621 if (cum->force_bnd_pass)
7623 cum->bnd_regno += cum->force_bnd_pass;
7624 cum->force_bnd_pass = 0;
7627 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7628 nregs = function_arg_advance_ms_64 (cum, bytes, words);
7629 else if (TARGET_64BIT)
7630 nregs = function_arg_advance_64 (cum, mode, type, words, named);
7631 else
7632 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
7634 /* For stdarg we expect bounds to be passed for each value passed
7635 in register. */
7636 if (cum->stdarg)
7637 cum->force_bnd_pass = nregs;
7638 /* For pointers passed in memory we expect bounds passed in Bounds
7639 Table. */
7640 if (!nregs)
7641 cum->bnds_in_bt = chkp_type_bounds_count (type);
7644 /* Define where to put the arguments to a function.
7645 Value is zero to push the argument on the stack,
7646 or a hard register in which to store the argument.
7648 MODE is the argument's machine mode.
7649 TYPE is the data type of the argument (as a tree).
7650 This is null for libcalls where that information may
7651 not be available.
7652 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7653 the preceding args and about the function being called.
7654 NAMED is nonzero if this argument is a named parameter
7655 (otherwise it is an extra parameter matching an ellipsis). */
7657 static rtx
7658 function_arg_32 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7659 machine_mode orig_mode, const_tree type,
7660 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7662 /* Avoid the AL settings for the Unix64 ABI. */
7663 if (mode == VOIDmode)
7664 return constm1_rtx;
7666 switch (mode)
7668 default:
7669 break;
7671 case BLKmode:
7672 if (bytes < 0)
7673 break;
7674 /* FALLTHRU */
7675 case DImode:
7676 case SImode:
7677 case HImode:
7678 case QImode:
7679 if (words <= cum->nregs)
7681 int regno = cum->regno;
7683 /* Fastcall allocates the first two DWORD (SImode) or
7684 smaller arguments to ECX and EDX if it isn't an
7685 aggregate type . */
7686 if (cum->fastcall)
7688 if (mode == BLKmode
7689 || mode == DImode
7690 || (type && AGGREGATE_TYPE_P (type)))
7691 break;
7693 /* ECX not EAX is the first allocated register. */
7694 if (regno == AX_REG)
7695 regno = CX_REG;
7697 return gen_rtx_REG (mode, regno);
7699 break;
7701 case DFmode:
7702 if (cum->float_in_sse < 2)
7703 break;
7704 case SFmode:
7705 if (cum->float_in_sse < 1)
7706 break;
7707 /* FALLTHRU */
7708 case TImode:
7709 /* In 32bit, we pass TImode in xmm registers. */
7710 case V16QImode:
7711 case V8HImode:
7712 case V4SImode:
7713 case V2DImode:
7714 case V4SFmode:
7715 case V2DFmode:
7716 if (!type || !AGGREGATE_TYPE_P (type))
7718 if (cum->sse_nregs)
7719 return gen_reg_or_parallel (mode, orig_mode,
7720 cum->sse_regno + FIRST_SSE_REG);
7722 break;
7724 case OImode:
7725 case XImode:
7726 /* OImode and XImode shouldn't be used directly. */
7727 gcc_unreachable ();
7729 case V64QImode:
7730 case V32HImode:
7731 case V16SImode:
7732 case V8DImode:
7733 case V16SFmode:
7734 case V8DFmode:
7735 case V8SFmode:
7736 case V8SImode:
7737 case V32QImode:
7738 case V16HImode:
7739 case V4DFmode:
7740 case V4DImode:
7741 if (!type || !AGGREGATE_TYPE_P (type))
7743 if (cum->sse_nregs)
7744 return gen_reg_or_parallel (mode, orig_mode,
7745 cum->sse_regno + FIRST_SSE_REG);
7747 break;
7749 case V8QImode:
7750 case V4HImode:
7751 case V2SImode:
7752 case V2SFmode:
7753 case V1TImode:
7754 case V1DImode:
7755 if (!type || !AGGREGATE_TYPE_P (type))
7757 if (cum->mmx_nregs)
7758 return gen_reg_or_parallel (mode, orig_mode,
7759 cum->mmx_regno + FIRST_MMX_REG);
7761 break;
7764 return NULL_RTX;
7767 static rtx
7768 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7769 machine_mode orig_mode, const_tree type, bool named)
7771 /* Handle a hidden AL argument containing number of registers
7772 for varargs x86-64 functions. */
7773 if (mode == VOIDmode)
7774 return GEN_INT (cum->maybe_vaarg
7775 ? (cum->sse_nregs < 0
7776 ? X86_64_SSE_REGPARM_MAX
7777 : cum->sse_regno)
7778 : -1);
7780 switch (mode)
7782 default:
7783 break;
7785 case V8SFmode:
7786 case V8SImode:
7787 case V32QImode:
7788 case V16HImode:
7789 case V4DFmode:
7790 case V4DImode:
7791 case V16SFmode:
7792 case V16SImode:
7793 case V64QImode:
7794 case V32HImode:
7795 case V8DFmode:
7796 case V8DImode:
7797 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7798 if (!named)
7799 return NULL;
7800 break;
7803 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7804 cum->sse_nregs,
7805 &x86_64_int_parameter_registers [cum->regno],
7806 cum->sse_regno);
7809 static rtx
7810 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7811 machine_mode orig_mode, bool named,
7812 HOST_WIDE_INT bytes)
7814 unsigned int regno;
7816 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7817 We use value of -2 to specify that current function call is MSABI. */
7818 if (mode == VOIDmode)
7819 return GEN_INT (-2);
7821 /* If we've run out of registers, it goes on the stack. */
7822 if (cum->nregs == 0)
7823 return NULL_RTX;
7825 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7827 /* Only floating point modes are passed in anything but integer regs. */
7828 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7830 if (named)
7831 regno = cum->regno + FIRST_SSE_REG;
7832 else
7834 rtx t1, t2;
7836 /* Unnamed floating parameters are passed in both the
7837 SSE and integer registers. */
7838 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7839 t2 = gen_rtx_REG (mode, regno);
7840 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7841 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7842 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7845 /* Handle aggregated types passed in register. */
7846 if (orig_mode == BLKmode)
7848 if (bytes > 0 && bytes <= 8)
7849 mode = (bytes > 4 ? DImode : SImode);
7850 if (mode == BLKmode)
7851 mode = DImode;
7854 return gen_reg_or_parallel (mode, orig_mode, regno);
7857 /* Return where to put the arguments to a function.
7858 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7860 MODE is the argument's machine mode. TYPE is the data type of the
7861 argument. It is null for libcalls where that information may not be
7862 available. CUM gives information about the preceding args and about
7863 the function being called. NAMED is nonzero if this argument is a
7864 named parameter (otherwise it is an extra parameter matching an
7865 ellipsis). */
7867 static rtx
7868 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
7869 const_tree type, bool named)
7871 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7872 machine_mode mode = omode;
7873 HOST_WIDE_INT bytes, words;
7874 rtx arg;
7876 /* All pointer bounds argumntas are handled separately here. */
7877 if ((type && POINTER_BOUNDS_TYPE_P (type))
7878 || POINTER_BOUNDS_MODE_P (mode))
7880 /* Return NULL if bounds are forced to go in Bounds Table. */
7881 if (cum->bnds_in_bt)
7882 arg = NULL;
7883 /* Return the next available bound reg if any. */
7884 else if (cum->bnd_regno <= LAST_BND_REG)
7885 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
7886 /* Return the next special slot number otherwise. */
7887 else
7888 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
7890 return arg;
7893 if (mode == BLKmode)
7894 bytes = int_size_in_bytes (type);
7895 else
7896 bytes = GET_MODE_SIZE (mode);
7897 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7899 /* To simplify the code below, represent vector types with a vector mode
7900 even if MMX/SSE are not active. */
7901 if (type && TREE_CODE (type) == VECTOR_TYPE)
7902 mode = type_natural_mode (type, cum, false);
7904 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7905 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7906 else if (TARGET_64BIT)
7907 arg = function_arg_64 (cum, mode, omode, type, named);
7908 else
7909 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7911 return arg;
7914 /* A C expression that indicates when an argument must be passed by
7915 reference. If nonzero for an argument, a copy of that argument is
7916 made in memory and a pointer to the argument is passed instead of
7917 the argument itself. The pointer is passed in whatever way is
7918 appropriate for passing a pointer to that type. */
7920 static bool
7921 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
7922 const_tree type, bool)
7924 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7926 /* Bounds are never passed by reference. */
7927 if ((type && POINTER_BOUNDS_TYPE_P (type))
7928 || POINTER_BOUNDS_MODE_P (mode))
7929 return false;
7931 /* See Windows x64 Software Convention. */
7932 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7934 int msize = (int) GET_MODE_SIZE (mode);
7935 if (type)
7937 /* Arrays are passed by reference. */
7938 if (TREE_CODE (type) == ARRAY_TYPE)
7939 return true;
7941 if (AGGREGATE_TYPE_P (type))
7943 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7944 are passed by reference. */
7945 msize = int_size_in_bytes (type);
7949 /* __m128 is passed by reference. */
7950 switch (msize) {
7951 case 1: case 2: case 4: case 8:
7952 break;
7953 default:
7954 return true;
7957 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7958 return 1;
7960 return 0;
7963 /* Return true when TYPE should be 128bit aligned for 32bit argument
7964 passing ABI. XXX: This function is obsolete and is only used for
7965 checking psABI compatibility with previous versions of GCC. */
7967 static bool
7968 ix86_compat_aligned_value_p (const_tree type)
7970 machine_mode mode = TYPE_MODE (type);
7971 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7972 || mode == TDmode
7973 || mode == TFmode
7974 || mode == TCmode)
7975 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7976 return true;
7977 if (TYPE_ALIGN (type) < 128)
7978 return false;
7980 if (AGGREGATE_TYPE_P (type))
7982 /* Walk the aggregates recursively. */
7983 switch (TREE_CODE (type))
7985 case RECORD_TYPE:
7986 case UNION_TYPE:
7987 case QUAL_UNION_TYPE:
7989 tree field;
7991 /* Walk all the structure fields. */
7992 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7994 if (TREE_CODE (field) == FIELD_DECL
7995 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7996 return true;
7998 break;
8001 case ARRAY_TYPE:
8002 /* Just for use if some languages passes arrays by value. */
8003 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
8004 return true;
8005 break;
8007 default:
8008 gcc_unreachable ();
8011 return false;
8014 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
8015 XXX: This function is obsolete and is only used for checking psABI
8016 compatibility with previous versions of GCC. */
8018 static unsigned int
8019 ix86_compat_function_arg_boundary (machine_mode mode,
8020 const_tree type, unsigned int align)
8022 /* In 32bit, only _Decimal128 and __float128 are aligned to their
8023 natural boundaries. */
8024 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
8026 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
8027 make an exception for SSE modes since these require 128bit
8028 alignment.
8030 The handling here differs from field_alignment. ICC aligns MMX
8031 arguments to 4 byte boundaries, while structure fields are aligned
8032 to 8 byte boundaries. */
8033 if (!type)
8035 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
8036 align = PARM_BOUNDARY;
8038 else
8040 if (!ix86_compat_aligned_value_p (type))
8041 align = PARM_BOUNDARY;
8044 if (align > BIGGEST_ALIGNMENT)
8045 align = BIGGEST_ALIGNMENT;
8046 return align;
8049 /* Return true when TYPE should be 128bit aligned for 32bit argument
8050 passing ABI. */
8052 static bool
8053 ix86_contains_aligned_value_p (const_tree type)
8055 machine_mode mode = TYPE_MODE (type);
8057 if (mode == XFmode || mode == XCmode)
8058 return false;
8060 if (TYPE_ALIGN (type) < 128)
8061 return false;
8063 if (AGGREGATE_TYPE_P (type))
8065 /* Walk the aggregates recursively. */
8066 switch (TREE_CODE (type))
8068 case RECORD_TYPE:
8069 case UNION_TYPE:
8070 case QUAL_UNION_TYPE:
8072 tree field;
8074 /* Walk all the structure fields. */
8075 for (field = TYPE_FIELDS (type);
8076 field;
8077 field = DECL_CHAIN (field))
8079 if (TREE_CODE (field) == FIELD_DECL
8080 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
8081 return true;
8083 break;
8086 case ARRAY_TYPE:
8087 /* Just for use if some languages passes arrays by value. */
8088 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
8089 return true;
8090 break;
8092 default:
8093 gcc_unreachable ();
8096 else
8097 return TYPE_ALIGN (type) >= 128;
8099 return false;
8102 /* Gives the alignment boundary, in bits, of an argument with the
8103 specified mode and type. */
8105 static unsigned int
8106 ix86_function_arg_boundary (machine_mode mode, const_tree type)
8108 unsigned int align;
8109 if (type)
8111 /* Since the main variant type is used for call, we convert it to
8112 the main variant type. */
8113 type = TYPE_MAIN_VARIANT (type);
8114 align = TYPE_ALIGN (type);
8116 else
8117 align = GET_MODE_ALIGNMENT (mode);
8118 if (align < PARM_BOUNDARY)
8119 align = PARM_BOUNDARY;
8120 else
8122 static bool warned;
8123 unsigned int saved_align = align;
8125 if (!TARGET_64BIT)
8127 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
8128 if (!type)
8130 if (mode == XFmode || mode == XCmode)
8131 align = PARM_BOUNDARY;
8133 else if (!ix86_contains_aligned_value_p (type))
8134 align = PARM_BOUNDARY;
8136 if (align < 128)
8137 align = PARM_BOUNDARY;
8140 if (warn_psabi
8141 && !warned
8142 && align != ix86_compat_function_arg_boundary (mode, type,
8143 saved_align))
8145 warned = true;
8146 inform (input_location,
8147 "The ABI for passing parameters with %d-byte"
8148 " alignment has changed in GCC 4.6",
8149 align / BITS_PER_UNIT);
8153 return align;
8156 /* Return true if N is a possible register number of function value. */
8158 static bool
8159 ix86_function_value_regno_p (const unsigned int regno)
8161 switch (regno)
8163 case AX_REG:
8164 return true;
8165 case DX_REG:
8166 return (!TARGET_64BIT || ix86_abi != MS_ABI);
8167 case DI_REG:
8168 case SI_REG:
8169 return TARGET_64BIT && ix86_abi != MS_ABI;
8171 case FIRST_BND_REG:
8172 return chkp_function_instrumented_p (current_function_decl);
8174 /* Complex values are returned in %st(0)/%st(1) pair. */
8175 case ST0_REG:
8176 case ST1_REG:
8177 /* TODO: The function should depend on current function ABI but
8178 builtins.c would need updating then. Therefore we use the
8179 default ABI. */
8180 if (TARGET_64BIT && ix86_abi == MS_ABI)
8181 return false;
8182 return TARGET_FLOAT_RETURNS_IN_80387;
8184 /* Complex values are returned in %xmm0/%xmm1 pair. */
8185 case XMM0_REG:
8186 case XMM1_REG:
8187 return TARGET_SSE;
8189 case MM0_REG:
8190 if (TARGET_MACHO || TARGET_64BIT)
8191 return false;
8192 return TARGET_MMX;
8195 return false;
8198 /* Define how to find the value returned by a function.
8199 VALTYPE is the data type of the value (as a tree).
8200 If the precise function being called is known, FUNC is its FUNCTION_DECL;
8201 otherwise, FUNC is 0. */
8203 static rtx
8204 function_value_32 (machine_mode orig_mode, machine_mode mode,
8205 const_tree fntype, const_tree fn)
8207 unsigned int regno;
8209 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
8210 we normally prevent this case when mmx is not available. However
8211 some ABIs may require the result to be returned like DImode. */
8212 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
8213 regno = FIRST_MMX_REG;
8215 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
8216 we prevent this case when sse is not available. However some ABIs
8217 may require the result to be returned like integer TImode. */
8218 else if (mode == TImode
8219 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
8220 regno = FIRST_SSE_REG;
8222 /* 32-byte vector modes in %ymm0. */
8223 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
8224 regno = FIRST_SSE_REG;
8226 /* 64-byte vector modes in %zmm0. */
8227 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
8228 regno = FIRST_SSE_REG;
8230 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
8231 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
8232 regno = FIRST_FLOAT_REG;
8233 else
8234 /* Most things go in %eax. */
8235 regno = AX_REG;
8237 /* Override FP return register with %xmm0 for local functions when
8238 SSE math is enabled or for functions with sseregparm attribute. */
8239 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
8241 int sse_level = ix86_function_sseregparm (fntype, fn, false);
8242 if ((sse_level >= 1 && mode == SFmode)
8243 || (sse_level == 2 && mode == DFmode))
8244 regno = FIRST_SSE_REG;
8247 /* OImode shouldn't be used directly. */
8248 gcc_assert (mode != OImode);
8250 return gen_rtx_REG (orig_mode, regno);
8253 static rtx
8254 function_value_64 (machine_mode orig_mode, machine_mode mode,
8255 const_tree valtype)
8257 rtx ret;
8259 /* Handle libcalls, which don't provide a type node. */
8260 if (valtype == NULL)
8262 unsigned int regno;
8264 switch (mode)
8266 case SFmode:
8267 case SCmode:
8268 case DFmode:
8269 case DCmode:
8270 case TFmode:
8271 case SDmode:
8272 case DDmode:
8273 case TDmode:
8274 regno = FIRST_SSE_REG;
8275 break;
8276 case XFmode:
8277 case XCmode:
8278 regno = FIRST_FLOAT_REG;
8279 break;
8280 case TCmode:
8281 return NULL;
8282 default:
8283 regno = AX_REG;
8286 return gen_rtx_REG (mode, regno);
8288 else if (POINTER_TYPE_P (valtype)
8289 && !upc_shared_type_p (TREE_TYPE (valtype)))
8291 /* Pointers are always returned in word_mode. */
8292 mode = word_mode;
8295 ret = construct_container (mode, orig_mode, valtype, 1,
8296 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
8297 x86_64_int_return_registers, 0);
8299 /* For zero sized structures, construct_container returns NULL, but we
8300 need to keep rest of compiler happy by returning meaningful value. */
8301 if (!ret)
8302 ret = gen_rtx_REG (orig_mode, AX_REG);
8304 return ret;
8307 static rtx
8308 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
8309 const_tree valtype)
8311 unsigned int regno = AX_REG;
8313 if (TARGET_SSE)
8315 switch (GET_MODE_SIZE (mode))
8317 case 16:
8318 if (valtype != NULL_TREE
8319 && !VECTOR_INTEGER_TYPE_P (valtype)
8320 && !VECTOR_INTEGER_TYPE_P (valtype)
8321 && !INTEGRAL_TYPE_P (valtype)
8322 && !VECTOR_FLOAT_TYPE_P (valtype))
8323 break;
8324 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8325 && !COMPLEX_MODE_P (mode))
8326 regno = FIRST_SSE_REG;
8327 break;
8328 case 8:
8329 case 4:
8330 if (mode == SFmode || mode == DFmode)
8331 regno = FIRST_SSE_REG;
8332 break;
8333 default:
8334 break;
8337 return gen_rtx_REG (orig_mode, regno);
8340 static rtx
8341 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
8342 machine_mode orig_mode, machine_mode mode)
8344 const_tree fn, fntype;
8346 fn = NULL_TREE;
8347 if (fntype_or_decl && DECL_P (fntype_or_decl))
8348 fn = fntype_or_decl;
8349 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
8351 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
8352 || POINTER_BOUNDS_MODE_P (mode))
8353 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
8354 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
8355 return function_value_ms_64 (orig_mode, mode, valtype);
8356 else if (TARGET_64BIT)
8357 return function_value_64 (orig_mode, mode, valtype);
8358 else
8359 return function_value_32 (orig_mode, mode, fntype, fn);
8362 static rtx
8363 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
8365 machine_mode mode, orig_mode;
8367 orig_mode = TYPE_MODE (valtype);
8368 mode = type_natural_mode (valtype, NULL, true);
8369 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
8372 /* Return an RTX representing a place where a function returns
8373 or recieves pointer bounds or NULL if no bounds are returned.
8375 VALTYPE is a data type of a value returned by the function.
8377 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
8378 or FUNCTION_TYPE of the function.
8380 If OUTGOING is false, return a place in which the caller will
8381 see the return value. Otherwise, return a place where a
8382 function returns a value. */
8384 static rtx
8385 ix86_function_value_bounds (const_tree valtype,
8386 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
8387 bool outgoing ATTRIBUTE_UNUSED)
8389 rtx res = NULL_RTX;
8391 if (BOUNDED_TYPE_P (valtype))
8392 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
8393 else if (chkp_type_has_pointer (valtype))
8395 bitmap slots;
8396 rtx bounds[2];
8397 bitmap_iterator bi;
8398 unsigned i, bnd_no = 0;
8400 bitmap_obstack_initialize (NULL);
8401 slots = BITMAP_ALLOC (NULL);
8402 chkp_find_bound_slots (valtype, slots);
8404 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
8406 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
8407 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
8408 gcc_assert (bnd_no < 2);
8409 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
8412 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
8414 BITMAP_FREE (slots);
8415 bitmap_obstack_release (NULL);
8417 else
8418 res = NULL_RTX;
8420 return res;
8423 /* Pointer function arguments and return values are promoted to
8424 word_mode. */
8426 static machine_mode
8427 ix86_promote_function_mode (const_tree type, machine_mode mode,
8428 int *punsignedp, const_tree fntype,
8429 int for_return)
8431 if (type != NULL_TREE && POINTER_TYPE_P (type))
8433 if (upc_shared_type_p (TREE_TYPE (type)))
8435 *punsignedp = 1;
8436 return TYPE_MODE (upc_pts_rep_type_node);
8438 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8439 return word_mode;
8441 return default_promote_function_mode (type, mode, punsignedp, fntype,
8442 for_return);
8445 /* Return true if a structure, union or array with MODE containing FIELD
8446 should be accessed using BLKmode. */
8448 static bool
8449 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
8451 /* Union with XFmode must be in BLKmode. */
8452 return (mode == XFmode
8453 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8454 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8458 ix86_libcall_value (machine_mode mode)
8460 return ix86_function_value_1 (NULL, NULL, mode, mode);
8463 /* Return true iff type is returned in memory. */
8465 static bool
8466 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8468 #ifdef SUBTARGET_RETURN_IN_MEMORY
8469 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8470 #else
8471 const machine_mode mode = type_natural_mode (type, NULL, true);
8472 HOST_WIDE_INT size;
8474 if (POINTER_BOUNDS_TYPE_P (type))
8475 return false;
8477 if (TARGET_64BIT)
8479 if (ix86_function_type_abi (fntype) == MS_ABI)
8481 size = int_size_in_bytes (type);
8483 /* __m128 is returned in xmm0. */
8484 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8485 || INTEGRAL_TYPE_P (type)
8486 || VECTOR_FLOAT_TYPE_P (type))
8487 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8488 && !COMPLEX_MODE_P (mode)
8489 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8490 return false;
8492 /* Otherwise, the size must be exactly in [1248]. */
8493 return size != 1 && size != 2 && size != 4 && size != 8;
8495 else
8497 int needed_intregs, needed_sseregs;
8499 return examine_argument (mode, type, 1,
8500 &needed_intregs, &needed_sseregs);
8503 else
8505 if (mode == BLKmode)
8506 return true;
8508 size = int_size_in_bytes (type);
8510 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8511 return false;
8513 if (VECTOR_MODE_P (mode) || mode == TImode)
8515 /* User-created vectors small enough to fit in EAX. */
8516 if (size < 8)
8517 return false;
8519 /* Unless ABI prescibes otherwise,
8520 MMX/3dNow values are returned in MM0 if available. */
8522 if (size == 8)
8523 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8525 /* SSE values are returned in XMM0 if available. */
8526 if (size == 16)
8527 return !TARGET_SSE;
8529 /* AVX values are returned in YMM0 if available. */
8530 if (size == 32)
8531 return !TARGET_AVX;
8533 /* AVX512F values are returned in ZMM0 if available. */
8534 if (size == 64)
8535 return !TARGET_AVX512F;
8538 if (mode == XFmode)
8539 return false;
8541 if (size > 12)
8542 return true;
8544 /* OImode shouldn't be used directly. */
8545 gcc_assert (mode != OImode);
8547 return false;
8549 #endif
8553 /* Create the va_list data type. */
8555 /* Returns the calling convention specific va_list date type.
8556 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8558 static tree
8559 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8561 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8563 /* For i386 we use plain pointer to argument area. */
8564 if (!TARGET_64BIT || abi == MS_ABI)
8565 return build_pointer_type (char_type_node);
8567 record = lang_hooks.types.make_type (RECORD_TYPE);
8568 type_decl = build_decl (BUILTINS_LOCATION,
8569 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8571 f_gpr = build_decl (BUILTINS_LOCATION,
8572 FIELD_DECL, get_identifier ("gp_offset"),
8573 unsigned_type_node);
8574 f_fpr = build_decl (BUILTINS_LOCATION,
8575 FIELD_DECL, get_identifier ("fp_offset"),
8576 unsigned_type_node);
8577 f_ovf = build_decl (BUILTINS_LOCATION,
8578 FIELD_DECL, get_identifier ("overflow_arg_area"),
8579 ptr_type_node);
8580 f_sav = build_decl (BUILTINS_LOCATION,
8581 FIELD_DECL, get_identifier ("reg_save_area"),
8582 ptr_type_node);
8584 va_list_gpr_counter_field = f_gpr;
8585 va_list_fpr_counter_field = f_fpr;
8587 DECL_FIELD_CONTEXT (f_gpr) = record;
8588 DECL_FIELD_CONTEXT (f_fpr) = record;
8589 DECL_FIELD_CONTEXT (f_ovf) = record;
8590 DECL_FIELD_CONTEXT (f_sav) = record;
8592 TYPE_STUB_DECL (record) = type_decl;
8593 TYPE_NAME (record) = type_decl;
8594 TYPE_FIELDS (record) = f_gpr;
8595 DECL_CHAIN (f_gpr) = f_fpr;
8596 DECL_CHAIN (f_fpr) = f_ovf;
8597 DECL_CHAIN (f_ovf) = f_sav;
8599 layout_type (record);
8601 /* The correct type is an array type of one element. */
8602 return build_array_type (record, build_index_type (size_zero_node));
8605 /* Setup the builtin va_list data type and for 64-bit the additional
8606 calling convention specific va_list data types. */
8608 static tree
8609 ix86_build_builtin_va_list (void)
8611 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8613 /* Initialize abi specific va_list builtin types. */
8614 if (TARGET_64BIT)
8616 tree t;
8617 if (ix86_abi == MS_ABI)
8619 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8620 if (TREE_CODE (t) != RECORD_TYPE)
8621 t = build_variant_type_copy (t);
8622 sysv_va_list_type_node = t;
8624 else
8626 t = ret;
8627 if (TREE_CODE (t) != RECORD_TYPE)
8628 t = build_variant_type_copy (t);
8629 sysv_va_list_type_node = t;
8631 if (ix86_abi != MS_ABI)
8633 t = ix86_build_builtin_va_list_abi (MS_ABI);
8634 if (TREE_CODE (t) != RECORD_TYPE)
8635 t = build_variant_type_copy (t);
8636 ms_va_list_type_node = t;
8638 else
8640 t = ret;
8641 if (TREE_CODE (t) != RECORD_TYPE)
8642 t = build_variant_type_copy (t);
8643 ms_va_list_type_node = t;
8647 return ret;
8650 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8652 static void
8653 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8655 rtx save_area, mem;
8656 alias_set_type set;
8657 int i, max;
8659 /* GPR size of varargs save area. */
8660 if (cfun->va_list_gpr_size)
8661 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8662 else
8663 ix86_varargs_gpr_size = 0;
8665 /* FPR size of varargs save area. We don't need it if we don't pass
8666 anything in SSE registers. */
8667 if (TARGET_SSE && cfun->va_list_fpr_size)
8668 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8669 else
8670 ix86_varargs_fpr_size = 0;
8672 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8673 return;
8675 save_area = frame_pointer_rtx;
8676 set = get_varargs_alias_set ();
8678 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8679 if (max > X86_64_REGPARM_MAX)
8680 max = X86_64_REGPARM_MAX;
8682 for (i = cum->regno; i < max; i++)
8684 mem = gen_rtx_MEM (word_mode,
8685 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8686 MEM_NOTRAP_P (mem) = 1;
8687 set_mem_alias_set (mem, set);
8688 emit_move_insn (mem,
8689 gen_rtx_REG (word_mode,
8690 x86_64_int_parameter_registers[i]));
8693 if (ix86_varargs_fpr_size)
8695 machine_mode smode;
8696 rtx_code_label *label;
8697 rtx test;
8699 /* Now emit code to save SSE registers. The AX parameter contains number
8700 of SSE parameter registers used to call this function, though all we
8701 actually check here is the zero/non-zero status. */
8703 label = gen_label_rtx ();
8704 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8705 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8706 label));
8708 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8709 we used movdqa (i.e. TImode) instead? Perhaps even better would
8710 be if we could determine the real mode of the data, via a hook
8711 into pass_stdarg. Ignore all that for now. */
8712 smode = V4SFmode;
8713 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8714 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8716 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8717 if (max > X86_64_SSE_REGPARM_MAX)
8718 max = X86_64_SSE_REGPARM_MAX;
8720 for (i = cum->sse_regno; i < max; ++i)
8722 mem = plus_constant (Pmode, save_area,
8723 i * 16 + ix86_varargs_gpr_size);
8724 mem = gen_rtx_MEM (smode, mem);
8725 MEM_NOTRAP_P (mem) = 1;
8726 set_mem_alias_set (mem, set);
8727 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8729 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8732 emit_label (label);
8736 static void
8737 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8739 alias_set_type set = get_varargs_alias_set ();
8740 int i;
8742 /* Reset to zero, as there might be a sysv vaarg used
8743 before. */
8744 ix86_varargs_gpr_size = 0;
8745 ix86_varargs_fpr_size = 0;
8747 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8749 rtx reg, mem;
8751 mem = gen_rtx_MEM (Pmode,
8752 plus_constant (Pmode, virtual_incoming_args_rtx,
8753 i * UNITS_PER_WORD));
8754 MEM_NOTRAP_P (mem) = 1;
8755 set_mem_alias_set (mem, set);
8757 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8758 emit_move_insn (mem, reg);
8762 static void
8763 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
8764 tree type, int *, int no_rtl)
8766 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8767 CUMULATIVE_ARGS next_cum;
8768 tree fntype;
8770 /* This argument doesn't appear to be used anymore. Which is good,
8771 because the old code here didn't suppress rtl generation. */
8772 gcc_assert (!no_rtl);
8774 if (!TARGET_64BIT)
8775 return;
8777 fntype = TREE_TYPE (current_function_decl);
8779 /* For varargs, we do not want to skip the dummy va_dcl argument.
8780 For stdargs, we do want to skip the last named argument. */
8781 next_cum = *cum;
8782 if (stdarg_p (fntype))
8783 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8784 true);
8786 if (cum->call_abi == MS_ABI)
8787 setup_incoming_varargs_ms_64 (&next_cum);
8788 else
8789 setup_incoming_varargs_64 (&next_cum);
8792 static void
8793 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
8794 enum machine_mode mode,
8795 tree type,
8796 int *pretend_size ATTRIBUTE_UNUSED,
8797 int no_rtl)
8799 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8800 CUMULATIVE_ARGS next_cum;
8801 tree fntype;
8802 rtx save_area;
8803 int bnd_reg, i, max;
8805 gcc_assert (!no_rtl);
8807 /* Do nothing if we use plain pointer to argument area. */
8808 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
8809 return;
8811 fntype = TREE_TYPE (current_function_decl);
8813 /* For varargs, we do not want to skip the dummy va_dcl argument.
8814 For stdargs, we do want to skip the last named argument. */
8815 next_cum = *cum;
8816 if (stdarg_p (fntype))
8817 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8818 true);
8819 save_area = frame_pointer_rtx;
8821 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8822 if (max > X86_64_REGPARM_MAX)
8823 max = X86_64_REGPARM_MAX;
8825 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
8826 if (chkp_function_instrumented_p (current_function_decl))
8827 for (i = cum->regno; i < max; i++)
8829 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
8830 rtx reg = gen_rtx_REG (DImode,
8831 x86_64_int_parameter_registers[i]);
8832 rtx ptr = reg;
8833 rtx bounds;
8835 if (bnd_reg <= LAST_BND_REG)
8836 bounds = gen_rtx_REG (BNDmode, bnd_reg);
8837 else
8839 rtx ldx_addr =
8840 plus_constant (Pmode, arg_pointer_rtx,
8841 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
8842 bounds = gen_reg_rtx (BNDmode);
8843 emit_insn (BNDmode == BND64mode
8844 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
8845 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
8848 emit_insn (BNDmode == BND64mode
8849 ? gen_bnd64_stx (addr, ptr, bounds)
8850 : gen_bnd32_stx (addr, ptr, bounds));
8852 bnd_reg++;
8857 /* Checks if TYPE is of kind va_list char *. */
8859 static bool
8860 is_va_list_char_pointer (tree type)
8862 tree canonic;
8864 /* For 32-bit it is always true. */
8865 if (!TARGET_64BIT)
8866 return true;
8867 canonic = ix86_canonical_va_list_type (type);
8868 return (canonic == ms_va_list_type_node
8869 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8872 /* Implement va_start. */
8874 static void
8875 ix86_va_start (tree valist, rtx nextarg)
8877 HOST_WIDE_INT words, n_gpr, n_fpr;
8878 tree f_gpr, f_fpr, f_ovf, f_sav;
8879 tree gpr, fpr, ovf, sav, t;
8880 tree type;
8881 rtx ovf_rtx;
8883 if (flag_split_stack
8884 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8886 unsigned int scratch_regno;
8888 /* When we are splitting the stack, we can't refer to the stack
8889 arguments using internal_arg_pointer, because they may be on
8890 the old stack. The split stack prologue will arrange to
8891 leave a pointer to the old stack arguments in a scratch
8892 register, which we here copy to a pseudo-register. The split
8893 stack prologue can't set the pseudo-register directly because
8894 it (the prologue) runs before any registers have been saved. */
8896 scratch_regno = split_stack_prologue_scratch_regno ();
8897 if (scratch_regno != INVALID_REGNUM)
8899 rtx reg;
8900 rtx_insn *seq;
8902 reg = gen_reg_rtx (Pmode);
8903 cfun->machine->split_stack_varargs_pointer = reg;
8905 start_sequence ();
8906 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8907 seq = get_insns ();
8908 end_sequence ();
8910 push_topmost_sequence ();
8911 emit_insn_after (seq, entry_of_function ());
8912 pop_topmost_sequence ();
8916 /* Only 64bit target needs something special. */
8917 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8919 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8920 std_expand_builtin_va_start (valist, nextarg);
8921 else
8923 rtx va_r, next;
8925 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8926 next = expand_binop (ptr_mode, add_optab,
8927 cfun->machine->split_stack_varargs_pointer,
8928 crtl->args.arg_offset_rtx,
8929 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8930 convert_move (va_r, next, 0);
8932 /* Store zero bounds for va_list. */
8933 if (chkp_function_instrumented_p (current_function_decl))
8934 chkp_expand_bounds_reset_for_mem (valist,
8935 make_tree (TREE_TYPE (valist),
8936 next));
8939 return;
8942 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8943 f_fpr = DECL_CHAIN (f_gpr);
8944 f_ovf = DECL_CHAIN (f_fpr);
8945 f_sav = DECL_CHAIN (f_ovf);
8947 valist = build_simple_mem_ref (valist);
8948 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8949 /* The following should be folded into the MEM_REF offset. */
8950 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8951 f_gpr, NULL_TREE);
8952 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8953 f_fpr, NULL_TREE);
8954 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8955 f_ovf, NULL_TREE);
8956 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8957 f_sav, NULL_TREE);
8959 /* Count number of gp and fp argument registers used. */
8960 words = crtl->args.info.words;
8961 n_gpr = crtl->args.info.regno;
8962 n_fpr = crtl->args.info.sse_regno;
8964 if (cfun->va_list_gpr_size)
8966 type = TREE_TYPE (gpr);
8967 t = build2 (MODIFY_EXPR, type,
8968 gpr, build_int_cst (type, n_gpr * 8));
8969 TREE_SIDE_EFFECTS (t) = 1;
8970 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8973 if (TARGET_SSE && cfun->va_list_fpr_size)
8975 type = TREE_TYPE (fpr);
8976 t = build2 (MODIFY_EXPR, type, fpr,
8977 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8978 TREE_SIDE_EFFECTS (t) = 1;
8979 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8982 /* Find the overflow area. */
8983 type = TREE_TYPE (ovf);
8984 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8985 ovf_rtx = crtl->args.internal_arg_pointer;
8986 else
8987 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8988 t = make_tree (type, ovf_rtx);
8989 if (words != 0)
8990 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8992 /* Store zero bounds for overflow area pointer. */
8993 if (chkp_function_instrumented_p (current_function_decl))
8994 chkp_expand_bounds_reset_for_mem (ovf, t);
8996 t = build2 (MODIFY_EXPR, type, ovf, t);
8997 TREE_SIDE_EFFECTS (t) = 1;
8998 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9000 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
9002 /* Find the register save area.
9003 Prologue of the function save it right above stack frame. */
9004 type = TREE_TYPE (sav);
9005 t = make_tree (type, frame_pointer_rtx);
9006 if (!ix86_varargs_gpr_size)
9007 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
9009 /* Store zero bounds for save area pointer. */
9010 if (chkp_function_instrumented_p (current_function_decl))
9011 chkp_expand_bounds_reset_for_mem (sav, t);
9013 t = build2 (MODIFY_EXPR, type, sav, t);
9014 TREE_SIDE_EFFECTS (t) = 1;
9015 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9019 /* Implement va_arg. */
9021 static tree
9022 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
9023 gimple_seq *post_p)
9025 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
9026 tree f_gpr, f_fpr, f_ovf, f_sav;
9027 tree gpr, fpr, ovf, sav, t;
9028 int size, rsize;
9029 tree lab_false, lab_over = NULL_TREE;
9030 tree addr, t2;
9031 rtx container;
9032 int indirect_p = 0;
9033 tree ptrtype;
9034 machine_mode nat_mode;
9035 unsigned int arg_boundary;
9037 /* Only 64bit target needs something special. */
9038 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
9039 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
9041 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9042 f_fpr = DECL_CHAIN (f_gpr);
9043 f_ovf = DECL_CHAIN (f_fpr);
9044 f_sav = DECL_CHAIN (f_ovf);
9046 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
9047 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
9048 valist = build_va_arg_indirect_ref (valist);
9049 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
9050 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
9051 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
9053 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
9054 if (indirect_p)
9055 type = build_pointer_type (type);
9056 size = int_size_in_bytes (type);
9057 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
9059 nat_mode = type_natural_mode (type, NULL, false);
9060 switch (nat_mode)
9062 case V8SFmode:
9063 case V8SImode:
9064 case V32QImode:
9065 case V16HImode:
9066 case V4DFmode:
9067 case V4DImode:
9068 case V16SFmode:
9069 case V16SImode:
9070 case V64QImode:
9071 case V32HImode:
9072 case V8DFmode:
9073 case V8DImode:
9074 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
9075 if (!TARGET_64BIT_MS_ABI)
9077 container = NULL;
9078 break;
9081 default:
9082 container = construct_container (nat_mode, TYPE_MODE (type),
9083 type, 0, X86_64_REGPARM_MAX,
9084 X86_64_SSE_REGPARM_MAX, intreg,
9086 break;
9089 /* Pull the value out of the saved registers. */
9091 addr = create_tmp_var (ptr_type_node, "addr");
9093 if (container)
9095 int needed_intregs, needed_sseregs;
9096 bool need_temp;
9097 tree int_addr, sse_addr;
9099 lab_false = create_artificial_label (UNKNOWN_LOCATION);
9100 lab_over = create_artificial_label (UNKNOWN_LOCATION);
9102 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
9104 need_temp = (!REG_P (container)
9105 && ((needed_intregs && TYPE_ALIGN (type) > 64)
9106 || TYPE_ALIGN (type) > 128));
9108 /* In case we are passing structure, verify that it is consecutive block
9109 on the register save area. If not we need to do moves. */
9110 if (!need_temp && !REG_P (container))
9112 /* Verify that all registers are strictly consecutive */
9113 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
9115 int i;
9117 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9119 rtx slot = XVECEXP (container, 0, i);
9120 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
9121 || INTVAL (XEXP (slot, 1)) != i * 16)
9122 need_temp = true;
9125 else
9127 int i;
9129 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9131 rtx slot = XVECEXP (container, 0, i);
9132 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
9133 || INTVAL (XEXP (slot, 1)) != i * 8)
9134 need_temp = true;
9138 if (!need_temp)
9140 int_addr = addr;
9141 sse_addr = addr;
9143 else
9145 int_addr = create_tmp_var (ptr_type_node, "int_addr");
9146 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
9149 /* First ensure that we fit completely in registers. */
9150 if (needed_intregs)
9152 t = build_int_cst (TREE_TYPE (gpr),
9153 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
9154 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
9155 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9156 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9157 gimplify_and_add (t, pre_p);
9159 if (needed_sseregs)
9161 t = build_int_cst (TREE_TYPE (fpr),
9162 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
9163 + X86_64_REGPARM_MAX * 8);
9164 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
9165 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9166 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9167 gimplify_and_add (t, pre_p);
9170 /* Compute index to start of area used for integer regs. */
9171 if (needed_intregs)
9173 /* int_addr = gpr + sav; */
9174 t = fold_build_pointer_plus (sav, gpr);
9175 gimplify_assign (int_addr, t, pre_p);
9177 if (needed_sseregs)
9179 /* sse_addr = fpr + sav; */
9180 t = fold_build_pointer_plus (sav, fpr);
9181 gimplify_assign (sse_addr, t, pre_p);
9183 if (need_temp)
9185 int i, prev_size = 0;
9186 tree temp = create_tmp_var (type, "va_arg_tmp");
9188 /* addr = &temp; */
9189 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
9190 gimplify_assign (addr, t, pre_p);
9192 for (i = 0; i < XVECLEN (container, 0); i++)
9194 rtx slot = XVECEXP (container, 0, i);
9195 rtx reg = XEXP (slot, 0);
9196 machine_mode mode = GET_MODE (reg);
9197 tree piece_type;
9198 tree addr_type;
9199 tree daddr_type;
9200 tree src_addr, src;
9201 int src_offset;
9202 tree dest_addr, dest;
9203 int cur_size = GET_MODE_SIZE (mode);
9205 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
9206 prev_size = INTVAL (XEXP (slot, 1));
9207 if (prev_size + cur_size > size)
9209 cur_size = size - prev_size;
9210 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
9211 if (mode == BLKmode)
9212 mode = QImode;
9214 piece_type = lang_hooks.types.type_for_mode (mode, 1);
9215 if (mode == GET_MODE (reg))
9216 addr_type = build_pointer_type (piece_type);
9217 else
9218 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9219 true);
9220 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9221 true);
9223 if (SSE_REGNO_P (REGNO (reg)))
9225 src_addr = sse_addr;
9226 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
9228 else
9230 src_addr = int_addr;
9231 src_offset = REGNO (reg) * 8;
9233 src_addr = fold_convert (addr_type, src_addr);
9234 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
9236 dest_addr = fold_convert (daddr_type, addr);
9237 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
9238 if (cur_size == GET_MODE_SIZE (mode))
9240 src = build_va_arg_indirect_ref (src_addr);
9241 dest = build_va_arg_indirect_ref (dest_addr);
9243 gimplify_assign (dest, src, pre_p);
9245 else
9247 tree copy
9248 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
9249 3, dest_addr, src_addr,
9250 size_int (cur_size));
9251 gimplify_and_add (copy, pre_p);
9253 prev_size += cur_size;
9257 if (needed_intregs)
9259 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
9260 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
9261 gimplify_assign (gpr, t, pre_p);
9264 if (needed_sseregs)
9266 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
9267 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
9268 gimplify_assign (fpr, t, pre_p);
9271 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
9273 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
9276 /* ... otherwise out of the overflow area. */
9278 /* When we align parameter on stack for caller, if the parameter
9279 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
9280 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
9281 here with caller. */
9282 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
9283 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
9284 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
9286 /* Care for on-stack alignment if needed. */
9287 if (arg_boundary <= 64 || size == 0)
9288 t = ovf;
9289 else
9291 HOST_WIDE_INT align = arg_boundary / 8;
9292 t = fold_build_pointer_plus_hwi (ovf, align - 1);
9293 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9294 build_int_cst (TREE_TYPE (t), -align));
9297 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
9298 gimplify_assign (addr, t, pre_p);
9300 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
9301 gimplify_assign (unshare_expr (ovf), t, pre_p);
9303 if (container)
9304 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
9306 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
9307 addr = fold_convert (ptrtype, addr);
9309 if (indirect_p)
9310 addr = build_va_arg_indirect_ref (addr);
9311 return build_va_arg_indirect_ref (addr);
9314 /* Return true if OPNUM's MEM should be matched
9315 in movabs* patterns. */
9317 bool
9318 ix86_check_movabs (rtx insn, int opnum)
9320 rtx set, mem;
9322 set = PATTERN (insn);
9323 if (GET_CODE (set) == PARALLEL)
9324 set = XVECEXP (set, 0, 0);
9325 gcc_assert (GET_CODE (set) == SET);
9326 mem = XEXP (set, opnum);
9327 while (GET_CODE (mem) == SUBREG)
9328 mem = SUBREG_REG (mem);
9329 gcc_assert (MEM_P (mem));
9330 return volatile_ok || !MEM_VOLATILE_P (mem);
9333 /* Initialize the table of extra 80387 mathematical constants. */
9335 static void
9336 init_ext_80387_constants (void)
9338 static const char * cst[5] =
9340 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
9341 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
9342 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
9343 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
9344 "3.1415926535897932385128089594061862044", /* 4: fldpi */
9346 int i;
9348 for (i = 0; i < 5; i++)
9350 real_from_string (&ext_80387_constants_table[i], cst[i]);
9351 /* Ensure each constant is rounded to XFmode precision. */
9352 real_convert (&ext_80387_constants_table[i],
9353 XFmode, &ext_80387_constants_table[i]);
9356 ext_80387_constants_init = 1;
9359 /* Return non-zero if the constant is something that
9360 can be loaded with a special instruction. */
9363 standard_80387_constant_p (rtx x)
9365 machine_mode mode = GET_MODE (x);
9367 REAL_VALUE_TYPE r;
9369 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
9370 return -1;
9372 if (x == CONST0_RTX (mode))
9373 return 1;
9374 if (x == CONST1_RTX (mode))
9375 return 2;
9377 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9379 /* For XFmode constants, try to find a special 80387 instruction when
9380 optimizing for size or on those CPUs that benefit from them. */
9381 if (mode == XFmode
9382 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
9384 int i;
9386 if (! ext_80387_constants_init)
9387 init_ext_80387_constants ();
9389 for (i = 0; i < 5; i++)
9390 if (real_identical (&r, &ext_80387_constants_table[i]))
9391 return i + 3;
9394 /* Load of the constant -0.0 or -1.0 will be split as
9395 fldz;fchs or fld1;fchs sequence. */
9396 if (real_isnegzero (&r))
9397 return 8;
9398 if (real_identical (&r, &dconstm1))
9399 return 9;
9401 return 0;
9404 /* Return the opcode of the special instruction to be used to load
9405 the constant X. */
9407 const char *
9408 standard_80387_constant_opcode (rtx x)
9410 switch (standard_80387_constant_p (x))
9412 case 1:
9413 return "fldz";
9414 case 2:
9415 return "fld1";
9416 case 3:
9417 return "fldlg2";
9418 case 4:
9419 return "fldln2";
9420 case 5:
9421 return "fldl2e";
9422 case 6:
9423 return "fldl2t";
9424 case 7:
9425 return "fldpi";
9426 case 8:
9427 case 9:
9428 return "#";
9429 default:
9430 gcc_unreachable ();
9434 /* Return the CONST_DOUBLE representing the 80387 constant that is
9435 loaded by the specified special instruction. The argument IDX
9436 matches the return value from standard_80387_constant_p. */
9439 standard_80387_constant_rtx (int idx)
9441 int i;
9443 if (! ext_80387_constants_init)
9444 init_ext_80387_constants ();
9446 switch (idx)
9448 case 3:
9449 case 4:
9450 case 5:
9451 case 6:
9452 case 7:
9453 i = idx - 3;
9454 break;
9456 default:
9457 gcc_unreachable ();
9460 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
9461 XFmode);
9464 /* Return 1 if X is all 0s and 2 if x is all 1s
9465 in supported SSE/AVX vector mode. */
9468 standard_sse_constant_p (rtx x)
9470 machine_mode mode = GET_MODE (x);
9472 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
9473 return 1;
9474 if (vector_all_ones_operand (x, mode))
9475 switch (mode)
9477 case V16QImode:
9478 case V8HImode:
9479 case V4SImode:
9480 case V2DImode:
9481 if (TARGET_SSE2)
9482 return 2;
9483 case V32QImode:
9484 case V16HImode:
9485 case V8SImode:
9486 case V4DImode:
9487 if (TARGET_AVX2)
9488 return 2;
9489 case V64QImode:
9490 case V32HImode:
9491 case V16SImode:
9492 case V8DImode:
9493 if (TARGET_AVX512F)
9494 return 2;
9495 default:
9496 break;
9499 return 0;
9502 /* Return the opcode of the special instruction to be used to load
9503 the constant X. */
9505 const char *
9506 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
9508 switch (standard_sse_constant_p (x))
9510 case 1:
9511 switch (get_attr_mode (insn))
9513 case MODE_XI:
9514 return "vpxord\t%g0, %g0, %g0";
9515 case MODE_V16SF:
9516 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
9517 : "vpxord\t%g0, %g0, %g0";
9518 case MODE_V8DF:
9519 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
9520 : "vpxorq\t%g0, %g0, %g0";
9521 case MODE_TI:
9522 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
9523 : "%vpxor\t%0, %d0";
9524 case MODE_V2DF:
9525 return "%vxorpd\t%0, %d0";
9526 case MODE_V4SF:
9527 return "%vxorps\t%0, %d0";
9529 case MODE_OI:
9530 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
9531 : "vpxor\t%x0, %x0, %x0";
9532 case MODE_V4DF:
9533 return "vxorpd\t%x0, %x0, %x0";
9534 case MODE_V8SF:
9535 return "vxorps\t%x0, %x0, %x0";
9537 default:
9538 break;
9541 case 2:
9542 if (TARGET_AVX512VL
9543 || get_attr_mode (insn) == MODE_XI
9544 || get_attr_mode (insn) == MODE_V8DF
9545 || get_attr_mode (insn) == MODE_V16SF)
9546 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9547 if (TARGET_AVX)
9548 return "vpcmpeqd\t%0, %0, %0";
9549 else
9550 return "pcmpeqd\t%0, %0";
9552 default:
9553 break;
9555 gcc_unreachable ();
9558 /* Returns true if OP contains a symbol reference */
9560 bool
9561 symbolic_reference_mentioned_p (rtx op)
9563 const char *fmt;
9564 int i;
9566 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9567 return true;
9569 fmt = GET_RTX_FORMAT (GET_CODE (op));
9570 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9572 if (fmt[i] == 'E')
9574 int j;
9576 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9577 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9578 return true;
9581 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9582 return true;
9585 return false;
9588 /* Return true if it is appropriate to emit `ret' instructions in the
9589 body of a function. Do this only if the epilogue is simple, needing a
9590 couple of insns. Prior to reloading, we can't tell how many registers
9591 must be saved, so return false then. Return false if there is no frame
9592 marker to de-allocate. */
9594 bool
9595 ix86_can_use_return_insn_p (void)
9597 struct ix86_frame frame;
9599 if (! reload_completed || frame_pointer_needed)
9600 return 0;
9602 /* Don't allow more than 32k pop, since that's all we can do
9603 with one instruction. */
9604 if (crtl->args.pops_args && crtl->args.size >= 32768)
9605 return 0;
9607 ix86_compute_frame_layout (&frame);
9608 return (frame.stack_pointer_offset == UNITS_PER_WORD
9609 && (frame.nregs + frame.nsseregs) == 0);
9612 /* Value should be nonzero if functions must have frame pointers.
9613 Zero means the frame pointer need not be set up (and parms may
9614 be accessed via the stack pointer) in functions that seem suitable. */
9616 static bool
9617 ix86_frame_pointer_required (void)
9619 /* If we accessed previous frames, then the generated code expects
9620 to be able to access the saved ebp value in our frame. */
9621 if (cfun->machine->accesses_prev_frame)
9622 return true;
9624 /* Several x86 os'es need a frame pointer for other reasons,
9625 usually pertaining to setjmp. */
9626 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9627 return true;
9629 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9630 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9631 return true;
9633 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9634 allocation is 4GB. */
9635 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9636 return true;
9638 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9639 turns off the frame pointer by default. Turn it back on now if
9640 we've not got a leaf function. */
9641 if (TARGET_OMIT_LEAF_FRAME_POINTER
9642 && (!crtl->is_leaf
9643 || ix86_current_function_calls_tls_descriptor))
9644 return true;
9646 if (crtl->profile && !flag_fentry)
9647 return true;
9649 return false;
9652 /* Record that the current function accesses previous call frames. */
9654 void
9655 ix86_setup_frame_addresses (void)
9657 cfun->machine->accesses_prev_frame = 1;
9660 #ifndef USE_HIDDEN_LINKONCE
9661 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9662 # define USE_HIDDEN_LINKONCE 1
9663 # else
9664 # define USE_HIDDEN_LINKONCE 0
9665 # endif
9666 #endif
9668 static int pic_labels_used;
9670 /* Fills in the label name that should be used for a pc thunk for
9671 the given register. */
9673 static void
9674 get_pc_thunk_name (char name[32], unsigned int regno)
9676 gcc_assert (!TARGET_64BIT);
9678 if (USE_HIDDEN_LINKONCE)
9679 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9680 else
9681 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9685 /* This function generates code for -fpic that loads %ebx with
9686 the return address of the caller and then returns. */
9688 static void
9689 ix86_code_end (void)
9691 rtx xops[2];
9692 int regno;
9694 for (regno = AX_REG; regno <= SP_REG; regno++)
9696 char name[32];
9697 tree decl;
9699 if (!(pic_labels_used & (1 << regno)))
9700 continue;
9702 get_pc_thunk_name (name, regno);
9704 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9705 get_identifier (name),
9706 build_function_type_list (void_type_node, NULL_TREE));
9707 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9708 NULL_TREE, void_type_node);
9709 TREE_PUBLIC (decl) = 1;
9710 TREE_STATIC (decl) = 1;
9711 DECL_IGNORED_P (decl) = 1;
9713 #if TARGET_MACHO
9714 if (TARGET_MACHO)
9716 switch_to_section (darwin_sections[text_coal_section]);
9717 fputs ("\t.weak_definition\t", asm_out_file);
9718 assemble_name (asm_out_file, name);
9719 fputs ("\n\t.private_extern\t", asm_out_file);
9720 assemble_name (asm_out_file, name);
9721 putc ('\n', asm_out_file);
9722 ASM_OUTPUT_LABEL (asm_out_file, name);
9723 DECL_WEAK (decl) = 1;
9725 else
9726 #endif
9727 if (USE_HIDDEN_LINKONCE)
9729 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9731 targetm.asm_out.unique_section (decl, 0);
9732 switch_to_section (get_named_section (decl, NULL, 0));
9734 targetm.asm_out.globalize_label (asm_out_file, name);
9735 fputs ("\t.hidden\t", asm_out_file);
9736 assemble_name (asm_out_file, name);
9737 putc ('\n', asm_out_file);
9738 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9740 else
9742 switch_to_section (text_section);
9743 ASM_OUTPUT_LABEL (asm_out_file, name);
9746 DECL_INITIAL (decl) = make_node (BLOCK);
9747 current_function_decl = decl;
9748 init_function_start (decl);
9749 first_function_block_is_cold = false;
9750 /* Make sure unwind info is emitted for the thunk if needed. */
9751 final_start_function (emit_barrier (), asm_out_file, 1);
9753 /* Pad stack IP move with 4 instructions (two NOPs count
9754 as one instruction). */
9755 if (TARGET_PAD_SHORT_FUNCTION)
9757 int i = 8;
9759 while (i--)
9760 fputs ("\tnop\n", asm_out_file);
9763 xops[0] = gen_rtx_REG (Pmode, regno);
9764 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9765 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9766 output_asm_insn ("%!ret", NULL);
9767 final_end_function ();
9768 init_insn_lengths ();
9769 free_after_compilation (cfun);
9770 set_cfun (NULL);
9771 current_function_decl = NULL;
9774 if (flag_split_stack)
9775 file_end_indicate_split_stack ();
9778 /* Emit code for the SET_GOT patterns. */
9780 const char *
9781 output_set_got (rtx dest, rtx label)
9783 rtx xops[3];
9785 xops[0] = dest;
9787 if (TARGET_VXWORKS_RTP && flag_pic)
9789 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9790 xops[2] = gen_rtx_MEM (Pmode,
9791 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9792 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9794 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9795 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9796 an unadorned address. */
9797 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9798 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9799 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9800 return "";
9803 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9805 if (!flag_pic)
9807 if (TARGET_MACHO)
9808 /* We don't need a pic base, we're not producing pic. */
9809 gcc_unreachable ();
9811 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9812 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9813 targetm.asm_out.internal_label (asm_out_file, "L",
9814 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9816 else
9818 char name[32];
9819 get_pc_thunk_name (name, REGNO (dest));
9820 pic_labels_used |= 1 << REGNO (dest);
9822 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9823 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9824 output_asm_insn ("%!call\t%X2", xops);
9826 #if TARGET_MACHO
9827 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9828 This is what will be referenced by the Mach-O PIC subsystem. */
9829 if (machopic_should_output_picbase_label () || !label)
9830 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9832 /* When we are restoring the pic base at the site of a nonlocal label,
9833 and we decided to emit the pic base above, we will still output a
9834 local label used for calculating the correction offset (even though
9835 the offset will be 0 in that case). */
9836 if (label)
9837 targetm.asm_out.internal_label (asm_out_file, "L",
9838 CODE_LABEL_NUMBER (label));
9839 #endif
9842 if (!TARGET_MACHO)
9843 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9845 return "";
9848 /* Generate an "push" pattern for input ARG. */
9850 static rtx
9851 gen_push (rtx arg)
9853 struct machine_function *m = cfun->machine;
9855 if (m->fs.cfa_reg == stack_pointer_rtx)
9856 m->fs.cfa_offset += UNITS_PER_WORD;
9857 m->fs.sp_offset += UNITS_PER_WORD;
9859 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9860 arg = gen_rtx_REG (word_mode, REGNO (arg));
9862 return gen_rtx_SET (VOIDmode,
9863 gen_rtx_MEM (word_mode,
9864 gen_rtx_PRE_DEC (Pmode,
9865 stack_pointer_rtx)),
9866 arg);
9869 /* Generate an "pop" pattern for input ARG. */
9871 static rtx
9872 gen_pop (rtx arg)
9874 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9875 arg = gen_rtx_REG (word_mode, REGNO (arg));
9877 return gen_rtx_SET (VOIDmode,
9878 arg,
9879 gen_rtx_MEM (word_mode,
9880 gen_rtx_POST_INC (Pmode,
9881 stack_pointer_rtx)));
9884 /* Return >= 0 if there is an unused call-clobbered register available
9885 for the entire function. */
9887 static unsigned int
9888 ix86_select_alt_pic_regnum (void)
9890 if (ix86_use_pseudo_pic_reg ())
9891 return INVALID_REGNUM;
9893 if (crtl->is_leaf
9894 && !crtl->profile
9895 && !ix86_current_function_calls_tls_descriptor)
9897 int i, drap;
9898 /* Can't use the same register for both PIC and DRAP. */
9899 if (crtl->drap_reg)
9900 drap = REGNO (crtl->drap_reg);
9901 else
9902 drap = -1;
9903 for (i = 2; i >= 0; --i)
9904 if (i != drap && !df_regs_ever_live_p (i))
9905 return i;
9908 return INVALID_REGNUM;
9911 /* Return TRUE if we need to save REGNO. */
9913 static bool
9914 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9916 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
9917 && pic_offset_table_rtx)
9919 if (ix86_use_pseudo_pic_reg ())
9921 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
9922 _mcount in prologue. */
9923 if (!TARGET_64BIT && flag_pic && crtl->profile)
9924 return true;
9926 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9927 || crtl->profile
9928 || crtl->calls_eh_return
9929 || crtl->uses_const_pool
9930 || cfun->has_nonlocal_label)
9931 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9934 if (crtl->calls_eh_return && maybe_eh_return)
9936 unsigned i;
9937 for (i = 0; ; i++)
9939 unsigned test = EH_RETURN_DATA_REGNO (i);
9940 if (test == INVALID_REGNUM)
9941 break;
9942 if (test == regno)
9943 return true;
9947 if (crtl->drap_reg
9948 && regno == REGNO (crtl->drap_reg)
9949 && !cfun->machine->no_drap_save_restore)
9950 return true;
9952 return (df_regs_ever_live_p (regno)
9953 && !call_used_regs[regno]
9954 && !fixed_regs[regno]
9955 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9958 /* Return number of saved general prupose registers. */
9960 static int
9961 ix86_nsaved_regs (void)
9963 int nregs = 0;
9964 int regno;
9966 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9967 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9968 nregs ++;
9969 return nregs;
9972 /* Return number of saved SSE registrers. */
9974 static int
9975 ix86_nsaved_sseregs (void)
9977 int nregs = 0;
9978 int regno;
9980 if (!TARGET_64BIT_MS_ABI)
9981 return 0;
9982 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9983 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9984 nregs ++;
9985 return nregs;
9988 /* Given FROM and TO register numbers, say whether this elimination is
9989 allowed. If stack alignment is needed, we can only replace argument
9990 pointer with hard frame pointer, or replace frame pointer with stack
9991 pointer. Otherwise, frame pointer elimination is automatically
9992 handled and all other eliminations are valid. */
9994 static bool
9995 ix86_can_eliminate (const int from, const int to)
9997 if (stack_realign_fp)
9998 return ((from == ARG_POINTER_REGNUM
9999 && to == HARD_FRAME_POINTER_REGNUM)
10000 || (from == FRAME_POINTER_REGNUM
10001 && to == STACK_POINTER_REGNUM));
10002 else
10003 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
10006 /* Return the offset between two registers, one to be eliminated, and the other
10007 its replacement, at the start of a routine. */
10009 HOST_WIDE_INT
10010 ix86_initial_elimination_offset (int from, int to)
10012 struct ix86_frame frame;
10013 ix86_compute_frame_layout (&frame);
10015 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10016 return frame.hard_frame_pointer_offset;
10017 else if (from == FRAME_POINTER_REGNUM
10018 && to == HARD_FRAME_POINTER_REGNUM)
10019 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
10020 else
10022 gcc_assert (to == STACK_POINTER_REGNUM);
10024 if (from == ARG_POINTER_REGNUM)
10025 return frame.stack_pointer_offset;
10027 gcc_assert (from == FRAME_POINTER_REGNUM);
10028 return frame.stack_pointer_offset - frame.frame_pointer_offset;
10032 /* In a dynamically-aligned function, we can't know the offset from
10033 stack pointer to frame pointer, so we must ensure that setjmp
10034 eliminates fp against the hard fp (%ebp) rather than trying to
10035 index from %esp up to the top of the frame across a gap that is
10036 of unknown (at compile-time) size. */
10037 static rtx
10038 ix86_builtin_setjmp_frame_value (void)
10040 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
10043 /* When using -fsplit-stack, the allocation routines set a field in
10044 the TCB to the bottom of the stack plus this much space, measured
10045 in bytes. */
10047 #define SPLIT_STACK_AVAILABLE 256
10049 /* Fill structure ix86_frame about frame of currently computed function. */
10051 static void
10052 ix86_compute_frame_layout (struct ix86_frame *frame)
10054 unsigned HOST_WIDE_INT stack_alignment_needed;
10055 HOST_WIDE_INT offset;
10056 unsigned HOST_WIDE_INT preferred_alignment;
10057 HOST_WIDE_INT size = get_frame_size ();
10058 HOST_WIDE_INT to_allocate;
10060 frame->nregs = ix86_nsaved_regs ();
10061 frame->nsseregs = ix86_nsaved_sseregs ();
10063 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
10064 function prologues and leaf. */
10065 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
10066 && (!crtl->is_leaf || cfun->calls_alloca != 0
10067 || ix86_current_function_calls_tls_descriptor))
10069 crtl->preferred_stack_boundary = 128;
10070 crtl->stack_alignment_needed = 128;
10072 /* preferred_stack_boundary is never updated for call
10073 expanded from tls descriptor. Update it here. We don't update it in
10074 expand stage because according to the comments before
10075 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
10076 away. */
10077 else if (ix86_current_function_calls_tls_descriptor
10078 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
10080 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
10081 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
10082 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
10085 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
10086 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
10088 gcc_assert (!size || stack_alignment_needed);
10089 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
10090 gcc_assert (preferred_alignment <= stack_alignment_needed);
10092 /* For SEH we have to limit the amount of code movement into the prologue.
10093 At present we do this via a BLOCKAGE, at which point there's very little
10094 scheduling that can be done, which means that there's very little point
10095 in doing anything except PUSHs. */
10096 if (TARGET_SEH)
10097 cfun->machine->use_fast_prologue_epilogue = false;
10099 /* During reload iteration the amount of registers saved can change.
10100 Recompute the value as needed. Do not recompute when amount of registers
10101 didn't change as reload does multiple calls to the function and does not
10102 expect the decision to change within single iteration. */
10103 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
10104 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
10106 int count = frame->nregs;
10107 struct cgraph_node *node = cgraph_node::get (current_function_decl);
10109 cfun->machine->use_fast_prologue_epilogue_nregs = count;
10111 /* The fast prologue uses move instead of push to save registers. This
10112 is significantly longer, but also executes faster as modern hardware
10113 can execute the moves in parallel, but can't do that for push/pop.
10115 Be careful about choosing what prologue to emit: When function takes
10116 many instructions to execute we may use slow version as well as in
10117 case function is known to be outside hot spot (this is known with
10118 feedback only). Weight the size of function by number of registers
10119 to save as it is cheap to use one or two push instructions but very
10120 slow to use many of them. */
10121 if (count)
10122 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
10123 if (node->frequency < NODE_FREQUENCY_NORMAL
10124 || (flag_branch_probabilities
10125 && node->frequency < NODE_FREQUENCY_HOT))
10126 cfun->machine->use_fast_prologue_epilogue = false;
10127 else
10128 cfun->machine->use_fast_prologue_epilogue
10129 = !expensive_function_p (count);
10132 frame->save_regs_using_mov
10133 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
10134 /* If static stack checking is enabled and done with probes,
10135 the registers need to be saved before allocating the frame. */
10136 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
10138 /* Skip return address. */
10139 offset = UNITS_PER_WORD;
10141 /* Skip pushed static chain. */
10142 if (ix86_static_chain_on_stack)
10143 offset += UNITS_PER_WORD;
10145 /* Skip saved base pointer. */
10146 if (frame_pointer_needed)
10147 offset += UNITS_PER_WORD;
10148 frame->hfp_save_offset = offset;
10150 /* The traditional frame pointer location is at the top of the frame. */
10151 frame->hard_frame_pointer_offset = offset;
10153 /* Register save area */
10154 offset += frame->nregs * UNITS_PER_WORD;
10155 frame->reg_save_offset = offset;
10157 /* On SEH target, registers are pushed just before the frame pointer
10158 location. */
10159 if (TARGET_SEH)
10160 frame->hard_frame_pointer_offset = offset;
10162 /* Align and set SSE register save area. */
10163 if (frame->nsseregs)
10165 /* The only ABI that has saved SSE registers (Win64) also has a
10166 16-byte aligned default stack, and thus we don't need to be
10167 within the re-aligned local stack frame to save them. */
10168 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
10169 offset = (offset + 16 - 1) & -16;
10170 offset += frame->nsseregs * 16;
10172 frame->sse_reg_save_offset = offset;
10174 /* The re-aligned stack starts here. Values before this point are not
10175 directly comparable with values below this point. In order to make
10176 sure that no value happens to be the same before and after, force
10177 the alignment computation below to add a non-zero value. */
10178 if (stack_realign_fp)
10179 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
10181 /* Va-arg area */
10182 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
10183 offset += frame->va_arg_size;
10185 /* Align start of frame for local function. */
10186 if (stack_realign_fp
10187 || offset != frame->sse_reg_save_offset
10188 || size != 0
10189 || !crtl->is_leaf
10190 || cfun->calls_alloca
10191 || ix86_current_function_calls_tls_descriptor)
10192 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
10194 /* Frame pointer points here. */
10195 frame->frame_pointer_offset = offset;
10197 offset += size;
10199 /* Add outgoing arguments area. Can be skipped if we eliminated
10200 all the function calls as dead code.
10201 Skipping is however impossible when function calls alloca. Alloca
10202 expander assumes that last crtl->outgoing_args_size
10203 of stack frame are unused. */
10204 if (ACCUMULATE_OUTGOING_ARGS
10205 && (!crtl->is_leaf || cfun->calls_alloca
10206 || ix86_current_function_calls_tls_descriptor))
10208 offset += crtl->outgoing_args_size;
10209 frame->outgoing_arguments_size = crtl->outgoing_args_size;
10211 else
10212 frame->outgoing_arguments_size = 0;
10214 /* Align stack boundary. Only needed if we're calling another function
10215 or using alloca. */
10216 if (!crtl->is_leaf || cfun->calls_alloca
10217 || ix86_current_function_calls_tls_descriptor)
10218 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
10220 /* We've reached end of stack frame. */
10221 frame->stack_pointer_offset = offset;
10223 /* Size prologue needs to allocate. */
10224 to_allocate = offset - frame->sse_reg_save_offset;
10226 if ((!to_allocate && frame->nregs <= 1)
10227 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
10228 frame->save_regs_using_mov = false;
10230 if (ix86_using_red_zone ()
10231 && crtl->sp_is_unchanging
10232 && crtl->is_leaf
10233 && !ix86_current_function_calls_tls_descriptor)
10235 frame->red_zone_size = to_allocate;
10236 if (frame->save_regs_using_mov)
10237 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
10238 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
10239 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
10241 else
10242 frame->red_zone_size = 0;
10243 frame->stack_pointer_offset -= frame->red_zone_size;
10245 /* The SEH frame pointer location is near the bottom of the frame.
10246 This is enforced by the fact that the difference between the
10247 stack pointer and the frame pointer is limited to 240 bytes in
10248 the unwind data structure. */
10249 if (TARGET_SEH)
10251 HOST_WIDE_INT diff;
10253 /* If we can leave the frame pointer where it is, do so. Also, returns
10254 the establisher frame for __builtin_frame_address (0). */
10255 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
10256 if (diff <= SEH_MAX_FRAME_SIZE
10257 && (diff > 240 || (diff & 15) != 0)
10258 && !crtl->accesses_prior_frames)
10260 /* Ideally we'd determine what portion of the local stack frame
10261 (within the constraint of the lowest 240) is most heavily used.
10262 But without that complication, simply bias the frame pointer
10263 by 128 bytes so as to maximize the amount of the local stack
10264 frame that is addressable with 8-bit offsets. */
10265 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
10270 /* This is semi-inlined memory_address_length, but simplified
10271 since we know that we're always dealing with reg+offset, and
10272 to avoid having to create and discard all that rtl. */
10274 static inline int
10275 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
10277 int len = 4;
10279 if (offset == 0)
10281 /* EBP and R13 cannot be encoded without an offset. */
10282 len = (regno == BP_REG || regno == R13_REG);
10284 else if (IN_RANGE (offset, -128, 127))
10285 len = 1;
10287 /* ESP and R12 must be encoded with a SIB byte. */
10288 if (regno == SP_REG || regno == R12_REG)
10289 len++;
10291 return len;
10294 /* Return an RTX that points to CFA_OFFSET within the stack frame.
10295 The valid base registers are taken from CFUN->MACHINE->FS. */
10297 static rtx
10298 choose_baseaddr (HOST_WIDE_INT cfa_offset)
10300 const struct machine_function *m = cfun->machine;
10301 rtx base_reg = NULL;
10302 HOST_WIDE_INT base_offset = 0;
10304 if (m->use_fast_prologue_epilogue)
10306 /* Choose the base register most likely to allow the most scheduling
10307 opportunities. Generally FP is valid throughout the function,
10308 while DRAP must be reloaded within the epilogue. But choose either
10309 over the SP due to increased encoding size. */
10311 if (m->fs.fp_valid)
10313 base_reg = hard_frame_pointer_rtx;
10314 base_offset = m->fs.fp_offset - cfa_offset;
10316 else if (m->fs.drap_valid)
10318 base_reg = crtl->drap_reg;
10319 base_offset = 0 - cfa_offset;
10321 else if (m->fs.sp_valid)
10323 base_reg = stack_pointer_rtx;
10324 base_offset = m->fs.sp_offset - cfa_offset;
10327 else
10329 HOST_WIDE_INT toffset;
10330 int len = 16, tlen;
10332 /* Choose the base register with the smallest address encoding.
10333 With a tie, choose FP > DRAP > SP. */
10334 if (m->fs.sp_valid)
10336 base_reg = stack_pointer_rtx;
10337 base_offset = m->fs.sp_offset - cfa_offset;
10338 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
10340 if (m->fs.drap_valid)
10342 toffset = 0 - cfa_offset;
10343 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
10344 if (tlen <= len)
10346 base_reg = crtl->drap_reg;
10347 base_offset = toffset;
10348 len = tlen;
10351 if (m->fs.fp_valid)
10353 toffset = m->fs.fp_offset - cfa_offset;
10354 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
10355 if (tlen <= len)
10357 base_reg = hard_frame_pointer_rtx;
10358 base_offset = toffset;
10359 len = tlen;
10363 gcc_assert (base_reg != NULL);
10365 return plus_constant (Pmode, base_reg, base_offset);
10368 /* Emit code to save registers in the prologue. */
10370 static void
10371 ix86_emit_save_regs (void)
10373 unsigned int regno;
10374 rtx insn;
10376 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
10377 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10379 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
10380 RTX_FRAME_RELATED_P (insn) = 1;
10384 /* Emit a single register save at CFA - CFA_OFFSET. */
10386 static void
10387 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
10388 HOST_WIDE_INT cfa_offset)
10390 struct machine_function *m = cfun->machine;
10391 rtx reg = gen_rtx_REG (mode, regno);
10392 rtx mem, addr, base, insn;
10394 addr = choose_baseaddr (cfa_offset);
10395 mem = gen_frame_mem (mode, addr);
10397 /* For SSE saves, we need to indicate the 128-bit alignment. */
10398 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
10400 insn = emit_move_insn (mem, reg);
10401 RTX_FRAME_RELATED_P (insn) = 1;
10403 base = addr;
10404 if (GET_CODE (base) == PLUS)
10405 base = XEXP (base, 0);
10406 gcc_checking_assert (REG_P (base));
10408 /* When saving registers into a re-aligned local stack frame, avoid
10409 any tricky guessing by dwarf2out. */
10410 if (m->fs.realigned)
10412 gcc_checking_assert (stack_realign_drap);
10414 if (regno == REGNO (crtl->drap_reg))
10416 /* A bit of a hack. We force the DRAP register to be saved in
10417 the re-aligned stack frame, which provides us with a copy
10418 of the CFA that will last past the prologue. Install it. */
10419 gcc_checking_assert (cfun->machine->fs.fp_valid);
10420 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10421 cfun->machine->fs.fp_offset - cfa_offset);
10422 mem = gen_rtx_MEM (mode, addr);
10423 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
10425 else
10427 /* The frame pointer is a stable reference within the
10428 aligned frame. Use it. */
10429 gcc_checking_assert (cfun->machine->fs.fp_valid);
10430 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10431 cfun->machine->fs.fp_offset - cfa_offset);
10432 mem = gen_rtx_MEM (mode, addr);
10433 add_reg_note (insn, REG_CFA_EXPRESSION,
10434 gen_rtx_SET (VOIDmode, mem, reg));
10438 /* The memory may not be relative to the current CFA register,
10439 which means that we may need to generate a new pattern for
10440 use by the unwind info. */
10441 else if (base != m->fs.cfa_reg)
10443 addr = plus_constant (Pmode, m->fs.cfa_reg,
10444 m->fs.cfa_offset - cfa_offset);
10445 mem = gen_rtx_MEM (mode, addr);
10446 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
10450 /* Emit code to save registers using MOV insns.
10451 First register is stored at CFA - CFA_OFFSET. */
10452 static void
10453 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
10455 unsigned int regno;
10457 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10458 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10460 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
10461 cfa_offset -= UNITS_PER_WORD;
10465 /* Emit code to save SSE registers using MOV insns.
10466 First register is stored at CFA - CFA_OFFSET. */
10467 static void
10468 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
10470 unsigned int regno;
10472 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10473 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10475 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
10476 cfa_offset -= 16;
10480 static GTY(()) rtx queued_cfa_restores;
10482 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
10483 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
10484 Don't add the note if the previously saved value will be left untouched
10485 within stack red-zone till return, as unwinders can find the same value
10486 in the register and on the stack. */
10488 static void
10489 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
10491 if (!crtl->shrink_wrapped
10492 && cfa_offset <= cfun->machine->fs.red_zone_offset)
10493 return;
10495 if (insn)
10497 add_reg_note (insn, REG_CFA_RESTORE, reg);
10498 RTX_FRAME_RELATED_P (insn) = 1;
10500 else
10501 queued_cfa_restores
10502 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
10505 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
10507 static void
10508 ix86_add_queued_cfa_restore_notes (rtx insn)
10510 rtx last;
10511 if (!queued_cfa_restores)
10512 return;
10513 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
10515 XEXP (last, 1) = REG_NOTES (insn);
10516 REG_NOTES (insn) = queued_cfa_restores;
10517 queued_cfa_restores = NULL_RTX;
10518 RTX_FRAME_RELATED_P (insn) = 1;
10521 /* Expand prologue or epilogue stack adjustment.
10522 The pattern exist to put a dependency on all ebp-based memory accesses.
10523 STYLE should be negative if instructions should be marked as frame related,
10524 zero if %r11 register is live and cannot be freely used and positive
10525 otherwise. */
10527 static void
10528 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10529 int style, bool set_cfa)
10531 struct machine_function *m = cfun->machine;
10532 rtx insn;
10533 bool add_frame_related_expr = false;
10535 if (Pmode == SImode)
10536 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10537 else if (x86_64_immediate_operand (offset, DImode))
10538 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10539 else
10541 rtx tmp;
10542 /* r11 is used by indirect sibcall return as well, set before the
10543 epilogue and used after the epilogue. */
10544 if (style)
10545 tmp = gen_rtx_REG (DImode, R11_REG);
10546 else
10548 gcc_assert (src != hard_frame_pointer_rtx
10549 && dest != hard_frame_pointer_rtx);
10550 tmp = hard_frame_pointer_rtx;
10552 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
10553 if (style < 0)
10554 add_frame_related_expr = true;
10556 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10559 insn = emit_insn (insn);
10560 if (style >= 0)
10561 ix86_add_queued_cfa_restore_notes (insn);
10563 if (set_cfa)
10565 rtx r;
10567 gcc_assert (m->fs.cfa_reg == src);
10568 m->fs.cfa_offset += INTVAL (offset);
10569 m->fs.cfa_reg = dest;
10571 r = gen_rtx_PLUS (Pmode, src, offset);
10572 r = gen_rtx_SET (VOIDmode, dest, r);
10573 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10574 RTX_FRAME_RELATED_P (insn) = 1;
10576 else if (style < 0)
10578 RTX_FRAME_RELATED_P (insn) = 1;
10579 if (add_frame_related_expr)
10581 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10582 r = gen_rtx_SET (VOIDmode, dest, r);
10583 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10587 if (dest == stack_pointer_rtx)
10589 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10590 bool valid = m->fs.sp_valid;
10592 if (src == hard_frame_pointer_rtx)
10594 valid = m->fs.fp_valid;
10595 ooffset = m->fs.fp_offset;
10597 else if (src == crtl->drap_reg)
10599 valid = m->fs.drap_valid;
10600 ooffset = 0;
10602 else
10604 /* Else there are two possibilities: SP itself, which we set
10605 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10606 taken care of this by hand along the eh_return path. */
10607 gcc_checking_assert (src == stack_pointer_rtx
10608 || offset == const0_rtx);
10611 m->fs.sp_offset = ooffset - INTVAL (offset);
10612 m->fs.sp_valid = valid;
10616 /* Find an available register to be used as dynamic realign argument
10617 pointer regsiter. Such a register will be written in prologue and
10618 used in begin of body, so it must not be
10619 1. parameter passing register.
10620 2. GOT pointer.
10621 We reuse static-chain register if it is available. Otherwise, we
10622 use DI for i386 and R13 for x86-64. We chose R13 since it has
10623 shorter encoding.
10625 Return: the regno of chosen register. */
10627 static unsigned int
10628 find_drap_reg (void)
10630 tree decl = cfun->decl;
10632 if (TARGET_64BIT)
10634 /* Use R13 for nested function or function need static chain.
10635 Since function with tail call may use any caller-saved
10636 registers in epilogue, DRAP must not use caller-saved
10637 register in such case. */
10638 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10639 return R13_REG;
10641 return R10_REG;
10643 else
10645 /* Use DI for nested function or function need static chain.
10646 Since function with tail call may use any caller-saved
10647 registers in epilogue, DRAP must not use caller-saved
10648 register in such case. */
10649 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10650 return DI_REG;
10652 /* Reuse static chain register if it isn't used for parameter
10653 passing. */
10654 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10656 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10657 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10658 return CX_REG;
10660 return DI_REG;
10664 /* Return minimum incoming stack alignment. */
10666 static unsigned int
10667 ix86_minimum_incoming_stack_boundary (bool sibcall)
10669 unsigned int incoming_stack_boundary;
10671 /* Prefer the one specified at command line. */
10672 if (ix86_user_incoming_stack_boundary)
10673 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10674 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10675 if -mstackrealign is used, it isn't used for sibcall check and
10676 estimated stack alignment is 128bit. */
10677 else if (!sibcall
10678 && !TARGET_64BIT
10679 && ix86_force_align_arg_pointer
10680 && crtl->stack_alignment_estimated == 128)
10681 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10682 else
10683 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10685 /* Incoming stack alignment can be changed on individual functions
10686 via force_align_arg_pointer attribute. We use the smallest
10687 incoming stack boundary. */
10688 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10689 && lookup_attribute (ix86_force_align_arg_pointer_string,
10690 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10691 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10693 /* The incoming stack frame has to be aligned at least at
10694 parm_stack_boundary. */
10695 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10696 incoming_stack_boundary = crtl->parm_stack_boundary;
10698 /* Stack at entrance of main is aligned by runtime. We use the
10699 smallest incoming stack boundary. */
10700 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10701 && DECL_NAME (current_function_decl)
10702 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10703 && DECL_FILE_SCOPE_P (current_function_decl))
10704 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10706 return incoming_stack_boundary;
10709 /* Update incoming stack boundary and estimated stack alignment. */
10711 static void
10712 ix86_update_stack_boundary (void)
10714 ix86_incoming_stack_boundary
10715 = ix86_minimum_incoming_stack_boundary (false);
10717 /* x86_64 vararg needs 16byte stack alignment for register save
10718 area. */
10719 if (TARGET_64BIT
10720 && cfun->stdarg
10721 && crtl->stack_alignment_estimated < 128)
10722 crtl->stack_alignment_estimated = 128;
10725 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10726 needed or an rtx for DRAP otherwise. */
10728 static rtx
10729 ix86_get_drap_rtx (void)
10731 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10732 crtl->need_drap = true;
10734 if (stack_realign_drap)
10736 /* Assign DRAP to vDRAP and returns vDRAP */
10737 unsigned int regno = find_drap_reg ();
10738 rtx drap_vreg;
10739 rtx arg_ptr;
10740 rtx_insn *seq, *insn;
10742 arg_ptr = gen_rtx_REG (Pmode, regno);
10743 crtl->drap_reg = arg_ptr;
10745 start_sequence ();
10746 drap_vreg = copy_to_reg (arg_ptr);
10747 seq = get_insns ();
10748 end_sequence ();
10750 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10751 if (!optimize)
10753 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10754 RTX_FRAME_RELATED_P (insn) = 1;
10756 return drap_vreg;
10758 else
10759 return NULL;
10762 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10764 static rtx
10765 ix86_internal_arg_pointer (void)
10767 return virtual_incoming_args_rtx;
10770 struct scratch_reg {
10771 rtx reg;
10772 bool saved;
10775 /* Return a short-lived scratch register for use on function entry.
10776 In 32-bit mode, it is valid only after the registers are saved
10777 in the prologue. This register must be released by means of
10778 release_scratch_register_on_entry once it is dead. */
10780 static void
10781 get_scratch_register_on_entry (struct scratch_reg *sr)
10783 int regno;
10785 sr->saved = false;
10787 if (TARGET_64BIT)
10789 /* We always use R11 in 64-bit mode. */
10790 regno = R11_REG;
10792 else
10794 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10795 bool fastcall_p
10796 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10797 bool thiscall_p
10798 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10799 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10800 int regparm = ix86_function_regparm (fntype, decl);
10801 int drap_regno
10802 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10804 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10805 for the static chain register. */
10806 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10807 && drap_regno != AX_REG)
10808 regno = AX_REG;
10809 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10810 for the static chain register. */
10811 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10812 regno = AX_REG;
10813 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10814 regno = DX_REG;
10815 /* ecx is the static chain register. */
10816 else if (regparm < 3 && !fastcall_p && !thiscall_p
10817 && !static_chain_p
10818 && drap_regno != CX_REG)
10819 regno = CX_REG;
10820 else if (ix86_save_reg (BX_REG, true))
10821 regno = BX_REG;
10822 /* esi is the static chain register. */
10823 else if (!(regparm == 3 && static_chain_p)
10824 && ix86_save_reg (SI_REG, true))
10825 regno = SI_REG;
10826 else if (ix86_save_reg (DI_REG, true))
10827 regno = DI_REG;
10828 else
10830 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10831 sr->saved = true;
10835 sr->reg = gen_rtx_REG (Pmode, regno);
10836 if (sr->saved)
10838 rtx insn = emit_insn (gen_push (sr->reg));
10839 RTX_FRAME_RELATED_P (insn) = 1;
10843 /* Release a scratch register obtained from the preceding function. */
10845 static void
10846 release_scratch_register_on_entry (struct scratch_reg *sr)
10848 if (sr->saved)
10850 struct machine_function *m = cfun->machine;
10851 rtx x, insn = emit_insn (gen_pop (sr->reg));
10853 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10854 RTX_FRAME_RELATED_P (insn) = 1;
10855 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10856 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10857 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10858 m->fs.sp_offset -= UNITS_PER_WORD;
10862 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10864 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10866 static void
10867 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10869 /* We skip the probe for the first interval + a small dope of 4 words and
10870 probe that many bytes past the specified size to maintain a protection
10871 area at the botton of the stack. */
10872 const int dope = 4 * UNITS_PER_WORD;
10873 rtx size_rtx = GEN_INT (size), last;
10875 /* See if we have a constant small number of probes to generate. If so,
10876 that's the easy case. The run-time loop is made up of 11 insns in the
10877 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10878 for n # of intervals. */
10879 if (size <= 5 * PROBE_INTERVAL)
10881 HOST_WIDE_INT i, adjust;
10882 bool first_probe = true;
10884 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10885 values of N from 1 until it exceeds SIZE. If only one probe is
10886 needed, this will not generate any code. Then adjust and probe
10887 to PROBE_INTERVAL + SIZE. */
10888 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10890 if (first_probe)
10892 adjust = 2 * PROBE_INTERVAL + dope;
10893 first_probe = false;
10895 else
10896 adjust = PROBE_INTERVAL;
10898 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10899 plus_constant (Pmode, stack_pointer_rtx,
10900 -adjust)));
10901 emit_stack_probe (stack_pointer_rtx);
10904 if (first_probe)
10905 adjust = size + PROBE_INTERVAL + dope;
10906 else
10907 adjust = size + PROBE_INTERVAL - i;
10909 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10910 plus_constant (Pmode, stack_pointer_rtx,
10911 -adjust)));
10912 emit_stack_probe (stack_pointer_rtx);
10914 /* Adjust back to account for the additional first interval. */
10915 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10916 plus_constant (Pmode, stack_pointer_rtx,
10917 PROBE_INTERVAL + dope)));
10920 /* Otherwise, do the same as above, but in a loop. Note that we must be
10921 extra careful with variables wrapping around because we might be at
10922 the very top (or the very bottom) of the address space and we have
10923 to be able to handle this case properly; in particular, we use an
10924 equality test for the loop condition. */
10925 else
10927 HOST_WIDE_INT rounded_size;
10928 struct scratch_reg sr;
10930 get_scratch_register_on_entry (&sr);
10933 /* Step 1: round SIZE to the previous multiple of the interval. */
10935 rounded_size = size & -PROBE_INTERVAL;
10938 /* Step 2: compute initial and final value of the loop counter. */
10940 /* SP = SP_0 + PROBE_INTERVAL. */
10941 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10942 plus_constant (Pmode, stack_pointer_rtx,
10943 - (PROBE_INTERVAL + dope))));
10945 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10946 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10947 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10948 gen_rtx_PLUS (Pmode, sr.reg,
10949 stack_pointer_rtx)));
10952 /* Step 3: the loop
10954 while (SP != LAST_ADDR)
10956 SP = SP + PROBE_INTERVAL
10957 probe at SP
10960 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10961 values of N from 1 until it is equal to ROUNDED_SIZE. */
10963 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10966 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10967 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10969 if (size != rounded_size)
10971 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10972 plus_constant (Pmode, stack_pointer_rtx,
10973 rounded_size - size)));
10974 emit_stack_probe (stack_pointer_rtx);
10977 /* Adjust back to account for the additional first interval. */
10978 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10979 plus_constant (Pmode, stack_pointer_rtx,
10980 PROBE_INTERVAL + dope)));
10982 release_scratch_register_on_entry (&sr);
10985 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10987 /* Even if the stack pointer isn't the CFA register, we need to correctly
10988 describe the adjustments made to it, in particular differentiate the
10989 frame-related ones from the frame-unrelated ones. */
10990 if (size > 0)
10992 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10993 XVECEXP (expr, 0, 0)
10994 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10995 plus_constant (Pmode, stack_pointer_rtx, -size));
10996 XVECEXP (expr, 0, 1)
10997 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10998 plus_constant (Pmode, stack_pointer_rtx,
10999 PROBE_INTERVAL + dope + size));
11000 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
11001 RTX_FRAME_RELATED_P (last) = 1;
11003 cfun->machine->fs.sp_offset += size;
11006 /* Make sure nothing is scheduled before we are done. */
11007 emit_insn (gen_blockage ());
11010 /* Adjust the stack pointer up to REG while probing it. */
11012 const char *
11013 output_adjust_stack_and_probe (rtx reg)
11015 static int labelno = 0;
11016 char loop_lab[32], end_lab[32];
11017 rtx xops[2];
11019 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11020 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11022 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11024 /* Jump to END_LAB if SP == LAST_ADDR. */
11025 xops[0] = stack_pointer_rtx;
11026 xops[1] = reg;
11027 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11028 fputs ("\tje\t", asm_out_file);
11029 assemble_name_raw (asm_out_file, end_lab);
11030 fputc ('\n', asm_out_file);
11032 /* SP = SP + PROBE_INTERVAL. */
11033 xops[1] = GEN_INT (PROBE_INTERVAL);
11034 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11036 /* Probe at SP. */
11037 xops[1] = const0_rtx;
11038 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
11040 fprintf (asm_out_file, "\tjmp\t");
11041 assemble_name_raw (asm_out_file, loop_lab);
11042 fputc ('\n', asm_out_file);
11044 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11046 return "";
11049 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
11050 inclusive. These are offsets from the current stack pointer. */
11052 static void
11053 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
11055 /* See if we have a constant small number of probes to generate. If so,
11056 that's the easy case. The run-time loop is made up of 7 insns in the
11057 generic case while the compile-time loop is made up of n insns for n #
11058 of intervals. */
11059 if (size <= 7 * PROBE_INTERVAL)
11061 HOST_WIDE_INT i;
11063 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
11064 it exceeds SIZE. If only one probe is needed, this will not
11065 generate any code. Then probe at FIRST + SIZE. */
11066 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
11067 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
11068 -(first + i)));
11070 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
11071 -(first + size)));
11074 /* Otherwise, do the same as above, but in a loop. Note that we must be
11075 extra careful with variables wrapping around because we might be at
11076 the very top (or the very bottom) of the address space and we have
11077 to be able to handle this case properly; in particular, we use an
11078 equality test for the loop condition. */
11079 else
11081 HOST_WIDE_INT rounded_size, last;
11082 struct scratch_reg sr;
11084 get_scratch_register_on_entry (&sr);
11087 /* Step 1: round SIZE to the previous multiple of the interval. */
11089 rounded_size = size & -PROBE_INTERVAL;
11092 /* Step 2: compute initial and final value of the loop counter. */
11094 /* TEST_OFFSET = FIRST. */
11095 emit_move_insn (sr.reg, GEN_INT (-first));
11097 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
11098 last = first + rounded_size;
11101 /* Step 3: the loop
11103 while (TEST_ADDR != LAST_ADDR)
11105 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
11106 probe at TEST_ADDR
11109 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
11110 until it is equal to ROUNDED_SIZE. */
11112 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
11115 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
11116 that SIZE is equal to ROUNDED_SIZE. */
11118 if (size != rounded_size)
11119 emit_stack_probe (plus_constant (Pmode,
11120 gen_rtx_PLUS (Pmode,
11121 stack_pointer_rtx,
11122 sr.reg),
11123 rounded_size - size));
11125 release_scratch_register_on_entry (&sr);
11128 /* Make sure nothing is scheduled before we are done. */
11129 emit_insn (gen_blockage ());
11132 /* Probe a range of stack addresses from REG to END, inclusive. These are
11133 offsets from the current stack pointer. */
11135 const char *
11136 output_probe_stack_range (rtx reg, rtx end)
11138 static int labelno = 0;
11139 char loop_lab[32], end_lab[32];
11140 rtx xops[3];
11142 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11143 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11145 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11147 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
11148 xops[0] = reg;
11149 xops[1] = end;
11150 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11151 fputs ("\tje\t", asm_out_file);
11152 assemble_name_raw (asm_out_file, end_lab);
11153 fputc ('\n', asm_out_file);
11155 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
11156 xops[1] = GEN_INT (PROBE_INTERVAL);
11157 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11159 /* Probe at TEST_ADDR. */
11160 xops[0] = stack_pointer_rtx;
11161 xops[1] = reg;
11162 xops[2] = const0_rtx;
11163 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
11165 fprintf (asm_out_file, "\tjmp\t");
11166 assemble_name_raw (asm_out_file, loop_lab);
11167 fputc ('\n', asm_out_file);
11169 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11171 return "";
11174 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
11175 to be generated in correct form. */
11176 static void
11177 ix86_finalize_stack_realign_flags (void)
11179 /* Check if stack realign is really needed after reload, and
11180 stores result in cfun */
11181 unsigned int incoming_stack_boundary
11182 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
11183 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
11184 unsigned int stack_realign = (incoming_stack_boundary
11185 < (crtl->is_leaf
11186 ? crtl->max_used_stack_slot_alignment
11187 : crtl->stack_alignment_needed));
11189 if (crtl->stack_realign_finalized)
11191 /* After stack_realign_needed is finalized, we can't no longer
11192 change it. */
11193 gcc_assert (crtl->stack_realign_needed == stack_realign);
11194 return;
11197 /* If the only reason for frame_pointer_needed is that we conservatively
11198 assumed stack realignment might be needed, but in the end nothing that
11199 needed the stack alignment had been spilled, clear frame_pointer_needed
11200 and say we don't need stack realignment. */
11201 if (stack_realign
11202 && frame_pointer_needed
11203 && crtl->is_leaf
11204 && flag_omit_frame_pointer
11205 && crtl->sp_is_unchanging
11206 && !ix86_current_function_calls_tls_descriptor
11207 && !crtl->accesses_prior_frames
11208 && !cfun->calls_alloca
11209 && !crtl->calls_eh_return
11210 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
11211 && !ix86_frame_pointer_required ()
11212 && get_frame_size () == 0
11213 && ix86_nsaved_sseregs () == 0
11214 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
11216 HARD_REG_SET set_up_by_prologue, prologue_used;
11217 basic_block bb;
11219 CLEAR_HARD_REG_SET (prologue_used);
11220 CLEAR_HARD_REG_SET (set_up_by_prologue);
11221 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
11222 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
11223 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
11224 HARD_FRAME_POINTER_REGNUM);
11225 FOR_EACH_BB_FN (bb, cfun)
11227 rtx_insn *insn;
11228 FOR_BB_INSNS (bb, insn)
11229 if (NONDEBUG_INSN_P (insn)
11230 && requires_stack_frame_p (insn, prologue_used,
11231 set_up_by_prologue))
11233 crtl->stack_realign_needed = stack_realign;
11234 crtl->stack_realign_finalized = true;
11235 return;
11239 /* If drap has been set, but it actually isn't live at the start
11240 of the function, there is no reason to set it up. */
11241 if (crtl->drap_reg)
11243 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11244 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
11246 crtl->drap_reg = NULL_RTX;
11247 crtl->need_drap = false;
11250 else
11251 cfun->machine->no_drap_save_restore = true;
11253 frame_pointer_needed = false;
11254 stack_realign = false;
11255 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
11256 crtl->stack_alignment_needed = incoming_stack_boundary;
11257 crtl->stack_alignment_estimated = incoming_stack_boundary;
11258 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
11259 crtl->preferred_stack_boundary = incoming_stack_boundary;
11260 df_finish_pass (true);
11261 df_scan_alloc (NULL);
11262 df_scan_blocks ();
11263 df_compute_regs_ever_live (true);
11264 df_analyze ();
11267 crtl->stack_realign_needed = stack_realign;
11268 crtl->stack_realign_finalized = true;
11271 /* Delete SET_GOT right after entry block if it is allocated to reg. */
11273 static void
11274 ix86_elim_entry_set_got (rtx reg)
11276 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11277 rtx_insn *c_insn = BB_HEAD (bb);
11278 if (!NONDEBUG_INSN_P (c_insn))
11279 c_insn = next_nonnote_nondebug_insn (c_insn);
11280 if (c_insn && NONJUMP_INSN_P (c_insn))
11282 rtx pat = PATTERN (c_insn);
11283 if (GET_CODE (pat) == PARALLEL)
11285 rtx vec = XVECEXP (pat, 0, 0);
11286 if (GET_CODE (vec) == SET
11287 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
11288 && REGNO (XEXP (vec, 0)) == REGNO (reg))
11289 delete_insn (c_insn);
11294 /* Expand the prologue into a bunch of separate insns. */
11296 void
11297 ix86_expand_prologue (void)
11299 struct machine_function *m = cfun->machine;
11300 rtx insn, t;
11301 struct ix86_frame frame;
11302 HOST_WIDE_INT allocate;
11303 bool int_registers_saved;
11304 bool sse_registers_saved;
11306 ix86_finalize_stack_realign_flags ();
11308 /* DRAP should not coexist with stack_realign_fp */
11309 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
11311 memset (&m->fs, 0, sizeof (m->fs));
11313 /* Initialize CFA state for before the prologue. */
11314 m->fs.cfa_reg = stack_pointer_rtx;
11315 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
11317 /* Track SP offset to the CFA. We continue tracking this after we've
11318 swapped the CFA register away from SP. In the case of re-alignment
11319 this is fudged; we're interested to offsets within the local frame. */
11320 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11321 m->fs.sp_valid = true;
11323 ix86_compute_frame_layout (&frame);
11325 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
11327 /* We should have already generated an error for any use of
11328 ms_hook on a nested function. */
11329 gcc_checking_assert (!ix86_static_chain_on_stack);
11331 /* Check if profiling is active and we shall use profiling before
11332 prologue variant. If so sorry. */
11333 if (crtl->profile && flag_fentry != 0)
11334 sorry ("ms_hook_prologue attribute isn%'t compatible "
11335 "with -mfentry for 32-bit");
11337 /* In ix86_asm_output_function_label we emitted:
11338 8b ff movl.s %edi,%edi
11339 55 push %ebp
11340 8b ec movl.s %esp,%ebp
11342 This matches the hookable function prologue in Win32 API
11343 functions in Microsoft Windows XP Service Pack 2 and newer.
11344 Wine uses this to enable Windows apps to hook the Win32 API
11345 functions provided by Wine.
11347 What that means is that we've already set up the frame pointer. */
11349 if (frame_pointer_needed
11350 && !(crtl->drap_reg && crtl->stack_realign_needed))
11352 rtx push, mov;
11354 /* We've decided to use the frame pointer already set up.
11355 Describe this to the unwinder by pretending that both
11356 push and mov insns happen right here.
11358 Putting the unwind info here at the end of the ms_hook
11359 is done so that we can make absolutely certain we get
11360 the required byte sequence at the start of the function,
11361 rather than relying on an assembler that can produce
11362 the exact encoding required.
11364 However it does mean (in the unpatched case) that we have
11365 a 1 insn window where the asynchronous unwind info is
11366 incorrect. However, if we placed the unwind info at
11367 its correct location we would have incorrect unwind info
11368 in the patched case. Which is probably all moot since
11369 I don't expect Wine generates dwarf2 unwind info for the
11370 system libraries that use this feature. */
11372 insn = emit_insn (gen_blockage ());
11374 push = gen_push (hard_frame_pointer_rtx);
11375 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
11376 stack_pointer_rtx);
11377 RTX_FRAME_RELATED_P (push) = 1;
11378 RTX_FRAME_RELATED_P (mov) = 1;
11380 RTX_FRAME_RELATED_P (insn) = 1;
11381 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11382 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
11384 /* Note that gen_push incremented m->fs.cfa_offset, even
11385 though we didn't emit the push insn here. */
11386 m->fs.cfa_reg = hard_frame_pointer_rtx;
11387 m->fs.fp_offset = m->fs.cfa_offset;
11388 m->fs.fp_valid = true;
11390 else
11392 /* The frame pointer is not needed so pop %ebp again.
11393 This leaves us with a pristine state. */
11394 emit_insn (gen_pop (hard_frame_pointer_rtx));
11398 /* The first insn of a function that accepts its static chain on the
11399 stack is to push the register that would be filled in by a direct
11400 call. This insn will be skipped by the trampoline. */
11401 else if (ix86_static_chain_on_stack)
11403 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
11404 emit_insn (gen_blockage ());
11406 /* We don't want to interpret this push insn as a register save,
11407 only as a stack adjustment. The real copy of the register as
11408 a save will be done later, if needed. */
11409 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
11410 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
11411 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
11412 RTX_FRAME_RELATED_P (insn) = 1;
11415 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
11416 of DRAP is needed and stack realignment is really needed after reload */
11417 if (stack_realign_drap)
11419 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11421 /* Only need to push parameter pointer reg if it is caller saved. */
11422 if (!call_used_regs[REGNO (crtl->drap_reg)])
11424 /* Push arg pointer reg */
11425 insn = emit_insn (gen_push (crtl->drap_reg));
11426 RTX_FRAME_RELATED_P (insn) = 1;
11429 /* Grab the argument pointer. */
11430 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
11431 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11432 RTX_FRAME_RELATED_P (insn) = 1;
11433 m->fs.cfa_reg = crtl->drap_reg;
11434 m->fs.cfa_offset = 0;
11436 /* Align the stack. */
11437 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11438 stack_pointer_rtx,
11439 GEN_INT (-align_bytes)));
11440 RTX_FRAME_RELATED_P (insn) = 1;
11442 /* Replicate the return address on the stack so that return
11443 address can be reached via (argp - 1) slot. This is needed
11444 to implement macro RETURN_ADDR_RTX and intrinsic function
11445 expand_builtin_return_addr etc. */
11446 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
11447 t = gen_frame_mem (word_mode, t);
11448 insn = emit_insn (gen_push (t));
11449 RTX_FRAME_RELATED_P (insn) = 1;
11451 /* For the purposes of frame and register save area addressing,
11452 we've started over with a new frame. */
11453 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11454 m->fs.realigned = true;
11457 int_registers_saved = (frame.nregs == 0);
11458 sse_registers_saved = (frame.nsseregs == 0);
11460 if (frame_pointer_needed && !m->fs.fp_valid)
11462 /* Note: AT&T enter does NOT have reversed args. Enter is probably
11463 slower on all targets. Also sdb doesn't like it. */
11464 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
11465 RTX_FRAME_RELATED_P (insn) = 1;
11467 /* Push registers now, before setting the frame pointer
11468 on SEH target. */
11469 if (!int_registers_saved
11470 && TARGET_SEH
11471 && !frame.save_regs_using_mov)
11473 ix86_emit_save_regs ();
11474 int_registers_saved = true;
11475 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11478 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
11480 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11481 RTX_FRAME_RELATED_P (insn) = 1;
11483 if (m->fs.cfa_reg == stack_pointer_rtx)
11484 m->fs.cfa_reg = hard_frame_pointer_rtx;
11485 m->fs.fp_offset = m->fs.sp_offset;
11486 m->fs.fp_valid = true;
11490 if (!int_registers_saved)
11492 /* If saving registers via PUSH, do so now. */
11493 if (!frame.save_regs_using_mov)
11495 ix86_emit_save_regs ();
11496 int_registers_saved = true;
11497 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11500 /* When using red zone we may start register saving before allocating
11501 the stack frame saving one cycle of the prologue. However, avoid
11502 doing this if we have to probe the stack; at least on x86_64 the
11503 stack probe can turn into a call that clobbers a red zone location. */
11504 else if (ix86_using_red_zone ()
11505 && (! TARGET_STACK_PROBE
11506 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
11508 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11509 int_registers_saved = true;
11513 if (stack_realign_fp)
11515 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11516 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
11518 /* The computation of the size of the re-aligned stack frame means
11519 that we must allocate the size of the register save area before
11520 performing the actual alignment. Otherwise we cannot guarantee
11521 that there's enough storage above the realignment point. */
11522 if (m->fs.sp_offset != frame.sse_reg_save_offset)
11523 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11524 GEN_INT (m->fs.sp_offset
11525 - frame.sse_reg_save_offset),
11526 -1, false);
11528 /* Align the stack. */
11529 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11530 stack_pointer_rtx,
11531 GEN_INT (-align_bytes)));
11533 /* For the purposes of register save area addressing, the stack
11534 pointer is no longer valid. As for the value of sp_offset,
11535 see ix86_compute_frame_layout, which we need to match in order
11536 to pass verification of stack_pointer_offset at the end. */
11537 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
11538 m->fs.sp_valid = false;
11541 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
11543 if (flag_stack_usage_info)
11545 /* We start to count from ARG_POINTER. */
11546 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11548 /* If it was realigned, take into account the fake frame. */
11549 if (stack_realign_drap)
11551 if (ix86_static_chain_on_stack)
11552 stack_size += UNITS_PER_WORD;
11554 if (!call_used_regs[REGNO (crtl->drap_reg)])
11555 stack_size += UNITS_PER_WORD;
11557 /* This over-estimates by 1 minimal-stack-alignment-unit but
11558 mitigates that by counting in the new return address slot. */
11559 current_function_dynamic_stack_size
11560 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11563 current_function_static_stack_size = stack_size;
11566 /* On SEH target with very large frame size, allocate an area to save
11567 SSE registers (as the very large allocation won't be described). */
11568 if (TARGET_SEH
11569 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11570 && !sse_registers_saved)
11572 HOST_WIDE_INT sse_size =
11573 frame.sse_reg_save_offset - frame.reg_save_offset;
11575 gcc_assert (int_registers_saved);
11577 /* No need to do stack checking as the area will be immediately
11578 written. */
11579 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11580 GEN_INT (-sse_size), -1,
11581 m->fs.cfa_reg == stack_pointer_rtx);
11582 allocate -= sse_size;
11583 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11584 sse_registers_saved = true;
11587 /* The stack has already been decremented by the instruction calling us
11588 so probe if the size is non-negative to preserve the protection area. */
11589 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11591 /* We expect the registers to be saved when probes are used. */
11592 gcc_assert (int_registers_saved);
11594 if (STACK_CHECK_MOVING_SP)
11596 if (!(crtl->is_leaf && !cfun->calls_alloca
11597 && allocate <= PROBE_INTERVAL))
11599 ix86_adjust_stack_and_probe (allocate);
11600 allocate = 0;
11603 else
11605 HOST_WIDE_INT size = allocate;
11607 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11608 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11610 if (TARGET_STACK_PROBE)
11612 if (crtl->is_leaf && !cfun->calls_alloca)
11614 if (size > PROBE_INTERVAL)
11615 ix86_emit_probe_stack_range (0, size);
11617 else
11618 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11620 else
11622 if (crtl->is_leaf && !cfun->calls_alloca)
11624 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11625 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11626 size - STACK_CHECK_PROTECT);
11628 else
11629 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11634 if (allocate == 0)
11636 else if (!ix86_target_stack_probe ()
11637 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11639 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11640 GEN_INT (-allocate), -1,
11641 m->fs.cfa_reg == stack_pointer_rtx);
11643 else
11645 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11646 rtx r10 = NULL;
11647 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11648 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11649 bool eax_live = ix86_eax_live_at_start_p ();
11650 bool r10_live = false;
11652 if (TARGET_64BIT)
11653 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11655 if (eax_live)
11657 insn = emit_insn (gen_push (eax));
11658 allocate -= UNITS_PER_WORD;
11659 /* Note that SEH directives need to continue tracking the stack
11660 pointer even after the frame pointer has been set up. */
11661 if (sp_is_cfa_reg || TARGET_SEH)
11663 if (sp_is_cfa_reg)
11664 m->fs.cfa_offset += UNITS_PER_WORD;
11665 RTX_FRAME_RELATED_P (insn) = 1;
11666 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11667 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11668 plus_constant (Pmode, stack_pointer_rtx,
11669 -UNITS_PER_WORD)));
11673 if (r10_live)
11675 r10 = gen_rtx_REG (Pmode, R10_REG);
11676 insn = emit_insn (gen_push (r10));
11677 allocate -= UNITS_PER_WORD;
11678 if (sp_is_cfa_reg || TARGET_SEH)
11680 if (sp_is_cfa_reg)
11681 m->fs.cfa_offset += UNITS_PER_WORD;
11682 RTX_FRAME_RELATED_P (insn) = 1;
11683 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11684 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11685 plus_constant (Pmode, stack_pointer_rtx,
11686 -UNITS_PER_WORD)));
11690 emit_move_insn (eax, GEN_INT (allocate));
11691 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11693 /* Use the fact that AX still contains ALLOCATE. */
11694 adjust_stack_insn = (Pmode == DImode
11695 ? gen_pro_epilogue_adjust_stack_di_sub
11696 : gen_pro_epilogue_adjust_stack_si_sub);
11698 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11699 stack_pointer_rtx, eax));
11701 if (sp_is_cfa_reg || TARGET_SEH)
11703 if (sp_is_cfa_reg)
11704 m->fs.cfa_offset += allocate;
11705 RTX_FRAME_RELATED_P (insn) = 1;
11706 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11707 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11708 plus_constant (Pmode, stack_pointer_rtx,
11709 -allocate)));
11711 m->fs.sp_offset += allocate;
11713 /* Use stack_pointer_rtx for relative addressing so that code
11714 works for realigned stack, too. */
11715 if (r10_live && eax_live)
11717 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11718 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11719 gen_frame_mem (word_mode, t));
11720 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11721 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11722 gen_frame_mem (word_mode, t));
11724 else if (eax_live || r10_live)
11726 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11727 emit_move_insn (gen_rtx_REG (word_mode,
11728 (eax_live ? AX_REG : R10_REG)),
11729 gen_frame_mem (word_mode, t));
11732 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11734 /* If we havn't already set up the frame pointer, do so now. */
11735 if (frame_pointer_needed && !m->fs.fp_valid)
11737 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11738 GEN_INT (frame.stack_pointer_offset
11739 - frame.hard_frame_pointer_offset));
11740 insn = emit_insn (insn);
11741 RTX_FRAME_RELATED_P (insn) = 1;
11742 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11744 if (m->fs.cfa_reg == stack_pointer_rtx)
11745 m->fs.cfa_reg = hard_frame_pointer_rtx;
11746 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11747 m->fs.fp_valid = true;
11750 if (!int_registers_saved)
11751 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11752 if (!sse_registers_saved)
11753 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11755 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
11756 in PROLOGUE. */
11757 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
11759 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
11760 insn = emit_insn (gen_set_got (pic));
11761 RTX_FRAME_RELATED_P (insn) = 1;
11762 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11763 emit_insn (gen_prologue_use (pic));
11764 /* Deleting already emmitted SET_GOT if exist and allocated to
11765 REAL_PIC_OFFSET_TABLE_REGNUM. */
11766 ix86_elim_entry_set_got (pic);
11769 if (crtl->drap_reg && !crtl->stack_realign_needed)
11771 /* vDRAP is setup but after reload it turns out stack realign
11772 isn't necessary, here we will emit prologue to setup DRAP
11773 without stack realign adjustment */
11774 t = choose_baseaddr (0);
11775 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11778 /* Prevent instructions from being scheduled into register save push
11779 sequence when access to the redzone area is done through frame pointer.
11780 The offset between the frame pointer and the stack pointer is calculated
11781 relative to the value of the stack pointer at the end of the function
11782 prologue, and moving instructions that access redzone area via frame
11783 pointer inside push sequence violates this assumption. */
11784 if (frame_pointer_needed && frame.red_zone_size)
11785 emit_insn (gen_memory_blockage ());
11787 /* Emit cld instruction if stringops are used in the function. */
11788 if (TARGET_CLD && ix86_current_function_needs_cld)
11789 emit_insn (gen_cld ());
11791 /* SEH requires that the prologue end within 256 bytes of the start of
11792 the function. Prevent instruction schedules that would extend that.
11793 Further, prevent alloca modifications to the stack pointer from being
11794 combined with prologue modifications. */
11795 if (TARGET_SEH)
11796 emit_insn (gen_prologue_use (stack_pointer_rtx));
11799 /* Emit code to restore REG using a POP insn. */
11801 static void
11802 ix86_emit_restore_reg_using_pop (rtx reg)
11804 struct machine_function *m = cfun->machine;
11805 rtx insn = emit_insn (gen_pop (reg));
11807 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11808 m->fs.sp_offset -= UNITS_PER_WORD;
11810 if (m->fs.cfa_reg == crtl->drap_reg
11811 && REGNO (reg) == REGNO (crtl->drap_reg))
11813 /* Previously we'd represented the CFA as an expression
11814 like *(%ebp - 8). We've just popped that value from
11815 the stack, which means we need to reset the CFA to
11816 the drap register. This will remain until we restore
11817 the stack pointer. */
11818 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11819 RTX_FRAME_RELATED_P (insn) = 1;
11821 /* This means that the DRAP register is valid for addressing too. */
11822 m->fs.drap_valid = true;
11823 return;
11826 if (m->fs.cfa_reg == stack_pointer_rtx)
11828 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11829 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11830 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11831 RTX_FRAME_RELATED_P (insn) = 1;
11833 m->fs.cfa_offset -= UNITS_PER_WORD;
11836 /* When the frame pointer is the CFA, and we pop it, we are
11837 swapping back to the stack pointer as the CFA. This happens
11838 for stack frames that don't allocate other data, so we assume
11839 the stack pointer is now pointing at the return address, i.e.
11840 the function entry state, which makes the offset be 1 word. */
11841 if (reg == hard_frame_pointer_rtx)
11843 m->fs.fp_valid = false;
11844 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11846 m->fs.cfa_reg = stack_pointer_rtx;
11847 m->fs.cfa_offset -= UNITS_PER_WORD;
11849 add_reg_note (insn, REG_CFA_DEF_CFA,
11850 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11851 GEN_INT (m->fs.cfa_offset)));
11852 RTX_FRAME_RELATED_P (insn) = 1;
11857 /* Emit code to restore saved registers using POP insns. */
11859 static void
11860 ix86_emit_restore_regs_using_pop (void)
11862 unsigned int regno;
11864 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11865 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11866 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11869 /* Emit code and notes for the LEAVE instruction. */
11871 static void
11872 ix86_emit_leave (void)
11874 struct machine_function *m = cfun->machine;
11875 rtx insn = emit_insn (ix86_gen_leave ());
11877 ix86_add_queued_cfa_restore_notes (insn);
11879 gcc_assert (m->fs.fp_valid);
11880 m->fs.sp_valid = true;
11881 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11882 m->fs.fp_valid = false;
11884 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11886 m->fs.cfa_reg = stack_pointer_rtx;
11887 m->fs.cfa_offset = m->fs.sp_offset;
11889 add_reg_note (insn, REG_CFA_DEF_CFA,
11890 plus_constant (Pmode, stack_pointer_rtx,
11891 m->fs.sp_offset));
11892 RTX_FRAME_RELATED_P (insn) = 1;
11894 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11895 m->fs.fp_offset);
11898 /* Emit code to restore saved registers using MOV insns.
11899 First register is restored from CFA - CFA_OFFSET. */
11900 static void
11901 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11902 bool maybe_eh_return)
11904 struct machine_function *m = cfun->machine;
11905 unsigned int regno;
11907 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11908 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11910 rtx reg = gen_rtx_REG (word_mode, regno);
11911 rtx insn, mem;
11913 mem = choose_baseaddr (cfa_offset);
11914 mem = gen_frame_mem (word_mode, mem);
11915 insn = emit_move_insn (reg, mem);
11917 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11919 /* Previously we'd represented the CFA as an expression
11920 like *(%ebp - 8). We've just popped that value from
11921 the stack, which means we need to reset the CFA to
11922 the drap register. This will remain until we restore
11923 the stack pointer. */
11924 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11925 RTX_FRAME_RELATED_P (insn) = 1;
11927 /* This means that the DRAP register is valid for addressing. */
11928 m->fs.drap_valid = true;
11930 else
11931 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11933 cfa_offset -= UNITS_PER_WORD;
11937 /* Emit code to restore saved registers using MOV insns.
11938 First register is restored from CFA - CFA_OFFSET. */
11939 static void
11940 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11941 bool maybe_eh_return)
11943 unsigned int regno;
11945 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11946 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11948 rtx reg = gen_rtx_REG (V4SFmode, regno);
11949 rtx mem;
11951 mem = choose_baseaddr (cfa_offset);
11952 mem = gen_rtx_MEM (V4SFmode, mem);
11953 set_mem_align (mem, 128);
11954 emit_move_insn (reg, mem);
11956 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11958 cfa_offset -= 16;
11962 /* Restore function stack, frame, and registers. */
11964 void
11965 ix86_expand_epilogue (int style)
11967 struct machine_function *m = cfun->machine;
11968 struct machine_frame_state frame_state_save = m->fs;
11969 struct ix86_frame frame;
11970 bool restore_regs_via_mov;
11971 bool using_drap;
11973 ix86_finalize_stack_realign_flags ();
11974 ix86_compute_frame_layout (&frame);
11976 m->fs.sp_valid = (!frame_pointer_needed
11977 || (crtl->sp_is_unchanging
11978 && !stack_realign_fp));
11979 gcc_assert (!m->fs.sp_valid
11980 || m->fs.sp_offset == frame.stack_pointer_offset);
11982 /* The FP must be valid if the frame pointer is present. */
11983 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11984 gcc_assert (!m->fs.fp_valid
11985 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11987 /* We must have *some* valid pointer to the stack frame. */
11988 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11990 /* The DRAP is never valid at this point. */
11991 gcc_assert (!m->fs.drap_valid);
11993 /* See the comment about red zone and frame
11994 pointer usage in ix86_expand_prologue. */
11995 if (frame_pointer_needed && frame.red_zone_size)
11996 emit_insn (gen_memory_blockage ());
11998 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11999 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
12001 /* Determine the CFA offset of the end of the red-zone. */
12002 m->fs.red_zone_offset = 0;
12003 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
12005 /* The red-zone begins below the return address. */
12006 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
12008 /* When the register save area is in the aligned portion of
12009 the stack, determine the maximum runtime displacement that
12010 matches up with the aligned frame. */
12011 if (stack_realign_drap)
12012 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
12013 + UNITS_PER_WORD);
12016 /* Special care must be taken for the normal return case of a function
12017 using eh_return: the eax and edx registers are marked as saved, but
12018 not restored along this path. Adjust the save location to match. */
12019 if (crtl->calls_eh_return && style != 2)
12020 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
12022 /* EH_RETURN requires the use of moves to function properly. */
12023 if (crtl->calls_eh_return)
12024 restore_regs_via_mov = true;
12025 /* SEH requires the use of pops to identify the epilogue. */
12026 else if (TARGET_SEH)
12027 restore_regs_via_mov = false;
12028 /* If we're only restoring one register and sp is not valid then
12029 using a move instruction to restore the register since it's
12030 less work than reloading sp and popping the register. */
12031 else if (!m->fs.sp_valid && frame.nregs <= 1)
12032 restore_regs_via_mov = true;
12033 else if (TARGET_EPILOGUE_USING_MOVE
12034 && cfun->machine->use_fast_prologue_epilogue
12035 && (frame.nregs > 1
12036 || m->fs.sp_offset != frame.reg_save_offset))
12037 restore_regs_via_mov = true;
12038 else if (frame_pointer_needed
12039 && !frame.nregs
12040 && m->fs.sp_offset != frame.reg_save_offset)
12041 restore_regs_via_mov = true;
12042 else if (frame_pointer_needed
12043 && TARGET_USE_LEAVE
12044 && cfun->machine->use_fast_prologue_epilogue
12045 && frame.nregs == 1)
12046 restore_regs_via_mov = true;
12047 else
12048 restore_regs_via_mov = false;
12050 if (restore_regs_via_mov || frame.nsseregs)
12052 /* Ensure that the entire register save area is addressable via
12053 the stack pointer, if we will restore via sp. */
12054 if (TARGET_64BIT
12055 && m->fs.sp_offset > 0x7fffffff
12056 && !(m->fs.fp_valid || m->fs.drap_valid)
12057 && (frame.nsseregs + frame.nregs) != 0)
12059 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12060 GEN_INT (m->fs.sp_offset
12061 - frame.sse_reg_save_offset),
12062 style,
12063 m->fs.cfa_reg == stack_pointer_rtx);
12067 /* If there are any SSE registers to restore, then we have to do it
12068 via moves, since there's obviously no pop for SSE regs. */
12069 if (frame.nsseregs)
12070 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
12071 style == 2);
12073 if (restore_regs_via_mov)
12075 rtx t;
12077 if (frame.nregs)
12078 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
12080 /* eh_return epilogues need %ecx added to the stack pointer. */
12081 if (style == 2)
12083 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
12085 /* Stack align doesn't work with eh_return. */
12086 gcc_assert (!stack_realign_drap);
12087 /* Neither does regparm nested functions. */
12088 gcc_assert (!ix86_static_chain_on_stack);
12090 if (frame_pointer_needed)
12092 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
12093 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
12094 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
12096 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
12097 insn = emit_move_insn (hard_frame_pointer_rtx, t);
12099 /* Note that we use SA as a temporary CFA, as the return
12100 address is at the proper place relative to it. We
12101 pretend this happens at the FP restore insn because
12102 prior to this insn the FP would be stored at the wrong
12103 offset relative to SA, and after this insn we have no
12104 other reasonable register to use for the CFA. We don't
12105 bother resetting the CFA to the SP for the duration of
12106 the return insn. */
12107 add_reg_note (insn, REG_CFA_DEF_CFA,
12108 plus_constant (Pmode, sa, UNITS_PER_WORD));
12109 ix86_add_queued_cfa_restore_notes (insn);
12110 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
12111 RTX_FRAME_RELATED_P (insn) = 1;
12113 m->fs.cfa_reg = sa;
12114 m->fs.cfa_offset = UNITS_PER_WORD;
12115 m->fs.fp_valid = false;
12117 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
12118 const0_rtx, style, false);
12120 else
12122 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
12123 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
12124 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
12125 ix86_add_queued_cfa_restore_notes (insn);
12127 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
12128 if (m->fs.cfa_offset != UNITS_PER_WORD)
12130 m->fs.cfa_offset = UNITS_PER_WORD;
12131 add_reg_note (insn, REG_CFA_DEF_CFA,
12132 plus_constant (Pmode, stack_pointer_rtx,
12133 UNITS_PER_WORD));
12134 RTX_FRAME_RELATED_P (insn) = 1;
12137 m->fs.sp_offset = UNITS_PER_WORD;
12138 m->fs.sp_valid = true;
12141 else
12143 /* SEH requires that the function end with (1) a stack adjustment
12144 if necessary, (2) a sequence of pops, and (3) a return or
12145 jump instruction. Prevent insns from the function body from
12146 being scheduled into this sequence. */
12147 if (TARGET_SEH)
12149 /* Prevent a catch region from being adjacent to the standard
12150 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
12151 several other flags that would be interesting to test are
12152 not yet set up. */
12153 if (flag_non_call_exceptions)
12154 emit_insn (gen_nops (const1_rtx));
12155 else
12156 emit_insn (gen_blockage ());
12159 /* First step is to deallocate the stack frame so that we can
12160 pop the registers. Also do it on SEH target for very large
12161 frame as the emitted instructions aren't allowed by the ABI in
12162 epilogues. */
12163 if (!m->fs.sp_valid
12164 || (TARGET_SEH
12165 && (m->fs.sp_offset - frame.reg_save_offset
12166 >= SEH_MAX_FRAME_SIZE)))
12168 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
12169 GEN_INT (m->fs.fp_offset
12170 - frame.reg_save_offset),
12171 style, false);
12173 else if (m->fs.sp_offset != frame.reg_save_offset)
12175 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12176 GEN_INT (m->fs.sp_offset
12177 - frame.reg_save_offset),
12178 style,
12179 m->fs.cfa_reg == stack_pointer_rtx);
12182 ix86_emit_restore_regs_using_pop ();
12185 /* If we used a stack pointer and haven't already got rid of it,
12186 then do so now. */
12187 if (m->fs.fp_valid)
12189 /* If the stack pointer is valid and pointing at the frame
12190 pointer store address, then we only need a pop. */
12191 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
12192 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12193 /* Leave results in shorter dependency chains on CPUs that are
12194 able to grok it fast. */
12195 else if (TARGET_USE_LEAVE
12196 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
12197 || !cfun->machine->use_fast_prologue_epilogue)
12198 ix86_emit_leave ();
12199 else
12201 pro_epilogue_adjust_stack (stack_pointer_rtx,
12202 hard_frame_pointer_rtx,
12203 const0_rtx, style, !using_drap);
12204 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12208 if (using_drap)
12210 int param_ptr_offset = UNITS_PER_WORD;
12211 rtx insn;
12213 gcc_assert (stack_realign_drap);
12215 if (ix86_static_chain_on_stack)
12216 param_ptr_offset += UNITS_PER_WORD;
12217 if (!call_used_regs[REGNO (crtl->drap_reg)])
12218 param_ptr_offset += UNITS_PER_WORD;
12220 insn = emit_insn (gen_rtx_SET
12221 (VOIDmode, stack_pointer_rtx,
12222 gen_rtx_PLUS (Pmode,
12223 crtl->drap_reg,
12224 GEN_INT (-param_ptr_offset))));
12225 m->fs.cfa_reg = stack_pointer_rtx;
12226 m->fs.cfa_offset = param_ptr_offset;
12227 m->fs.sp_offset = param_ptr_offset;
12228 m->fs.realigned = false;
12230 add_reg_note (insn, REG_CFA_DEF_CFA,
12231 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12232 GEN_INT (param_ptr_offset)));
12233 RTX_FRAME_RELATED_P (insn) = 1;
12235 if (!call_used_regs[REGNO (crtl->drap_reg)])
12236 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
12239 /* At this point the stack pointer must be valid, and we must have
12240 restored all of the registers. We may not have deallocated the
12241 entire stack frame. We've delayed this until now because it may
12242 be possible to merge the local stack deallocation with the
12243 deallocation forced by ix86_static_chain_on_stack. */
12244 gcc_assert (m->fs.sp_valid);
12245 gcc_assert (!m->fs.fp_valid);
12246 gcc_assert (!m->fs.realigned);
12247 if (m->fs.sp_offset != UNITS_PER_WORD)
12249 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12250 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
12251 style, true);
12253 else
12254 ix86_add_queued_cfa_restore_notes (get_last_insn ());
12256 /* Sibcall epilogues don't want a return instruction. */
12257 if (style == 0)
12259 m->fs = frame_state_save;
12260 return;
12263 if (crtl->args.pops_args && crtl->args.size)
12265 rtx popc = GEN_INT (crtl->args.pops_args);
12267 /* i386 can only pop 64K bytes. If asked to pop more, pop return
12268 address, do explicit add, and jump indirectly to the caller. */
12270 if (crtl->args.pops_args >= 65536)
12272 rtx ecx = gen_rtx_REG (SImode, CX_REG);
12273 rtx insn;
12275 /* There is no "pascal" calling convention in any 64bit ABI. */
12276 gcc_assert (!TARGET_64BIT);
12278 insn = emit_insn (gen_pop (ecx));
12279 m->fs.cfa_offset -= UNITS_PER_WORD;
12280 m->fs.sp_offset -= UNITS_PER_WORD;
12282 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
12283 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
12284 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
12285 add_reg_note (insn, REG_CFA_REGISTER,
12286 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
12287 RTX_FRAME_RELATED_P (insn) = 1;
12289 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12290 popc, -1, true);
12291 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
12293 else
12294 emit_jump_insn (gen_simple_return_pop_internal (popc));
12296 else
12297 emit_jump_insn (gen_simple_return_internal ());
12299 /* Restore the state back to the state from the prologue,
12300 so that it's correct for the next epilogue. */
12301 m->fs = frame_state_save;
12304 /* Reset from the function's potential modifications. */
12306 static void
12307 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
12309 if (pic_offset_table_rtx
12310 && !ix86_use_pseudo_pic_reg ())
12311 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
12312 #if TARGET_MACHO
12313 /* Mach-O doesn't support labels at the end of objects, so if
12314 it looks like we might want one, insert a NOP. */
12316 rtx_insn *insn = get_last_insn ();
12317 rtx_insn *deleted_debug_label = NULL;
12318 while (insn
12319 && NOTE_P (insn)
12320 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
12322 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
12323 notes only, instead set their CODE_LABEL_NUMBER to -1,
12324 otherwise there would be code generation differences
12325 in between -g and -g0. */
12326 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12327 deleted_debug_label = insn;
12328 insn = PREV_INSN (insn);
12330 if (insn
12331 && (LABEL_P (insn)
12332 || (NOTE_P (insn)
12333 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
12334 fputs ("\tnop\n", file);
12335 else if (deleted_debug_label)
12336 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
12337 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12338 CODE_LABEL_NUMBER (insn) = -1;
12340 #endif
12344 /* Return a scratch register to use in the split stack prologue. The
12345 split stack prologue is used for -fsplit-stack. It is the first
12346 instructions in the function, even before the regular prologue.
12347 The scratch register can be any caller-saved register which is not
12348 used for parameters or for the static chain. */
12350 static unsigned int
12351 split_stack_prologue_scratch_regno (void)
12353 if (TARGET_64BIT)
12354 return R11_REG;
12355 else
12357 bool is_fastcall, is_thiscall;
12358 int regparm;
12360 is_fastcall = (lookup_attribute ("fastcall",
12361 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12362 != NULL);
12363 is_thiscall = (lookup_attribute ("thiscall",
12364 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12365 != NULL);
12366 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
12368 if (is_fastcall)
12370 if (DECL_STATIC_CHAIN (cfun->decl))
12372 sorry ("-fsplit-stack does not support fastcall with "
12373 "nested function");
12374 return INVALID_REGNUM;
12376 return AX_REG;
12378 else if (is_thiscall)
12380 if (!DECL_STATIC_CHAIN (cfun->decl))
12381 return DX_REG;
12382 return AX_REG;
12384 else if (regparm < 3)
12386 if (!DECL_STATIC_CHAIN (cfun->decl))
12387 return CX_REG;
12388 else
12390 if (regparm >= 2)
12392 sorry ("-fsplit-stack does not support 2 register "
12393 "parameters for a nested function");
12394 return INVALID_REGNUM;
12396 return DX_REG;
12399 else
12401 /* FIXME: We could make this work by pushing a register
12402 around the addition and comparison. */
12403 sorry ("-fsplit-stack does not support 3 register parameters");
12404 return INVALID_REGNUM;
12409 /* A SYMBOL_REF for the function which allocates new stackspace for
12410 -fsplit-stack. */
12412 static GTY(()) rtx split_stack_fn;
12414 /* A SYMBOL_REF for the more stack function when using the large
12415 model. */
12417 static GTY(()) rtx split_stack_fn_large;
12419 /* Handle -fsplit-stack. These are the first instructions in the
12420 function, even before the regular prologue. */
12422 void
12423 ix86_expand_split_stack_prologue (void)
12425 struct ix86_frame frame;
12426 HOST_WIDE_INT allocate;
12427 unsigned HOST_WIDE_INT args_size;
12428 rtx_code_label *label;
12429 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
12430 rtx scratch_reg = NULL_RTX;
12431 rtx_code_label *varargs_label = NULL;
12432 rtx fn;
12434 gcc_assert (flag_split_stack && reload_completed);
12436 ix86_finalize_stack_realign_flags ();
12437 ix86_compute_frame_layout (&frame);
12438 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
12440 /* This is the label we will branch to if we have enough stack
12441 space. We expect the basic block reordering pass to reverse this
12442 branch if optimizing, so that we branch in the unlikely case. */
12443 label = gen_label_rtx ();
12445 /* We need to compare the stack pointer minus the frame size with
12446 the stack boundary in the TCB. The stack boundary always gives
12447 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
12448 can compare directly. Otherwise we need to do an addition. */
12450 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12451 UNSPEC_STACK_CHECK);
12452 limit = gen_rtx_CONST (Pmode, limit);
12453 limit = gen_rtx_MEM (Pmode, limit);
12454 if (allocate < SPLIT_STACK_AVAILABLE)
12455 current = stack_pointer_rtx;
12456 else
12458 unsigned int scratch_regno;
12459 rtx offset;
12461 /* We need a scratch register to hold the stack pointer minus
12462 the required frame size. Since this is the very start of the
12463 function, the scratch register can be any caller-saved
12464 register which is not used for parameters. */
12465 offset = GEN_INT (- allocate);
12466 scratch_regno = split_stack_prologue_scratch_regno ();
12467 if (scratch_regno == INVALID_REGNUM)
12468 return;
12469 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12470 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
12472 /* We don't use ix86_gen_add3 in this case because it will
12473 want to split to lea, but when not optimizing the insn
12474 will not be split after this point. */
12475 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12476 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12477 offset)));
12479 else
12481 emit_move_insn (scratch_reg, offset);
12482 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
12483 stack_pointer_rtx));
12485 current = scratch_reg;
12488 ix86_expand_branch (GEU, current, limit, label);
12489 jump_insn = get_last_insn ();
12490 JUMP_LABEL (jump_insn) = label;
12492 /* Mark the jump as very likely to be taken. */
12493 add_int_reg_note (jump_insn, REG_BR_PROB,
12494 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
12496 if (split_stack_fn == NULL_RTX)
12498 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
12499 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
12501 fn = split_stack_fn;
12503 /* Get more stack space. We pass in the desired stack space and the
12504 size of the arguments to copy to the new stack. In 32-bit mode
12505 we push the parameters; __morestack will return on a new stack
12506 anyhow. In 64-bit mode we pass the parameters in r10 and
12507 r11. */
12508 allocate_rtx = GEN_INT (allocate);
12509 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
12510 call_fusage = NULL_RTX;
12511 if (TARGET_64BIT)
12513 rtx reg10, reg11;
12515 reg10 = gen_rtx_REG (Pmode, R10_REG);
12516 reg11 = gen_rtx_REG (Pmode, R11_REG);
12518 /* If this function uses a static chain, it will be in %r10.
12519 Preserve it across the call to __morestack. */
12520 if (DECL_STATIC_CHAIN (cfun->decl))
12522 rtx rax;
12524 rax = gen_rtx_REG (word_mode, AX_REG);
12525 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12526 use_reg (&call_fusage, rax);
12529 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12530 && !TARGET_PECOFF)
12532 HOST_WIDE_INT argval;
12534 gcc_assert (Pmode == DImode);
12535 /* When using the large model we need to load the address
12536 into a register, and we've run out of registers. So we
12537 switch to a different calling convention, and we call a
12538 different function: __morestack_large. We pass the
12539 argument size in the upper 32 bits of r10 and pass the
12540 frame size in the lower 32 bits. */
12541 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12542 gcc_assert ((args_size & 0xffffffff) == args_size);
12544 if (split_stack_fn_large == NULL_RTX)
12546 split_stack_fn_large =
12547 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12548 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
12550 if (ix86_cmodel == CM_LARGE_PIC)
12552 rtx_code_label *label;
12553 rtx x;
12555 label = gen_label_rtx ();
12556 emit_label (label);
12557 LABEL_PRESERVE_P (label) = 1;
12558 emit_insn (gen_set_rip_rex64 (reg10, label));
12559 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12560 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12561 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12562 UNSPEC_GOT);
12563 x = gen_rtx_CONST (Pmode, x);
12564 emit_move_insn (reg11, x);
12565 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12566 x = gen_const_mem (Pmode, x);
12567 emit_move_insn (reg11, x);
12569 else
12570 emit_move_insn (reg11, split_stack_fn_large);
12572 fn = reg11;
12574 argval = ((args_size << 16) << 16) + allocate;
12575 emit_move_insn (reg10, GEN_INT (argval));
12577 else
12579 emit_move_insn (reg10, allocate_rtx);
12580 emit_move_insn (reg11, GEN_INT (args_size));
12581 use_reg (&call_fusage, reg11);
12584 use_reg (&call_fusage, reg10);
12586 else
12588 emit_insn (gen_push (GEN_INT (args_size)));
12589 emit_insn (gen_push (allocate_rtx));
12591 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12592 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12593 NULL_RTX, false);
12594 add_function_usage_to (call_insn, call_fusage);
12596 /* In order to make call/return prediction work right, we now need
12597 to execute a return instruction. See
12598 libgcc/config/i386/morestack.S for the details on how this works.
12600 For flow purposes gcc must not see this as a return
12601 instruction--we need control flow to continue at the subsequent
12602 label. Therefore, we use an unspec. */
12603 gcc_assert (crtl->args.pops_args < 65536);
12604 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12606 /* If we are in 64-bit mode and this function uses a static chain,
12607 we saved %r10 in %rax before calling _morestack. */
12608 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12609 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12610 gen_rtx_REG (word_mode, AX_REG));
12612 /* If this function calls va_start, we need to store a pointer to
12613 the arguments on the old stack, because they may not have been
12614 all copied to the new stack. At this point the old stack can be
12615 found at the frame pointer value used by __morestack, because
12616 __morestack has set that up before calling back to us. Here we
12617 store that pointer in a scratch register, and in
12618 ix86_expand_prologue we store the scratch register in a stack
12619 slot. */
12620 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12622 unsigned int scratch_regno;
12623 rtx frame_reg;
12624 int words;
12626 scratch_regno = split_stack_prologue_scratch_regno ();
12627 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12628 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12630 /* 64-bit:
12631 fp -> old fp value
12632 return address within this function
12633 return address of caller of this function
12634 stack arguments
12635 So we add three words to get to the stack arguments.
12637 32-bit:
12638 fp -> old fp value
12639 return address within this function
12640 first argument to __morestack
12641 second argument to __morestack
12642 return address of caller of this function
12643 stack arguments
12644 So we add five words to get to the stack arguments.
12646 words = TARGET_64BIT ? 3 : 5;
12647 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12648 gen_rtx_PLUS (Pmode, frame_reg,
12649 GEN_INT (words * UNITS_PER_WORD))));
12651 varargs_label = gen_label_rtx ();
12652 emit_jump_insn (gen_jump (varargs_label));
12653 JUMP_LABEL (get_last_insn ()) = varargs_label;
12655 emit_barrier ();
12658 emit_label (label);
12659 LABEL_NUSES (label) = 1;
12661 /* If this function calls va_start, we now have to set the scratch
12662 register for the case where we do not call __morestack. In this
12663 case we need to set it based on the stack pointer. */
12664 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12666 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12667 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12668 GEN_INT (UNITS_PER_WORD))));
12670 emit_label (varargs_label);
12671 LABEL_NUSES (varargs_label) = 1;
12675 /* We may have to tell the dataflow pass that the split stack prologue
12676 is initializing a scratch register. */
12678 static void
12679 ix86_live_on_entry (bitmap regs)
12681 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12683 gcc_assert (flag_split_stack);
12684 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12688 /* Extract the parts of an RTL expression that is a valid memory address
12689 for an instruction. Return 0 if the structure of the address is
12690 grossly off. Return -1 if the address contains ASHIFT, so it is not
12691 strictly valid, but still used for computing length of lea instruction. */
12694 ix86_decompose_address (rtx addr, struct ix86_address *out)
12696 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12697 rtx base_reg, index_reg;
12698 HOST_WIDE_INT scale = 1;
12699 rtx scale_rtx = NULL_RTX;
12700 rtx tmp;
12701 int retval = 1;
12702 enum ix86_address_seg seg = SEG_DEFAULT;
12704 /* Allow zero-extended SImode addresses,
12705 they will be emitted with addr32 prefix. */
12706 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12708 if (GET_CODE (addr) == ZERO_EXTEND
12709 && GET_MODE (XEXP (addr, 0)) == SImode)
12711 addr = XEXP (addr, 0);
12712 if (CONST_INT_P (addr))
12713 return 0;
12715 else if (GET_CODE (addr) == AND
12716 && const_32bit_mask (XEXP (addr, 1), DImode))
12718 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12719 if (addr == NULL_RTX)
12720 return 0;
12722 if (CONST_INT_P (addr))
12723 return 0;
12727 /* Allow SImode subregs of DImode addresses,
12728 they will be emitted with addr32 prefix. */
12729 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12731 if (GET_CODE (addr) == SUBREG
12732 && GET_MODE (SUBREG_REG (addr)) == DImode)
12734 addr = SUBREG_REG (addr);
12735 if (CONST_INT_P (addr))
12736 return 0;
12740 if (REG_P (addr))
12741 base = addr;
12742 else if (GET_CODE (addr) == SUBREG)
12744 if (REG_P (SUBREG_REG (addr)))
12745 base = addr;
12746 else
12747 return 0;
12749 else if (GET_CODE (addr) == PLUS)
12751 rtx addends[4], op;
12752 int n = 0, i;
12754 op = addr;
12757 if (n >= 4)
12758 return 0;
12759 addends[n++] = XEXP (op, 1);
12760 op = XEXP (op, 0);
12762 while (GET_CODE (op) == PLUS);
12763 if (n >= 4)
12764 return 0;
12765 addends[n] = op;
12767 for (i = n; i >= 0; --i)
12769 op = addends[i];
12770 switch (GET_CODE (op))
12772 case MULT:
12773 if (index)
12774 return 0;
12775 index = XEXP (op, 0);
12776 scale_rtx = XEXP (op, 1);
12777 break;
12779 case ASHIFT:
12780 if (index)
12781 return 0;
12782 index = XEXP (op, 0);
12783 tmp = XEXP (op, 1);
12784 if (!CONST_INT_P (tmp))
12785 return 0;
12786 scale = INTVAL (tmp);
12787 if ((unsigned HOST_WIDE_INT) scale > 3)
12788 return 0;
12789 scale = 1 << scale;
12790 break;
12792 case ZERO_EXTEND:
12793 op = XEXP (op, 0);
12794 if (GET_CODE (op) != UNSPEC)
12795 return 0;
12796 /* FALLTHRU */
12798 case UNSPEC:
12799 if (XINT (op, 1) == UNSPEC_TP
12800 && TARGET_TLS_DIRECT_SEG_REFS
12801 && seg == SEG_DEFAULT)
12802 seg = DEFAULT_TLS_SEG_REG;
12803 else
12804 return 0;
12805 break;
12807 case SUBREG:
12808 if (!REG_P (SUBREG_REG (op)))
12809 return 0;
12810 /* FALLTHRU */
12812 case REG:
12813 if (!base)
12814 base = op;
12815 else if (!index)
12816 index = op;
12817 else
12818 return 0;
12819 break;
12821 case CONST:
12822 case CONST_INT:
12823 case SYMBOL_REF:
12824 case LABEL_REF:
12825 if (disp)
12826 return 0;
12827 disp = op;
12828 break;
12830 default:
12831 return 0;
12835 else if (GET_CODE (addr) == MULT)
12837 index = XEXP (addr, 0); /* index*scale */
12838 scale_rtx = XEXP (addr, 1);
12840 else if (GET_CODE (addr) == ASHIFT)
12842 /* We're called for lea too, which implements ashift on occasion. */
12843 index = XEXP (addr, 0);
12844 tmp = XEXP (addr, 1);
12845 if (!CONST_INT_P (tmp))
12846 return 0;
12847 scale = INTVAL (tmp);
12848 if ((unsigned HOST_WIDE_INT) scale > 3)
12849 return 0;
12850 scale = 1 << scale;
12851 retval = -1;
12853 else
12854 disp = addr; /* displacement */
12856 if (index)
12858 if (REG_P (index))
12860 else if (GET_CODE (index) == SUBREG
12861 && REG_P (SUBREG_REG (index)))
12863 else
12864 return 0;
12867 /* Extract the integral value of scale. */
12868 if (scale_rtx)
12870 if (!CONST_INT_P (scale_rtx))
12871 return 0;
12872 scale = INTVAL (scale_rtx);
12875 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12876 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12878 /* Avoid useless 0 displacement. */
12879 if (disp == const0_rtx && (base || index))
12880 disp = NULL_RTX;
12882 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12883 if (base_reg && index_reg && scale == 1
12884 && (index_reg == arg_pointer_rtx
12885 || index_reg == frame_pointer_rtx
12886 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12888 std::swap (base, index);
12889 std::swap (base_reg, index_reg);
12892 /* Special case: %ebp cannot be encoded as a base without a displacement.
12893 Similarly %r13. */
12894 if (!disp
12895 && base_reg
12896 && (base_reg == hard_frame_pointer_rtx
12897 || base_reg == frame_pointer_rtx
12898 || base_reg == arg_pointer_rtx
12899 || (REG_P (base_reg)
12900 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12901 || REGNO (base_reg) == R13_REG))))
12902 disp = const0_rtx;
12904 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12905 Avoid this by transforming to [%esi+0].
12906 Reload calls address legitimization without cfun defined, so we need
12907 to test cfun for being non-NULL. */
12908 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12909 && base_reg && !index_reg && !disp
12910 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12911 disp = const0_rtx;
12913 /* Special case: encode reg+reg instead of reg*2. */
12914 if (!base && index && scale == 2)
12915 base = index, base_reg = index_reg, scale = 1;
12917 /* Special case: scaling cannot be encoded without base or displacement. */
12918 if (!base && !disp && index && scale != 1)
12919 disp = const0_rtx;
12921 out->base = base;
12922 out->index = index;
12923 out->disp = disp;
12924 out->scale = scale;
12925 out->seg = seg;
12927 return retval;
12930 /* Return cost of the memory address x.
12931 For i386, it is better to use a complex address than let gcc copy
12932 the address into a reg and make a new pseudo. But not if the address
12933 requires to two regs - that would mean more pseudos with longer
12934 lifetimes. */
12935 static int
12936 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
12938 struct ix86_address parts;
12939 int cost = 1;
12940 int ok = ix86_decompose_address (x, &parts);
12942 gcc_assert (ok);
12944 if (parts.base && GET_CODE (parts.base) == SUBREG)
12945 parts.base = SUBREG_REG (parts.base);
12946 if (parts.index && GET_CODE (parts.index) == SUBREG)
12947 parts.index = SUBREG_REG (parts.index);
12949 /* Attempt to minimize number of registers in the address by increasing
12950 address cost for each used register. We don't increase address cost
12951 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
12952 is not invariant itself it most likely means that base or index is not
12953 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
12954 which is not profitable for x86. */
12955 if (parts.base
12956 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12957 && (current_pass->type == GIMPLE_PASS
12958 || !pic_offset_table_rtx
12959 || !REG_P (parts.base)
12960 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
12961 cost++;
12963 if (parts.index
12964 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12965 && (current_pass->type == GIMPLE_PASS
12966 || !pic_offset_table_rtx
12967 || !REG_P (parts.index)
12968 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
12969 cost++;
12971 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12972 since it's predecode logic can't detect the length of instructions
12973 and it degenerates to vector decoded. Increase cost of such
12974 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12975 to split such addresses or even refuse such addresses at all.
12977 Following addressing modes are affected:
12978 [base+scale*index]
12979 [scale*index+disp]
12980 [base+index]
12982 The first and last case may be avoidable by explicitly coding the zero in
12983 memory address, but I don't have AMD-K6 machine handy to check this
12984 theory. */
12986 if (TARGET_K6
12987 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12988 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12989 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12990 cost += 10;
12992 return cost;
12995 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12996 this is used for to form addresses to local data when -fPIC is in
12997 use. */
12999 static bool
13000 darwin_local_data_pic (rtx disp)
13002 return (GET_CODE (disp) == UNSPEC
13003 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
13006 /* Determine if a given RTX is a valid constant. We already know this
13007 satisfies CONSTANT_P. */
13009 static bool
13010 ix86_legitimate_constant_p (machine_mode, rtx x)
13012 /* Pointer bounds constants are not valid. */
13013 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
13014 return false;
13016 switch (GET_CODE (x))
13018 case CONST:
13019 x = XEXP (x, 0);
13021 if (GET_CODE (x) == PLUS)
13023 if (!CONST_INT_P (XEXP (x, 1)))
13024 return false;
13025 x = XEXP (x, 0);
13028 if (TARGET_MACHO && darwin_local_data_pic (x))
13029 return true;
13031 /* Only some unspecs are valid as "constants". */
13032 if (GET_CODE (x) == UNSPEC)
13033 switch (XINT (x, 1))
13035 case UNSPEC_GOT:
13036 case UNSPEC_GOTOFF:
13037 case UNSPEC_PLTOFF:
13038 return TARGET_64BIT;
13039 case UNSPEC_TPOFF:
13040 case UNSPEC_NTPOFF:
13041 x = XVECEXP (x, 0, 0);
13042 return (GET_CODE (x) == SYMBOL_REF
13043 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13044 case UNSPEC_DTPOFF:
13045 x = XVECEXP (x, 0, 0);
13046 return (GET_CODE (x) == SYMBOL_REF
13047 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
13048 default:
13049 return false;
13052 /* We must have drilled down to a symbol. */
13053 if (GET_CODE (x) == LABEL_REF)
13054 return true;
13055 if (GET_CODE (x) != SYMBOL_REF)
13056 return false;
13057 /* FALLTHRU */
13059 case SYMBOL_REF:
13060 /* TLS symbols are never valid. */
13061 if (SYMBOL_REF_TLS_MODEL (x))
13062 return false;
13064 /* DLLIMPORT symbols are never valid. */
13065 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13066 && SYMBOL_REF_DLLIMPORT_P (x))
13067 return false;
13069 #if TARGET_MACHO
13070 /* mdynamic-no-pic */
13071 if (MACHO_DYNAMIC_NO_PIC_P)
13072 return machopic_symbol_defined_p (x);
13073 #endif
13074 break;
13076 case CONST_DOUBLE:
13077 if (GET_MODE (x) == TImode
13078 && x != CONST0_RTX (TImode)
13079 && !TARGET_64BIT)
13080 return false;
13081 break;
13083 case CONST_VECTOR:
13084 if (!standard_sse_constant_p (x))
13085 return false;
13087 default:
13088 break;
13091 /* Otherwise we handle everything else in the move patterns. */
13092 return true;
13095 /* Determine if it's legal to put X into the constant pool. This
13096 is not possible for the address of thread-local symbols, which
13097 is checked above. */
13099 static bool
13100 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
13102 /* We can always put integral constants and vectors in memory. */
13103 switch (GET_CODE (x))
13105 case CONST_INT:
13106 case CONST_DOUBLE:
13107 case CONST_VECTOR:
13108 return false;
13110 default:
13111 break;
13113 return !ix86_legitimate_constant_p (mode, x);
13116 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
13117 otherwise zero. */
13119 static bool
13120 is_imported_p (rtx x)
13122 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
13123 || GET_CODE (x) != SYMBOL_REF)
13124 return false;
13126 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
13130 /* Nonzero if the constant value X is a legitimate general operand
13131 when generating PIC code. It is given that flag_pic is on and
13132 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
13134 bool
13135 legitimate_pic_operand_p (rtx x)
13137 rtx inner;
13139 switch (GET_CODE (x))
13141 case CONST:
13142 inner = XEXP (x, 0);
13143 if (GET_CODE (inner) == PLUS
13144 && CONST_INT_P (XEXP (inner, 1)))
13145 inner = XEXP (inner, 0);
13147 /* Only some unspecs are valid as "constants". */
13148 if (GET_CODE (inner) == UNSPEC)
13149 switch (XINT (inner, 1))
13151 case UNSPEC_GOT:
13152 case UNSPEC_GOTOFF:
13153 case UNSPEC_PLTOFF:
13154 return TARGET_64BIT;
13155 case UNSPEC_TPOFF:
13156 x = XVECEXP (inner, 0, 0);
13157 return (GET_CODE (x) == SYMBOL_REF
13158 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13159 case UNSPEC_MACHOPIC_OFFSET:
13160 return legitimate_pic_address_disp_p (x);
13161 default:
13162 return false;
13164 /* FALLTHRU */
13166 case SYMBOL_REF:
13167 case LABEL_REF:
13168 return legitimate_pic_address_disp_p (x);
13170 default:
13171 return true;
13175 /* Determine if a given CONST RTX is a valid memory displacement
13176 in PIC mode. */
13178 bool
13179 legitimate_pic_address_disp_p (rtx disp)
13181 bool saw_plus;
13183 /* In 64bit mode we can allow direct addresses of symbols and labels
13184 when they are not dynamic symbols. */
13185 if (TARGET_64BIT)
13187 rtx op0 = disp, op1;
13189 switch (GET_CODE (disp))
13191 case LABEL_REF:
13192 return true;
13194 case CONST:
13195 if (GET_CODE (XEXP (disp, 0)) != PLUS)
13196 break;
13197 op0 = XEXP (XEXP (disp, 0), 0);
13198 op1 = XEXP (XEXP (disp, 0), 1);
13199 if (!CONST_INT_P (op1)
13200 || INTVAL (op1) >= 16*1024*1024
13201 || INTVAL (op1) < -16*1024*1024)
13202 break;
13203 if (GET_CODE (op0) == LABEL_REF)
13204 return true;
13205 if (GET_CODE (op0) == CONST
13206 && GET_CODE (XEXP (op0, 0)) == UNSPEC
13207 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
13208 return true;
13209 if (GET_CODE (op0) == UNSPEC
13210 && XINT (op0, 1) == UNSPEC_PCREL)
13211 return true;
13212 if (GET_CODE (op0) != SYMBOL_REF)
13213 break;
13214 /* FALLTHRU */
13216 case SYMBOL_REF:
13217 /* TLS references should always be enclosed in UNSPEC.
13218 The dllimported symbol needs always to be resolved. */
13219 if (SYMBOL_REF_TLS_MODEL (op0)
13220 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
13221 return false;
13223 if (TARGET_PECOFF)
13225 if (is_imported_p (op0))
13226 return true;
13228 if (SYMBOL_REF_FAR_ADDR_P (op0)
13229 || !SYMBOL_REF_LOCAL_P (op0))
13230 break;
13232 /* Function-symbols need to be resolved only for
13233 large-model.
13234 For the small-model we don't need to resolve anything
13235 here. */
13236 if ((ix86_cmodel != CM_LARGE_PIC
13237 && SYMBOL_REF_FUNCTION_P (op0))
13238 || ix86_cmodel == CM_SMALL_PIC)
13239 return true;
13240 /* Non-external symbols don't need to be resolved for
13241 large, and medium-model. */
13242 if ((ix86_cmodel == CM_LARGE_PIC
13243 || ix86_cmodel == CM_MEDIUM_PIC)
13244 && !SYMBOL_REF_EXTERNAL_P (op0))
13245 return true;
13247 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
13248 && (SYMBOL_REF_LOCAL_P (op0)
13249 || (HAVE_LD_PIE_COPYRELOC
13250 && flag_pie
13251 && !SYMBOL_REF_WEAK (op0)
13252 && !SYMBOL_REF_FUNCTION_P (op0)))
13253 && ix86_cmodel != CM_LARGE_PIC)
13254 return true;
13255 break;
13257 default:
13258 break;
13261 if (GET_CODE (disp) != CONST)
13262 return false;
13263 disp = XEXP (disp, 0);
13265 if (TARGET_64BIT)
13267 /* We are unsafe to allow PLUS expressions. This limit allowed distance
13268 of GOT tables. We should not need these anyway. */
13269 if (GET_CODE (disp) != UNSPEC
13270 || (XINT (disp, 1) != UNSPEC_GOTPCREL
13271 && XINT (disp, 1) != UNSPEC_GOTOFF
13272 && XINT (disp, 1) != UNSPEC_PCREL
13273 && XINT (disp, 1) != UNSPEC_PLTOFF))
13274 return false;
13276 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
13277 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
13278 return false;
13279 return true;
13282 saw_plus = false;
13283 if (GET_CODE (disp) == PLUS)
13285 if (!CONST_INT_P (XEXP (disp, 1)))
13286 return false;
13287 disp = XEXP (disp, 0);
13288 saw_plus = true;
13291 if (TARGET_MACHO && darwin_local_data_pic (disp))
13292 return true;
13294 if (GET_CODE (disp) != UNSPEC)
13295 return false;
13297 switch (XINT (disp, 1))
13299 case UNSPEC_GOT:
13300 if (saw_plus)
13301 return false;
13302 /* We need to check for both symbols and labels because VxWorks loads
13303 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
13304 details. */
13305 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13306 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
13307 case UNSPEC_GOTOFF:
13308 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
13309 While ABI specify also 32bit relocation but we don't produce it in
13310 small PIC model at all. */
13311 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13312 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
13313 && !TARGET_64BIT)
13314 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
13315 return false;
13316 case UNSPEC_GOTTPOFF:
13317 case UNSPEC_GOTNTPOFF:
13318 case UNSPEC_INDNTPOFF:
13319 if (saw_plus)
13320 return false;
13321 disp = XVECEXP (disp, 0, 0);
13322 return (GET_CODE (disp) == SYMBOL_REF
13323 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
13324 case UNSPEC_NTPOFF:
13325 disp = XVECEXP (disp, 0, 0);
13326 return (GET_CODE (disp) == SYMBOL_REF
13327 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
13328 case UNSPEC_DTPOFF:
13329 disp = XVECEXP (disp, 0, 0);
13330 return (GET_CODE (disp) == SYMBOL_REF
13331 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
13334 return false;
13337 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
13338 replace the input X, or the original X if no replacement is called for.
13339 The output parameter *WIN is 1 if the calling macro should goto WIN,
13340 0 if it should not. */
13342 bool
13343 ix86_legitimize_reload_address (rtx x, machine_mode, int opnum, int type,
13344 int)
13346 /* Reload can generate:
13348 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
13349 (reg:DI 97))
13350 (reg:DI 2 cx))
13352 This RTX is rejected from ix86_legitimate_address_p due to
13353 non-strictness of base register 97. Following this rejection,
13354 reload pushes all three components into separate registers,
13355 creating invalid memory address RTX.
13357 Following code reloads only the invalid part of the
13358 memory address RTX. */
13360 if (GET_CODE (x) == PLUS
13361 && REG_P (XEXP (x, 1))
13362 && GET_CODE (XEXP (x, 0)) == PLUS
13363 && REG_P (XEXP (XEXP (x, 0), 1)))
13365 rtx base, index;
13366 bool something_reloaded = false;
13368 base = XEXP (XEXP (x, 0), 1);
13369 if (!REG_OK_FOR_BASE_STRICT_P (base))
13371 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
13372 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
13373 opnum, (enum reload_type) type);
13374 something_reloaded = true;
13377 index = XEXP (x, 1);
13378 if (!REG_OK_FOR_INDEX_STRICT_P (index))
13380 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
13381 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
13382 opnum, (enum reload_type) type);
13383 something_reloaded = true;
13386 gcc_assert (something_reloaded);
13387 return true;
13390 return false;
13393 /* Determine if op is suitable RTX for an address register.
13394 Return naked register if a register or a register subreg is
13395 found, otherwise return NULL_RTX. */
13397 static rtx
13398 ix86_validate_address_register (rtx op)
13400 machine_mode mode = GET_MODE (op);
13402 /* Only SImode or DImode registers can form the address. */
13403 if (mode != SImode && mode != DImode)
13404 return NULL_RTX;
13406 if (REG_P (op))
13407 return op;
13408 else if (GET_CODE (op) == SUBREG)
13410 rtx reg = SUBREG_REG (op);
13412 if (!REG_P (reg))
13413 return NULL_RTX;
13415 mode = GET_MODE (reg);
13417 /* Don't allow SUBREGs that span more than a word. It can
13418 lead to spill failures when the register is one word out
13419 of a two word structure. */
13420 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
13421 return NULL_RTX;
13423 /* Allow only SUBREGs of non-eliminable hard registers. */
13424 if (register_no_elim_operand (reg, mode))
13425 return reg;
13428 /* Op is not a register. */
13429 return NULL_RTX;
13432 /* Recognizes RTL expressions that are valid memory addresses for an
13433 instruction. The MODE argument is the machine mode for the MEM
13434 expression that wants to use this address.
13436 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
13437 convert common non-canonical forms to canonical form so that they will
13438 be recognized. */
13440 static bool
13441 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
13443 struct ix86_address parts;
13444 rtx base, index, disp;
13445 HOST_WIDE_INT scale;
13446 enum ix86_address_seg seg;
13448 if (ix86_decompose_address (addr, &parts) <= 0)
13449 /* Decomposition failed. */
13450 return false;
13452 base = parts.base;
13453 index = parts.index;
13454 disp = parts.disp;
13455 scale = parts.scale;
13456 seg = parts.seg;
13458 /* Validate base register. */
13459 if (base)
13461 rtx reg = ix86_validate_address_register (base);
13463 if (reg == NULL_RTX)
13464 return false;
13466 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
13467 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
13468 /* Base is not valid. */
13469 return false;
13472 /* Validate index register. */
13473 if (index)
13475 rtx reg = ix86_validate_address_register (index);
13477 if (reg == NULL_RTX)
13478 return false;
13480 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
13481 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
13482 /* Index is not valid. */
13483 return false;
13486 /* Index and base should have the same mode. */
13487 if (base && index
13488 && GET_MODE (base) != GET_MODE (index))
13489 return false;
13491 /* Address override works only on the (%reg) part of %fs:(%reg). */
13492 if (seg != SEG_DEFAULT
13493 && ((base && GET_MODE (base) != word_mode)
13494 || (index && GET_MODE (index) != word_mode)))
13495 return false;
13497 /* Validate scale factor. */
13498 if (scale != 1)
13500 if (!index)
13501 /* Scale without index. */
13502 return false;
13504 if (scale != 2 && scale != 4 && scale != 8)
13505 /* Scale is not a valid multiplier. */
13506 return false;
13509 /* Validate displacement. */
13510 if (disp)
13512 if (GET_CODE (disp) == CONST
13513 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13514 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
13515 switch (XINT (XEXP (disp, 0), 1))
13517 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
13518 used. While ABI specify also 32bit relocations, we don't produce
13519 them at all and use IP relative instead. */
13520 case UNSPEC_GOT:
13521 case UNSPEC_GOTOFF:
13522 gcc_assert (flag_pic);
13523 if (!TARGET_64BIT)
13524 goto is_legitimate_pic;
13526 /* 64bit address unspec. */
13527 return false;
13529 case UNSPEC_GOTPCREL:
13530 case UNSPEC_PCREL:
13531 gcc_assert (flag_pic);
13532 goto is_legitimate_pic;
13534 case UNSPEC_GOTTPOFF:
13535 case UNSPEC_GOTNTPOFF:
13536 case UNSPEC_INDNTPOFF:
13537 case UNSPEC_NTPOFF:
13538 case UNSPEC_DTPOFF:
13539 break;
13541 case UNSPEC_STACK_CHECK:
13542 gcc_assert (flag_split_stack);
13543 break;
13545 default:
13546 /* Invalid address unspec. */
13547 return false;
13550 else if (SYMBOLIC_CONST (disp)
13551 && (flag_pic
13552 || (TARGET_MACHO
13553 #if TARGET_MACHO
13554 && MACHOPIC_INDIRECT
13555 && !machopic_operand_p (disp)
13556 #endif
13560 is_legitimate_pic:
13561 if (TARGET_64BIT && (index || base))
13563 /* foo@dtpoff(%rX) is ok. */
13564 if (GET_CODE (disp) != CONST
13565 || GET_CODE (XEXP (disp, 0)) != PLUS
13566 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13567 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13568 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13569 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13570 /* Non-constant pic memory reference. */
13571 return false;
13573 else if ((!TARGET_MACHO || flag_pic)
13574 && ! legitimate_pic_address_disp_p (disp))
13575 /* Displacement is an invalid pic construct. */
13576 return false;
13577 #if TARGET_MACHO
13578 else if (MACHO_DYNAMIC_NO_PIC_P
13579 && !ix86_legitimate_constant_p (Pmode, disp))
13580 /* displacment must be referenced via non_lazy_pointer */
13581 return false;
13582 #endif
13584 /* This code used to verify that a symbolic pic displacement
13585 includes the pic_offset_table_rtx register.
13587 While this is good idea, unfortunately these constructs may
13588 be created by "adds using lea" optimization for incorrect
13589 code like:
13591 int a;
13592 int foo(int i)
13594 return *(&a+i);
13597 This code is nonsensical, but results in addressing
13598 GOT table with pic_offset_table_rtx base. We can't
13599 just refuse it easily, since it gets matched by
13600 "addsi3" pattern, that later gets split to lea in the
13601 case output register differs from input. While this
13602 can be handled by separate addsi pattern for this case
13603 that never results in lea, this seems to be easier and
13604 correct fix for crash to disable this test. */
13606 else if (GET_CODE (disp) != LABEL_REF
13607 && !CONST_INT_P (disp)
13608 && (GET_CODE (disp) != CONST
13609 || !ix86_legitimate_constant_p (Pmode, disp))
13610 && (GET_CODE (disp) != SYMBOL_REF
13611 || !ix86_legitimate_constant_p (Pmode, disp)))
13612 /* Displacement is not constant. */
13613 return false;
13614 else if (TARGET_64BIT
13615 && !x86_64_immediate_operand (disp, VOIDmode))
13616 /* Displacement is out of range. */
13617 return false;
13618 /* In x32 mode, constant addresses are sign extended to 64bit, so
13619 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13620 else if (TARGET_X32 && !(index || base)
13621 && CONST_INT_P (disp)
13622 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13623 return false;
13626 /* Everything looks valid. */
13627 return true;
13630 /* Determine if a given RTX is a valid constant address. */
13632 bool
13633 constant_address_p (rtx x)
13635 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13638 /* Return a unique alias set for the GOT. */
13640 static alias_set_type
13641 ix86_GOT_alias_set (void)
13643 static alias_set_type set = -1;
13644 if (set == -1)
13645 set = new_alias_set ();
13646 return set;
13649 /* Set regs_ever_live for PIC base address register
13650 to true if required. */
13651 static void
13652 set_pic_reg_ever_live ()
13654 if (reload_in_progress)
13655 df_set_regs_ever_live (REGNO (pic_offset_table_rtx), true);
13658 /* Return a legitimate reference for ORIG (an address) using the
13659 register REG. If REG is 0, a new pseudo is generated.
13661 There are two types of references that must be handled:
13663 1. Global data references must load the address from the GOT, via
13664 the PIC reg. An insn is emitted to do this load, and the reg is
13665 returned.
13667 2. Static data references, constant pool addresses, and code labels
13668 compute the address as an offset from the GOT, whose base is in
13669 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13670 differentiate them from global data objects. The returned
13671 address is the PIC reg + an unspec constant.
13673 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13674 reg also appears in the address. */
13676 static rtx
13677 legitimize_pic_address (rtx orig, rtx reg)
13679 rtx addr = orig;
13680 rtx new_rtx = orig;
13682 #if TARGET_MACHO
13683 if (TARGET_MACHO && !TARGET_64BIT)
13685 if (reg == 0)
13686 reg = gen_reg_rtx (Pmode);
13687 /* Use the generic Mach-O PIC machinery. */
13688 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13690 #endif
13692 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13694 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13695 if (tmp)
13696 return tmp;
13699 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13700 new_rtx = addr;
13701 else if (TARGET_64BIT && !TARGET_PECOFF
13702 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13704 rtx tmpreg;
13705 /* This symbol may be referenced via a displacement from the PIC
13706 base address (@GOTOFF). */
13708 set_pic_reg_ever_live ();
13709 if (GET_CODE (addr) == CONST)
13710 addr = XEXP (addr, 0);
13711 if (GET_CODE (addr) == PLUS)
13713 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13714 UNSPEC_GOTOFF);
13715 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13717 else
13718 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13719 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13720 if (!reg)
13721 tmpreg = gen_reg_rtx (Pmode);
13722 else
13723 tmpreg = reg;
13724 emit_move_insn (tmpreg, new_rtx);
13726 if (reg != 0)
13728 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13729 tmpreg, 1, OPTAB_DIRECT);
13730 new_rtx = reg;
13732 else
13733 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13735 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13737 /* This symbol may be referenced via a displacement from the PIC
13738 base address (@GOTOFF). */
13740 set_pic_reg_ever_live ();
13741 if (GET_CODE (addr) == CONST)
13742 addr = XEXP (addr, 0);
13743 if (GET_CODE (addr) == PLUS)
13745 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13746 UNSPEC_GOTOFF);
13747 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13749 else
13750 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13751 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13752 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13754 if (reg != 0)
13756 emit_move_insn (reg, new_rtx);
13757 new_rtx = reg;
13760 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13761 /* We can't use @GOTOFF for text labels on VxWorks;
13762 see gotoff_operand. */
13763 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13765 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13766 if (tmp)
13767 return tmp;
13769 /* For x64 PE-COFF there is no GOT table. So we use address
13770 directly. */
13771 if (TARGET_64BIT && TARGET_PECOFF)
13773 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13774 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13776 if (reg == 0)
13777 reg = gen_reg_rtx (Pmode);
13778 emit_move_insn (reg, new_rtx);
13779 new_rtx = reg;
13781 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13783 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13784 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13785 new_rtx = gen_const_mem (Pmode, new_rtx);
13786 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13788 if (reg == 0)
13789 reg = gen_reg_rtx (Pmode);
13790 /* Use directly gen_movsi, otherwise the address is loaded
13791 into register for CSE. We don't want to CSE this addresses,
13792 instead we CSE addresses from the GOT table, so skip this. */
13793 emit_insn (gen_movsi (reg, new_rtx));
13794 new_rtx = reg;
13796 else
13798 /* This symbol must be referenced via a load from the
13799 Global Offset Table (@GOT). */
13801 set_pic_reg_ever_live ();
13802 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13803 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13804 if (TARGET_64BIT)
13805 new_rtx = force_reg (Pmode, new_rtx);
13806 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13807 new_rtx = gen_const_mem (Pmode, new_rtx);
13808 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13810 if (reg == 0)
13811 reg = gen_reg_rtx (Pmode);
13812 emit_move_insn (reg, new_rtx);
13813 new_rtx = reg;
13816 else
13818 if (CONST_INT_P (addr)
13819 && !x86_64_immediate_operand (addr, VOIDmode))
13821 if (reg)
13823 emit_move_insn (reg, addr);
13824 new_rtx = reg;
13826 else
13827 new_rtx = force_reg (Pmode, addr);
13829 else if (GET_CODE (addr) == CONST)
13831 addr = XEXP (addr, 0);
13833 /* We must match stuff we generate before. Assume the only
13834 unspecs that can get here are ours. Not that we could do
13835 anything with them anyway.... */
13836 if (GET_CODE (addr) == UNSPEC
13837 || (GET_CODE (addr) == PLUS
13838 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13839 return orig;
13840 gcc_assert (GET_CODE (addr) == PLUS);
13842 if (GET_CODE (addr) == PLUS)
13844 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13846 /* Check first to see if this is a constant offset from a @GOTOFF
13847 symbol reference. */
13848 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13849 && CONST_INT_P (op1))
13851 if (!TARGET_64BIT)
13853 set_pic_reg_ever_live ();
13854 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13855 UNSPEC_GOTOFF);
13856 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13857 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13858 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13860 if (reg != 0)
13862 emit_move_insn (reg, new_rtx);
13863 new_rtx = reg;
13866 else
13868 if (INTVAL (op1) < -16*1024*1024
13869 || INTVAL (op1) >= 16*1024*1024)
13871 if (!x86_64_immediate_operand (op1, Pmode))
13872 op1 = force_reg (Pmode, op1);
13873 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13877 else
13879 rtx base = legitimize_pic_address (op0, reg);
13880 machine_mode mode = GET_MODE (base);
13881 new_rtx
13882 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13884 if (CONST_INT_P (new_rtx))
13886 if (INTVAL (new_rtx) < -16*1024*1024
13887 || INTVAL (new_rtx) >= 16*1024*1024)
13889 if (!x86_64_immediate_operand (new_rtx, mode))
13890 new_rtx = force_reg (mode, new_rtx);
13891 new_rtx
13892 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13894 else
13895 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13897 else
13899 if (GET_CODE (new_rtx) == PLUS
13900 && CONSTANT_P (XEXP (new_rtx, 1)))
13902 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13903 new_rtx = XEXP (new_rtx, 1);
13905 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13910 return new_rtx;
13913 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13915 static rtx
13916 get_thread_pointer (machine_mode tp_mode, bool to_reg)
13918 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13920 if (GET_MODE (tp) != tp_mode)
13922 gcc_assert (GET_MODE (tp) == SImode);
13923 gcc_assert (tp_mode == DImode);
13925 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13928 if (to_reg)
13929 tp = copy_to_mode_reg (tp_mode, tp);
13931 return tp;
13934 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13936 static GTY(()) rtx ix86_tls_symbol;
13938 static rtx
13939 ix86_tls_get_addr (void)
13941 if (!ix86_tls_symbol)
13943 const char *sym
13944 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13945 ? "___tls_get_addr" : "__tls_get_addr");
13947 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13950 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13952 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13953 UNSPEC_PLTOFF);
13954 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13955 gen_rtx_CONST (Pmode, unspec));
13958 return ix86_tls_symbol;
13961 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13963 static GTY(()) rtx ix86_tls_module_base_symbol;
13966 ix86_tls_module_base (void)
13968 if (!ix86_tls_module_base_symbol)
13970 ix86_tls_module_base_symbol
13971 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13973 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13974 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13977 return ix86_tls_module_base_symbol;
13980 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13981 false if we expect this to be used for a memory address and true if
13982 we expect to load the address into a register. */
13984 static rtx
13985 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13987 rtx dest, base, off;
13988 rtx pic = NULL_RTX, tp = NULL_RTX;
13989 machine_mode tp_mode = Pmode;
13990 int type;
13992 /* Fall back to global dynamic model if tool chain cannot support local
13993 dynamic. */
13994 if (TARGET_SUN_TLS && !TARGET_64BIT
13995 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13996 && model == TLS_MODEL_LOCAL_DYNAMIC)
13997 model = TLS_MODEL_GLOBAL_DYNAMIC;
13999 switch (model)
14001 case TLS_MODEL_GLOBAL_DYNAMIC:
14002 dest = gen_reg_rtx (Pmode);
14004 if (!TARGET_64BIT)
14006 if (flag_pic && !TARGET_PECOFF)
14007 pic = pic_offset_table_rtx;
14008 else
14010 pic = gen_reg_rtx (Pmode);
14011 emit_insn (gen_set_got (pic));
14015 if (TARGET_GNU2_TLS)
14017 if (TARGET_64BIT)
14018 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
14019 else
14020 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
14022 tp = get_thread_pointer (Pmode, true);
14023 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
14025 if (GET_MODE (x) != Pmode)
14026 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14028 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
14030 else
14032 rtx caddr = ix86_tls_get_addr ();
14034 if (TARGET_64BIT)
14036 rtx rax = gen_rtx_REG (Pmode, AX_REG);
14037 rtx_insn *insns;
14039 start_sequence ();
14040 emit_call_insn
14041 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
14042 insns = get_insns ();
14043 end_sequence ();
14045 if (GET_MODE (x) != Pmode)
14046 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14048 RTL_CONST_CALL_P (insns) = 1;
14049 emit_libcall_block (insns, dest, rax, x);
14051 else
14052 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
14054 break;
14056 case TLS_MODEL_LOCAL_DYNAMIC:
14057 base = gen_reg_rtx (Pmode);
14059 if (!TARGET_64BIT)
14061 if (flag_pic)
14062 pic = pic_offset_table_rtx;
14063 else
14065 pic = gen_reg_rtx (Pmode);
14066 emit_insn (gen_set_got (pic));
14070 if (TARGET_GNU2_TLS)
14072 rtx tmp = ix86_tls_module_base ();
14074 if (TARGET_64BIT)
14075 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
14076 else
14077 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
14079 tp = get_thread_pointer (Pmode, true);
14080 set_unique_reg_note (get_last_insn (), REG_EQUAL,
14081 gen_rtx_MINUS (Pmode, tmp, tp));
14083 else
14085 rtx caddr = ix86_tls_get_addr ();
14087 if (TARGET_64BIT)
14089 rtx rax = gen_rtx_REG (Pmode, AX_REG);
14090 rtx_insn *insns;
14091 rtx eqv;
14093 start_sequence ();
14094 emit_call_insn
14095 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
14096 insns = get_insns ();
14097 end_sequence ();
14099 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
14100 share the LD_BASE result with other LD model accesses. */
14101 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
14102 UNSPEC_TLS_LD_BASE);
14104 RTL_CONST_CALL_P (insns) = 1;
14105 emit_libcall_block (insns, base, rax, eqv);
14107 else
14108 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
14111 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
14112 off = gen_rtx_CONST (Pmode, off);
14114 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
14116 if (TARGET_GNU2_TLS)
14118 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
14120 if (GET_MODE (x) != Pmode)
14121 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14123 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
14125 break;
14127 case TLS_MODEL_INITIAL_EXEC:
14128 if (TARGET_64BIT)
14130 if (TARGET_SUN_TLS && !TARGET_X32)
14132 /* The Sun linker took the AMD64 TLS spec literally
14133 and can only handle %rax as destination of the
14134 initial executable code sequence. */
14136 dest = gen_reg_rtx (DImode);
14137 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
14138 return dest;
14141 /* Generate DImode references to avoid %fs:(%reg32)
14142 problems and linker IE->LE relaxation bug. */
14143 tp_mode = DImode;
14144 pic = NULL;
14145 type = UNSPEC_GOTNTPOFF;
14147 else if (flag_pic)
14149 set_pic_reg_ever_live ();
14150 pic = pic_offset_table_rtx;
14151 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
14153 else if (!TARGET_ANY_GNU_TLS)
14155 pic = gen_reg_rtx (Pmode);
14156 emit_insn (gen_set_got (pic));
14157 type = UNSPEC_GOTTPOFF;
14159 else
14161 pic = NULL;
14162 type = UNSPEC_INDNTPOFF;
14165 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
14166 off = gen_rtx_CONST (tp_mode, off);
14167 if (pic)
14168 off = gen_rtx_PLUS (tp_mode, pic, off);
14169 off = gen_const_mem (tp_mode, off);
14170 set_mem_alias_set (off, ix86_GOT_alias_set ());
14172 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14174 base = get_thread_pointer (tp_mode,
14175 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14176 off = force_reg (tp_mode, off);
14177 return gen_rtx_PLUS (tp_mode, base, off);
14179 else
14181 base = get_thread_pointer (Pmode, true);
14182 dest = gen_reg_rtx (Pmode);
14183 emit_insn (ix86_gen_sub3 (dest, base, off));
14185 break;
14187 case TLS_MODEL_LOCAL_EXEC:
14188 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
14189 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14190 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
14191 off = gen_rtx_CONST (Pmode, off);
14193 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14195 base = get_thread_pointer (Pmode,
14196 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14197 return gen_rtx_PLUS (Pmode, base, off);
14199 else
14201 base = get_thread_pointer (Pmode, true);
14202 dest = gen_reg_rtx (Pmode);
14203 emit_insn (ix86_gen_sub3 (dest, base, off));
14205 break;
14207 default:
14208 gcc_unreachable ();
14211 return dest;
14214 /* Create or return the unique __imp_DECL dllimport symbol corresponding
14215 to symbol DECL if BEIMPORT is true. Otherwise create or return the
14216 unique refptr-DECL symbol corresponding to symbol DECL. */
14218 struct dllimport_hasher : ggc_cache_hasher<tree_map *>
14220 static inline hashval_t hash (tree_map *m) { return m->hash; }
14221 static inline bool
14222 equal (tree_map *a, tree_map *b)
14224 return a->base.from == b->base.from;
14227 static void
14228 handle_cache_entry (tree_map *&m)
14230 extern void gt_ggc_mx (tree_map *&);
14231 if (m == HTAB_EMPTY_ENTRY || m == HTAB_DELETED_ENTRY)
14232 return;
14233 else if (ggc_marked_p (m->base.from))
14234 gt_ggc_mx (m);
14235 else
14236 m = static_cast<tree_map *> (HTAB_DELETED_ENTRY);
14240 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
14242 static tree
14243 get_dllimport_decl (tree decl, bool beimport)
14245 struct tree_map *h, in;
14246 const char *name;
14247 const char *prefix;
14248 size_t namelen, prefixlen;
14249 char *imp_name;
14250 tree to;
14251 rtx rtl;
14253 if (!dllimport_map)
14254 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
14256 in.hash = htab_hash_pointer (decl);
14257 in.base.from = decl;
14258 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
14259 h = *loc;
14260 if (h)
14261 return h->to;
14263 *loc = h = ggc_alloc<tree_map> ();
14264 h->hash = in.hash;
14265 h->base.from = decl;
14266 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
14267 VAR_DECL, NULL, ptr_type_node);
14268 DECL_ARTIFICIAL (to) = 1;
14269 DECL_IGNORED_P (to) = 1;
14270 DECL_EXTERNAL (to) = 1;
14271 TREE_READONLY (to) = 1;
14273 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
14274 name = targetm.strip_name_encoding (name);
14275 if (beimport)
14276 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
14277 ? "*__imp_" : "*__imp__";
14278 else
14279 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
14280 namelen = strlen (name);
14281 prefixlen = strlen (prefix);
14282 imp_name = (char *) alloca (namelen + prefixlen + 1);
14283 memcpy (imp_name, prefix, prefixlen);
14284 memcpy (imp_name + prefixlen, name, namelen + 1);
14286 name = ggc_alloc_string (imp_name, namelen + prefixlen);
14287 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
14288 SET_SYMBOL_REF_DECL (rtl, to);
14289 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
14290 if (!beimport)
14292 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
14293 #ifdef SUB_TARGET_RECORD_STUB
14294 SUB_TARGET_RECORD_STUB (name);
14295 #endif
14298 rtl = gen_const_mem (Pmode, rtl);
14299 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
14301 SET_DECL_RTL (to, rtl);
14302 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
14304 return to;
14307 /* Expand SYMBOL into its corresponding far-addresse symbol.
14308 WANT_REG is true if we require the result be a register. */
14310 static rtx
14311 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
14313 tree imp_decl;
14314 rtx x;
14316 gcc_assert (SYMBOL_REF_DECL (symbol));
14317 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
14319 x = DECL_RTL (imp_decl);
14320 if (want_reg)
14321 x = force_reg (Pmode, x);
14322 return x;
14325 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
14326 true if we require the result be a register. */
14328 static rtx
14329 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
14331 tree imp_decl;
14332 rtx x;
14334 gcc_assert (SYMBOL_REF_DECL (symbol));
14335 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
14337 x = DECL_RTL (imp_decl);
14338 if (want_reg)
14339 x = force_reg (Pmode, x);
14340 return x;
14343 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
14344 is true if we require the result be a register. */
14346 static rtx
14347 legitimize_pe_coff_symbol (rtx addr, bool inreg)
14349 if (!TARGET_PECOFF)
14350 return NULL_RTX;
14352 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14354 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
14355 return legitimize_dllimport_symbol (addr, inreg);
14356 if (GET_CODE (addr) == CONST
14357 && GET_CODE (XEXP (addr, 0)) == PLUS
14358 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14359 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
14361 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
14362 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14366 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
14367 return NULL_RTX;
14368 if (GET_CODE (addr) == SYMBOL_REF
14369 && !is_imported_p (addr)
14370 && SYMBOL_REF_EXTERNAL_P (addr)
14371 && SYMBOL_REF_DECL (addr))
14372 return legitimize_pe_coff_extern_decl (addr, inreg);
14374 if (GET_CODE (addr) == CONST
14375 && GET_CODE (XEXP (addr, 0)) == PLUS
14376 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14377 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
14378 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
14379 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
14381 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
14382 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14384 return NULL_RTX;
14387 /* Try machine-dependent ways of modifying an illegitimate address
14388 to be legitimate. If we find one, return the new, valid address.
14389 This macro is used in only one place: `memory_address' in explow.c.
14391 OLDX is the address as it was before break_out_memory_refs was called.
14392 In some cases it is useful to look at this to decide what needs to be done.
14394 It is always safe for this macro to do nothing. It exists to recognize
14395 opportunities to optimize the output.
14397 For the 80386, we handle X+REG by loading X into a register R and
14398 using R+REG. R will go in a general reg and indexing will be used.
14399 However, if REG is a broken-out memory address or multiplication,
14400 nothing needs to be done because REG can certainly go in a general reg.
14402 When -fpic is used, special handling is needed for symbolic references.
14403 See comments by legitimize_pic_address in i386.c for details. */
14405 static rtx
14406 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
14408 bool changed = false;
14409 unsigned log;
14411 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
14412 if (log)
14413 return legitimize_tls_address (x, (enum tls_model) log, false);
14414 if (GET_CODE (x) == CONST
14415 && GET_CODE (XEXP (x, 0)) == PLUS
14416 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
14417 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
14419 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
14420 (enum tls_model) log, false);
14421 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
14424 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14426 rtx tmp = legitimize_pe_coff_symbol (x, true);
14427 if (tmp)
14428 return tmp;
14431 if (flag_pic && SYMBOLIC_CONST (x))
14432 return legitimize_pic_address (x, 0);
14434 #if TARGET_MACHO
14435 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
14436 return machopic_indirect_data_reference (x, 0);
14437 #endif
14439 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
14440 if (GET_CODE (x) == ASHIFT
14441 && CONST_INT_P (XEXP (x, 1))
14442 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
14444 changed = true;
14445 log = INTVAL (XEXP (x, 1));
14446 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
14447 GEN_INT (1 << log));
14450 if (GET_CODE (x) == PLUS)
14452 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
14454 if (GET_CODE (XEXP (x, 0)) == ASHIFT
14455 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14456 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
14458 changed = true;
14459 log = INTVAL (XEXP (XEXP (x, 0), 1));
14460 XEXP (x, 0) = gen_rtx_MULT (Pmode,
14461 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
14462 GEN_INT (1 << log));
14465 if (GET_CODE (XEXP (x, 1)) == ASHIFT
14466 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
14467 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
14469 changed = true;
14470 log = INTVAL (XEXP (XEXP (x, 1), 1));
14471 XEXP (x, 1) = gen_rtx_MULT (Pmode,
14472 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
14473 GEN_INT (1 << log));
14476 /* Put multiply first if it isn't already. */
14477 if (GET_CODE (XEXP (x, 1)) == MULT)
14479 std::swap (XEXP (x, 0), XEXP (x, 1));
14480 changed = true;
14483 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
14484 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
14485 created by virtual register instantiation, register elimination, and
14486 similar optimizations. */
14487 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
14489 changed = true;
14490 x = gen_rtx_PLUS (Pmode,
14491 gen_rtx_PLUS (Pmode, XEXP (x, 0),
14492 XEXP (XEXP (x, 1), 0)),
14493 XEXP (XEXP (x, 1), 1));
14496 /* Canonicalize
14497 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
14498 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
14499 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
14500 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14501 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
14502 && CONSTANT_P (XEXP (x, 1)))
14504 rtx constant;
14505 rtx other = NULL_RTX;
14507 if (CONST_INT_P (XEXP (x, 1)))
14509 constant = XEXP (x, 1);
14510 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
14512 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
14514 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
14515 other = XEXP (x, 1);
14517 else
14518 constant = 0;
14520 if (constant)
14522 changed = true;
14523 x = gen_rtx_PLUS (Pmode,
14524 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
14525 XEXP (XEXP (XEXP (x, 0), 1), 0)),
14526 plus_constant (Pmode, other,
14527 INTVAL (constant)));
14531 if (changed && ix86_legitimate_address_p (mode, x, false))
14532 return x;
14534 if (GET_CODE (XEXP (x, 0)) == MULT)
14536 changed = true;
14537 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
14540 if (GET_CODE (XEXP (x, 1)) == MULT)
14542 changed = true;
14543 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
14546 if (changed
14547 && REG_P (XEXP (x, 1))
14548 && REG_P (XEXP (x, 0)))
14549 return x;
14551 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
14553 changed = true;
14554 x = legitimize_pic_address (x, 0);
14557 if (changed && ix86_legitimate_address_p (mode, x, false))
14558 return x;
14560 if (REG_P (XEXP (x, 0)))
14562 rtx temp = gen_reg_rtx (Pmode);
14563 rtx val = force_operand (XEXP (x, 1), temp);
14564 if (val != temp)
14566 val = convert_to_mode (Pmode, val, 1);
14567 emit_move_insn (temp, val);
14570 XEXP (x, 1) = temp;
14571 return x;
14574 else if (REG_P (XEXP (x, 1)))
14576 rtx temp = gen_reg_rtx (Pmode);
14577 rtx val = force_operand (XEXP (x, 0), temp);
14578 if (val != temp)
14580 val = convert_to_mode (Pmode, val, 1);
14581 emit_move_insn (temp, val);
14584 XEXP (x, 0) = temp;
14585 return x;
14589 return x;
14592 /* Print an integer constant expression in assembler syntax. Addition
14593 and subtraction are the only arithmetic that may appear in these
14594 expressions. FILE is the stdio stream to write to, X is the rtx, and
14595 CODE is the operand print code from the output string. */
14597 static void
14598 output_pic_addr_const (FILE *file, rtx x, int code)
14600 char buf[256];
14602 switch (GET_CODE (x))
14604 case PC:
14605 gcc_assert (flag_pic);
14606 putc ('.', file);
14607 break;
14609 case SYMBOL_REF:
14610 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14611 output_addr_const (file, x);
14612 else
14614 const char *name = XSTR (x, 0);
14616 /* Mark the decl as referenced so that cgraph will
14617 output the function. */
14618 if (SYMBOL_REF_DECL (x))
14619 mark_decl_referenced (SYMBOL_REF_DECL (x));
14621 #if TARGET_MACHO
14622 if (MACHOPIC_INDIRECT
14623 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14624 name = machopic_indirection_name (x, /*stub_p=*/true);
14625 #endif
14626 assemble_name (file, name);
14628 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14629 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14630 fputs ("@PLT", file);
14631 break;
14633 case LABEL_REF:
14634 x = XEXP (x, 0);
14635 /* FALLTHRU */
14636 case CODE_LABEL:
14637 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14638 assemble_name (asm_out_file, buf);
14639 break;
14641 case CONST_INT:
14642 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14643 break;
14645 case CONST:
14646 /* This used to output parentheses around the expression,
14647 but that does not work on the 386 (either ATT or BSD assembler). */
14648 output_pic_addr_const (file, XEXP (x, 0), code);
14649 break;
14651 case CONST_DOUBLE:
14652 if (GET_MODE (x) == VOIDmode)
14654 /* We can use %d if the number is <32 bits and positive. */
14655 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14656 fprintf (file, "0x%lx%08lx",
14657 (unsigned long) CONST_DOUBLE_HIGH (x),
14658 (unsigned long) CONST_DOUBLE_LOW (x));
14659 else
14660 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14662 else
14663 /* We can't handle floating point constants;
14664 TARGET_PRINT_OPERAND must handle them. */
14665 output_operand_lossage ("floating constant misused");
14666 break;
14668 case PLUS:
14669 /* Some assemblers need integer constants to appear first. */
14670 if (CONST_INT_P (XEXP (x, 0)))
14672 output_pic_addr_const (file, XEXP (x, 0), code);
14673 putc ('+', file);
14674 output_pic_addr_const (file, XEXP (x, 1), code);
14676 else
14678 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14679 output_pic_addr_const (file, XEXP (x, 1), code);
14680 putc ('+', file);
14681 output_pic_addr_const (file, XEXP (x, 0), code);
14683 break;
14685 case MINUS:
14686 if (!TARGET_MACHO)
14687 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14688 output_pic_addr_const (file, XEXP (x, 0), code);
14689 putc ('-', file);
14690 output_pic_addr_const (file, XEXP (x, 1), code);
14691 if (!TARGET_MACHO)
14692 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14693 break;
14695 case UNSPEC:
14696 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14698 bool f = i386_asm_output_addr_const_extra (file, x);
14699 gcc_assert (f);
14700 break;
14703 gcc_assert (XVECLEN (x, 0) == 1);
14704 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14705 switch (XINT (x, 1))
14707 case UNSPEC_GOT:
14708 fputs ("@GOT", file);
14709 break;
14710 case UNSPEC_GOTOFF:
14711 fputs ("@GOTOFF", file);
14712 break;
14713 case UNSPEC_PLTOFF:
14714 fputs ("@PLTOFF", file);
14715 break;
14716 case UNSPEC_PCREL:
14717 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14718 "(%rip)" : "[rip]", file);
14719 break;
14720 case UNSPEC_GOTPCREL:
14721 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14722 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14723 break;
14724 case UNSPEC_GOTTPOFF:
14725 /* FIXME: This might be @TPOFF in Sun ld too. */
14726 fputs ("@gottpoff", file);
14727 break;
14728 case UNSPEC_TPOFF:
14729 fputs ("@tpoff", file);
14730 break;
14731 case UNSPEC_NTPOFF:
14732 if (TARGET_64BIT)
14733 fputs ("@tpoff", file);
14734 else
14735 fputs ("@ntpoff", file);
14736 break;
14737 case UNSPEC_DTPOFF:
14738 fputs ("@dtpoff", file);
14739 break;
14740 case UNSPEC_GOTNTPOFF:
14741 if (TARGET_64BIT)
14742 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14743 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14744 else
14745 fputs ("@gotntpoff", file);
14746 break;
14747 case UNSPEC_INDNTPOFF:
14748 fputs ("@indntpoff", file);
14749 break;
14750 #if TARGET_MACHO
14751 case UNSPEC_MACHOPIC_OFFSET:
14752 putc ('-', file);
14753 machopic_output_function_base_name (file);
14754 break;
14755 #endif
14756 default:
14757 output_operand_lossage ("invalid UNSPEC as operand");
14758 break;
14760 break;
14762 default:
14763 output_operand_lossage ("invalid expression as operand");
14767 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14768 We need to emit DTP-relative relocations. */
14770 static void ATTRIBUTE_UNUSED
14771 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14773 fputs (ASM_LONG, file);
14774 output_addr_const (file, x);
14775 fputs ("@dtpoff", file);
14776 switch (size)
14778 case 4:
14779 break;
14780 case 8:
14781 fputs (", 0", file);
14782 break;
14783 default:
14784 gcc_unreachable ();
14788 /* Return true if X is a representation of the PIC register. This copes
14789 with calls from ix86_find_base_term, where the register might have
14790 been replaced by a cselib value. */
14792 static bool
14793 ix86_pic_register_p (rtx x)
14795 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14796 return (pic_offset_table_rtx
14797 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14798 else if (!REG_P (x))
14799 return false;
14800 else if (pic_offset_table_rtx)
14802 if (REGNO (x) == REGNO (pic_offset_table_rtx))
14803 return true;
14804 if (HARD_REGISTER_P (x)
14805 && !HARD_REGISTER_P (pic_offset_table_rtx)
14806 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
14807 return true;
14808 return false;
14810 else
14811 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14814 /* Helper function for ix86_delegitimize_address.
14815 Attempt to delegitimize TLS local-exec accesses. */
14817 static rtx
14818 ix86_delegitimize_tls_address (rtx orig_x)
14820 rtx x = orig_x, unspec;
14821 struct ix86_address addr;
14823 if (!TARGET_TLS_DIRECT_SEG_REFS)
14824 return orig_x;
14825 if (MEM_P (x))
14826 x = XEXP (x, 0);
14827 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14828 return orig_x;
14829 if (ix86_decompose_address (x, &addr) == 0
14830 || addr.seg != DEFAULT_TLS_SEG_REG
14831 || addr.disp == NULL_RTX
14832 || GET_CODE (addr.disp) != CONST)
14833 return orig_x;
14834 unspec = XEXP (addr.disp, 0);
14835 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14836 unspec = XEXP (unspec, 0);
14837 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14838 return orig_x;
14839 x = XVECEXP (unspec, 0, 0);
14840 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14841 if (unspec != XEXP (addr.disp, 0))
14842 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14843 if (addr.index)
14845 rtx idx = addr.index;
14846 if (addr.scale != 1)
14847 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14848 x = gen_rtx_PLUS (Pmode, idx, x);
14850 if (addr.base)
14851 x = gen_rtx_PLUS (Pmode, addr.base, x);
14852 if (MEM_P (orig_x))
14853 x = replace_equiv_address_nv (orig_x, x);
14854 return x;
14857 /* In the name of slightly smaller debug output, and to cater to
14858 general assembler lossage, recognize PIC+GOTOFF and turn it back
14859 into a direct symbol reference.
14861 On Darwin, this is necessary to avoid a crash, because Darwin
14862 has a different PIC label for each routine but the DWARF debugging
14863 information is not associated with any particular routine, so it's
14864 necessary to remove references to the PIC label from RTL stored by
14865 the DWARF output code. */
14867 static rtx
14868 ix86_delegitimize_address (rtx x)
14870 rtx orig_x = delegitimize_mem_from_attrs (x);
14871 /* addend is NULL or some rtx if x is something+GOTOFF where
14872 something doesn't include the PIC register. */
14873 rtx addend = NULL_RTX;
14874 /* reg_addend is NULL or a multiple of some register. */
14875 rtx reg_addend = NULL_RTX;
14876 /* const_addend is NULL or a const_int. */
14877 rtx const_addend = NULL_RTX;
14878 /* This is the result, or NULL. */
14879 rtx result = NULL_RTX;
14881 x = orig_x;
14883 if (MEM_P (x))
14884 x = XEXP (x, 0);
14886 if (TARGET_64BIT)
14888 if (GET_CODE (x) == CONST
14889 && GET_CODE (XEXP (x, 0)) == PLUS
14890 && GET_MODE (XEXP (x, 0)) == Pmode
14891 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14892 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14893 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14895 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14896 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14897 if (MEM_P (orig_x))
14898 x = replace_equiv_address_nv (orig_x, x);
14899 return x;
14902 if (GET_CODE (x) == CONST
14903 && GET_CODE (XEXP (x, 0)) == UNSPEC
14904 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14905 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14906 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14908 x = XVECEXP (XEXP (x, 0), 0, 0);
14909 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14911 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14912 GET_MODE (x), 0);
14913 if (x == NULL_RTX)
14914 return orig_x;
14916 return x;
14919 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14920 return ix86_delegitimize_tls_address (orig_x);
14922 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14923 and -mcmodel=medium -fpic. */
14926 if (GET_CODE (x) != PLUS
14927 || GET_CODE (XEXP (x, 1)) != CONST)
14928 return ix86_delegitimize_tls_address (orig_x);
14930 if (ix86_pic_register_p (XEXP (x, 0)))
14931 /* %ebx + GOT/GOTOFF */
14933 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14935 /* %ebx + %reg * scale + GOT/GOTOFF */
14936 reg_addend = XEXP (x, 0);
14937 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14938 reg_addend = XEXP (reg_addend, 1);
14939 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14940 reg_addend = XEXP (reg_addend, 0);
14941 else
14943 reg_addend = NULL_RTX;
14944 addend = XEXP (x, 0);
14947 else
14948 addend = XEXP (x, 0);
14950 x = XEXP (XEXP (x, 1), 0);
14951 if (GET_CODE (x) == PLUS
14952 && CONST_INT_P (XEXP (x, 1)))
14954 const_addend = XEXP (x, 1);
14955 x = XEXP (x, 0);
14958 if (GET_CODE (x) == UNSPEC
14959 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14960 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14961 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14962 && !MEM_P (orig_x) && !addend)))
14963 result = XVECEXP (x, 0, 0);
14965 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14966 && !MEM_P (orig_x))
14967 result = XVECEXP (x, 0, 0);
14969 if (! result)
14970 return ix86_delegitimize_tls_address (orig_x);
14972 if (const_addend)
14973 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14974 if (reg_addend)
14975 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14976 if (addend)
14978 /* If the rest of original X doesn't involve the PIC register, add
14979 addend and subtract pic_offset_table_rtx. This can happen e.g.
14980 for code like:
14981 leal (%ebx, %ecx, 4), %ecx
14983 movl foo@GOTOFF(%ecx), %edx
14984 in which case we return (%ecx - %ebx) + foo
14985 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
14986 and reload has completed. */
14987 if (pic_offset_table_rtx
14988 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
14989 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14990 pic_offset_table_rtx),
14991 result);
14992 else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
14994 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
14995 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
14996 result = gen_rtx_PLUS (Pmode, tmp, result);
14998 else
14999 return orig_x;
15001 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
15003 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
15004 if (result == NULL_RTX)
15005 return orig_x;
15007 return result;
15010 /* If X is a machine specific address (i.e. a symbol or label being
15011 referenced as a displacement from the GOT implemented using an
15012 UNSPEC), then return the base term. Otherwise return X. */
15015 ix86_find_base_term (rtx x)
15017 rtx term;
15019 if (TARGET_64BIT)
15021 if (GET_CODE (x) != CONST)
15022 return x;
15023 term = XEXP (x, 0);
15024 if (GET_CODE (term) == PLUS
15025 && (CONST_INT_P (XEXP (term, 1))
15026 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
15027 term = XEXP (term, 0);
15028 if (GET_CODE (term) != UNSPEC
15029 || (XINT (term, 1) != UNSPEC_GOTPCREL
15030 && XINT (term, 1) != UNSPEC_PCREL))
15031 return x;
15033 return XVECEXP (term, 0, 0);
15036 return ix86_delegitimize_address (x);
15039 static void
15040 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
15041 bool fp, FILE *file)
15043 const char *suffix;
15045 if (mode == CCFPmode || mode == CCFPUmode)
15047 code = ix86_fp_compare_code_to_integer (code);
15048 mode = CCmode;
15050 if (reverse)
15051 code = reverse_condition (code);
15053 switch (code)
15055 case EQ:
15056 switch (mode)
15058 case CCAmode:
15059 suffix = "a";
15060 break;
15062 case CCCmode:
15063 suffix = "c";
15064 break;
15066 case CCOmode:
15067 suffix = "o";
15068 break;
15070 case CCSmode:
15071 suffix = "s";
15072 break;
15074 default:
15075 suffix = "e";
15077 break;
15078 case NE:
15079 switch (mode)
15081 case CCAmode:
15082 suffix = "na";
15083 break;
15085 case CCCmode:
15086 suffix = "nc";
15087 break;
15089 case CCOmode:
15090 suffix = "no";
15091 break;
15093 case CCSmode:
15094 suffix = "ns";
15095 break;
15097 default:
15098 suffix = "ne";
15100 break;
15101 case GT:
15102 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
15103 suffix = "g";
15104 break;
15105 case GTU:
15106 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
15107 Those same assemblers have the same but opposite lossage on cmov. */
15108 if (mode == CCmode)
15109 suffix = fp ? "nbe" : "a";
15110 else
15111 gcc_unreachable ();
15112 break;
15113 case LT:
15114 switch (mode)
15116 case CCNOmode:
15117 case CCGOCmode:
15118 suffix = "s";
15119 break;
15121 case CCmode:
15122 case CCGCmode:
15123 suffix = "l";
15124 break;
15126 default:
15127 gcc_unreachable ();
15129 break;
15130 case LTU:
15131 if (mode == CCmode)
15132 suffix = "b";
15133 else if (mode == CCCmode)
15134 suffix = fp ? "b" : "c";
15135 else
15136 gcc_unreachable ();
15137 break;
15138 case GE:
15139 switch (mode)
15141 case CCNOmode:
15142 case CCGOCmode:
15143 suffix = "ns";
15144 break;
15146 case CCmode:
15147 case CCGCmode:
15148 suffix = "ge";
15149 break;
15151 default:
15152 gcc_unreachable ();
15154 break;
15155 case GEU:
15156 if (mode == CCmode)
15157 suffix = "nb";
15158 else if (mode == CCCmode)
15159 suffix = fp ? "nb" : "nc";
15160 else
15161 gcc_unreachable ();
15162 break;
15163 case LE:
15164 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
15165 suffix = "le";
15166 break;
15167 case LEU:
15168 if (mode == CCmode)
15169 suffix = "be";
15170 else
15171 gcc_unreachable ();
15172 break;
15173 case UNORDERED:
15174 suffix = fp ? "u" : "p";
15175 break;
15176 case ORDERED:
15177 suffix = fp ? "nu" : "np";
15178 break;
15179 default:
15180 gcc_unreachable ();
15182 fputs (suffix, file);
15185 /* Print the name of register X to FILE based on its machine mode and number.
15186 If CODE is 'w', pretend the mode is HImode.
15187 If CODE is 'b', pretend the mode is QImode.
15188 If CODE is 'k', pretend the mode is SImode.
15189 If CODE is 'q', pretend the mode is DImode.
15190 If CODE is 'x', pretend the mode is V4SFmode.
15191 If CODE is 't', pretend the mode is V8SFmode.
15192 If CODE is 'g', pretend the mode is V16SFmode.
15193 If CODE is 'h', pretend the reg is the 'high' byte register.
15194 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
15195 If CODE is 'd', duplicate the operand for AVX instruction.
15198 void
15199 print_reg (rtx x, int code, FILE *file)
15201 const char *reg;
15202 unsigned int regno;
15203 bool duplicated = code == 'd' && TARGET_AVX;
15205 if (ASSEMBLER_DIALECT == ASM_ATT)
15206 putc ('%', file);
15208 if (x == pc_rtx)
15210 gcc_assert (TARGET_64BIT);
15211 fputs ("rip", file);
15212 return;
15215 regno = true_regnum (x);
15216 gcc_assert (regno != ARG_POINTER_REGNUM
15217 && regno != FRAME_POINTER_REGNUM
15218 && regno != FLAGS_REG
15219 && regno != FPSR_REG
15220 && regno != FPCR_REG);
15222 if (code == 'w' || MMX_REG_P (x))
15223 code = 2;
15224 else if (code == 'b')
15225 code = 1;
15226 else if (code == 'k')
15227 code = 4;
15228 else if (code == 'q')
15229 code = 8;
15230 else if (code == 'y')
15231 code = 3;
15232 else if (code == 'h')
15233 code = 0;
15234 else if (code == 'x')
15235 code = 16;
15236 else if (code == 't')
15237 code = 32;
15238 else if (code == 'g')
15239 code = 64;
15240 else
15241 code = GET_MODE_SIZE (GET_MODE (x));
15243 /* Irritatingly, AMD extended registers use different naming convention
15244 from the normal registers: "r%d[bwd]" */
15245 if (REX_INT_REGNO_P (regno))
15247 gcc_assert (TARGET_64BIT);
15248 putc ('r', file);
15249 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
15250 switch (code)
15252 case 0:
15253 error ("extended registers have no high halves");
15254 break;
15255 case 1:
15256 putc ('b', file);
15257 break;
15258 case 2:
15259 putc ('w', file);
15260 break;
15261 case 4:
15262 putc ('d', file);
15263 break;
15264 case 8:
15265 /* no suffix */
15266 break;
15267 default:
15268 error ("unsupported operand size for extended register");
15269 break;
15271 return;
15274 reg = NULL;
15275 switch (code)
15277 case 3:
15278 if (STACK_TOP_P (x))
15280 reg = "st(0)";
15281 break;
15283 /* FALLTHRU */
15284 case 8:
15285 case 4:
15286 case 12:
15287 if (! ANY_FP_REG_P (x) && ! ANY_MASK_REG_P (x) && ! ANY_BND_REG_P (x))
15288 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
15289 /* FALLTHRU */
15290 case 16:
15291 case 2:
15292 normal:
15293 reg = hi_reg_name[regno];
15294 break;
15295 case 1:
15296 if (regno >= ARRAY_SIZE (qi_reg_name))
15297 goto normal;
15298 reg = qi_reg_name[regno];
15299 break;
15300 case 0:
15301 if (regno >= ARRAY_SIZE (qi_high_reg_name))
15302 goto normal;
15303 reg = qi_high_reg_name[regno];
15304 break;
15305 case 32:
15306 if (SSE_REG_P (x))
15308 gcc_assert (!duplicated);
15309 putc ('y', file);
15310 fputs (hi_reg_name[regno] + 1, file);
15311 return;
15313 case 64:
15314 if (SSE_REG_P (x))
15316 gcc_assert (!duplicated);
15317 putc ('z', file);
15318 fputs (hi_reg_name[REGNO (x)] + 1, file);
15319 return;
15321 break;
15322 default:
15323 gcc_unreachable ();
15326 fputs (reg, file);
15327 if (duplicated)
15329 if (ASSEMBLER_DIALECT == ASM_ATT)
15330 fprintf (file, ", %%%s", reg);
15331 else
15332 fprintf (file, ", %s", reg);
15336 /* Meaning of CODE:
15337 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
15338 C -- print opcode suffix for set/cmov insn.
15339 c -- like C, but print reversed condition
15340 F,f -- likewise, but for floating-point.
15341 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
15342 otherwise nothing
15343 R -- print embeded rounding and sae.
15344 r -- print only sae.
15345 z -- print the opcode suffix for the size of the current operand.
15346 Z -- likewise, with special suffixes for x87 instructions.
15347 * -- print a star (in certain assembler syntax)
15348 A -- print an absolute memory reference.
15349 E -- print address with DImode register names if TARGET_64BIT.
15350 w -- print the operand as if it's a "word" (HImode) even if it isn't.
15351 s -- print a shift double count, followed by the assemblers argument
15352 delimiter.
15353 b -- print the QImode name of the register for the indicated operand.
15354 %b0 would print %al if operands[0] is reg 0.
15355 w -- likewise, print the HImode name of the register.
15356 k -- likewise, print the SImode name of the register.
15357 q -- likewise, print the DImode name of the register.
15358 x -- likewise, print the V4SFmode name of the register.
15359 t -- likewise, print the V8SFmode name of the register.
15360 g -- likewise, print the V16SFmode name of the register.
15361 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
15362 y -- print "st(0)" instead of "st" as a register.
15363 d -- print duplicated register operand for AVX instruction.
15364 D -- print condition for SSE cmp instruction.
15365 P -- if PIC, print an @PLT suffix.
15366 p -- print raw symbol name.
15367 X -- don't print any sort of PIC '@' suffix for a symbol.
15368 & -- print some in-use local-dynamic symbol name.
15369 H -- print a memory address offset by 8; used for sse high-parts
15370 Y -- print condition for XOP pcom* instruction.
15371 + -- print a branch hint as 'cs' or 'ds' prefix
15372 ; -- print a semicolon (after prefixes due to bug in older gas).
15373 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
15374 @ -- print a segment register of thread base pointer load
15375 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
15376 ! -- print MPX prefix for jxx/call/ret instructions if required.
15379 void
15380 ix86_print_operand (FILE *file, rtx x, int code)
15382 if (code)
15384 switch (code)
15386 case 'A':
15387 switch (ASSEMBLER_DIALECT)
15389 case ASM_ATT:
15390 putc ('*', file);
15391 break;
15393 case ASM_INTEL:
15394 /* Intel syntax. For absolute addresses, registers should not
15395 be surrounded by braces. */
15396 if (!REG_P (x))
15398 putc ('[', file);
15399 ix86_print_operand (file, x, 0);
15400 putc (']', file);
15401 return;
15403 break;
15405 default:
15406 gcc_unreachable ();
15409 ix86_print_operand (file, x, 0);
15410 return;
15412 case 'E':
15413 /* Wrap address in an UNSPEC to declare special handling. */
15414 if (TARGET_64BIT)
15415 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
15417 output_address (x);
15418 return;
15420 case 'L':
15421 if (ASSEMBLER_DIALECT == ASM_ATT)
15422 putc ('l', file);
15423 return;
15425 case 'W':
15426 if (ASSEMBLER_DIALECT == ASM_ATT)
15427 putc ('w', file);
15428 return;
15430 case 'B':
15431 if (ASSEMBLER_DIALECT == ASM_ATT)
15432 putc ('b', file);
15433 return;
15435 case 'Q':
15436 if (ASSEMBLER_DIALECT == ASM_ATT)
15437 putc ('l', file);
15438 return;
15440 case 'S':
15441 if (ASSEMBLER_DIALECT == ASM_ATT)
15442 putc ('s', file);
15443 return;
15445 case 'T':
15446 if (ASSEMBLER_DIALECT == ASM_ATT)
15447 putc ('t', file);
15448 return;
15450 case 'O':
15451 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15452 if (ASSEMBLER_DIALECT != ASM_ATT)
15453 return;
15455 switch (GET_MODE_SIZE (GET_MODE (x)))
15457 case 2:
15458 putc ('w', file);
15459 break;
15461 case 4:
15462 putc ('l', file);
15463 break;
15465 case 8:
15466 putc ('q', file);
15467 break;
15469 default:
15470 output_operand_lossage
15471 ("invalid operand size for operand code 'O'");
15472 return;
15475 putc ('.', file);
15476 #endif
15477 return;
15479 case 'z':
15480 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15482 /* Opcodes don't get size suffixes if using Intel opcodes. */
15483 if (ASSEMBLER_DIALECT == ASM_INTEL)
15484 return;
15486 switch (GET_MODE_SIZE (GET_MODE (x)))
15488 case 1:
15489 putc ('b', file);
15490 return;
15492 case 2:
15493 putc ('w', file);
15494 return;
15496 case 4:
15497 putc ('l', file);
15498 return;
15500 case 8:
15501 putc ('q', file);
15502 return;
15504 default:
15505 output_operand_lossage
15506 ("invalid operand size for operand code 'z'");
15507 return;
15511 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15512 warning
15513 (0, "non-integer operand used with operand code 'z'");
15514 /* FALLTHRU */
15516 case 'Z':
15517 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
15518 if (ASSEMBLER_DIALECT == ASM_INTEL)
15519 return;
15521 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15523 switch (GET_MODE_SIZE (GET_MODE (x)))
15525 case 2:
15526 #ifdef HAVE_AS_IX86_FILDS
15527 putc ('s', file);
15528 #endif
15529 return;
15531 case 4:
15532 putc ('l', file);
15533 return;
15535 case 8:
15536 #ifdef HAVE_AS_IX86_FILDQ
15537 putc ('q', file);
15538 #else
15539 fputs ("ll", file);
15540 #endif
15541 return;
15543 default:
15544 break;
15547 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15549 /* 387 opcodes don't get size suffixes
15550 if the operands are registers. */
15551 if (STACK_REG_P (x))
15552 return;
15554 switch (GET_MODE_SIZE (GET_MODE (x)))
15556 case 4:
15557 putc ('s', file);
15558 return;
15560 case 8:
15561 putc ('l', file);
15562 return;
15564 case 12:
15565 case 16:
15566 putc ('t', file);
15567 return;
15569 default:
15570 break;
15573 else
15575 output_operand_lossage
15576 ("invalid operand type used with operand code 'Z'");
15577 return;
15580 output_operand_lossage
15581 ("invalid operand size for operand code 'Z'");
15582 return;
15584 case 'd':
15585 case 'b':
15586 case 'w':
15587 case 'k':
15588 case 'q':
15589 case 'h':
15590 case 't':
15591 case 'g':
15592 case 'y':
15593 case 'x':
15594 case 'X':
15595 case 'P':
15596 case 'p':
15597 break;
15599 case 's':
15600 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15602 ix86_print_operand (file, x, 0);
15603 fputs (", ", file);
15605 return;
15607 case 'Y':
15608 switch (GET_CODE (x))
15610 case NE:
15611 fputs ("neq", file);
15612 break;
15613 case EQ:
15614 fputs ("eq", file);
15615 break;
15616 case GE:
15617 case GEU:
15618 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15619 break;
15620 case GT:
15621 case GTU:
15622 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15623 break;
15624 case LE:
15625 case LEU:
15626 fputs ("le", file);
15627 break;
15628 case LT:
15629 case LTU:
15630 fputs ("lt", file);
15631 break;
15632 case UNORDERED:
15633 fputs ("unord", file);
15634 break;
15635 case ORDERED:
15636 fputs ("ord", file);
15637 break;
15638 case UNEQ:
15639 fputs ("ueq", file);
15640 break;
15641 case UNGE:
15642 fputs ("nlt", file);
15643 break;
15644 case UNGT:
15645 fputs ("nle", file);
15646 break;
15647 case UNLE:
15648 fputs ("ule", file);
15649 break;
15650 case UNLT:
15651 fputs ("ult", file);
15652 break;
15653 case LTGT:
15654 fputs ("une", file);
15655 break;
15656 default:
15657 output_operand_lossage ("operand is not a condition code, "
15658 "invalid operand code 'Y'");
15659 return;
15661 return;
15663 case 'D':
15664 /* Little bit of braindamage here. The SSE compare instructions
15665 does use completely different names for the comparisons that the
15666 fp conditional moves. */
15667 switch (GET_CODE (x))
15669 case UNEQ:
15670 if (TARGET_AVX)
15672 fputs ("eq_us", file);
15673 break;
15675 case EQ:
15676 fputs ("eq", file);
15677 break;
15678 case UNLT:
15679 if (TARGET_AVX)
15681 fputs ("nge", file);
15682 break;
15684 case LT:
15685 fputs ("lt", file);
15686 break;
15687 case UNLE:
15688 if (TARGET_AVX)
15690 fputs ("ngt", file);
15691 break;
15693 case LE:
15694 fputs ("le", file);
15695 break;
15696 case UNORDERED:
15697 fputs ("unord", file);
15698 break;
15699 case LTGT:
15700 if (TARGET_AVX)
15702 fputs ("neq_oq", file);
15703 break;
15705 case NE:
15706 fputs ("neq", file);
15707 break;
15708 case GE:
15709 if (TARGET_AVX)
15711 fputs ("ge", file);
15712 break;
15714 case UNGE:
15715 fputs ("nlt", file);
15716 break;
15717 case GT:
15718 if (TARGET_AVX)
15720 fputs ("gt", file);
15721 break;
15723 case UNGT:
15724 fputs ("nle", file);
15725 break;
15726 case ORDERED:
15727 fputs ("ord", file);
15728 break;
15729 default:
15730 output_operand_lossage ("operand is not a condition code, "
15731 "invalid operand code 'D'");
15732 return;
15734 return;
15736 case 'F':
15737 case 'f':
15738 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15739 if (ASSEMBLER_DIALECT == ASM_ATT)
15740 putc ('.', file);
15741 #endif
15743 case 'C':
15744 case 'c':
15745 if (!COMPARISON_P (x))
15747 output_operand_lossage ("operand is not a condition code, "
15748 "invalid operand code '%c'", code);
15749 return;
15751 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15752 code == 'c' || code == 'f',
15753 code == 'F' || code == 'f',
15754 file);
15755 return;
15757 case 'H':
15758 if (!offsettable_memref_p (x))
15760 output_operand_lossage ("operand is not an offsettable memory "
15761 "reference, invalid operand code 'H'");
15762 return;
15764 /* It doesn't actually matter what mode we use here, as we're
15765 only going to use this for printing. */
15766 x = adjust_address_nv (x, DImode, 8);
15767 /* Output 'qword ptr' for intel assembler dialect. */
15768 if (ASSEMBLER_DIALECT == ASM_INTEL)
15769 code = 'q';
15770 break;
15772 case 'K':
15773 gcc_assert (CONST_INT_P (x));
15775 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15776 #ifdef HAVE_AS_IX86_HLE
15777 fputs ("xacquire ", file);
15778 #else
15779 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15780 #endif
15781 else if (INTVAL (x) & IX86_HLE_RELEASE)
15782 #ifdef HAVE_AS_IX86_HLE
15783 fputs ("xrelease ", file);
15784 #else
15785 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15786 #endif
15787 /* We do not want to print value of the operand. */
15788 return;
15790 case 'N':
15791 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15792 fputs ("{z}", file);
15793 return;
15795 case 'r':
15796 gcc_assert (CONST_INT_P (x));
15797 gcc_assert (INTVAL (x) == ROUND_SAE);
15799 if (ASSEMBLER_DIALECT == ASM_INTEL)
15800 fputs (", ", file);
15802 fputs ("{sae}", file);
15804 if (ASSEMBLER_DIALECT == ASM_ATT)
15805 fputs (", ", file);
15807 return;
15809 case 'R':
15810 gcc_assert (CONST_INT_P (x));
15812 if (ASSEMBLER_DIALECT == ASM_INTEL)
15813 fputs (", ", file);
15815 switch (INTVAL (x))
15817 case ROUND_NEAREST_INT | ROUND_SAE:
15818 fputs ("{rn-sae}", file);
15819 break;
15820 case ROUND_NEG_INF | ROUND_SAE:
15821 fputs ("{rd-sae}", file);
15822 break;
15823 case ROUND_POS_INF | ROUND_SAE:
15824 fputs ("{ru-sae}", file);
15825 break;
15826 case ROUND_ZERO | ROUND_SAE:
15827 fputs ("{rz-sae}", file);
15828 break;
15829 default:
15830 gcc_unreachable ();
15833 if (ASSEMBLER_DIALECT == ASM_ATT)
15834 fputs (", ", file);
15836 return;
15838 case '*':
15839 if (ASSEMBLER_DIALECT == ASM_ATT)
15840 putc ('*', file);
15841 return;
15843 case '&':
15845 const char *name = get_some_local_dynamic_name ();
15846 if (name == NULL)
15847 output_operand_lossage ("'%%&' used without any "
15848 "local dynamic TLS references");
15849 else
15850 assemble_name (file, name);
15851 return;
15854 case '+':
15856 rtx x;
15858 if (!optimize
15859 || optimize_function_for_size_p (cfun)
15860 || !TARGET_BRANCH_PREDICTION_HINTS)
15861 return;
15863 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15864 if (x)
15866 int pred_val = XINT (x, 0);
15868 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15869 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15871 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15872 bool cputaken
15873 = final_forward_branch_p (current_output_insn) == 0;
15875 /* Emit hints only in the case default branch prediction
15876 heuristics would fail. */
15877 if (taken != cputaken)
15879 /* We use 3e (DS) prefix for taken branches and
15880 2e (CS) prefix for not taken branches. */
15881 if (taken)
15882 fputs ("ds ; ", file);
15883 else
15884 fputs ("cs ; ", file);
15888 return;
15891 case ';':
15892 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15893 putc (';', file);
15894 #endif
15895 return;
15897 case '@':
15898 if (ASSEMBLER_DIALECT == ASM_ATT)
15899 putc ('%', file);
15901 /* The kernel uses a different segment register for performance
15902 reasons; a system call would not have to trash the userspace
15903 segment register, which would be expensive. */
15904 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15905 fputs ("fs", file);
15906 else
15907 fputs ("gs", file);
15908 return;
15910 case '~':
15911 putc (TARGET_AVX2 ? 'i' : 'f', file);
15912 return;
15914 case '^':
15915 if (TARGET_64BIT && Pmode != word_mode)
15916 fputs ("addr32 ", file);
15917 return;
15919 case '!':
15920 if (ix86_bnd_prefixed_insn_p (current_output_insn))
15921 fputs ("bnd ", file);
15922 return;
15924 default:
15925 output_operand_lossage ("invalid operand code '%c'", code);
15929 if (REG_P (x))
15930 print_reg (x, code, file);
15932 else if (MEM_P (x))
15934 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15935 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15936 && GET_MODE (x) != BLKmode)
15938 const char * size;
15939 switch (GET_MODE_SIZE (GET_MODE (x)))
15941 case 1: size = "BYTE"; break;
15942 case 2: size = "WORD"; break;
15943 case 4: size = "DWORD"; break;
15944 case 8: size = "QWORD"; break;
15945 case 12: size = "TBYTE"; break;
15946 case 16:
15947 if (GET_MODE (x) == XFmode)
15948 size = "TBYTE";
15949 else
15950 size = "XMMWORD";
15951 break;
15952 case 32: size = "YMMWORD"; break;
15953 case 64: size = "ZMMWORD"; break;
15954 default:
15955 gcc_unreachable ();
15958 /* Check for explicit size override (codes 'b', 'w', 'k',
15959 'q' and 'x') */
15960 if (code == 'b')
15961 size = "BYTE";
15962 else if (code == 'w')
15963 size = "WORD";
15964 else if (code == 'k')
15965 size = "DWORD";
15966 else if (code == 'q')
15967 size = "QWORD";
15968 else if (code == 'x')
15969 size = "XMMWORD";
15971 fputs (size, file);
15972 fputs (" PTR ", file);
15975 x = XEXP (x, 0);
15976 /* Avoid (%rip) for call operands. */
15977 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15978 && !CONST_INT_P (x))
15979 output_addr_const (file, x);
15980 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15981 output_operand_lossage ("invalid constraints for operand");
15982 else
15983 output_address (x);
15986 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15988 REAL_VALUE_TYPE r;
15989 long l;
15991 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15992 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15994 if (ASSEMBLER_DIALECT == ASM_ATT)
15995 putc ('$', file);
15996 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15997 if (code == 'q')
15998 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15999 (unsigned long long) (int) l);
16000 else
16001 fprintf (file, "0x%08x", (unsigned int) l);
16004 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
16006 REAL_VALUE_TYPE r;
16007 long l[2];
16009 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16010 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16012 if (ASSEMBLER_DIALECT == ASM_ATT)
16013 putc ('$', file);
16014 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
16017 /* These float cases don't actually occur as immediate operands. */
16018 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
16020 char dstr[30];
16022 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
16023 fputs (dstr, file);
16026 else
16028 /* We have patterns that allow zero sets of memory, for instance.
16029 In 64-bit mode, we should probably support all 8-byte vectors,
16030 since we can in fact encode that into an immediate. */
16031 if (GET_CODE (x) == CONST_VECTOR)
16033 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
16034 x = const0_rtx;
16037 if (code != 'P' && code != 'p')
16039 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
16041 if (ASSEMBLER_DIALECT == ASM_ATT)
16042 putc ('$', file);
16044 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
16045 || GET_CODE (x) == LABEL_REF)
16047 if (ASSEMBLER_DIALECT == ASM_ATT)
16048 putc ('$', file);
16049 else
16050 fputs ("OFFSET FLAT:", file);
16053 if (CONST_INT_P (x))
16054 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16055 else if (flag_pic || MACHOPIC_INDIRECT)
16056 output_pic_addr_const (file, x, code);
16057 else
16058 output_addr_const (file, x);
16062 static bool
16063 ix86_print_operand_punct_valid_p (unsigned char code)
16065 return (code == '@' || code == '*' || code == '+' || code == '&'
16066 || code == ';' || code == '~' || code == '^' || code == '!');
16069 /* Print a memory operand whose address is ADDR. */
16071 static void
16072 ix86_print_operand_address (FILE *file, rtx addr)
16074 struct ix86_address parts;
16075 rtx base, index, disp;
16076 int scale;
16077 int ok;
16078 bool vsib = false;
16079 int code = 0;
16081 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
16083 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16084 gcc_assert (parts.index == NULL_RTX);
16085 parts.index = XVECEXP (addr, 0, 1);
16086 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
16087 addr = XVECEXP (addr, 0, 0);
16088 vsib = true;
16090 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
16092 gcc_assert (TARGET_64BIT);
16093 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16094 code = 'q';
16096 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
16098 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
16099 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
16100 if (parts.base != NULL_RTX)
16102 parts.index = parts.base;
16103 parts.scale = 1;
16105 parts.base = XVECEXP (addr, 0, 0);
16106 addr = XVECEXP (addr, 0, 0);
16108 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
16110 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16111 gcc_assert (parts.index == NULL_RTX);
16112 parts.index = XVECEXP (addr, 0, 1);
16113 addr = XVECEXP (addr, 0, 0);
16115 else
16116 ok = ix86_decompose_address (addr, &parts);
16118 gcc_assert (ok);
16120 base = parts.base;
16121 index = parts.index;
16122 disp = parts.disp;
16123 scale = parts.scale;
16125 switch (parts.seg)
16127 case SEG_DEFAULT:
16128 break;
16129 case SEG_FS:
16130 case SEG_GS:
16131 if (ASSEMBLER_DIALECT == ASM_ATT)
16132 putc ('%', file);
16133 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
16134 break;
16135 default:
16136 gcc_unreachable ();
16139 /* Use one byte shorter RIP relative addressing for 64bit mode. */
16140 if (TARGET_64BIT && !base && !index)
16142 rtx symbol = disp;
16144 if (GET_CODE (disp) == CONST
16145 && GET_CODE (XEXP (disp, 0)) == PLUS
16146 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16147 symbol = XEXP (XEXP (disp, 0), 0);
16149 if (GET_CODE (symbol) == LABEL_REF
16150 || (GET_CODE (symbol) == SYMBOL_REF
16151 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
16152 base = pc_rtx;
16154 if (!base && !index)
16156 /* Displacement only requires special attention. */
16158 if (CONST_INT_P (disp))
16160 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
16161 fputs ("ds:", file);
16162 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
16164 else if (flag_pic)
16165 output_pic_addr_const (file, disp, 0);
16166 else
16167 output_addr_const (file, disp);
16169 else
16171 /* Print SImode register names to force addr32 prefix. */
16172 if (SImode_address_operand (addr, VOIDmode))
16174 #ifdef ENABLE_CHECKING
16175 gcc_assert (TARGET_64BIT);
16176 switch (GET_CODE (addr))
16178 case SUBREG:
16179 gcc_assert (GET_MODE (addr) == SImode);
16180 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
16181 break;
16182 case ZERO_EXTEND:
16183 case AND:
16184 gcc_assert (GET_MODE (addr) == DImode);
16185 break;
16186 default:
16187 gcc_unreachable ();
16189 #endif
16190 gcc_assert (!code);
16191 code = 'k';
16193 else if (code == 0
16194 && TARGET_X32
16195 && disp
16196 && CONST_INT_P (disp)
16197 && INTVAL (disp) < -16*1024*1024)
16199 /* X32 runs in 64-bit mode, where displacement, DISP, in
16200 address DISP(%r64), is encoded as 32-bit immediate sign-
16201 extended from 32-bit to 64-bit. For -0x40000300(%r64),
16202 address is %r64 + 0xffffffffbffffd00. When %r64 <
16203 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
16204 which is invalid for x32. The correct address is %r64
16205 - 0x40000300 == 0xf7ffdd64. To properly encode
16206 -0x40000300(%r64) for x32, we zero-extend negative
16207 displacement by forcing addr32 prefix which truncates
16208 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
16209 zero-extend all negative displacements, including -1(%rsp).
16210 However, for small negative displacements, sign-extension
16211 won't cause overflow. We only zero-extend negative
16212 displacements if they < -16*1024*1024, which is also used
16213 to check legitimate address displacements for PIC. */
16214 code = 'k';
16217 if (ASSEMBLER_DIALECT == ASM_ATT)
16219 if (disp)
16221 if (flag_pic)
16222 output_pic_addr_const (file, disp, 0);
16223 else if (GET_CODE (disp) == LABEL_REF)
16224 output_asm_label (disp);
16225 else
16226 output_addr_const (file, disp);
16229 putc ('(', file);
16230 if (base)
16231 print_reg (base, code, file);
16232 if (index)
16234 putc (',', file);
16235 print_reg (index, vsib ? 0 : code, file);
16236 if (scale != 1 || vsib)
16237 fprintf (file, ",%d", scale);
16239 putc (')', file);
16241 else
16243 rtx offset = NULL_RTX;
16245 if (disp)
16247 /* Pull out the offset of a symbol; print any symbol itself. */
16248 if (GET_CODE (disp) == CONST
16249 && GET_CODE (XEXP (disp, 0)) == PLUS
16250 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16252 offset = XEXP (XEXP (disp, 0), 1);
16253 disp = gen_rtx_CONST (VOIDmode,
16254 XEXP (XEXP (disp, 0), 0));
16257 if (flag_pic)
16258 output_pic_addr_const (file, disp, 0);
16259 else if (GET_CODE (disp) == LABEL_REF)
16260 output_asm_label (disp);
16261 else if (CONST_INT_P (disp))
16262 offset = disp;
16263 else
16264 output_addr_const (file, disp);
16267 putc ('[', file);
16268 if (base)
16270 print_reg (base, code, file);
16271 if (offset)
16273 if (INTVAL (offset) >= 0)
16274 putc ('+', file);
16275 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16278 else if (offset)
16279 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16280 else
16281 putc ('0', file);
16283 if (index)
16285 putc ('+', file);
16286 print_reg (index, vsib ? 0 : code, file);
16287 if (scale != 1 || vsib)
16288 fprintf (file, "*%d", scale);
16290 putc (']', file);
16295 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
16297 static bool
16298 i386_asm_output_addr_const_extra (FILE *file, rtx x)
16300 rtx op;
16302 if (GET_CODE (x) != UNSPEC)
16303 return false;
16305 op = XVECEXP (x, 0, 0);
16306 switch (XINT (x, 1))
16308 case UNSPEC_GOTTPOFF:
16309 output_addr_const (file, op);
16310 /* FIXME: This might be @TPOFF in Sun ld. */
16311 fputs ("@gottpoff", file);
16312 break;
16313 case UNSPEC_TPOFF:
16314 output_addr_const (file, op);
16315 fputs ("@tpoff", file);
16316 break;
16317 case UNSPEC_NTPOFF:
16318 output_addr_const (file, op);
16319 if (TARGET_64BIT)
16320 fputs ("@tpoff", file);
16321 else
16322 fputs ("@ntpoff", file);
16323 break;
16324 case UNSPEC_DTPOFF:
16325 output_addr_const (file, op);
16326 fputs ("@dtpoff", file);
16327 break;
16328 case UNSPEC_GOTNTPOFF:
16329 output_addr_const (file, op);
16330 if (TARGET_64BIT)
16331 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16332 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
16333 else
16334 fputs ("@gotntpoff", file);
16335 break;
16336 case UNSPEC_INDNTPOFF:
16337 output_addr_const (file, op);
16338 fputs ("@indntpoff", file);
16339 break;
16340 #if TARGET_MACHO
16341 case UNSPEC_MACHOPIC_OFFSET:
16342 output_addr_const (file, op);
16343 putc ('-', file);
16344 machopic_output_function_base_name (file);
16345 break;
16346 #endif
16348 case UNSPEC_STACK_CHECK:
16350 int offset;
16352 gcc_assert (flag_split_stack);
16354 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
16355 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
16356 #else
16357 gcc_unreachable ();
16358 #endif
16360 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
16362 break;
16364 default:
16365 return false;
16368 return true;
16371 /* Split one or more double-mode RTL references into pairs of half-mode
16372 references. The RTL can be REG, offsettable MEM, integer constant, or
16373 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
16374 split and "num" is its length. lo_half and hi_half are output arrays
16375 that parallel "operands". */
16377 void
16378 split_double_mode (machine_mode mode, rtx operands[],
16379 int num, rtx lo_half[], rtx hi_half[])
16381 machine_mode half_mode;
16382 unsigned int byte;
16384 switch (mode)
16386 case TImode:
16387 half_mode = DImode;
16388 break;
16389 case DImode:
16390 half_mode = SImode;
16391 break;
16392 default:
16393 gcc_unreachable ();
16396 byte = GET_MODE_SIZE (half_mode);
16398 while (num--)
16400 rtx op = operands[num];
16402 /* simplify_subreg refuse to split volatile memory addresses,
16403 but we still have to handle it. */
16404 if (MEM_P (op))
16406 lo_half[num] = adjust_address (op, half_mode, 0);
16407 hi_half[num] = adjust_address (op, half_mode, byte);
16409 else
16411 lo_half[num] = simplify_gen_subreg (half_mode, op,
16412 GET_MODE (op) == VOIDmode
16413 ? mode : GET_MODE (op), 0);
16414 hi_half[num] = simplify_gen_subreg (half_mode, op,
16415 GET_MODE (op) == VOIDmode
16416 ? mode : GET_MODE (op), byte);
16421 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
16422 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
16423 is the expression of the binary operation. The output may either be
16424 emitted here, or returned to the caller, like all output_* functions.
16426 There is no guarantee that the operands are the same mode, as they
16427 might be within FLOAT or FLOAT_EXTEND expressions. */
16429 #ifndef SYSV386_COMPAT
16430 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
16431 wants to fix the assemblers because that causes incompatibility
16432 with gcc. No-one wants to fix gcc because that causes
16433 incompatibility with assemblers... You can use the option of
16434 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
16435 #define SYSV386_COMPAT 1
16436 #endif
16438 const char *
16439 output_387_binary_op (rtx insn, rtx *operands)
16441 static char buf[40];
16442 const char *p;
16443 const char *ssep;
16444 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
16446 #ifdef ENABLE_CHECKING
16447 /* Even if we do not want to check the inputs, this documents input
16448 constraints. Which helps in understanding the following code. */
16449 if (STACK_REG_P (operands[0])
16450 && ((REG_P (operands[1])
16451 && REGNO (operands[0]) == REGNO (operands[1])
16452 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
16453 || (REG_P (operands[2])
16454 && REGNO (operands[0]) == REGNO (operands[2])
16455 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
16456 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
16457 ; /* ok */
16458 else
16459 gcc_assert (is_sse);
16460 #endif
16462 switch (GET_CODE (operands[3]))
16464 case PLUS:
16465 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16466 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16467 p = "fiadd";
16468 else
16469 p = "fadd";
16470 ssep = "vadd";
16471 break;
16473 case MINUS:
16474 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16475 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16476 p = "fisub";
16477 else
16478 p = "fsub";
16479 ssep = "vsub";
16480 break;
16482 case MULT:
16483 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16484 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16485 p = "fimul";
16486 else
16487 p = "fmul";
16488 ssep = "vmul";
16489 break;
16491 case DIV:
16492 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16493 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16494 p = "fidiv";
16495 else
16496 p = "fdiv";
16497 ssep = "vdiv";
16498 break;
16500 default:
16501 gcc_unreachable ();
16504 if (is_sse)
16506 if (TARGET_AVX)
16508 strcpy (buf, ssep);
16509 if (GET_MODE (operands[0]) == SFmode)
16510 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
16511 else
16512 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
16514 else
16516 strcpy (buf, ssep + 1);
16517 if (GET_MODE (operands[0]) == SFmode)
16518 strcat (buf, "ss\t{%2, %0|%0, %2}");
16519 else
16520 strcat (buf, "sd\t{%2, %0|%0, %2}");
16522 return buf;
16524 strcpy (buf, p);
16526 switch (GET_CODE (operands[3]))
16528 case MULT:
16529 case PLUS:
16530 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
16531 std::swap (operands[1], operands[2]);
16533 /* know operands[0] == operands[1]. */
16535 if (MEM_P (operands[2]))
16537 p = "%Z2\t%2";
16538 break;
16541 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16543 if (STACK_TOP_P (operands[0]))
16544 /* How is it that we are storing to a dead operand[2]?
16545 Well, presumably operands[1] is dead too. We can't
16546 store the result to st(0) as st(0) gets popped on this
16547 instruction. Instead store to operands[2] (which I
16548 think has to be st(1)). st(1) will be popped later.
16549 gcc <= 2.8.1 didn't have this check and generated
16550 assembly code that the Unixware assembler rejected. */
16551 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16552 else
16553 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16554 break;
16557 if (STACK_TOP_P (operands[0]))
16558 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16559 else
16560 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16561 break;
16563 case MINUS:
16564 case DIV:
16565 if (MEM_P (operands[1]))
16567 p = "r%Z1\t%1";
16568 break;
16571 if (MEM_P (operands[2]))
16573 p = "%Z2\t%2";
16574 break;
16577 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16579 #if SYSV386_COMPAT
16580 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16581 derived assemblers, confusingly reverse the direction of
16582 the operation for fsub{r} and fdiv{r} when the
16583 destination register is not st(0). The Intel assembler
16584 doesn't have this brain damage. Read !SYSV386_COMPAT to
16585 figure out what the hardware really does. */
16586 if (STACK_TOP_P (operands[0]))
16587 p = "{p\t%0, %2|rp\t%2, %0}";
16588 else
16589 p = "{rp\t%2, %0|p\t%0, %2}";
16590 #else
16591 if (STACK_TOP_P (operands[0]))
16592 /* As above for fmul/fadd, we can't store to st(0). */
16593 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16594 else
16595 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16596 #endif
16597 break;
16600 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16602 #if SYSV386_COMPAT
16603 if (STACK_TOP_P (operands[0]))
16604 p = "{rp\t%0, %1|p\t%1, %0}";
16605 else
16606 p = "{p\t%1, %0|rp\t%0, %1}";
16607 #else
16608 if (STACK_TOP_P (operands[0]))
16609 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16610 else
16611 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16612 #endif
16613 break;
16616 if (STACK_TOP_P (operands[0]))
16618 if (STACK_TOP_P (operands[1]))
16619 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16620 else
16621 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16622 break;
16624 else if (STACK_TOP_P (operands[1]))
16626 #if SYSV386_COMPAT
16627 p = "{\t%1, %0|r\t%0, %1}";
16628 #else
16629 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16630 #endif
16632 else
16634 #if SYSV386_COMPAT
16635 p = "{r\t%2, %0|\t%0, %2}";
16636 #else
16637 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16638 #endif
16640 break;
16642 default:
16643 gcc_unreachable ();
16646 strcat (buf, p);
16647 return buf;
16650 /* Check if a 256bit AVX register is referenced inside of EXP. */
16652 static bool
16653 ix86_check_avx256_register (const_rtx exp)
16655 if (GET_CODE (exp) == SUBREG)
16656 exp = SUBREG_REG (exp);
16658 return (REG_P (exp)
16659 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
16662 /* Return needed mode for entity in optimize_mode_switching pass. */
16664 static int
16665 ix86_avx_u128_mode_needed (rtx_insn *insn)
16667 if (CALL_P (insn))
16669 rtx link;
16671 /* Needed mode is set to AVX_U128_CLEAN if there are
16672 no 256bit modes used in function arguments. */
16673 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16674 link;
16675 link = XEXP (link, 1))
16677 if (GET_CODE (XEXP (link, 0)) == USE)
16679 rtx arg = XEXP (XEXP (link, 0), 0);
16681 if (ix86_check_avx256_register (arg))
16682 return AVX_U128_DIRTY;
16686 return AVX_U128_CLEAN;
16689 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16690 changes state only when a 256bit register is written to, but we need
16691 to prevent the compiler from moving optimal insertion point above
16692 eventual read from 256bit register. */
16693 subrtx_iterator::array_type array;
16694 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
16695 if (ix86_check_avx256_register (*iter))
16696 return AVX_U128_DIRTY;
16698 return AVX_U128_ANY;
16701 /* Return mode that i387 must be switched into
16702 prior to the execution of insn. */
16704 static int
16705 ix86_i387_mode_needed (int entity, rtx_insn *insn)
16707 enum attr_i387_cw mode;
16709 /* The mode UNINITIALIZED is used to store control word after a
16710 function call or ASM pattern. The mode ANY specify that function
16711 has no requirements on the control word and make no changes in the
16712 bits we are interested in. */
16714 if (CALL_P (insn)
16715 || (NONJUMP_INSN_P (insn)
16716 && (asm_noperands (PATTERN (insn)) >= 0
16717 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16718 return I387_CW_UNINITIALIZED;
16720 if (recog_memoized (insn) < 0)
16721 return I387_CW_ANY;
16723 mode = get_attr_i387_cw (insn);
16725 switch (entity)
16727 case I387_TRUNC:
16728 if (mode == I387_CW_TRUNC)
16729 return mode;
16730 break;
16732 case I387_FLOOR:
16733 if (mode == I387_CW_FLOOR)
16734 return mode;
16735 break;
16737 case I387_CEIL:
16738 if (mode == I387_CW_CEIL)
16739 return mode;
16740 break;
16742 case I387_MASK_PM:
16743 if (mode == I387_CW_MASK_PM)
16744 return mode;
16745 break;
16747 default:
16748 gcc_unreachable ();
16751 return I387_CW_ANY;
16754 /* Return mode that entity must be switched into
16755 prior to the execution of insn. */
16757 static int
16758 ix86_mode_needed (int entity, rtx_insn *insn)
16760 switch (entity)
16762 case AVX_U128:
16763 return ix86_avx_u128_mode_needed (insn);
16764 case I387_TRUNC:
16765 case I387_FLOOR:
16766 case I387_CEIL:
16767 case I387_MASK_PM:
16768 return ix86_i387_mode_needed (entity, insn);
16769 default:
16770 gcc_unreachable ();
16772 return 0;
16775 /* Check if a 256bit AVX register is referenced in stores. */
16777 static void
16778 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16780 if (ix86_check_avx256_register (dest))
16782 bool *used = (bool *) data;
16783 *used = true;
16787 /* Calculate mode of upper 128bit AVX registers after the insn. */
16789 static int
16790 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
16792 rtx pat = PATTERN (insn);
16794 if (vzeroupper_operation (pat, VOIDmode)
16795 || vzeroall_operation (pat, VOIDmode))
16796 return AVX_U128_CLEAN;
16798 /* We know that state is clean after CALL insn if there are no
16799 256bit registers used in the function return register. */
16800 if (CALL_P (insn))
16802 bool avx_reg256_found = false;
16803 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16805 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16808 /* Otherwise, return current mode. Remember that if insn
16809 references AVX 256bit registers, the mode was already changed
16810 to DIRTY from MODE_NEEDED. */
16811 return mode;
16814 /* Return the mode that an insn results in. */
16816 static int
16817 ix86_mode_after (int entity, int mode, rtx_insn *insn)
16819 switch (entity)
16821 case AVX_U128:
16822 return ix86_avx_u128_mode_after (mode, insn);
16823 case I387_TRUNC:
16824 case I387_FLOOR:
16825 case I387_CEIL:
16826 case I387_MASK_PM:
16827 return mode;
16828 default:
16829 gcc_unreachable ();
16833 static int
16834 ix86_avx_u128_mode_entry (void)
16836 tree arg;
16838 /* Entry mode is set to AVX_U128_DIRTY if there are
16839 256bit modes used in function arguments. */
16840 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16841 arg = TREE_CHAIN (arg))
16843 rtx incoming = DECL_INCOMING_RTL (arg);
16845 if (incoming && ix86_check_avx256_register (incoming))
16846 return AVX_U128_DIRTY;
16849 return AVX_U128_CLEAN;
16852 /* Return a mode that ENTITY is assumed to be
16853 switched to at function entry. */
16855 static int
16856 ix86_mode_entry (int entity)
16858 switch (entity)
16860 case AVX_U128:
16861 return ix86_avx_u128_mode_entry ();
16862 case I387_TRUNC:
16863 case I387_FLOOR:
16864 case I387_CEIL:
16865 case I387_MASK_PM:
16866 return I387_CW_ANY;
16867 default:
16868 gcc_unreachable ();
16872 static int
16873 ix86_avx_u128_mode_exit (void)
16875 rtx reg = crtl->return_rtx;
16877 /* Exit mode is set to AVX_U128_DIRTY if there are
16878 256bit modes used in the function return register. */
16879 if (reg && ix86_check_avx256_register (reg))
16880 return AVX_U128_DIRTY;
16882 return AVX_U128_CLEAN;
16885 /* Return a mode that ENTITY is assumed to be
16886 switched to at function exit. */
16888 static int
16889 ix86_mode_exit (int entity)
16891 switch (entity)
16893 case AVX_U128:
16894 return ix86_avx_u128_mode_exit ();
16895 case I387_TRUNC:
16896 case I387_FLOOR:
16897 case I387_CEIL:
16898 case I387_MASK_PM:
16899 return I387_CW_ANY;
16900 default:
16901 gcc_unreachable ();
16905 static int
16906 ix86_mode_priority (int, int n)
16908 return n;
16911 /* Output code to initialize control word copies used by trunc?f?i and
16912 rounding patterns. CURRENT_MODE is set to current control word,
16913 while NEW_MODE is set to new control word. */
16915 static void
16916 emit_i387_cw_initialization (int mode)
16918 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16919 rtx new_mode;
16921 enum ix86_stack_slot slot;
16923 rtx reg = gen_reg_rtx (HImode);
16925 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16926 emit_move_insn (reg, copy_rtx (stored_mode));
16928 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16929 || optimize_insn_for_size_p ())
16931 switch (mode)
16933 case I387_CW_TRUNC:
16934 /* round toward zero (truncate) */
16935 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16936 slot = SLOT_CW_TRUNC;
16937 break;
16939 case I387_CW_FLOOR:
16940 /* round down toward -oo */
16941 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16942 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16943 slot = SLOT_CW_FLOOR;
16944 break;
16946 case I387_CW_CEIL:
16947 /* round up toward +oo */
16948 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16949 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16950 slot = SLOT_CW_CEIL;
16951 break;
16953 case I387_CW_MASK_PM:
16954 /* mask precision exception for nearbyint() */
16955 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16956 slot = SLOT_CW_MASK_PM;
16957 break;
16959 default:
16960 gcc_unreachable ();
16963 else
16965 switch (mode)
16967 case I387_CW_TRUNC:
16968 /* round toward zero (truncate) */
16969 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16970 slot = SLOT_CW_TRUNC;
16971 break;
16973 case I387_CW_FLOOR:
16974 /* round down toward -oo */
16975 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16976 slot = SLOT_CW_FLOOR;
16977 break;
16979 case I387_CW_CEIL:
16980 /* round up toward +oo */
16981 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16982 slot = SLOT_CW_CEIL;
16983 break;
16985 case I387_CW_MASK_PM:
16986 /* mask precision exception for nearbyint() */
16987 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16988 slot = SLOT_CW_MASK_PM;
16989 break;
16991 default:
16992 gcc_unreachable ();
16996 gcc_assert (slot < MAX_386_STACK_LOCALS);
16998 new_mode = assign_386_stack_local (HImode, slot);
16999 emit_move_insn (new_mode, reg);
17002 /* Emit vzeroupper. */
17004 void
17005 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
17007 int i;
17009 /* Cancel automatic vzeroupper insertion if there are
17010 live call-saved SSE registers at the insertion point. */
17012 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17013 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
17014 return;
17016 if (TARGET_64BIT)
17017 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17018 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
17019 return;
17021 emit_insn (gen_avx_vzeroupper ());
17024 /* Generate one or more insns to set ENTITY to MODE. */
17026 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
17027 is the set of hard registers live at the point where the insn(s)
17028 are to be inserted. */
17030 static void
17031 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
17032 HARD_REG_SET regs_live)
17034 switch (entity)
17036 case AVX_U128:
17037 if (mode == AVX_U128_CLEAN)
17038 ix86_avx_emit_vzeroupper (regs_live);
17039 break;
17040 case I387_TRUNC:
17041 case I387_FLOOR:
17042 case I387_CEIL:
17043 case I387_MASK_PM:
17044 if (mode != I387_CW_ANY
17045 && mode != I387_CW_UNINITIALIZED)
17046 emit_i387_cw_initialization (mode);
17047 break;
17048 default:
17049 gcc_unreachable ();
17053 /* Output code for INSN to convert a float to a signed int. OPERANDS
17054 are the insn operands. The output may be [HSD]Imode and the input
17055 operand may be [SDX]Fmode. */
17057 const char *
17058 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
17060 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17061 int dimode_p = GET_MODE (operands[0]) == DImode;
17062 int round_mode = get_attr_i387_cw (insn);
17064 /* Jump through a hoop or two for DImode, since the hardware has no
17065 non-popping instruction. We used to do this a different way, but
17066 that was somewhat fragile and broke with post-reload splitters. */
17067 if ((dimode_p || fisttp) && !stack_top_dies)
17068 output_asm_insn ("fld\t%y1", operands);
17070 gcc_assert (STACK_TOP_P (operands[1]));
17071 gcc_assert (MEM_P (operands[0]));
17072 gcc_assert (GET_MODE (operands[1]) != TFmode);
17074 if (fisttp)
17075 output_asm_insn ("fisttp%Z0\t%0", operands);
17076 else
17078 if (round_mode != I387_CW_ANY)
17079 output_asm_insn ("fldcw\t%3", operands);
17080 if (stack_top_dies || dimode_p)
17081 output_asm_insn ("fistp%Z0\t%0", operands);
17082 else
17083 output_asm_insn ("fist%Z0\t%0", operands);
17084 if (round_mode != I387_CW_ANY)
17085 output_asm_insn ("fldcw\t%2", operands);
17088 return "";
17091 /* Output code for x87 ffreep insn. The OPNO argument, which may only
17092 have the values zero or one, indicates the ffreep insn's operand
17093 from the OPERANDS array. */
17095 static const char *
17096 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
17098 if (TARGET_USE_FFREEP)
17099 #ifdef HAVE_AS_IX86_FFREEP
17100 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
17101 #else
17103 static char retval[32];
17104 int regno = REGNO (operands[opno]);
17106 gcc_assert (STACK_REGNO_P (regno));
17108 regno -= FIRST_STACK_REG;
17110 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
17111 return retval;
17113 #endif
17115 return opno ? "fstp\t%y1" : "fstp\t%y0";
17119 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
17120 should be used. UNORDERED_P is true when fucom should be used. */
17122 const char *
17123 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
17125 int stack_top_dies;
17126 rtx cmp_op0, cmp_op1;
17127 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
17129 if (eflags_p)
17131 cmp_op0 = operands[0];
17132 cmp_op1 = operands[1];
17134 else
17136 cmp_op0 = operands[1];
17137 cmp_op1 = operands[2];
17140 if (is_sse)
17142 if (GET_MODE (operands[0]) == SFmode)
17143 if (unordered_p)
17144 return "%vucomiss\t{%1, %0|%0, %1}";
17145 else
17146 return "%vcomiss\t{%1, %0|%0, %1}";
17147 else
17148 if (unordered_p)
17149 return "%vucomisd\t{%1, %0|%0, %1}";
17150 else
17151 return "%vcomisd\t{%1, %0|%0, %1}";
17154 gcc_assert (STACK_TOP_P (cmp_op0));
17156 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17158 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
17160 if (stack_top_dies)
17162 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
17163 return output_387_ffreep (operands, 1);
17165 else
17166 return "ftst\n\tfnstsw\t%0";
17169 if (STACK_REG_P (cmp_op1)
17170 && stack_top_dies
17171 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
17172 && REGNO (cmp_op1) != FIRST_STACK_REG)
17174 /* If both the top of the 387 stack dies, and the other operand
17175 is also a stack register that dies, then this must be a
17176 `fcompp' float compare */
17178 if (eflags_p)
17180 /* There is no double popping fcomi variant. Fortunately,
17181 eflags is immune from the fstp's cc clobbering. */
17182 if (unordered_p)
17183 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
17184 else
17185 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
17186 return output_387_ffreep (operands, 0);
17188 else
17190 if (unordered_p)
17191 return "fucompp\n\tfnstsw\t%0";
17192 else
17193 return "fcompp\n\tfnstsw\t%0";
17196 else
17198 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
17200 static const char * const alt[16] =
17202 "fcom%Z2\t%y2\n\tfnstsw\t%0",
17203 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
17204 "fucom%Z2\t%y2\n\tfnstsw\t%0",
17205 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
17207 "ficom%Z2\t%y2\n\tfnstsw\t%0",
17208 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
17209 NULL,
17210 NULL,
17212 "fcomi\t{%y1, %0|%0, %y1}",
17213 "fcomip\t{%y1, %0|%0, %y1}",
17214 "fucomi\t{%y1, %0|%0, %y1}",
17215 "fucomip\t{%y1, %0|%0, %y1}",
17217 NULL,
17218 NULL,
17219 NULL,
17220 NULL
17223 int mask;
17224 const char *ret;
17226 mask = eflags_p << 3;
17227 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
17228 mask |= unordered_p << 1;
17229 mask |= stack_top_dies;
17231 gcc_assert (mask < 16);
17232 ret = alt[mask];
17233 gcc_assert (ret);
17235 return ret;
17239 void
17240 ix86_output_addr_vec_elt (FILE *file, int value)
17242 const char *directive = ASM_LONG;
17244 #ifdef ASM_QUAD
17245 if (TARGET_LP64)
17246 directive = ASM_QUAD;
17247 #else
17248 gcc_assert (!TARGET_64BIT);
17249 #endif
17251 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
17254 void
17255 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
17257 const char *directive = ASM_LONG;
17259 #ifdef ASM_QUAD
17260 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
17261 directive = ASM_QUAD;
17262 #else
17263 gcc_assert (!TARGET_64BIT);
17264 #endif
17265 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
17266 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
17267 fprintf (file, "%s%s%d-%s%d\n",
17268 directive, LPREFIX, value, LPREFIX, rel);
17269 else if (HAVE_AS_GOTOFF_IN_DATA)
17270 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
17271 #if TARGET_MACHO
17272 else if (TARGET_MACHO)
17274 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
17275 machopic_output_function_base_name (file);
17276 putc ('\n', file);
17278 #endif
17279 else
17280 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
17281 GOT_SYMBOL_NAME, LPREFIX, value);
17284 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
17285 for the target. */
17287 void
17288 ix86_expand_clear (rtx dest)
17290 rtx tmp;
17292 /* We play register width games, which are only valid after reload. */
17293 gcc_assert (reload_completed);
17295 /* Avoid HImode and its attendant prefix byte. */
17296 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
17297 dest = gen_rtx_REG (SImode, REGNO (dest));
17298 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
17300 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
17302 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17303 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
17306 emit_insn (tmp);
17309 /* X is an unchanging MEM. If it is a constant pool reference, return
17310 the constant pool rtx, else NULL. */
17313 maybe_get_pool_constant (rtx x)
17315 x = ix86_delegitimize_address (XEXP (x, 0));
17317 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
17318 return get_pool_constant (x);
17320 return NULL_RTX;
17323 void
17324 ix86_expand_move (machine_mode mode, rtx operands[])
17326 rtx op0, op1;
17327 enum tls_model model;
17329 op0 = operands[0];
17330 op1 = operands[1];
17332 if (GET_CODE (op1) == SYMBOL_REF)
17334 rtx tmp;
17336 model = SYMBOL_REF_TLS_MODEL (op1);
17337 if (model)
17339 op1 = legitimize_tls_address (op1, model, true);
17340 op1 = force_operand (op1, op0);
17341 if (op1 == op0)
17342 return;
17343 op1 = convert_to_mode (mode, op1, 1);
17345 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
17346 op1 = tmp;
17348 else if (GET_CODE (op1) == CONST
17349 && GET_CODE (XEXP (op1, 0)) == PLUS
17350 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
17352 rtx addend = XEXP (XEXP (op1, 0), 1);
17353 rtx symbol = XEXP (XEXP (op1, 0), 0);
17354 rtx tmp;
17356 model = SYMBOL_REF_TLS_MODEL (symbol);
17357 if (model)
17358 tmp = legitimize_tls_address (symbol, model, true);
17359 else
17360 tmp = legitimize_pe_coff_symbol (symbol, true);
17362 if (tmp)
17364 tmp = force_operand (tmp, NULL);
17365 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
17366 op0, 1, OPTAB_DIRECT);
17367 if (tmp == op0)
17368 return;
17369 op1 = convert_to_mode (mode, tmp, 1);
17373 if ((flag_pic || MACHOPIC_INDIRECT)
17374 && symbolic_operand (op1, mode))
17376 if (TARGET_MACHO && !TARGET_64BIT)
17378 #if TARGET_MACHO
17379 /* dynamic-no-pic */
17380 if (MACHOPIC_INDIRECT)
17382 rtx temp = ((reload_in_progress
17383 || ((op0 && REG_P (op0))
17384 && mode == Pmode))
17385 ? op0 : gen_reg_rtx (Pmode));
17386 op1 = machopic_indirect_data_reference (op1, temp);
17387 if (MACHOPIC_PURE)
17388 op1 = machopic_legitimize_pic_address (op1, mode,
17389 temp == op1 ? 0 : temp);
17391 if (op0 != op1 && GET_CODE (op0) != MEM)
17393 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
17394 emit_insn (insn);
17395 return;
17397 if (GET_CODE (op0) == MEM)
17398 op1 = force_reg (Pmode, op1);
17399 else
17401 rtx temp = op0;
17402 if (GET_CODE (temp) != REG)
17403 temp = gen_reg_rtx (Pmode);
17404 temp = legitimize_pic_address (op1, temp);
17405 if (temp == op0)
17406 return;
17407 op1 = temp;
17409 /* dynamic-no-pic */
17410 #endif
17412 else
17414 if (MEM_P (op0))
17415 op1 = force_reg (mode, op1);
17416 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
17418 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
17419 op1 = legitimize_pic_address (op1, reg);
17420 if (op0 == op1)
17421 return;
17422 op1 = convert_to_mode (mode, op1, 1);
17426 else
17428 if (MEM_P (op0)
17429 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
17430 || !push_operand (op0, mode))
17431 && MEM_P (op1))
17432 op1 = force_reg (mode, op1);
17434 if (push_operand (op0, mode)
17435 && ! general_no_elim_operand (op1, mode))
17436 op1 = copy_to_mode_reg (mode, op1);
17438 /* Force large constants in 64bit compilation into register
17439 to get them CSEed. */
17440 if (can_create_pseudo_p ()
17441 && (mode == DImode) && TARGET_64BIT
17442 && immediate_operand (op1, mode)
17443 && !x86_64_zext_immediate_operand (op1, VOIDmode)
17444 && !register_operand (op0, mode)
17445 && optimize)
17446 op1 = copy_to_mode_reg (mode, op1);
17448 if (can_create_pseudo_p ()
17449 && FLOAT_MODE_P (mode)
17450 && GET_CODE (op1) == CONST_DOUBLE)
17452 /* If we are loading a floating point constant to a register,
17453 force the value to memory now, since we'll get better code
17454 out the back end. */
17456 op1 = validize_mem (force_const_mem (mode, op1));
17457 if (!register_operand (op0, mode))
17459 rtx temp = gen_reg_rtx (mode);
17460 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
17461 emit_move_insn (op0, temp);
17462 return;
17467 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17470 void
17471 ix86_expand_vector_move (machine_mode mode, rtx operands[])
17473 rtx op0 = operands[0], op1 = operands[1];
17474 unsigned int align = GET_MODE_ALIGNMENT (mode);
17476 if (push_operand (op0, VOIDmode))
17477 op0 = emit_move_resolve_push (mode, op0);
17479 /* Force constants other than zero into memory. We do not know how
17480 the instructions used to build constants modify the upper 64 bits
17481 of the register, once we have that information we may be able
17482 to handle some of them more efficiently. */
17483 if (can_create_pseudo_p ()
17484 && register_operand (op0, mode)
17485 && (CONSTANT_P (op1)
17486 || (GET_CODE (op1) == SUBREG
17487 && CONSTANT_P (SUBREG_REG (op1))))
17488 && !standard_sse_constant_p (op1))
17489 op1 = validize_mem (force_const_mem (mode, op1));
17491 /* We need to check memory alignment for SSE mode since attribute
17492 can make operands unaligned. */
17493 if (can_create_pseudo_p ()
17494 && SSE_REG_MODE_P (mode)
17495 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
17496 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
17498 rtx tmp[2];
17500 /* ix86_expand_vector_move_misalign() does not like constants ... */
17501 if (CONSTANT_P (op1)
17502 || (GET_CODE (op1) == SUBREG
17503 && CONSTANT_P (SUBREG_REG (op1))))
17504 op1 = validize_mem (force_const_mem (mode, op1));
17506 /* ... nor both arguments in memory. */
17507 if (!register_operand (op0, mode)
17508 && !register_operand (op1, mode))
17509 op1 = force_reg (mode, op1);
17511 tmp[0] = op0; tmp[1] = op1;
17512 ix86_expand_vector_move_misalign (mode, tmp);
17513 return;
17516 /* Make operand1 a register if it isn't already. */
17517 if (can_create_pseudo_p ()
17518 && !register_operand (op0, mode)
17519 && !register_operand (op1, mode))
17521 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
17522 return;
17525 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17528 /* Split 32-byte AVX unaligned load and store if needed. */
17530 static void
17531 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
17533 rtx m;
17534 rtx (*extract) (rtx, rtx, rtx);
17535 rtx (*load_unaligned) (rtx, rtx);
17536 rtx (*store_unaligned) (rtx, rtx);
17537 machine_mode mode;
17539 switch (GET_MODE (op0))
17541 default:
17542 gcc_unreachable ();
17543 case V32QImode:
17544 extract = gen_avx_vextractf128v32qi;
17545 load_unaligned = gen_avx_loaddquv32qi;
17546 store_unaligned = gen_avx_storedquv32qi;
17547 mode = V16QImode;
17548 break;
17549 case V8SFmode:
17550 extract = gen_avx_vextractf128v8sf;
17551 load_unaligned = gen_avx_loadups256;
17552 store_unaligned = gen_avx_storeups256;
17553 mode = V4SFmode;
17554 break;
17555 case V4DFmode:
17556 extract = gen_avx_vextractf128v4df;
17557 load_unaligned = gen_avx_loadupd256;
17558 store_unaligned = gen_avx_storeupd256;
17559 mode = V2DFmode;
17560 break;
17563 if (MEM_P (op1))
17565 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
17566 && optimize_insn_for_speed_p ())
17568 rtx r = gen_reg_rtx (mode);
17569 m = adjust_address (op1, mode, 0);
17570 emit_move_insn (r, m);
17571 m = adjust_address (op1, mode, 16);
17572 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
17573 emit_move_insn (op0, r);
17575 /* Normal *mov<mode>_internal pattern will handle
17576 unaligned loads just fine if misaligned_operand
17577 is true, and without the UNSPEC it can be combined
17578 with arithmetic instructions. */
17579 else if (misaligned_operand (op1, GET_MODE (op1)))
17580 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17581 else
17582 emit_insn (load_unaligned (op0, op1));
17584 else if (MEM_P (op0))
17586 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
17587 && optimize_insn_for_speed_p ())
17589 m = adjust_address (op0, mode, 0);
17590 emit_insn (extract (m, op1, const0_rtx));
17591 m = adjust_address (op0, mode, 16);
17592 emit_insn (extract (m, op1, const1_rtx));
17594 else
17595 emit_insn (store_unaligned (op0, op1));
17597 else
17598 gcc_unreachable ();
17601 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17602 straight to ix86_expand_vector_move. */
17603 /* Code generation for scalar reg-reg moves of single and double precision data:
17604 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17605 movaps reg, reg
17606 else
17607 movss reg, reg
17608 if (x86_sse_partial_reg_dependency == true)
17609 movapd reg, reg
17610 else
17611 movsd reg, reg
17613 Code generation for scalar loads of double precision data:
17614 if (x86_sse_split_regs == true)
17615 movlpd mem, reg (gas syntax)
17616 else
17617 movsd mem, reg
17619 Code generation for unaligned packed loads of single precision data
17620 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17621 if (x86_sse_unaligned_move_optimal)
17622 movups mem, reg
17624 if (x86_sse_partial_reg_dependency == true)
17626 xorps reg, reg
17627 movlps mem, reg
17628 movhps mem+8, reg
17630 else
17632 movlps mem, reg
17633 movhps mem+8, reg
17636 Code generation for unaligned packed loads of double precision data
17637 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17638 if (x86_sse_unaligned_move_optimal)
17639 movupd mem, reg
17641 if (x86_sse_split_regs == true)
17643 movlpd mem, reg
17644 movhpd mem+8, reg
17646 else
17648 movsd mem, reg
17649 movhpd mem+8, reg
17653 void
17654 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
17656 rtx op0, op1, orig_op0 = NULL_RTX, m;
17657 rtx (*load_unaligned) (rtx, rtx);
17658 rtx (*store_unaligned) (rtx, rtx);
17660 op0 = operands[0];
17661 op1 = operands[1];
17663 if (GET_MODE_SIZE (mode) == 64)
17665 switch (GET_MODE_CLASS (mode))
17667 case MODE_VECTOR_INT:
17668 case MODE_INT:
17669 if (GET_MODE (op0) != V16SImode)
17671 if (!MEM_P (op0))
17673 orig_op0 = op0;
17674 op0 = gen_reg_rtx (V16SImode);
17676 else
17677 op0 = gen_lowpart (V16SImode, op0);
17679 op1 = gen_lowpart (V16SImode, op1);
17680 /* FALLTHRU */
17682 case MODE_VECTOR_FLOAT:
17683 switch (GET_MODE (op0))
17685 default:
17686 gcc_unreachable ();
17687 case V16SImode:
17688 load_unaligned = gen_avx512f_loaddquv16si;
17689 store_unaligned = gen_avx512f_storedquv16si;
17690 break;
17691 case V16SFmode:
17692 load_unaligned = gen_avx512f_loadups512;
17693 store_unaligned = gen_avx512f_storeups512;
17694 break;
17695 case V8DFmode:
17696 load_unaligned = gen_avx512f_loadupd512;
17697 store_unaligned = gen_avx512f_storeupd512;
17698 break;
17701 if (MEM_P (op1))
17702 emit_insn (load_unaligned (op0, op1));
17703 else if (MEM_P (op0))
17704 emit_insn (store_unaligned (op0, op1));
17705 else
17706 gcc_unreachable ();
17707 if (orig_op0)
17708 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17709 break;
17711 default:
17712 gcc_unreachable ();
17715 return;
17718 if (TARGET_AVX
17719 && GET_MODE_SIZE (mode) == 32)
17721 switch (GET_MODE_CLASS (mode))
17723 case MODE_VECTOR_INT:
17724 case MODE_INT:
17725 if (GET_MODE (op0) != V32QImode)
17727 if (!MEM_P (op0))
17729 orig_op0 = op0;
17730 op0 = gen_reg_rtx (V32QImode);
17732 else
17733 op0 = gen_lowpart (V32QImode, op0);
17735 op1 = gen_lowpart (V32QImode, op1);
17736 /* FALLTHRU */
17738 case MODE_VECTOR_FLOAT:
17739 ix86_avx256_split_vector_move_misalign (op0, op1);
17740 if (orig_op0)
17741 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17742 break;
17744 default:
17745 gcc_unreachable ();
17748 return;
17751 if (MEM_P (op1))
17753 /* Normal *mov<mode>_internal pattern will handle
17754 unaligned loads just fine if misaligned_operand
17755 is true, and without the UNSPEC it can be combined
17756 with arithmetic instructions. */
17757 if (TARGET_AVX
17758 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17759 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17760 && misaligned_operand (op1, GET_MODE (op1)))
17761 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17762 /* ??? If we have typed data, then it would appear that using
17763 movdqu is the only way to get unaligned data loaded with
17764 integer type. */
17765 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17767 if (GET_MODE (op0) != V16QImode)
17769 orig_op0 = op0;
17770 op0 = gen_reg_rtx (V16QImode);
17772 op1 = gen_lowpart (V16QImode, op1);
17773 /* We will eventually emit movups based on insn attributes. */
17774 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17775 if (orig_op0)
17776 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17778 else if (TARGET_SSE2 && mode == V2DFmode)
17780 rtx zero;
17782 if (TARGET_AVX
17783 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17784 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17785 || optimize_insn_for_size_p ())
17787 /* We will eventually emit movups based on insn attributes. */
17788 emit_insn (gen_sse2_loadupd (op0, op1));
17789 return;
17792 /* When SSE registers are split into halves, we can avoid
17793 writing to the top half twice. */
17794 if (TARGET_SSE_SPLIT_REGS)
17796 emit_clobber (op0);
17797 zero = op0;
17799 else
17801 /* ??? Not sure about the best option for the Intel chips.
17802 The following would seem to satisfy; the register is
17803 entirely cleared, breaking the dependency chain. We
17804 then store to the upper half, with a dependency depth
17805 of one. A rumor has it that Intel recommends two movsd
17806 followed by an unpacklpd, but this is unconfirmed. And
17807 given that the dependency depth of the unpacklpd would
17808 still be one, I'm not sure why this would be better. */
17809 zero = CONST0_RTX (V2DFmode);
17812 m = adjust_address (op1, DFmode, 0);
17813 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17814 m = adjust_address (op1, DFmode, 8);
17815 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17817 else
17819 rtx t;
17821 if (TARGET_AVX
17822 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17823 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17824 || optimize_insn_for_size_p ())
17826 if (GET_MODE (op0) != V4SFmode)
17828 orig_op0 = op0;
17829 op0 = gen_reg_rtx (V4SFmode);
17831 op1 = gen_lowpart (V4SFmode, op1);
17832 emit_insn (gen_sse_loadups (op0, op1));
17833 if (orig_op0)
17834 emit_move_insn (orig_op0,
17835 gen_lowpart (GET_MODE (orig_op0), op0));
17836 return;
17839 if (mode != V4SFmode)
17840 t = gen_reg_rtx (V4SFmode);
17841 else
17842 t = op0;
17844 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17845 emit_move_insn (t, CONST0_RTX (V4SFmode));
17846 else
17847 emit_clobber (t);
17849 m = adjust_address (op1, V2SFmode, 0);
17850 emit_insn (gen_sse_loadlps (t, t, m));
17851 m = adjust_address (op1, V2SFmode, 8);
17852 emit_insn (gen_sse_loadhps (t, t, m));
17853 if (mode != V4SFmode)
17854 emit_move_insn (op0, gen_lowpart (mode, t));
17857 else if (MEM_P (op0))
17859 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17861 op0 = gen_lowpart (V16QImode, op0);
17862 op1 = gen_lowpart (V16QImode, op1);
17863 /* We will eventually emit movups based on insn attributes. */
17864 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17866 else if (TARGET_SSE2 && mode == V2DFmode)
17868 if (TARGET_AVX
17869 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17870 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17871 || optimize_insn_for_size_p ())
17872 /* We will eventually emit movups based on insn attributes. */
17873 emit_insn (gen_sse2_storeupd (op0, op1));
17874 else
17876 m = adjust_address (op0, DFmode, 0);
17877 emit_insn (gen_sse2_storelpd (m, op1));
17878 m = adjust_address (op0, DFmode, 8);
17879 emit_insn (gen_sse2_storehpd (m, op1));
17882 else
17884 if (mode != V4SFmode)
17885 op1 = gen_lowpart (V4SFmode, op1);
17887 if (TARGET_AVX
17888 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17889 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17890 || optimize_insn_for_size_p ())
17892 op0 = gen_lowpart (V4SFmode, op0);
17893 emit_insn (gen_sse_storeups (op0, op1));
17895 else
17897 m = adjust_address (op0, V2SFmode, 0);
17898 emit_insn (gen_sse_storelps (m, op1));
17899 m = adjust_address (op0, V2SFmode, 8);
17900 emit_insn (gen_sse_storehps (m, op1));
17904 else
17905 gcc_unreachable ();
17908 /* Helper function of ix86_fixup_binary_operands to canonicalize
17909 operand order. Returns true if the operands should be swapped. */
17911 static bool
17912 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
17913 rtx operands[])
17915 rtx dst = operands[0];
17916 rtx src1 = operands[1];
17917 rtx src2 = operands[2];
17919 /* If the operation is not commutative, we can't do anything. */
17920 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17921 return false;
17923 /* Highest priority is that src1 should match dst. */
17924 if (rtx_equal_p (dst, src1))
17925 return false;
17926 if (rtx_equal_p (dst, src2))
17927 return true;
17929 /* Next highest priority is that immediate constants come second. */
17930 if (immediate_operand (src2, mode))
17931 return false;
17932 if (immediate_operand (src1, mode))
17933 return true;
17935 /* Lowest priority is that memory references should come second. */
17936 if (MEM_P (src2))
17937 return false;
17938 if (MEM_P (src1))
17939 return true;
17941 return false;
17945 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17946 destination to use for the operation. If different from the true
17947 destination in operands[0], a copy operation will be required. */
17950 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
17951 rtx operands[])
17953 rtx dst = operands[0];
17954 rtx src1 = operands[1];
17955 rtx src2 = operands[2];
17957 /* Canonicalize operand order. */
17958 if (ix86_swap_binary_operands_p (code, mode, operands))
17960 /* It is invalid to swap operands of different modes. */
17961 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17963 std::swap (src1, src2);
17966 /* Both source operands cannot be in memory. */
17967 if (MEM_P (src1) && MEM_P (src2))
17969 /* Optimization: Only read from memory once. */
17970 if (rtx_equal_p (src1, src2))
17972 src2 = force_reg (mode, src2);
17973 src1 = src2;
17975 else if (rtx_equal_p (dst, src1))
17976 src2 = force_reg (mode, src2);
17977 else
17978 src1 = force_reg (mode, src1);
17981 /* If the destination is memory, and we do not have matching source
17982 operands, do things in registers. */
17983 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17984 dst = gen_reg_rtx (mode);
17986 /* Source 1 cannot be a constant. */
17987 if (CONSTANT_P (src1))
17988 src1 = force_reg (mode, src1);
17990 /* Source 1 cannot be a non-matching memory. */
17991 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17992 src1 = force_reg (mode, src1);
17994 /* Improve address combine. */
17995 if (code == PLUS
17996 && GET_MODE_CLASS (mode) == MODE_INT
17997 && MEM_P (src2))
17998 src2 = force_reg (mode, src2);
18000 operands[1] = src1;
18001 operands[2] = src2;
18002 return dst;
18005 /* Similarly, but assume that the destination has already been
18006 set up properly. */
18008 void
18009 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
18010 machine_mode mode, rtx operands[])
18012 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
18013 gcc_assert (dst == operands[0]);
18016 /* Attempt to expand a binary operator. Make the expansion closer to the
18017 actual machine, then just general_operand, which will allow 3 separate
18018 memory references (one output, two input) in a single insn. */
18020 void
18021 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
18022 rtx operands[])
18024 rtx src1, src2, dst, op, clob;
18026 dst = ix86_fixup_binary_operands (code, mode, operands);
18027 src1 = operands[1];
18028 src2 = operands[2];
18030 /* Emit the instruction. */
18032 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
18033 if (reload_in_progress)
18035 /* Reload doesn't know about the flags register, and doesn't know that
18036 it doesn't want to clobber it. We can only do this with PLUS. */
18037 gcc_assert (code == PLUS);
18038 emit_insn (op);
18040 else if (reload_completed
18041 && code == PLUS
18042 && !rtx_equal_p (dst, src1))
18044 /* This is going to be an LEA; avoid splitting it later. */
18045 emit_insn (op);
18047 else
18049 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18050 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18053 /* Fix up the destination if needed. */
18054 if (dst != operands[0])
18055 emit_move_insn (operands[0], dst);
18058 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
18059 the given OPERANDS. */
18061 void
18062 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
18063 rtx operands[])
18065 rtx op1 = NULL_RTX, op2 = NULL_RTX;
18066 if (GET_CODE (operands[1]) == SUBREG)
18068 op1 = operands[1];
18069 op2 = operands[2];
18071 else if (GET_CODE (operands[2]) == SUBREG)
18073 op1 = operands[2];
18074 op2 = operands[1];
18076 /* Optimize (__m128i) d | (__m128i) e and similar code
18077 when d and e are float vectors into float vector logical
18078 insn. In C/C++ without using intrinsics there is no other way
18079 to express vector logical operation on float vectors than
18080 to cast them temporarily to integer vectors. */
18081 if (op1
18082 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
18083 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
18084 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
18085 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
18086 && SUBREG_BYTE (op1) == 0
18087 && (GET_CODE (op2) == CONST_VECTOR
18088 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
18089 && SUBREG_BYTE (op2) == 0))
18090 && can_create_pseudo_p ())
18092 rtx dst;
18093 switch (GET_MODE (SUBREG_REG (op1)))
18095 case V4SFmode:
18096 case V8SFmode:
18097 case V16SFmode:
18098 case V2DFmode:
18099 case V4DFmode:
18100 case V8DFmode:
18101 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
18102 if (GET_CODE (op2) == CONST_VECTOR)
18104 op2 = gen_lowpart (GET_MODE (dst), op2);
18105 op2 = force_reg (GET_MODE (dst), op2);
18107 else
18109 op1 = operands[1];
18110 op2 = SUBREG_REG (operands[2]);
18111 if (!nonimmediate_operand (op2, GET_MODE (dst)))
18112 op2 = force_reg (GET_MODE (dst), op2);
18114 op1 = SUBREG_REG (op1);
18115 if (!nonimmediate_operand (op1, GET_MODE (dst)))
18116 op1 = force_reg (GET_MODE (dst), op1);
18117 emit_insn (gen_rtx_SET (VOIDmode, dst,
18118 gen_rtx_fmt_ee (code, GET_MODE (dst),
18119 op1, op2)));
18120 emit_move_insn (operands[0], gen_lowpart (mode, dst));
18121 return;
18122 default:
18123 break;
18126 if (!nonimmediate_operand (operands[1], mode))
18127 operands[1] = force_reg (mode, operands[1]);
18128 if (!nonimmediate_operand (operands[2], mode))
18129 operands[2] = force_reg (mode, operands[2]);
18130 ix86_fixup_binary_operands_no_copy (code, mode, operands);
18131 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18132 gen_rtx_fmt_ee (code, mode, operands[1],
18133 operands[2])));
18136 /* Return TRUE or FALSE depending on whether the binary operator meets the
18137 appropriate constraints. */
18139 bool
18140 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
18141 rtx operands[3])
18143 rtx dst = operands[0];
18144 rtx src1 = operands[1];
18145 rtx src2 = operands[2];
18147 /* Both source operands cannot be in memory. */
18148 if (MEM_P (src1) && MEM_P (src2))
18149 return false;
18151 /* Canonicalize operand order for commutative operators. */
18152 if (ix86_swap_binary_operands_p (code, mode, operands))
18153 std::swap (src1, src2);
18155 /* If the destination is memory, we must have a matching source operand. */
18156 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
18157 return false;
18159 /* Source 1 cannot be a constant. */
18160 if (CONSTANT_P (src1))
18161 return false;
18163 /* Source 1 cannot be a non-matching memory. */
18164 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
18165 /* Support "andhi/andsi/anddi" as a zero-extending move. */
18166 return (code == AND
18167 && (mode == HImode
18168 || mode == SImode
18169 || (TARGET_64BIT && mode == DImode))
18170 && satisfies_constraint_L (src2));
18172 return true;
18175 /* Attempt to expand a unary operator. Make the expansion closer to the
18176 actual machine, then just general_operand, which will allow 2 separate
18177 memory references (one output, one input) in a single insn. */
18179 void
18180 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
18181 rtx operands[])
18183 bool matching_memory = false;
18184 rtx src, dst, op, clob;
18186 dst = operands[0];
18187 src = operands[1];
18189 /* If the destination is memory, and we do not have matching source
18190 operands, do things in registers. */
18191 if (MEM_P (dst))
18193 if (rtx_equal_p (dst, src))
18194 matching_memory = true;
18195 else
18196 dst = gen_reg_rtx (mode);
18199 /* When source operand is memory, destination must match. */
18200 if (MEM_P (src) && !matching_memory)
18201 src = force_reg (mode, src);
18203 /* Emit the instruction. */
18205 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
18206 if (reload_in_progress || code == NOT)
18208 /* Reload doesn't know about the flags register, and doesn't know that
18209 it doesn't want to clobber it. */
18210 gcc_assert (code == NOT);
18211 emit_insn (op);
18213 else
18215 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18216 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18219 /* Fix up the destination if needed. */
18220 if (dst != operands[0])
18221 emit_move_insn (operands[0], dst);
18224 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
18225 divisor are within the range [0-255]. */
18227 void
18228 ix86_split_idivmod (machine_mode mode, rtx operands[],
18229 bool signed_p)
18231 rtx_code_label *end_label, *qimode_label;
18232 rtx insn, div, mod;
18233 rtx scratch, tmp0, tmp1, tmp2;
18234 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
18235 rtx (*gen_zero_extend) (rtx, rtx);
18236 rtx (*gen_test_ccno_1) (rtx, rtx);
18238 switch (mode)
18240 case SImode:
18241 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
18242 gen_test_ccno_1 = gen_testsi_ccno_1;
18243 gen_zero_extend = gen_zero_extendqisi2;
18244 break;
18245 case DImode:
18246 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
18247 gen_test_ccno_1 = gen_testdi_ccno_1;
18248 gen_zero_extend = gen_zero_extendqidi2;
18249 break;
18250 default:
18251 gcc_unreachable ();
18254 end_label = gen_label_rtx ();
18255 qimode_label = gen_label_rtx ();
18257 scratch = gen_reg_rtx (mode);
18259 /* Use 8bit unsigned divimod if dividend and divisor are within
18260 the range [0-255]. */
18261 emit_move_insn (scratch, operands[2]);
18262 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
18263 scratch, 1, OPTAB_DIRECT);
18264 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
18265 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
18266 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
18267 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
18268 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
18269 pc_rtx);
18270 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
18271 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18272 JUMP_LABEL (insn) = qimode_label;
18274 /* Generate original signed/unsigned divimod. */
18275 div = gen_divmod4_1 (operands[0], operands[1],
18276 operands[2], operands[3]);
18277 emit_insn (div);
18279 /* Branch to the end. */
18280 emit_jump_insn (gen_jump (end_label));
18281 emit_barrier ();
18283 /* Generate 8bit unsigned divide. */
18284 emit_label (qimode_label);
18285 /* Don't use operands[0] for result of 8bit divide since not all
18286 registers support QImode ZERO_EXTRACT. */
18287 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
18288 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
18289 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
18290 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
18292 if (signed_p)
18294 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
18295 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
18297 else
18299 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
18300 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
18303 /* Extract remainder from AH. */
18304 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
18305 if (REG_P (operands[1]))
18306 insn = emit_move_insn (operands[1], tmp1);
18307 else
18309 /* Need a new scratch register since the old one has result
18310 of 8bit divide. */
18311 scratch = gen_reg_rtx (mode);
18312 emit_move_insn (scratch, tmp1);
18313 insn = emit_move_insn (operands[1], scratch);
18315 set_unique_reg_note (insn, REG_EQUAL, mod);
18317 /* Zero extend quotient from AL. */
18318 tmp1 = gen_lowpart (QImode, tmp0);
18319 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
18320 set_unique_reg_note (insn, REG_EQUAL, div);
18322 emit_label (end_label);
18325 #define LEA_MAX_STALL (3)
18326 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
18328 /* Increase given DISTANCE in half-cycles according to
18329 dependencies between PREV and NEXT instructions.
18330 Add 1 half-cycle if there is no dependency and
18331 go to next cycle if there is some dependecy. */
18333 static unsigned int
18334 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
18336 df_ref def, use;
18338 if (!prev || !next)
18339 return distance + (distance & 1) + 2;
18341 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
18342 return distance + 1;
18344 FOR_EACH_INSN_USE (use, next)
18345 FOR_EACH_INSN_DEF (def, prev)
18346 if (!DF_REF_IS_ARTIFICIAL (def)
18347 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
18348 return distance + (distance & 1) + 2;
18350 return distance + 1;
18353 /* Function checks if instruction INSN defines register number
18354 REGNO1 or REGNO2. */
18356 static bool
18357 insn_defines_reg (unsigned int regno1, unsigned int regno2,
18358 rtx insn)
18360 df_ref def;
18362 FOR_EACH_INSN_DEF (def, insn)
18363 if (DF_REF_REG_DEF_P (def)
18364 && !DF_REF_IS_ARTIFICIAL (def)
18365 && (regno1 == DF_REF_REGNO (def)
18366 || regno2 == DF_REF_REGNO (def)))
18367 return true;
18369 return false;
18372 /* Function checks if instruction INSN uses register number
18373 REGNO as a part of address expression. */
18375 static bool
18376 insn_uses_reg_mem (unsigned int regno, rtx insn)
18378 df_ref use;
18380 FOR_EACH_INSN_USE (use, insn)
18381 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
18382 return true;
18384 return false;
18387 /* Search backward for non-agu definition of register number REGNO1
18388 or register number REGNO2 in basic block starting from instruction
18389 START up to head of basic block or instruction INSN.
18391 Function puts true value into *FOUND var if definition was found
18392 and false otherwise.
18394 Distance in half-cycles between START and found instruction or head
18395 of BB is added to DISTANCE and returned. */
18397 static int
18398 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
18399 rtx_insn *insn, int distance,
18400 rtx_insn *start, bool *found)
18402 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
18403 rtx_insn *prev = start;
18404 rtx_insn *next = NULL;
18406 *found = false;
18408 while (prev
18409 && prev != insn
18410 && distance < LEA_SEARCH_THRESHOLD)
18412 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
18414 distance = increase_distance (prev, next, distance);
18415 if (insn_defines_reg (regno1, regno2, prev))
18417 if (recog_memoized (prev) < 0
18418 || get_attr_type (prev) != TYPE_LEA)
18420 *found = true;
18421 return distance;
18425 next = prev;
18427 if (prev == BB_HEAD (bb))
18428 break;
18430 prev = PREV_INSN (prev);
18433 return distance;
18436 /* Search backward for non-agu definition of register number REGNO1
18437 or register number REGNO2 in INSN's basic block until
18438 1. Pass LEA_SEARCH_THRESHOLD instructions, or
18439 2. Reach neighbour BBs boundary, or
18440 3. Reach agu definition.
18441 Returns the distance between the non-agu definition point and INSN.
18442 If no definition point, returns -1. */
18444 static int
18445 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
18446 rtx_insn *insn)
18448 basic_block bb = BLOCK_FOR_INSN (insn);
18449 int distance = 0;
18450 bool found = false;
18452 if (insn != BB_HEAD (bb))
18453 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
18454 distance, PREV_INSN (insn),
18455 &found);
18457 if (!found && distance < LEA_SEARCH_THRESHOLD)
18459 edge e;
18460 edge_iterator ei;
18461 bool simple_loop = false;
18463 FOR_EACH_EDGE (e, ei, bb->preds)
18464 if (e->src == bb)
18466 simple_loop = true;
18467 break;
18470 if (simple_loop)
18471 distance = distance_non_agu_define_in_bb (regno1, regno2,
18472 insn, distance,
18473 BB_END (bb), &found);
18474 else
18476 int shortest_dist = -1;
18477 bool found_in_bb = false;
18479 FOR_EACH_EDGE (e, ei, bb->preds)
18481 int bb_dist
18482 = distance_non_agu_define_in_bb (regno1, regno2,
18483 insn, distance,
18484 BB_END (e->src),
18485 &found_in_bb);
18486 if (found_in_bb)
18488 if (shortest_dist < 0)
18489 shortest_dist = bb_dist;
18490 else if (bb_dist > 0)
18491 shortest_dist = MIN (bb_dist, shortest_dist);
18493 found = true;
18497 distance = shortest_dist;
18501 /* get_attr_type may modify recog data. We want to make sure
18502 that recog data is valid for instruction INSN, on which
18503 distance_non_agu_define is called. INSN is unchanged here. */
18504 extract_insn_cached (insn);
18506 if (!found)
18507 return -1;
18509 return distance >> 1;
18512 /* Return the distance in half-cycles between INSN and the next
18513 insn that uses register number REGNO in memory address added
18514 to DISTANCE. Return -1 if REGNO0 is set.
18516 Put true value into *FOUND if register usage was found and
18517 false otherwise.
18518 Put true value into *REDEFINED if register redefinition was
18519 found and false otherwise. */
18521 static int
18522 distance_agu_use_in_bb (unsigned int regno,
18523 rtx_insn *insn, int distance, rtx_insn *start,
18524 bool *found, bool *redefined)
18526 basic_block bb = NULL;
18527 rtx_insn *next = start;
18528 rtx_insn *prev = NULL;
18530 *found = false;
18531 *redefined = false;
18533 if (start != NULL_RTX)
18535 bb = BLOCK_FOR_INSN (start);
18536 if (start != BB_HEAD (bb))
18537 /* If insn and start belong to the same bb, set prev to insn,
18538 so the call to increase_distance will increase the distance
18539 between insns by 1. */
18540 prev = insn;
18543 while (next
18544 && next != insn
18545 && distance < LEA_SEARCH_THRESHOLD)
18547 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
18549 distance = increase_distance(prev, next, distance);
18550 if (insn_uses_reg_mem (regno, next))
18552 /* Return DISTANCE if OP0 is used in memory
18553 address in NEXT. */
18554 *found = true;
18555 return distance;
18558 if (insn_defines_reg (regno, INVALID_REGNUM, next))
18560 /* Return -1 if OP0 is set in NEXT. */
18561 *redefined = true;
18562 return -1;
18565 prev = next;
18568 if (next == BB_END (bb))
18569 break;
18571 next = NEXT_INSN (next);
18574 return distance;
18577 /* Return the distance between INSN and the next insn that uses
18578 register number REGNO0 in memory address. Return -1 if no such
18579 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18581 static int
18582 distance_agu_use (unsigned int regno0, rtx_insn *insn)
18584 basic_block bb = BLOCK_FOR_INSN (insn);
18585 int distance = 0;
18586 bool found = false;
18587 bool redefined = false;
18589 if (insn != BB_END (bb))
18590 distance = distance_agu_use_in_bb (regno0, insn, distance,
18591 NEXT_INSN (insn),
18592 &found, &redefined);
18594 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18596 edge e;
18597 edge_iterator ei;
18598 bool simple_loop = false;
18600 FOR_EACH_EDGE (e, ei, bb->succs)
18601 if (e->dest == bb)
18603 simple_loop = true;
18604 break;
18607 if (simple_loop)
18608 distance = distance_agu_use_in_bb (regno0, insn,
18609 distance, BB_HEAD (bb),
18610 &found, &redefined);
18611 else
18613 int shortest_dist = -1;
18614 bool found_in_bb = false;
18615 bool redefined_in_bb = false;
18617 FOR_EACH_EDGE (e, ei, bb->succs)
18619 int bb_dist
18620 = distance_agu_use_in_bb (regno0, insn,
18621 distance, BB_HEAD (e->dest),
18622 &found_in_bb, &redefined_in_bb);
18623 if (found_in_bb)
18625 if (shortest_dist < 0)
18626 shortest_dist = bb_dist;
18627 else if (bb_dist > 0)
18628 shortest_dist = MIN (bb_dist, shortest_dist);
18630 found = true;
18634 distance = shortest_dist;
18638 if (!found || redefined)
18639 return -1;
18641 return distance >> 1;
18644 /* Define this macro to tune LEA priority vs ADD, it take effect when
18645 there is a dilemma of choicing LEA or ADD
18646 Negative value: ADD is more preferred than LEA
18647 Zero: Netrual
18648 Positive value: LEA is more preferred than ADD*/
18649 #define IX86_LEA_PRIORITY 0
18651 /* Return true if usage of lea INSN has performance advantage
18652 over a sequence of instructions. Instructions sequence has
18653 SPLIT_COST cycles higher latency than lea latency. */
18655 static bool
18656 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
18657 unsigned int regno2, int split_cost, bool has_scale)
18659 int dist_define, dist_use;
18661 /* For Silvermont if using a 2-source or 3-source LEA for
18662 non-destructive destination purposes, or due to wanting
18663 ability to use SCALE, the use of LEA is justified. */
18664 if (TARGET_SILVERMONT || TARGET_INTEL)
18666 if (has_scale)
18667 return true;
18668 if (split_cost < 1)
18669 return false;
18670 if (regno0 == regno1 || regno0 == regno2)
18671 return false;
18672 return true;
18675 dist_define = distance_non_agu_define (regno1, regno2, insn);
18676 dist_use = distance_agu_use (regno0, insn);
18678 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18680 /* If there is no non AGU operand definition, no AGU
18681 operand usage and split cost is 0 then both lea
18682 and non lea variants have same priority. Currently
18683 we prefer lea for 64 bit code and non lea on 32 bit
18684 code. */
18685 if (dist_use < 0 && split_cost == 0)
18686 return TARGET_64BIT || IX86_LEA_PRIORITY;
18687 else
18688 return true;
18691 /* With longer definitions distance lea is more preferable.
18692 Here we change it to take into account splitting cost and
18693 lea priority. */
18694 dist_define += split_cost + IX86_LEA_PRIORITY;
18696 /* If there is no use in memory addess then we just check
18697 that split cost exceeds AGU stall. */
18698 if (dist_use < 0)
18699 return dist_define > LEA_MAX_STALL;
18701 /* If this insn has both backward non-agu dependence and forward
18702 agu dependence, the one with short distance takes effect. */
18703 return dist_define >= dist_use;
18706 /* Return true if it is legal to clobber flags by INSN and
18707 false otherwise. */
18709 static bool
18710 ix86_ok_to_clobber_flags (rtx_insn *insn)
18712 basic_block bb = BLOCK_FOR_INSN (insn);
18713 df_ref use;
18714 bitmap live;
18716 while (insn)
18718 if (NONDEBUG_INSN_P (insn))
18720 FOR_EACH_INSN_USE (use, insn)
18721 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18722 return false;
18724 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18725 return true;
18728 if (insn == BB_END (bb))
18729 break;
18731 insn = NEXT_INSN (insn);
18734 live = df_get_live_out(bb);
18735 return !REGNO_REG_SET_P (live, FLAGS_REG);
18738 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18739 move and add to avoid AGU stalls. */
18741 bool
18742 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
18744 unsigned int regno0, regno1, regno2;
18746 /* Check if we need to optimize. */
18747 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18748 return false;
18750 /* Check it is correct to split here. */
18751 if (!ix86_ok_to_clobber_flags(insn))
18752 return false;
18754 regno0 = true_regnum (operands[0]);
18755 regno1 = true_regnum (operands[1]);
18756 regno2 = true_regnum (operands[2]);
18758 /* We need to split only adds with non destructive
18759 destination operand. */
18760 if (regno0 == regno1 || regno0 == regno2)
18761 return false;
18762 else
18763 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18766 /* Return true if we should emit lea instruction instead of mov
18767 instruction. */
18769 bool
18770 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
18772 unsigned int regno0, regno1;
18774 /* Check if we need to optimize. */
18775 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18776 return false;
18778 /* Use lea for reg to reg moves only. */
18779 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18780 return false;
18782 regno0 = true_regnum (operands[0]);
18783 regno1 = true_regnum (operands[1]);
18785 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18788 /* Return true if we need to split lea into a sequence of
18789 instructions to avoid AGU stalls. */
18791 bool
18792 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
18794 unsigned int regno0, regno1, regno2;
18795 int split_cost;
18796 struct ix86_address parts;
18797 int ok;
18799 /* Check we need to optimize. */
18800 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18801 return false;
18803 /* The "at least two components" test below might not catch simple
18804 move or zero extension insns if parts.base is non-NULL and parts.disp
18805 is const0_rtx as the only components in the address, e.g. if the
18806 register is %rbp or %r13. As this test is much cheaper and moves or
18807 zero extensions are the common case, do this check first. */
18808 if (REG_P (operands[1])
18809 || (SImode_address_operand (operands[1], VOIDmode)
18810 && REG_P (XEXP (operands[1], 0))))
18811 return false;
18813 /* Check if it is OK to split here. */
18814 if (!ix86_ok_to_clobber_flags (insn))
18815 return false;
18817 ok = ix86_decompose_address (operands[1], &parts);
18818 gcc_assert (ok);
18820 /* There should be at least two components in the address. */
18821 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18822 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18823 return false;
18825 /* We should not split into add if non legitimate pic
18826 operand is used as displacement. */
18827 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18828 return false;
18830 regno0 = true_regnum (operands[0]) ;
18831 regno1 = INVALID_REGNUM;
18832 regno2 = INVALID_REGNUM;
18834 if (parts.base)
18835 regno1 = true_regnum (parts.base);
18836 if (parts.index)
18837 regno2 = true_regnum (parts.index);
18839 split_cost = 0;
18841 /* Compute how many cycles we will add to execution time
18842 if split lea into a sequence of instructions. */
18843 if (parts.base || parts.index)
18845 /* Have to use mov instruction if non desctructive
18846 destination form is used. */
18847 if (regno1 != regno0 && regno2 != regno0)
18848 split_cost += 1;
18850 /* Have to add index to base if both exist. */
18851 if (parts.base && parts.index)
18852 split_cost += 1;
18854 /* Have to use shift and adds if scale is 2 or greater. */
18855 if (parts.scale > 1)
18857 if (regno0 != regno1)
18858 split_cost += 1;
18859 else if (regno2 == regno0)
18860 split_cost += 4;
18861 else
18862 split_cost += parts.scale;
18865 /* Have to use add instruction with immediate if
18866 disp is non zero. */
18867 if (parts.disp && parts.disp != const0_rtx)
18868 split_cost += 1;
18870 /* Subtract the price of lea. */
18871 split_cost -= 1;
18874 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18875 parts.scale > 1);
18878 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18879 matches destination. RTX includes clobber of FLAGS_REG. */
18881 static void
18882 ix86_emit_binop (enum rtx_code code, machine_mode mode,
18883 rtx dst, rtx src)
18885 rtx op, clob;
18887 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18888 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18890 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18893 /* Return true if regno1 def is nearest to the insn. */
18895 static bool
18896 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
18898 rtx_insn *prev = insn;
18899 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
18901 if (insn == start)
18902 return false;
18903 while (prev && prev != start)
18905 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18907 prev = PREV_INSN (prev);
18908 continue;
18910 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18911 return true;
18912 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18913 return false;
18914 prev = PREV_INSN (prev);
18917 /* None of the regs is defined in the bb. */
18918 return false;
18921 /* Split lea instructions into a sequence of instructions
18922 which are executed on ALU to avoid AGU stalls.
18923 It is assumed that it is allowed to clobber flags register
18924 at lea position. */
18926 void
18927 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
18929 unsigned int regno0, regno1, regno2;
18930 struct ix86_address parts;
18931 rtx target, tmp;
18932 int ok, adds;
18934 ok = ix86_decompose_address (operands[1], &parts);
18935 gcc_assert (ok);
18937 target = gen_lowpart (mode, operands[0]);
18939 regno0 = true_regnum (target);
18940 regno1 = INVALID_REGNUM;
18941 regno2 = INVALID_REGNUM;
18943 if (parts.base)
18945 parts.base = gen_lowpart (mode, parts.base);
18946 regno1 = true_regnum (parts.base);
18949 if (parts.index)
18951 parts.index = gen_lowpart (mode, parts.index);
18952 regno2 = true_regnum (parts.index);
18955 if (parts.disp)
18956 parts.disp = gen_lowpart (mode, parts.disp);
18958 if (parts.scale > 1)
18960 /* Case r1 = r1 + ... */
18961 if (regno1 == regno0)
18963 /* If we have a case r1 = r1 + C * r2 then we
18964 should use multiplication which is very
18965 expensive. Assume cost model is wrong if we
18966 have such case here. */
18967 gcc_assert (regno2 != regno0);
18969 for (adds = parts.scale; adds > 0; adds--)
18970 ix86_emit_binop (PLUS, mode, target, parts.index);
18972 else
18974 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18975 if (regno0 != regno2)
18976 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18978 /* Use shift for scaling. */
18979 ix86_emit_binop (ASHIFT, mode, target,
18980 GEN_INT (exact_log2 (parts.scale)));
18982 if (parts.base)
18983 ix86_emit_binop (PLUS, mode, target, parts.base);
18985 if (parts.disp && parts.disp != const0_rtx)
18986 ix86_emit_binop (PLUS, mode, target, parts.disp);
18989 else if (!parts.base && !parts.index)
18991 gcc_assert(parts.disp);
18992 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18994 else
18996 if (!parts.base)
18998 if (regno0 != regno2)
18999 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
19001 else if (!parts.index)
19003 if (regno0 != regno1)
19004 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
19006 else
19008 if (regno0 == regno1)
19009 tmp = parts.index;
19010 else if (regno0 == regno2)
19011 tmp = parts.base;
19012 else
19014 rtx tmp1;
19016 /* Find better operand for SET instruction, depending
19017 on which definition is farther from the insn. */
19018 if (find_nearest_reg_def (insn, regno1, regno2))
19019 tmp = parts.index, tmp1 = parts.base;
19020 else
19021 tmp = parts.base, tmp1 = parts.index;
19023 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19025 if (parts.disp && parts.disp != const0_rtx)
19026 ix86_emit_binop (PLUS, mode, target, parts.disp);
19028 ix86_emit_binop (PLUS, mode, target, tmp1);
19029 return;
19032 ix86_emit_binop (PLUS, mode, target, tmp);
19035 if (parts.disp && parts.disp != const0_rtx)
19036 ix86_emit_binop (PLUS, mode, target, parts.disp);
19040 /* Return true if it is ok to optimize an ADD operation to LEA
19041 operation to avoid flag register consumation. For most processors,
19042 ADD is faster than LEA. For the processors like BONNELL, if the
19043 destination register of LEA holds an actual address which will be
19044 used soon, LEA is better and otherwise ADD is better. */
19046 bool
19047 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
19049 unsigned int regno0 = true_regnum (operands[0]);
19050 unsigned int regno1 = true_regnum (operands[1]);
19051 unsigned int regno2 = true_regnum (operands[2]);
19053 /* If a = b + c, (a!=b && a!=c), must use lea form. */
19054 if (regno0 != regno1 && regno0 != regno2)
19055 return true;
19057 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
19058 return false;
19060 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
19063 /* Return true if destination reg of SET_BODY is shift count of
19064 USE_BODY. */
19066 static bool
19067 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
19069 rtx set_dest;
19070 rtx shift_rtx;
19071 int i;
19073 /* Retrieve destination of SET_BODY. */
19074 switch (GET_CODE (set_body))
19076 case SET:
19077 set_dest = SET_DEST (set_body);
19078 if (!set_dest || !REG_P (set_dest))
19079 return false;
19080 break;
19081 case PARALLEL:
19082 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
19083 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
19084 use_body))
19085 return true;
19086 default:
19087 return false;
19088 break;
19091 /* Retrieve shift count of USE_BODY. */
19092 switch (GET_CODE (use_body))
19094 case SET:
19095 shift_rtx = XEXP (use_body, 1);
19096 break;
19097 case PARALLEL:
19098 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
19099 if (ix86_dep_by_shift_count_body (set_body,
19100 XVECEXP (use_body, 0, i)))
19101 return true;
19102 default:
19103 return false;
19104 break;
19107 if (shift_rtx
19108 && (GET_CODE (shift_rtx) == ASHIFT
19109 || GET_CODE (shift_rtx) == LSHIFTRT
19110 || GET_CODE (shift_rtx) == ASHIFTRT
19111 || GET_CODE (shift_rtx) == ROTATE
19112 || GET_CODE (shift_rtx) == ROTATERT))
19114 rtx shift_count = XEXP (shift_rtx, 1);
19116 /* Return true if shift count is dest of SET_BODY. */
19117 if (REG_P (shift_count))
19119 /* Add check since it can be invoked before register
19120 allocation in pre-reload schedule. */
19121 if (reload_completed
19122 && true_regnum (set_dest) == true_regnum (shift_count))
19123 return true;
19124 else if (REGNO(set_dest) == REGNO(shift_count))
19125 return true;
19129 return false;
19132 /* Return true if destination reg of SET_INSN is shift count of
19133 USE_INSN. */
19135 bool
19136 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
19138 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
19139 PATTERN (use_insn));
19142 /* Return TRUE or FALSE depending on whether the unary operator meets the
19143 appropriate constraints. */
19145 bool
19146 ix86_unary_operator_ok (enum rtx_code,
19147 machine_mode,
19148 rtx operands[2])
19150 /* If one of operands is memory, source and destination must match. */
19151 if ((MEM_P (operands[0])
19152 || MEM_P (operands[1]))
19153 && ! rtx_equal_p (operands[0], operands[1]))
19154 return false;
19155 return true;
19158 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
19159 are ok, keeping in mind the possible movddup alternative. */
19161 bool
19162 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
19164 if (MEM_P (operands[0]))
19165 return rtx_equal_p (operands[0], operands[1 + high]);
19166 if (MEM_P (operands[1]) && MEM_P (operands[2]))
19167 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
19168 return true;
19171 /* Post-reload splitter for converting an SF or DFmode value in an
19172 SSE register into an unsigned SImode. */
19174 void
19175 ix86_split_convert_uns_si_sse (rtx operands[])
19177 machine_mode vecmode;
19178 rtx value, large, zero_or_two31, input, two31, x;
19180 large = operands[1];
19181 zero_or_two31 = operands[2];
19182 input = operands[3];
19183 two31 = operands[4];
19184 vecmode = GET_MODE (large);
19185 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
19187 /* Load up the value into the low element. We must ensure that the other
19188 elements are valid floats -- zero is the easiest such value. */
19189 if (MEM_P (input))
19191 if (vecmode == V4SFmode)
19192 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
19193 else
19194 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
19196 else
19198 input = gen_rtx_REG (vecmode, REGNO (input));
19199 emit_move_insn (value, CONST0_RTX (vecmode));
19200 if (vecmode == V4SFmode)
19201 emit_insn (gen_sse_movss (value, value, input));
19202 else
19203 emit_insn (gen_sse2_movsd (value, value, input));
19206 emit_move_insn (large, two31);
19207 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
19209 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
19210 emit_insn (gen_rtx_SET (VOIDmode, large, x));
19212 x = gen_rtx_AND (vecmode, zero_or_two31, large);
19213 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
19215 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
19216 emit_insn (gen_rtx_SET (VOIDmode, value, x));
19218 large = gen_rtx_REG (V4SImode, REGNO (large));
19219 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
19221 x = gen_rtx_REG (V4SImode, REGNO (value));
19222 if (vecmode == V4SFmode)
19223 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
19224 else
19225 emit_insn (gen_sse2_cvttpd2dq (x, value));
19226 value = x;
19228 emit_insn (gen_xorv4si3 (value, value, large));
19231 /* Convert an unsigned DImode value into a DFmode, using only SSE.
19232 Expects the 64-bit DImode to be supplied in a pair of integral
19233 registers. Requires SSE2; will use SSE3 if available. For x86_32,
19234 -mfpmath=sse, !optimize_size only. */
19236 void
19237 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
19239 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
19240 rtx int_xmm, fp_xmm;
19241 rtx biases, exponents;
19242 rtx x;
19244 int_xmm = gen_reg_rtx (V4SImode);
19245 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
19246 emit_insn (gen_movdi_to_sse (int_xmm, input));
19247 else if (TARGET_SSE_SPLIT_REGS)
19249 emit_clobber (int_xmm);
19250 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
19252 else
19254 x = gen_reg_rtx (V2DImode);
19255 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
19256 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
19259 x = gen_rtx_CONST_VECTOR (V4SImode,
19260 gen_rtvec (4, GEN_INT (0x43300000UL),
19261 GEN_INT (0x45300000UL),
19262 const0_rtx, const0_rtx));
19263 exponents = validize_mem (force_const_mem (V4SImode, x));
19265 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
19266 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
19268 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
19269 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
19270 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
19271 (0x1.0p84 + double(fp_value_hi_xmm)).
19272 Note these exponents differ by 32. */
19274 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
19276 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
19277 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
19278 real_ldexp (&bias_lo_rvt, &dconst1, 52);
19279 real_ldexp (&bias_hi_rvt, &dconst1, 84);
19280 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
19281 x = const_double_from_real_value (bias_hi_rvt, DFmode);
19282 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
19283 biases = validize_mem (force_const_mem (V2DFmode, biases));
19284 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
19286 /* Add the upper and lower DFmode values together. */
19287 if (TARGET_SSE3)
19288 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
19289 else
19291 x = copy_to_mode_reg (V2DFmode, fp_xmm);
19292 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
19293 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
19296 ix86_expand_vector_extract (false, target, fp_xmm, 0);
19299 /* Not used, but eases macroization of patterns. */
19300 void
19301 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
19303 gcc_unreachable ();
19306 /* Convert an unsigned SImode value into a DFmode. Only currently used
19307 for SSE, but applicable anywhere. */
19309 void
19310 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
19312 REAL_VALUE_TYPE TWO31r;
19313 rtx x, fp;
19315 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
19316 NULL, 1, OPTAB_DIRECT);
19318 fp = gen_reg_rtx (DFmode);
19319 emit_insn (gen_floatsidf2 (fp, x));
19321 real_ldexp (&TWO31r, &dconst1, 31);
19322 x = const_double_from_real_value (TWO31r, DFmode);
19324 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
19325 if (x != target)
19326 emit_move_insn (target, x);
19329 /* Convert a signed DImode value into a DFmode. Only used for SSE in
19330 32-bit mode; otherwise we have a direct convert instruction. */
19332 void
19333 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
19335 REAL_VALUE_TYPE TWO32r;
19336 rtx fp_lo, fp_hi, x;
19338 fp_lo = gen_reg_rtx (DFmode);
19339 fp_hi = gen_reg_rtx (DFmode);
19341 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
19343 real_ldexp (&TWO32r, &dconst1, 32);
19344 x = const_double_from_real_value (TWO32r, DFmode);
19345 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
19347 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
19349 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
19350 0, OPTAB_DIRECT);
19351 if (x != target)
19352 emit_move_insn (target, x);
19355 /* Convert an unsigned SImode value into a SFmode, using only SSE.
19356 For x86_32, -mfpmath=sse, !optimize_size only. */
19357 void
19358 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
19360 REAL_VALUE_TYPE ONE16r;
19361 rtx fp_hi, fp_lo, int_hi, int_lo, x;
19363 real_ldexp (&ONE16r, &dconst1, 16);
19364 x = const_double_from_real_value (ONE16r, SFmode);
19365 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
19366 NULL, 0, OPTAB_DIRECT);
19367 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
19368 NULL, 0, OPTAB_DIRECT);
19369 fp_hi = gen_reg_rtx (SFmode);
19370 fp_lo = gen_reg_rtx (SFmode);
19371 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
19372 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
19373 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
19374 0, OPTAB_DIRECT);
19375 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
19376 0, OPTAB_DIRECT);
19377 if (!rtx_equal_p (target, fp_hi))
19378 emit_move_insn (target, fp_hi);
19381 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
19382 a vector of unsigned ints VAL to vector of floats TARGET. */
19384 void
19385 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
19387 rtx tmp[8];
19388 REAL_VALUE_TYPE TWO16r;
19389 machine_mode intmode = GET_MODE (val);
19390 machine_mode fltmode = GET_MODE (target);
19391 rtx (*cvt) (rtx, rtx);
19393 if (intmode == V4SImode)
19394 cvt = gen_floatv4siv4sf2;
19395 else
19396 cvt = gen_floatv8siv8sf2;
19397 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
19398 tmp[0] = force_reg (intmode, tmp[0]);
19399 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
19400 OPTAB_DIRECT);
19401 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
19402 NULL_RTX, 1, OPTAB_DIRECT);
19403 tmp[3] = gen_reg_rtx (fltmode);
19404 emit_insn (cvt (tmp[3], tmp[1]));
19405 tmp[4] = gen_reg_rtx (fltmode);
19406 emit_insn (cvt (tmp[4], tmp[2]));
19407 real_ldexp (&TWO16r, &dconst1, 16);
19408 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
19409 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
19410 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
19411 OPTAB_DIRECT);
19412 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
19413 OPTAB_DIRECT);
19414 if (tmp[7] != target)
19415 emit_move_insn (target, tmp[7]);
19418 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
19419 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
19420 This is done by doing just signed conversion if < 0x1p31, and otherwise by
19421 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
19424 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
19426 REAL_VALUE_TYPE TWO31r;
19427 rtx two31r, tmp[4];
19428 machine_mode mode = GET_MODE (val);
19429 machine_mode scalarmode = GET_MODE_INNER (mode);
19430 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
19431 rtx (*cmp) (rtx, rtx, rtx, rtx);
19432 int i;
19434 for (i = 0; i < 3; i++)
19435 tmp[i] = gen_reg_rtx (mode);
19436 real_ldexp (&TWO31r, &dconst1, 31);
19437 two31r = const_double_from_real_value (TWO31r, scalarmode);
19438 two31r = ix86_build_const_vector (mode, 1, two31r);
19439 two31r = force_reg (mode, two31r);
19440 switch (mode)
19442 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
19443 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
19444 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
19445 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
19446 default: gcc_unreachable ();
19448 tmp[3] = gen_rtx_LE (mode, two31r, val);
19449 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
19450 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
19451 0, OPTAB_DIRECT);
19452 if (intmode == V4SImode || TARGET_AVX2)
19453 *xorp = expand_simple_binop (intmode, ASHIFT,
19454 gen_lowpart (intmode, tmp[0]),
19455 GEN_INT (31), NULL_RTX, 0,
19456 OPTAB_DIRECT);
19457 else
19459 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
19460 two31 = ix86_build_const_vector (intmode, 1, two31);
19461 *xorp = expand_simple_binop (intmode, AND,
19462 gen_lowpart (intmode, tmp[0]),
19463 two31, NULL_RTX, 0,
19464 OPTAB_DIRECT);
19466 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
19467 0, OPTAB_DIRECT);
19470 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
19471 then replicate the value for all elements of the vector
19472 register. */
19475 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
19477 int i, n_elt;
19478 rtvec v;
19479 machine_mode scalar_mode;
19481 switch (mode)
19483 case V64QImode:
19484 case V32QImode:
19485 case V16QImode:
19486 case V32HImode:
19487 case V16HImode:
19488 case V8HImode:
19489 case V16SImode:
19490 case V8SImode:
19491 case V4SImode:
19492 case V8DImode:
19493 case V4DImode:
19494 case V2DImode:
19495 gcc_assert (vect);
19496 case V16SFmode:
19497 case V8SFmode:
19498 case V4SFmode:
19499 case V8DFmode:
19500 case V4DFmode:
19501 case V2DFmode:
19502 n_elt = GET_MODE_NUNITS (mode);
19503 v = rtvec_alloc (n_elt);
19504 scalar_mode = GET_MODE_INNER (mode);
19506 RTVEC_ELT (v, 0) = value;
19508 for (i = 1; i < n_elt; ++i)
19509 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
19511 return gen_rtx_CONST_VECTOR (mode, v);
19513 default:
19514 gcc_unreachable ();
19518 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
19519 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
19520 for an SSE register. If VECT is true, then replicate the mask for
19521 all elements of the vector register. If INVERT is true, then create
19522 a mask excluding the sign bit. */
19525 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
19527 machine_mode vec_mode, imode;
19528 HOST_WIDE_INT hi, lo;
19529 int shift = 63;
19530 rtx v;
19531 rtx mask;
19533 /* Find the sign bit, sign extended to 2*HWI. */
19534 switch (mode)
19536 case V16SImode:
19537 case V16SFmode:
19538 case V8SImode:
19539 case V4SImode:
19540 case V8SFmode:
19541 case V4SFmode:
19542 vec_mode = mode;
19543 mode = GET_MODE_INNER (mode);
19544 imode = SImode;
19545 lo = 0x80000000, hi = lo < 0;
19546 break;
19548 case V8DImode:
19549 case V4DImode:
19550 case V2DImode:
19551 case V8DFmode:
19552 case V4DFmode:
19553 case V2DFmode:
19554 vec_mode = mode;
19555 mode = GET_MODE_INNER (mode);
19556 imode = DImode;
19557 if (HOST_BITS_PER_WIDE_INT >= 64)
19558 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
19559 else
19560 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19561 break;
19563 case TImode:
19564 case TFmode:
19565 vec_mode = VOIDmode;
19566 if (HOST_BITS_PER_WIDE_INT >= 64)
19568 imode = TImode;
19569 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
19571 else
19573 rtvec vec;
19575 imode = DImode;
19576 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19578 if (invert)
19580 lo = ~lo, hi = ~hi;
19581 v = constm1_rtx;
19583 else
19584 v = const0_rtx;
19586 mask = immed_double_const (lo, hi, imode);
19588 vec = gen_rtvec (2, v, mask);
19589 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19590 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19592 return v;
19594 break;
19596 default:
19597 gcc_unreachable ();
19600 if (invert)
19601 lo = ~lo, hi = ~hi;
19603 /* Force this value into the low part of a fp vector constant. */
19604 mask = immed_double_const (lo, hi, imode);
19605 mask = gen_lowpart (mode, mask);
19607 if (vec_mode == VOIDmode)
19608 return force_reg (mode, mask);
19610 v = ix86_build_const_vector (vec_mode, vect, mask);
19611 return force_reg (vec_mode, v);
19614 /* Generate code for floating point ABS or NEG. */
19616 void
19617 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
19618 rtx operands[])
19620 rtx mask, set, dst, src;
19621 bool use_sse = false;
19622 bool vector_mode = VECTOR_MODE_P (mode);
19623 machine_mode vmode = mode;
19625 if (vector_mode)
19626 use_sse = true;
19627 else if (mode == TFmode)
19628 use_sse = true;
19629 else if (TARGET_SSE_MATH)
19631 use_sse = SSE_FLOAT_MODE_P (mode);
19632 if (mode == SFmode)
19633 vmode = V4SFmode;
19634 else if (mode == DFmode)
19635 vmode = V2DFmode;
19638 /* NEG and ABS performed with SSE use bitwise mask operations.
19639 Create the appropriate mask now. */
19640 if (use_sse)
19641 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19642 else
19643 mask = NULL_RTX;
19645 dst = operands[0];
19646 src = operands[1];
19648 set = gen_rtx_fmt_e (code, mode, src);
19649 set = gen_rtx_SET (VOIDmode, dst, set);
19651 if (mask)
19653 rtx use, clob;
19654 rtvec par;
19656 use = gen_rtx_USE (VOIDmode, mask);
19657 if (vector_mode)
19658 par = gen_rtvec (2, set, use);
19659 else
19661 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19662 par = gen_rtvec (3, set, use, clob);
19664 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19666 else
19667 emit_insn (set);
19670 /* Expand a copysign operation. Special case operand 0 being a constant. */
19672 void
19673 ix86_expand_copysign (rtx operands[])
19675 machine_mode mode, vmode;
19676 rtx dest, op0, op1, mask, nmask;
19678 dest = operands[0];
19679 op0 = operands[1];
19680 op1 = operands[2];
19682 mode = GET_MODE (dest);
19684 if (mode == SFmode)
19685 vmode = V4SFmode;
19686 else if (mode == DFmode)
19687 vmode = V2DFmode;
19688 else
19689 vmode = mode;
19691 if (GET_CODE (op0) == CONST_DOUBLE)
19693 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19695 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19696 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19698 if (mode == SFmode || mode == DFmode)
19700 if (op0 == CONST0_RTX (mode))
19701 op0 = CONST0_RTX (vmode);
19702 else
19704 rtx v = ix86_build_const_vector (vmode, false, op0);
19706 op0 = force_reg (vmode, v);
19709 else if (op0 != CONST0_RTX (mode))
19710 op0 = force_reg (mode, op0);
19712 mask = ix86_build_signbit_mask (vmode, 0, 0);
19714 if (mode == SFmode)
19715 copysign_insn = gen_copysignsf3_const;
19716 else if (mode == DFmode)
19717 copysign_insn = gen_copysigndf3_const;
19718 else
19719 copysign_insn = gen_copysigntf3_const;
19721 emit_insn (copysign_insn (dest, op0, op1, mask));
19723 else
19725 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19727 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19728 mask = ix86_build_signbit_mask (vmode, 0, 0);
19730 if (mode == SFmode)
19731 copysign_insn = gen_copysignsf3_var;
19732 else if (mode == DFmode)
19733 copysign_insn = gen_copysigndf3_var;
19734 else
19735 copysign_insn = gen_copysigntf3_var;
19737 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19741 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19742 be a constant, and so has already been expanded into a vector constant. */
19744 void
19745 ix86_split_copysign_const (rtx operands[])
19747 machine_mode mode, vmode;
19748 rtx dest, op0, mask, x;
19750 dest = operands[0];
19751 op0 = operands[1];
19752 mask = operands[3];
19754 mode = GET_MODE (dest);
19755 vmode = GET_MODE (mask);
19757 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19758 x = gen_rtx_AND (vmode, dest, mask);
19759 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19761 if (op0 != CONST0_RTX (vmode))
19763 x = gen_rtx_IOR (vmode, dest, op0);
19764 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19768 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19769 so we have to do two masks. */
19771 void
19772 ix86_split_copysign_var (rtx operands[])
19774 machine_mode mode, vmode;
19775 rtx dest, scratch, op0, op1, mask, nmask, x;
19777 dest = operands[0];
19778 scratch = operands[1];
19779 op0 = operands[2];
19780 op1 = operands[3];
19781 nmask = operands[4];
19782 mask = operands[5];
19784 mode = GET_MODE (dest);
19785 vmode = GET_MODE (mask);
19787 if (rtx_equal_p (op0, op1))
19789 /* Shouldn't happen often (it's useless, obviously), but when it does
19790 we'd generate incorrect code if we continue below. */
19791 emit_move_insn (dest, op0);
19792 return;
19795 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19797 gcc_assert (REGNO (op1) == REGNO (scratch));
19799 x = gen_rtx_AND (vmode, scratch, mask);
19800 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19802 dest = mask;
19803 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19804 x = gen_rtx_NOT (vmode, dest);
19805 x = gen_rtx_AND (vmode, x, op0);
19806 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19808 else
19810 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19812 x = gen_rtx_AND (vmode, scratch, mask);
19814 else /* alternative 2,4 */
19816 gcc_assert (REGNO (mask) == REGNO (scratch));
19817 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19818 x = gen_rtx_AND (vmode, scratch, op1);
19820 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19822 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19824 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19825 x = gen_rtx_AND (vmode, dest, nmask);
19827 else /* alternative 3,4 */
19829 gcc_assert (REGNO (nmask) == REGNO (dest));
19830 dest = nmask;
19831 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19832 x = gen_rtx_AND (vmode, dest, op0);
19834 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19837 x = gen_rtx_IOR (vmode, dest, scratch);
19838 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19841 /* Return TRUE or FALSE depending on whether the first SET in INSN
19842 has source and destination with matching CC modes, and that the
19843 CC mode is at least as constrained as REQ_MODE. */
19845 bool
19846 ix86_match_ccmode (rtx insn, machine_mode req_mode)
19848 rtx set;
19849 machine_mode set_mode;
19851 set = PATTERN (insn);
19852 if (GET_CODE (set) == PARALLEL)
19853 set = XVECEXP (set, 0, 0);
19854 gcc_assert (GET_CODE (set) == SET);
19855 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19857 set_mode = GET_MODE (SET_DEST (set));
19858 switch (set_mode)
19860 case CCNOmode:
19861 if (req_mode != CCNOmode
19862 && (req_mode != CCmode
19863 || XEXP (SET_SRC (set), 1) != const0_rtx))
19864 return false;
19865 break;
19866 case CCmode:
19867 if (req_mode == CCGCmode)
19868 return false;
19869 /* FALLTHRU */
19870 case CCGCmode:
19871 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19872 return false;
19873 /* FALLTHRU */
19874 case CCGOCmode:
19875 if (req_mode == CCZmode)
19876 return false;
19877 /* FALLTHRU */
19878 case CCZmode:
19879 break;
19881 case CCAmode:
19882 case CCCmode:
19883 case CCOmode:
19884 case CCSmode:
19885 if (set_mode != req_mode)
19886 return false;
19887 break;
19889 default:
19890 gcc_unreachable ();
19893 return GET_MODE (SET_SRC (set)) == set_mode;
19896 /* Generate insn patterns to do an integer compare of OPERANDS. */
19898 static rtx
19899 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19901 machine_mode cmpmode;
19902 rtx tmp, flags;
19904 cmpmode = SELECT_CC_MODE (code, op0, op1);
19905 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19907 /* This is very simple, but making the interface the same as in the
19908 FP case makes the rest of the code easier. */
19909 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19910 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19912 /* Return the test that should be put into the flags user, i.e.
19913 the bcc, scc, or cmov instruction. */
19914 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19917 /* Figure out whether to use ordered or unordered fp comparisons.
19918 Return the appropriate mode to use. */
19920 machine_mode
19921 ix86_fp_compare_mode (enum rtx_code)
19923 /* ??? In order to make all comparisons reversible, we do all comparisons
19924 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19925 all forms trapping and nontrapping comparisons, we can make inequality
19926 comparisons trapping again, since it results in better code when using
19927 FCOM based compares. */
19928 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19931 machine_mode
19932 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19934 machine_mode mode = GET_MODE (op0);
19936 if (SCALAR_FLOAT_MODE_P (mode))
19938 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19939 return ix86_fp_compare_mode (code);
19942 switch (code)
19944 /* Only zero flag is needed. */
19945 case EQ: /* ZF=0 */
19946 case NE: /* ZF!=0 */
19947 return CCZmode;
19948 /* Codes needing carry flag. */
19949 case GEU: /* CF=0 */
19950 case LTU: /* CF=1 */
19951 /* Detect overflow checks. They need just the carry flag. */
19952 if (GET_CODE (op0) == PLUS
19953 && rtx_equal_p (op1, XEXP (op0, 0)))
19954 return CCCmode;
19955 else
19956 return CCmode;
19957 case GTU: /* CF=0 & ZF=0 */
19958 case LEU: /* CF=1 | ZF=1 */
19959 return CCmode;
19960 /* Codes possibly doable only with sign flag when
19961 comparing against zero. */
19962 case GE: /* SF=OF or SF=0 */
19963 case LT: /* SF<>OF or SF=1 */
19964 if (op1 == const0_rtx)
19965 return CCGOCmode;
19966 else
19967 /* For other cases Carry flag is not required. */
19968 return CCGCmode;
19969 /* Codes doable only with sign flag when comparing
19970 against zero, but we miss jump instruction for it
19971 so we need to use relational tests against overflow
19972 that thus needs to be zero. */
19973 case GT: /* ZF=0 & SF=OF */
19974 case LE: /* ZF=1 | SF<>OF */
19975 if (op1 == const0_rtx)
19976 return CCNOmode;
19977 else
19978 return CCGCmode;
19979 /* strcmp pattern do (use flags) and combine may ask us for proper
19980 mode. */
19981 case USE:
19982 return CCmode;
19983 default:
19984 gcc_unreachable ();
19988 /* Return the fixed registers used for condition codes. */
19990 static bool
19991 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19993 *p1 = FLAGS_REG;
19994 *p2 = FPSR_REG;
19995 return true;
19998 /* If two condition code modes are compatible, return a condition code
19999 mode which is compatible with both. Otherwise, return
20000 VOIDmode. */
20002 static machine_mode
20003 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
20005 if (m1 == m2)
20006 return m1;
20008 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
20009 return VOIDmode;
20011 if ((m1 == CCGCmode && m2 == CCGOCmode)
20012 || (m1 == CCGOCmode && m2 == CCGCmode))
20013 return CCGCmode;
20015 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
20016 return m2;
20017 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
20018 return m1;
20020 switch (m1)
20022 default:
20023 gcc_unreachable ();
20025 case CCmode:
20026 case CCGCmode:
20027 case CCGOCmode:
20028 case CCNOmode:
20029 case CCAmode:
20030 case CCCmode:
20031 case CCOmode:
20032 case CCSmode:
20033 case CCZmode:
20034 switch (m2)
20036 default:
20037 return VOIDmode;
20039 case CCmode:
20040 case CCGCmode:
20041 case CCGOCmode:
20042 case CCNOmode:
20043 case CCAmode:
20044 case CCCmode:
20045 case CCOmode:
20046 case CCSmode:
20047 case CCZmode:
20048 return CCmode;
20051 case CCFPmode:
20052 case CCFPUmode:
20053 /* These are only compatible with themselves, which we already
20054 checked above. */
20055 return VOIDmode;
20060 /* Return a comparison we can do and that it is equivalent to
20061 swap_condition (code) apart possibly from orderedness.
20062 But, never change orderedness if TARGET_IEEE_FP, returning
20063 UNKNOWN in that case if necessary. */
20065 static enum rtx_code
20066 ix86_fp_swap_condition (enum rtx_code code)
20068 switch (code)
20070 case GT: /* GTU - CF=0 & ZF=0 */
20071 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
20072 case GE: /* GEU - CF=0 */
20073 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
20074 case UNLT: /* LTU - CF=1 */
20075 return TARGET_IEEE_FP ? UNKNOWN : GT;
20076 case UNLE: /* LEU - CF=1 | ZF=1 */
20077 return TARGET_IEEE_FP ? UNKNOWN : GE;
20078 default:
20079 return swap_condition (code);
20083 /* Return cost of comparison CODE using the best strategy for performance.
20084 All following functions do use number of instructions as a cost metrics.
20085 In future this should be tweaked to compute bytes for optimize_size and
20086 take into account performance of various instructions on various CPUs. */
20088 static int
20089 ix86_fp_comparison_cost (enum rtx_code code)
20091 int arith_cost;
20093 /* The cost of code using bit-twiddling on %ah. */
20094 switch (code)
20096 case UNLE:
20097 case UNLT:
20098 case LTGT:
20099 case GT:
20100 case GE:
20101 case UNORDERED:
20102 case ORDERED:
20103 case UNEQ:
20104 arith_cost = 4;
20105 break;
20106 case LT:
20107 case NE:
20108 case EQ:
20109 case UNGE:
20110 arith_cost = TARGET_IEEE_FP ? 5 : 4;
20111 break;
20112 case LE:
20113 case UNGT:
20114 arith_cost = TARGET_IEEE_FP ? 6 : 4;
20115 break;
20116 default:
20117 gcc_unreachable ();
20120 switch (ix86_fp_comparison_strategy (code))
20122 case IX86_FPCMP_COMI:
20123 return arith_cost > 4 ? 3 : 2;
20124 case IX86_FPCMP_SAHF:
20125 return arith_cost > 4 ? 4 : 3;
20126 default:
20127 return arith_cost;
20131 /* Return strategy to use for floating-point. We assume that fcomi is always
20132 preferrable where available, since that is also true when looking at size
20133 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
20135 enum ix86_fpcmp_strategy
20136 ix86_fp_comparison_strategy (enum rtx_code)
20138 /* Do fcomi/sahf based test when profitable. */
20140 if (TARGET_CMOVE)
20141 return IX86_FPCMP_COMI;
20143 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
20144 return IX86_FPCMP_SAHF;
20146 return IX86_FPCMP_ARITH;
20149 /* Swap, force into registers, or otherwise massage the two operands
20150 to a fp comparison. The operands are updated in place; the new
20151 comparison code is returned. */
20153 static enum rtx_code
20154 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
20156 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
20157 rtx op0 = *pop0, op1 = *pop1;
20158 machine_mode op_mode = GET_MODE (op0);
20159 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
20161 /* All of the unordered compare instructions only work on registers.
20162 The same is true of the fcomi compare instructions. The XFmode
20163 compare instructions require registers except when comparing
20164 against zero or when converting operand 1 from fixed point to
20165 floating point. */
20167 if (!is_sse
20168 && (fpcmp_mode == CCFPUmode
20169 || (op_mode == XFmode
20170 && ! (standard_80387_constant_p (op0) == 1
20171 || standard_80387_constant_p (op1) == 1)
20172 && GET_CODE (op1) != FLOAT)
20173 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
20175 op0 = force_reg (op_mode, op0);
20176 op1 = force_reg (op_mode, op1);
20178 else
20180 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
20181 things around if they appear profitable, otherwise force op0
20182 into a register. */
20184 if (standard_80387_constant_p (op0) == 0
20185 || (MEM_P (op0)
20186 && ! (standard_80387_constant_p (op1) == 0
20187 || MEM_P (op1))))
20189 enum rtx_code new_code = ix86_fp_swap_condition (code);
20190 if (new_code != UNKNOWN)
20192 std::swap (op0, op1);
20193 code = new_code;
20197 if (!REG_P (op0))
20198 op0 = force_reg (op_mode, op0);
20200 if (CONSTANT_P (op1))
20202 int tmp = standard_80387_constant_p (op1);
20203 if (tmp == 0)
20204 op1 = validize_mem (force_const_mem (op_mode, op1));
20205 else if (tmp == 1)
20207 if (TARGET_CMOVE)
20208 op1 = force_reg (op_mode, op1);
20210 else
20211 op1 = force_reg (op_mode, op1);
20215 /* Try to rearrange the comparison to make it cheaper. */
20216 if (ix86_fp_comparison_cost (code)
20217 > ix86_fp_comparison_cost (swap_condition (code))
20218 && (REG_P (op1) || can_create_pseudo_p ()))
20220 std::swap (op0, op1);
20221 code = swap_condition (code);
20222 if (!REG_P (op0))
20223 op0 = force_reg (op_mode, op0);
20226 *pop0 = op0;
20227 *pop1 = op1;
20228 return code;
20231 /* Convert comparison codes we use to represent FP comparison to integer
20232 code that will result in proper branch. Return UNKNOWN if no such code
20233 is available. */
20235 enum rtx_code
20236 ix86_fp_compare_code_to_integer (enum rtx_code code)
20238 switch (code)
20240 case GT:
20241 return GTU;
20242 case GE:
20243 return GEU;
20244 case ORDERED:
20245 case UNORDERED:
20246 return code;
20247 break;
20248 case UNEQ:
20249 return EQ;
20250 break;
20251 case UNLT:
20252 return LTU;
20253 break;
20254 case UNLE:
20255 return LEU;
20256 break;
20257 case LTGT:
20258 return NE;
20259 break;
20260 default:
20261 return UNKNOWN;
20265 /* Generate insn patterns to do a floating point compare of OPERANDS. */
20267 static rtx
20268 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
20270 machine_mode fpcmp_mode, intcmp_mode;
20271 rtx tmp, tmp2;
20273 fpcmp_mode = ix86_fp_compare_mode (code);
20274 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
20276 /* Do fcomi/sahf based test when profitable. */
20277 switch (ix86_fp_comparison_strategy (code))
20279 case IX86_FPCMP_COMI:
20280 intcmp_mode = fpcmp_mode;
20281 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20282 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20283 tmp);
20284 emit_insn (tmp);
20285 break;
20287 case IX86_FPCMP_SAHF:
20288 intcmp_mode = fpcmp_mode;
20289 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20290 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20291 tmp);
20293 if (!scratch)
20294 scratch = gen_reg_rtx (HImode);
20295 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
20296 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
20297 break;
20299 case IX86_FPCMP_ARITH:
20300 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
20301 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20302 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
20303 if (!scratch)
20304 scratch = gen_reg_rtx (HImode);
20305 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
20307 /* In the unordered case, we have to check C2 for NaN's, which
20308 doesn't happen to work out to anything nice combination-wise.
20309 So do some bit twiddling on the value we've got in AH to come
20310 up with an appropriate set of condition codes. */
20312 intcmp_mode = CCNOmode;
20313 switch (code)
20315 case GT:
20316 case UNGT:
20317 if (code == GT || !TARGET_IEEE_FP)
20319 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20320 code = EQ;
20322 else
20324 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20325 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20326 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
20327 intcmp_mode = CCmode;
20328 code = GEU;
20330 break;
20331 case LT:
20332 case UNLT:
20333 if (code == LT && TARGET_IEEE_FP)
20335 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20336 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
20337 intcmp_mode = CCmode;
20338 code = EQ;
20340 else
20342 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
20343 code = NE;
20345 break;
20346 case GE:
20347 case UNGE:
20348 if (code == GE || !TARGET_IEEE_FP)
20350 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
20351 code = EQ;
20353 else
20355 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20356 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
20357 code = NE;
20359 break;
20360 case LE:
20361 case UNLE:
20362 if (code == LE && TARGET_IEEE_FP)
20364 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20365 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20366 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20367 intcmp_mode = CCmode;
20368 code = LTU;
20370 else
20372 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20373 code = NE;
20375 break;
20376 case EQ:
20377 case UNEQ:
20378 if (code == EQ && TARGET_IEEE_FP)
20380 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20381 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20382 intcmp_mode = CCmode;
20383 code = EQ;
20385 else
20387 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20388 code = NE;
20390 break;
20391 case NE:
20392 case LTGT:
20393 if (code == NE && TARGET_IEEE_FP)
20395 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20396 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
20397 GEN_INT (0x40)));
20398 code = NE;
20400 else
20402 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20403 code = EQ;
20405 break;
20407 case UNORDERED:
20408 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20409 code = NE;
20410 break;
20411 case ORDERED:
20412 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20413 code = EQ;
20414 break;
20416 default:
20417 gcc_unreachable ();
20419 break;
20421 default:
20422 gcc_unreachable();
20425 /* Return the test that should be put into the flags user, i.e.
20426 the bcc, scc, or cmov instruction. */
20427 return gen_rtx_fmt_ee (code, VOIDmode,
20428 gen_rtx_REG (intcmp_mode, FLAGS_REG),
20429 const0_rtx);
20432 static rtx
20433 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
20435 rtx ret;
20437 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
20438 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
20440 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
20442 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
20443 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20445 else
20446 ret = ix86_expand_int_compare (code, op0, op1);
20448 return ret;
20451 void
20452 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
20454 machine_mode mode = GET_MODE (op0);
20455 rtx tmp;
20457 switch (mode)
20459 case SFmode:
20460 case DFmode:
20461 case XFmode:
20462 case QImode:
20463 case HImode:
20464 case SImode:
20465 simple:
20466 tmp = ix86_expand_compare (code, op0, op1);
20467 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20468 gen_rtx_LABEL_REF (VOIDmode, label),
20469 pc_rtx);
20470 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
20471 return;
20473 case DImode:
20474 if (TARGET_64BIT)
20475 goto simple;
20476 case TImode:
20477 /* Expand DImode branch into multiple compare+branch. */
20479 rtx lo[2], hi[2];
20480 rtx_code_label *label2;
20481 enum rtx_code code1, code2, code3;
20482 machine_mode submode;
20484 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
20486 std::swap (op0, op1);
20487 code = swap_condition (code);
20490 split_double_mode (mode, &op0, 1, lo+0, hi+0);
20491 split_double_mode (mode, &op1, 1, lo+1, hi+1);
20493 submode = mode == DImode ? SImode : DImode;
20495 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
20496 avoid two branches. This costs one extra insn, so disable when
20497 optimizing for size. */
20499 if ((code == EQ || code == NE)
20500 && (!optimize_insn_for_size_p ()
20501 || hi[1] == const0_rtx || lo[1] == const0_rtx))
20503 rtx xor0, xor1;
20505 xor1 = hi[0];
20506 if (hi[1] != const0_rtx)
20507 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
20508 NULL_RTX, 0, OPTAB_WIDEN);
20510 xor0 = lo[0];
20511 if (lo[1] != const0_rtx)
20512 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
20513 NULL_RTX, 0, OPTAB_WIDEN);
20515 tmp = expand_binop (submode, ior_optab, xor1, xor0,
20516 NULL_RTX, 0, OPTAB_WIDEN);
20518 ix86_expand_branch (code, tmp, const0_rtx, label);
20519 return;
20522 /* Otherwise, if we are doing less-than or greater-or-equal-than,
20523 op1 is a constant and the low word is zero, then we can just
20524 examine the high word. Similarly for low word -1 and
20525 less-or-equal-than or greater-than. */
20527 if (CONST_INT_P (hi[1]))
20528 switch (code)
20530 case LT: case LTU: case GE: case GEU:
20531 if (lo[1] == const0_rtx)
20533 ix86_expand_branch (code, hi[0], hi[1], label);
20534 return;
20536 break;
20537 case LE: case LEU: case GT: case GTU:
20538 if (lo[1] == constm1_rtx)
20540 ix86_expand_branch (code, hi[0], hi[1], label);
20541 return;
20543 break;
20544 default:
20545 break;
20548 /* Otherwise, we need two or three jumps. */
20550 label2 = gen_label_rtx ();
20552 code1 = code;
20553 code2 = swap_condition (code);
20554 code3 = unsigned_condition (code);
20556 switch (code)
20558 case LT: case GT: case LTU: case GTU:
20559 break;
20561 case LE: code1 = LT; code2 = GT; break;
20562 case GE: code1 = GT; code2 = LT; break;
20563 case LEU: code1 = LTU; code2 = GTU; break;
20564 case GEU: code1 = GTU; code2 = LTU; break;
20566 case EQ: code1 = UNKNOWN; code2 = NE; break;
20567 case NE: code2 = UNKNOWN; break;
20569 default:
20570 gcc_unreachable ();
20574 * a < b =>
20575 * if (hi(a) < hi(b)) goto true;
20576 * if (hi(a) > hi(b)) goto false;
20577 * if (lo(a) < lo(b)) goto true;
20578 * false:
20581 if (code1 != UNKNOWN)
20582 ix86_expand_branch (code1, hi[0], hi[1], label);
20583 if (code2 != UNKNOWN)
20584 ix86_expand_branch (code2, hi[0], hi[1], label2);
20586 ix86_expand_branch (code3, lo[0], lo[1], label);
20588 if (code2 != UNKNOWN)
20589 emit_label (label2);
20590 return;
20593 default:
20594 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20595 goto simple;
20599 /* Split branch based on floating point condition. */
20600 void
20601 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20602 rtx target1, rtx target2, rtx tmp)
20604 rtx condition;
20605 rtx i;
20607 if (target2 != pc_rtx)
20609 std::swap (target1, target2);
20610 code = reverse_condition_maybe_unordered (code);
20613 condition = ix86_expand_fp_compare (code, op1, op2,
20614 tmp);
20616 i = emit_jump_insn (gen_rtx_SET
20617 (VOIDmode, pc_rtx,
20618 gen_rtx_IF_THEN_ELSE (VOIDmode,
20619 condition, target1, target2)));
20620 if (split_branch_probability >= 0)
20621 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20624 void
20625 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20627 rtx ret;
20629 gcc_assert (GET_MODE (dest) == QImode);
20631 ret = ix86_expand_compare (code, op0, op1);
20632 PUT_MODE (ret, QImode);
20633 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20636 /* Expand comparison setting or clearing carry flag. Return true when
20637 successful and set pop for the operation. */
20638 static bool
20639 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20641 machine_mode mode =
20642 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20644 /* Do not handle double-mode compares that go through special path. */
20645 if (mode == (TARGET_64BIT ? TImode : DImode))
20646 return false;
20648 if (SCALAR_FLOAT_MODE_P (mode))
20650 rtx compare_op;
20651 rtx_insn *compare_seq;
20653 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20655 /* Shortcut: following common codes never translate
20656 into carry flag compares. */
20657 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20658 || code == ORDERED || code == UNORDERED)
20659 return false;
20661 /* These comparisons require zero flag; swap operands so they won't. */
20662 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20663 && !TARGET_IEEE_FP)
20665 std::swap (op0, op1);
20666 code = swap_condition (code);
20669 /* Try to expand the comparison and verify that we end up with
20670 carry flag based comparison. This fails to be true only when
20671 we decide to expand comparison using arithmetic that is not
20672 too common scenario. */
20673 start_sequence ();
20674 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20675 compare_seq = get_insns ();
20676 end_sequence ();
20678 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20679 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20680 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20681 else
20682 code = GET_CODE (compare_op);
20684 if (code != LTU && code != GEU)
20685 return false;
20687 emit_insn (compare_seq);
20688 *pop = compare_op;
20689 return true;
20692 if (!INTEGRAL_MODE_P (mode))
20693 return false;
20695 switch (code)
20697 case LTU:
20698 case GEU:
20699 break;
20701 /* Convert a==0 into (unsigned)a<1. */
20702 case EQ:
20703 case NE:
20704 if (op1 != const0_rtx)
20705 return false;
20706 op1 = const1_rtx;
20707 code = (code == EQ ? LTU : GEU);
20708 break;
20710 /* Convert a>b into b<a or a>=b-1. */
20711 case GTU:
20712 case LEU:
20713 if (CONST_INT_P (op1))
20715 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20716 /* Bail out on overflow. We still can swap operands but that
20717 would force loading of the constant into register. */
20718 if (op1 == const0_rtx
20719 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20720 return false;
20721 code = (code == GTU ? GEU : LTU);
20723 else
20725 std::swap (op0, op1);
20726 code = (code == GTU ? LTU : GEU);
20728 break;
20730 /* Convert a>=0 into (unsigned)a<0x80000000. */
20731 case LT:
20732 case GE:
20733 if (mode == DImode || op1 != const0_rtx)
20734 return false;
20735 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20736 code = (code == LT ? GEU : LTU);
20737 break;
20738 case LE:
20739 case GT:
20740 if (mode == DImode || op1 != constm1_rtx)
20741 return false;
20742 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20743 code = (code == LE ? GEU : LTU);
20744 break;
20746 default:
20747 return false;
20749 /* Swapping operands may cause constant to appear as first operand. */
20750 if (!nonimmediate_operand (op0, VOIDmode))
20752 if (!can_create_pseudo_p ())
20753 return false;
20754 op0 = force_reg (mode, op0);
20756 *pop = ix86_expand_compare (code, op0, op1);
20757 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20758 return true;
20761 bool
20762 ix86_expand_int_movcc (rtx operands[])
20764 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20765 rtx_insn *compare_seq;
20766 rtx compare_op;
20767 machine_mode mode = GET_MODE (operands[0]);
20768 bool sign_bit_compare_p = false;
20769 rtx op0 = XEXP (operands[1], 0);
20770 rtx op1 = XEXP (operands[1], 1);
20772 if (GET_MODE (op0) == TImode
20773 || (GET_MODE (op0) == DImode
20774 && !TARGET_64BIT))
20775 return false;
20777 start_sequence ();
20778 compare_op = ix86_expand_compare (code, op0, op1);
20779 compare_seq = get_insns ();
20780 end_sequence ();
20782 compare_code = GET_CODE (compare_op);
20784 if ((op1 == const0_rtx && (code == GE || code == LT))
20785 || (op1 == constm1_rtx && (code == GT || code == LE)))
20786 sign_bit_compare_p = true;
20788 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20789 HImode insns, we'd be swallowed in word prefix ops. */
20791 if ((mode != HImode || TARGET_FAST_PREFIX)
20792 && (mode != (TARGET_64BIT ? TImode : DImode))
20793 && CONST_INT_P (operands[2])
20794 && CONST_INT_P (operands[3]))
20796 rtx out = operands[0];
20797 HOST_WIDE_INT ct = INTVAL (operands[2]);
20798 HOST_WIDE_INT cf = INTVAL (operands[3]);
20799 HOST_WIDE_INT diff;
20801 diff = ct - cf;
20802 /* Sign bit compares are better done using shifts than we do by using
20803 sbb. */
20804 if (sign_bit_compare_p
20805 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20807 /* Detect overlap between destination and compare sources. */
20808 rtx tmp = out;
20810 if (!sign_bit_compare_p)
20812 rtx flags;
20813 bool fpcmp = false;
20815 compare_code = GET_CODE (compare_op);
20817 flags = XEXP (compare_op, 0);
20819 if (GET_MODE (flags) == CCFPmode
20820 || GET_MODE (flags) == CCFPUmode)
20822 fpcmp = true;
20823 compare_code
20824 = ix86_fp_compare_code_to_integer (compare_code);
20827 /* To simplify rest of code, restrict to the GEU case. */
20828 if (compare_code == LTU)
20830 std::swap (ct, cf);
20831 compare_code = reverse_condition (compare_code);
20832 code = reverse_condition (code);
20834 else
20836 if (fpcmp)
20837 PUT_CODE (compare_op,
20838 reverse_condition_maybe_unordered
20839 (GET_CODE (compare_op)));
20840 else
20841 PUT_CODE (compare_op,
20842 reverse_condition (GET_CODE (compare_op)));
20844 diff = ct - cf;
20846 if (reg_overlap_mentioned_p (out, op0)
20847 || reg_overlap_mentioned_p (out, op1))
20848 tmp = gen_reg_rtx (mode);
20850 if (mode == DImode)
20851 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20852 else
20853 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20854 flags, compare_op));
20856 else
20858 if (code == GT || code == GE)
20859 code = reverse_condition (code);
20860 else
20862 std::swap (ct, cf);
20863 diff = ct - cf;
20865 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20868 if (diff == 1)
20871 * cmpl op0,op1
20872 * sbbl dest,dest
20873 * [addl dest, ct]
20875 * Size 5 - 8.
20877 if (ct)
20878 tmp = expand_simple_binop (mode, PLUS,
20879 tmp, GEN_INT (ct),
20880 copy_rtx (tmp), 1, OPTAB_DIRECT);
20882 else if (cf == -1)
20885 * cmpl op0,op1
20886 * sbbl dest,dest
20887 * orl $ct, dest
20889 * Size 8.
20891 tmp = expand_simple_binop (mode, IOR,
20892 tmp, GEN_INT (ct),
20893 copy_rtx (tmp), 1, OPTAB_DIRECT);
20895 else if (diff == -1 && ct)
20898 * cmpl op0,op1
20899 * sbbl dest,dest
20900 * notl dest
20901 * [addl dest, cf]
20903 * Size 8 - 11.
20905 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20906 if (cf)
20907 tmp = expand_simple_binop (mode, PLUS,
20908 copy_rtx (tmp), GEN_INT (cf),
20909 copy_rtx (tmp), 1, OPTAB_DIRECT);
20911 else
20914 * cmpl op0,op1
20915 * sbbl dest,dest
20916 * [notl dest]
20917 * andl cf - ct, dest
20918 * [addl dest, ct]
20920 * Size 8 - 11.
20923 if (cf == 0)
20925 cf = ct;
20926 ct = 0;
20927 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20930 tmp = expand_simple_binop (mode, AND,
20931 copy_rtx (tmp),
20932 gen_int_mode (cf - ct, mode),
20933 copy_rtx (tmp), 1, OPTAB_DIRECT);
20934 if (ct)
20935 tmp = expand_simple_binop (mode, PLUS,
20936 copy_rtx (tmp), GEN_INT (ct),
20937 copy_rtx (tmp), 1, OPTAB_DIRECT);
20940 if (!rtx_equal_p (tmp, out))
20941 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20943 return true;
20946 if (diff < 0)
20948 machine_mode cmp_mode = GET_MODE (op0);
20949 enum rtx_code new_code;
20951 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20953 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20955 /* We may be reversing unordered compare to normal compare, that
20956 is not valid in general (we may convert non-trapping condition
20957 to trapping one), however on i386 we currently emit all
20958 comparisons unordered. */
20959 new_code = reverse_condition_maybe_unordered (code);
20961 else
20962 new_code = ix86_reverse_condition (code, cmp_mode);
20963 if (new_code != UNKNOWN)
20965 std::swap (ct, cf);
20966 diff = -diff;
20967 code = new_code;
20971 compare_code = UNKNOWN;
20972 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20973 && CONST_INT_P (op1))
20975 if (op1 == const0_rtx
20976 && (code == LT || code == GE))
20977 compare_code = code;
20978 else if (op1 == constm1_rtx)
20980 if (code == LE)
20981 compare_code = LT;
20982 else if (code == GT)
20983 compare_code = GE;
20987 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20988 if (compare_code != UNKNOWN
20989 && GET_MODE (op0) == GET_MODE (out)
20990 && (cf == -1 || ct == -1))
20992 /* If lea code below could be used, only optimize
20993 if it results in a 2 insn sequence. */
20995 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20996 || diff == 3 || diff == 5 || diff == 9)
20997 || (compare_code == LT && ct == -1)
20998 || (compare_code == GE && cf == -1))
21001 * notl op1 (if necessary)
21002 * sarl $31, op1
21003 * orl cf, op1
21005 if (ct != -1)
21007 cf = ct;
21008 ct = -1;
21009 code = reverse_condition (code);
21012 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
21014 out = expand_simple_binop (mode, IOR,
21015 out, GEN_INT (cf),
21016 out, 1, OPTAB_DIRECT);
21017 if (out != operands[0])
21018 emit_move_insn (operands[0], out);
21020 return true;
21025 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
21026 || diff == 3 || diff == 5 || diff == 9)
21027 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
21028 && (mode != DImode
21029 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
21032 * xorl dest,dest
21033 * cmpl op1,op2
21034 * setcc dest
21035 * lea cf(dest*(ct-cf)),dest
21037 * Size 14.
21039 * This also catches the degenerate setcc-only case.
21042 rtx tmp;
21043 int nops;
21045 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21047 nops = 0;
21048 /* On x86_64 the lea instruction operates on Pmode, so we need
21049 to get arithmetics done in proper mode to match. */
21050 if (diff == 1)
21051 tmp = copy_rtx (out);
21052 else
21054 rtx out1;
21055 out1 = copy_rtx (out);
21056 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
21057 nops++;
21058 if (diff & 1)
21060 tmp = gen_rtx_PLUS (mode, tmp, out1);
21061 nops++;
21064 if (cf != 0)
21066 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
21067 nops++;
21069 if (!rtx_equal_p (tmp, out))
21071 if (nops == 1)
21072 out = force_operand (tmp, copy_rtx (out));
21073 else
21074 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
21076 if (!rtx_equal_p (out, operands[0]))
21077 emit_move_insn (operands[0], copy_rtx (out));
21079 return true;
21083 * General case: Jumpful:
21084 * xorl dest,dest cmpl op1, op2
21085 * cmpl op1, op2 movl ct, dest
21086 * setcc dest jcc 1f
21087 * decl dest movl cf, dest
21088 * andl (cf-ct),dest 1:
21089 * addl ct,dest
21091 * Size 20. Size 14.
21093 * This is reasonably steep, but branch mispredict costs are
21094 * high on modern cpus, so consider failing only if optimizing
21095 * for space.
21098 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21099 && BRANCH_COST (optimize_insn_for_speed_p (),
21100 false) >= 2)
21102 if (cf == 0)
21104 machine_mode cmp_mode = GET_MODE (op0);
21105 enum rtx_code new_code;
21107 if (SCALAR_FLOAT_MODE_P (cmp_mode))
21109 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
21111 /* We may be reversing unordered compare to normal compare,
21112 that is not valid in general (we may convert non-trapping
21113 condition to trapping one), however on i386 we currently
21114 emit all comparisons unordered. */
21115 new_code = reverse_condition_maybe_unordered (code);
21117 else
21119 new_code = ix86_reverse_condition (code, cmp_mode);
21120 if (compare_code != UNKNOWN && new_code != UNKNOWN)
21121 compare_code = reverse_condition (compare_code);
21124 if (new_code != UNKNOWN)
21126 cf = ct;
21127 ct = 0;
21128 code = new_code;
21132 if (compare_code != UNKNOWN)
21134 /* notl op1 (if needed)
21135 sarl $31, op1
21136 andl (cf-ct), op1
21137 addl ct, op1
21139 For x < 0 (resp. x <= -1) there will be no notl,
21140 so if possible swap the constants to get rid of the
21141 complement.
21142 True/false will be -1/0 while code below (store flag
21143 followed by decrement) is 0/-1, so the constants need
21144 to be exchanged once more. */
21146 if (compare_code == GE || !cf)
21148 code = reverse_condition (code);
21149 compare_code = LT;
21151 else
21152 std::swap (ct, cf);
21154 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
21156 else
21158 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21160 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
21161 constm1_rtx,
21162 copy_rtx (out), 1, OPTAB_DIRECT);
21165 out = expand_simple_binop (mode, AND, copy_rtx (out),
21166 gen_int_mode (cf - ct, mode),
21167 copy_rtx (out), 1, OPTAB_DIRECT);
21168 if (ct)
21169 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
21170 copy_rtx (out), 1, OPTAB_DIRECT);
21171 if (!rtx_equal_p (out, operands[0]))
21172 emit_move_insn (operands[0], copy_rtx (out));
21174 return true;
21178 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21180 /* Try a few things more with specific constants and a variable. */
21182 optab op;
21183 rtx var, orig_out, out, tmp;
21185 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
21186 return false;
21188 /* If one of the two operands is an interesting constant, load a
21189 constant with the above and mask it in with a logical operation. */
21191 if (CONST_INT_P (operands[2]))
21193 var = operands[3];
21194 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
21195 operands[3] = constm1_rtx, op = and_optab;
21196 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
21197 operands[3] = const0_rtx, op = ior_optab;
21198 else
21199 return false;
21201 else if (CONST_INT_P (operands[3]))
21203 var = operands[2];
21204 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
21205 operands[2] = constm1_rtx, op = and_optab;
21206 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
21207 operands[2] = const0_rtx, op = ior_optab;
21208 else
21209 return false;
21211 else
21212 return false;
21214 orig_out = operands[0];
21215 tmp = gen_reg_rtx (mode);
21216 operands[0] = tmp;
21218 /* Recurse to get the constant loaded. */
21219 if (ix86_expand_int_movcc (operands) == 0)
21220 return false;
21222 /* Mask in the interesting variable. */
21223 out = expand_binop (mode, op, var, tmp, orig_out, 0,
21224 OPTAB_WIDEN);
21225 if (!rtx_equal_p (out, orig_out))
21226 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
21228 return true;
21232 * For comparison with above,
21234 * movl cf,dest
21235 * movl ct,tmp
21236 * cmpl op1,op2
21237 * cmovcc tmp,dest
21239 * Size 15.
21242 if (! nonimmediate_operand (operands[2], mode))
21243 operands[2] = force_reg (mode, operands[2]);
21244 if (! nonimmediate_operand (operands[3], mode))
21245 operands[3] = force_reg (mode, operands[3]);
21247 if (! register_operand (operands[2], VOIDmode)
21248 && (mode == QImode
21249 || ! register_operand (operands[3], VOIDmode)))
21250 operands[2] = force_reg (mode, operands[2]);
21252 if (mode == QImode
21253 && ! register_operand (operands[3], VOIDmode))
21254 operands[3] = force_reg (mode, operands[3]);
21256 emit_insn (compare_seq);
21257 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21258 gen_rtx_IF_THEN_ELSE (mode,
21259 compare_op, operands[2],
21260 operands[3])));
21261 return true;
21264 /* Swap, force into registers, or otherwise massage the two operands
21265 to an sse comparison with a mask result. Thus we differ a bit from
21266 ix86_prepare_fp_compare_args which expects to produce a flags result.
21268 The DEST operand exists to help determine whether to commute commutative
21269 operators. The POP0/POP1 operands are updated in place. The new
21270 comparison code is returned, or UNKNOWN if not implementable. */
21272 static enum rtx_code
21273 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
21274 rtx *pop0, rtx *pop1)
21276 switch (code)
21278 case LTGT:
21279 case UNEQ:
21280 /* AVX supports all the needed comparisons. */
21281 if (TARGET_AVX)
21282 break;
21283 /* We have no LTGT as an operator. We could implement it with
21284 NE & ORDERED, but this requires an extra temporary. It's
21285 not clear that it's worth it. */
21286 return UNKNOWN;
21288 case LT:
21289 case LE:
21290 case UNGT:
21291 case UNGE:
21292 /* These are supported directly. */
21293 break;
21295 case EQ:
21296 case NE:
21297 case UNORDERED:
21298 case ORDERED:
21299 /* AVX has 3 operand comparisons, no need to swap anything. */
21300 if (TARGET_AVX)
21301 break;
21302 /* For commutative operators, try to canonicalize the destination
21303 operand to be first in the comparison - this helps reload to
21304 avoid extra moves. */
21305 if (!dest || !rtx_equal_p (dest, *pop1))
21306 break;
21307 /* FALLTHRU */
21309 case GE:
21310 case GT:
21311 case UNLE:
21312 case UNLT:
21313 /* These are not supported directly before AVX, and furthermore
21314 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
21315 comparison operands to transform into something that is
21316 supported. */
21317 std::swap (*pop0, *pop1);
21318 code = swap_condition (code);
21319 break;
21321 default:
21322 gcc_unreachable ();
21325 return code;
21328 /* Detect conditional moves that exactly match min/max operational
21329 semantics. Note that this is IEEE safe, as long as we don't
21330 interchange the operands.
21332 Returns FALSE if this conditional move doesn't match a MIN/MAX,
21333 and TRUE if the operation is successful and instructions are emitted. */
21335 static bool
21336 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
21337 rtx cmp_op1, rtx if_true, rtx if_false)
21339 machine_mode mode;
21340 bool is_min;
21341 rtx tmp;
21343 if (code == LT)
21345 else if (code == UNGE)
21346 std::swap (if_true, if_false);
21347 else
21348 return false;
21350 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
21351 is_min = true;
21352 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
21353 is_min = false;
21354 else
21355 return false;
21357 mode = GET_MODE (dest);
21359 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
21360 but MODE may be a vector mode and thus not appropriate. */
21361 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
21363 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
21364 rtvec v;
21366 if_true = force_reg (mode, if_true);
21367 v = gen_rtvec (2, if_true, if_false);
21368 tmp = gen_rtx_UNSPEC (mode, v, u);
21370 else
21372 code = is_min ? SMIN : SMAX;
21373 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
21376 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
21377 return true;
21380 /* Expand an sse vector comparison. Return the register with the result. */
21382 static rtx
21383 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
21384 rtx op_true, rtx op_false)
21386 machine_mode mode = GET_MODE (dest);
21387 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
21389 /* In general case result of comparison can differ from operands' type. */
21390 machine_mode cmp_mode;
21392 /* In AVX512F the result of comparison is an integer mask. */
21393 bool maskcmp = false;
21394 rtx x;
21396 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
21398 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
21399 gcc_assert (cmp_mode != BLKmode);
21401 maskcmp = true;
21403 else
21404 cmp_mode = cmp_ops_mode;
21407 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
21408 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
21409 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
21411 if (optimize
21412 || reg_overlap_mentioned_p (dest, op_true)
21413 || reg_overlap_mentioned_p (dest, op_false))
21414 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
21416 /* Compare patterns for int modes are unspec in AVX512F only. */
21417 if (maskcmp && (code == GT || code == EQ))
21419 rtx (*gen)(rtx, rtx, rtx);
21421 switch (cmp_ops_mode)
21423 case V64QImode:
21424 gcc_assert (TARGET_AVX512BW);
21425 gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
21426 break;
21427 case V32HImode:
21428 gcc_assert (TARGET_AVX512BW);
21429 gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
21430 break;
21431 case V16SImode:
21432 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
21433 break;
21434 case V8DImode:
21435 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
21436 break;
21437 default:
21438 gen = NULL;
21441 if (gen)
21443 emit_insn (gen (dest, cmp_op0, cmp_op1));
21444 return dest;
21447 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
21449 if (cmp_mode != mode && !maskcmp)
21451 x = force_reg (cmp_ops_mode, x);
21452 convert_move (dest, x, false);
21454 else
21455 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21457 return dest;
21460 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
21461 operations. This is used for both scalar and vector conditional moves. */
21463 static void
21464 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
21466 machine_mode mode = GET_MODE (dest);
21467 machine_mode cmpmode = GET_MODE (cmp);
21469 /* In AVX512F the result of comparison is an integer mask. */
21470 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
21472 rtx t2, t3, x;
21474 if (vector_all_ones_operand (op_true, mode)
21475 && rtx_equal_p (op_false, CONST0_RTX (mode))
21476 && !maskcmp)
21478 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
21480 else if (op_false == CONST0_RTX (mode)
21481 && !maskcmp)
21483 op_true = force_reg (mode, op_true);
21484 x = gen_rtx_AND (mode, cmp, op_true);
21485 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21487 else if (op_true == CONST0_RTX (mode)
21488 && !maskcmp)
21490 op_false = force_reg (mode, op_false);
21491 x = gen_rtx_NOT (mode, cmp);
21492 x = gen_rtx_AND (mode, x, op_false);
21493 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21495 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
21496 && !maskcmp)
21498 op_false = force_reg (mode, op_false);
21499 x = gen_rtx_IOR (mode, cmp, op_false);
21500 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21502 else if (TARGET_XOP
21503 && !maskcmp)
21505 op_true = force_reg (mode, op_true);
21507 if (!nonimmediate_operand (op_false, mode))
21508 op_false = force_reg (mode, op_false);
21510 emit_insn (gen_rtx_SET (mode, dest,
21511 gen_rtx_IF_THEN_ELSE (mode, cmp,
21512 op_true,
21513 op_false)));
21515 else
21517 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21518 rtx d = dest;
21520 if (!nonimmediate_operand (op_true, mode))
21521 op_true = force_reg (mode, op_true);
21523 op_false = force_reg (mode, op_false);
21525 switch (mode)
21527 case V4SFmode:
21528 if (TARGET_SSE4_1)
21529 gen = gen_sse4_1_blendvps;
21530 break;
21531 case V2DFmode:
21532 if (TARGET_SSE4_1)
21533 gen = gen_sse4_1_blendvpd;
21534 break;
21535 case V16QImode:
21536 case V8HImode:
21537 case V4SImode:
21538 case V2DImode:
21539 if (TARGET_SSE4_1)
21541 gen = gen_sse4_1_pblendvb;
21542 if (mode != V16QImode)
21543 d = gen_reg_rtx (V16QImode);
21544 op_false = gen_lowpart (V16QImode, op_false);
21545 op_true = gen_lowpart (V16QImode, op_true);
21546 cmp = gen_lowpart (V16QImode, cmp);
21548 break;
21549 case V8SFmode:
21550 if (TARGET_AVX)
21551 gen = gen_avx_blendvps256;
21552 break;
21553 case V4DFmode:
21554 if (TARGET_AVX)
21555 gen = gen_avx_blendvpd256;
21556 break;
21557 case V32QImode:
21558 case V16HImode:
21559 case V8SImode:
21560 case V4DImode:
21561 if (TARGET_AVX2)
21563 gen = gen_avx2_pblendvb;
21564 if (mode != V32QImode)
21565 d = gen_reg_rtx (V32QImode);
21566 op_false = gen_lowpart (V32QImode, op_false);
21567 op_true = gen_lowpart (V32QImode, op_true);
21568 cmp = gen_lowpart (V32QImode, cmp);
21570 break;
21572 case V64QImode:
21573 gen = gen_avx512bw_blendmv64qi;
21574 break;
21575 case V32HImode:
21576 gen = gen_avx512bw_blendmv32hi;
21577 break;
21578 case V16SImode:
21579 gen = gen_avx512f_blendmv16si;
21580 break;
21581 case V8DImode:
21582 gen = gen_avx512f_blendmv8di;
21583 break;
21584 case V8DFmode:
21585 gen = gen_avx512f_blendmv8df;
21586 break;
21587 case V16SFmode:
21588 gen = gen_avx512f_blendmv16sf;
21589 break;
21591 default:
21592 break;
21595 if (gen != NULL)
21597 emit_insn (gen (d, op_false, op_true, cmp));
21598 if (d != dest)
21599 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21601 else
21603 op_true = force_reg (mode, op_true);
21605 t2 = gen_reg_rtx (mode);
21606 if (optimize)
21607 t3 = gen_reg_rtx (mode);
21608 else
21609 t3 = dest;
21611 x = gen_rtx_AND (mode, op_true, cmp);
21612 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21614 x = gen_rtx_NOT (mode, cmp);
21615 x = gen_rtx_AND (mode, x, op_false);
21616 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21618 x = gen_rtx_IOR (mode, t3, t2);
21619 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21624 /* Expand a floating-point conditional move. Return true if successful. */
21626 bool
21627 ix86_expand_fp_movcc (rtx operands[])
21629 machine_mode mode = GET_MODE (operands[0]);
21630 enum rtx_code code = GET_CODE (operands[1]);
21631 rtx tmp, compare_op;
21632 rtx op0 = XEXP (operands[1], 0);
21633 rtx op1 = XEXP (operands[1], 1);
21635 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21637 machine_mode cmode;
21639 /* Since we've no cmove for sse registers, don't force bad register
21640 allocation just to gain access to it. Deny movcc when the
21641 comparison mode doesn't match the move mode. */
21642 cmode = GET_MODE (op0);
21643 if (cmode == VOIDmode)
21644 cmode = GET_MODE (op1);
21645 if (cmode != mode)
21646 return false;
21648 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21649 if (code == UNKNOWN)
21650 return false;
21652 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21653 operands[2], operands[3]))
21654 return true;
21656 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21657 operands[2], operands[3]);
21658 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21659 return true;
21662 if (GET_MODE (op0) == TImode
21663 || (GET_MODE (op0) == DImode
21664 && !TARGET_64BIT))
21665 return false;
21667 /* The floating point conditional move instructions don't directly
21668 support conditions resulting from a signed integer comparison. */
21670 compare_op = ix86_expand_compare (code, op0, op1);
21671 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21673 tmp = gen_reg_rtx (QImode);
21674 ix86_expand_setcc (tmp, code, op0, op1);
21676 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21679 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21680 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21681 operands[2], operands[3])));
21683 return true;
21686 /* Expand a floating-point vector conditional move; a vcond operation
21687 rather than a movcc operation. */
21689 bool
21690 ix86_expand_fp_vcond (rtx operands[])
21692 enum rtx_code code = GET_CODE (operands[3]);
21693 rtx cmp;
21695 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21696 &operands[4], &operands[5]);
21697 if (code == UNKNOWN)
21699 rtx temp;
21700 switch (GET_CODE (operands[3]))
21702 case LTGT:
21703 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21704 operands[5], operands[0], operands[0]);
21705 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21706 operands[5], operands[1], operands[2]);
21707 code = AND;
21708 break;
21709 case UNEQ:
21710 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21711 operands[5], operands[0], operands[0]);
21712 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21713 operands[5], operands[1], operands[2]);
21714 code = IOR;
21715 break;
21716 default:
21717 gcc_unreachable ();
21719 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21720 OPTAB_DIRECT);
21721 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21722 return true;
21725 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21726 operands[5], operands[1], operands[2]))
21727 return true;
21729 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21730 operands[1], operands[2]);
21731 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21732 return true;
21735 /* Expand a signed/unsigned integral vector conditional move. */
21737 bool
21738 ix86_expand_int_vcond (rtx operands[])
21740 machine_mode data_mode = GET_MODE (operands[0]);
21741 machine_mode mode = GET_MODE (operands[4]);
21742 enum rtx_code code = GET_CODE (operands[3]);
21743 bool negate = false;
21744 rtx x, cop0, cop1;
21746 cop0 = operands[4];
21747 cop1 = operands[5];
21749 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21750 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21751 if ((code == LT || code == GE)
21752 && data_mode == mode
21753 && cop1 == CONST0_RTX (mode)
21754 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21755 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21756 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21757 && (GET_MODE_SIZE (data_mode) == 16
21758 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21760 rtx negop = operands[2 - (code == LT)];
21761 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21762 if (negop == CONST1_RTX (data_mode))
21764 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21765 operands[0], 1, OPTAB_DIRECT);
21766 if (res != operands[0])
21767 emit_move_insn (operands[0], res);
21768 return true;
21770 else if (GET_MODE_INNER (data_mode) != DImode
21771 && vector_all_ones_operand (negop, data_mode))
21773 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21774 operands[0], 0, OPTAB_DIRECT);
21775 if (res != operands[0])
21776 emit_move_insn (operands[0], res);
21777 return true;
21781 if (!nonimmediate_operand (cop1, mode))
21782 cop1 = force_reg (mode, cop1);
21783 if (!general_operand (operands[1], data_mode))
21784 operands[1] = force_reg (data_mode, operands[1]);
21785 if (!general_operand (operands[2], data_mode))
21786 operands[2] = force_reg (data_mode, operands[2]);
21788 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21789 if (TARGET_XOP
21790 && (mode == V16QImode || mode == V8HImode
21791 || mode == V4SImode || mode == V2DImode))
21793 else
21795 /* Canonicalize the comparison to EQ, GT, GTU. */
21796 switch (code)
21798 case EQ:
21799 case GT:
21800 case GTU:
21801 break;
21803 case NE:
21804 case LE:
21805 case LEU:
21806 code = reverse_condition (code);
21807 negate = true;
21808 break;
21810 case GE:
21811 case GEU:
21812 code = reverse_condition (code);
21813 negate = true;
21814 /* FALLTHRU */
21816 case LT:
21817 case LTU:
21818 std::swap (cop0, cop1);
21819 code = swap_condition (code);
21820 break;
21822 default:
21823 gcc_unreachable ();
21826 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21827 if (mode == V2DImode)
21829 switch (code)
21831 case EQ:
21832 /* SSE4.1 supports EQ. */
21833 if (!TARGET_SSE4_1)
21834 return false;
21835 break;
21837 case GT:
21838 case GTU:
21839 /* SSE4.2 supports GT/GTU. */
21840 if (!TARGET_SSE4_2)
21841 return false;
21842 break;
21844 default:
21845 gcc_unreachable ();
21849 /* Unsigned parallel compare is not supported by the hardware.
21850 Play some tricks to turn this into a signed comparison
21851 against 0. */
21852 if (code == GTU)
21854 cop0 = force_reg (mode, cop0);
21856 switch (mode)
21858 case V16SImode:
21859 case V8DImode:
21860 case V8SImode:
21861 case V4DImode:
21862 case V4SImode:
21863 case V2DImode:
21865 rtx t1, t2, mask;
21866 rtx (*gen_sub3) (rtx, rtx, rtx);
21868 switch (mode)
21870 case V16SImode: gen_sub3 = gen_subv16si3; break;
21871 case V8DImode: gen_sub3 = gen_subv8di3; break;
21872 case V8SImode: gen_sub3 = gen_subv8si3; break;
21873 case V4DImode: gen_sub3 = gen_subv4di3; break;
21874 case V4SImode: gen_sub3 = gen_subv4si3; break;
21875 case V2DImode: gen_sub3 = gen_subv2di3; break;
21876 default:
21877 gcc_unreachable ();
21879 /* Subtract (-(INT MAX) - 1) from both operands to make
21880 them signed. */
21881 mask = ix86_build_signbit_mask (mode, true, false);
21882 t1 = gen_reg_rtx (mode);
21883 emit_insn (gen_sub3 (t1, cop0, mask));
21885 t2 = gen_reg_rtx (mode);
21886 emit_insn (gen_sub3 (t2, cop1, mask));
21888 cop0 = t1;
21889 cop1 = t2;
21890 code = GT;
21892 break;
21894 case V64QImode:
21895 case V32HImode:
21896 case V32QImode:
21897 case V16HImode:
21898 case V16QImode:
21899 case V8HImode:
21900 /* Perform a parallel unsigned saturating subtraction. */
21901 x = gen_reg_rtx (mode);
21902 emit_insn (gen_rtx_SET (VOIDmode, x,
21903 gen_rtx_US_MINUS (mode, cop0, cop1)));
21905 cop0 = x;
21906 cop1 = CONST0_RTX (mode);
21907 code = EQ;
21908 negate = !negate;
21909 break;
21911 default:
21912 gcc_unreachable ();
21917 /* Allow the comparison to be done in one mode, but the movcc to
21918 happen in another mode. */
21919 if (data_mode == mode)
21921 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21922 operands[1+negate], operands[2-negate]);
21924 else
21926 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21927 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21928 operands[1+negate], operands[2-negate]);
21929 if (GET_MODE (x) == mode)
21930 x = gen_lowpart (data_mode, x);
21933 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21934 operands[2-negate]);
21935 return true;
21938 /* AVX512F does support 64-byte integer vector operations,
21939 thus the longest vector we are faced with is V64QImode. */
21940 #define MAX_VECT_LEN 64
21942 struct expand_vec_perm_d
21944 rtx target, op0, op1;
21945 unsigned char perm[MAX_VECT_LEN];
21946 machine_mode vmode;
21947 unsigned char nelt;
21948 bool one_operand_p;
21949 bool testing_p;
21952 static bool
21953 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
21954 struct expand_vec_perm_d *d)
21956 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21957 expander, so args are either in d, or in op0, op1 etc. */
21958 machine_mode mode = GET_MODE (d ? d->op0 : op0);
21959 machine_mode maskmode = mode;
21960 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21962 switch (mode)
21964 case V8HImode:
21965 if (TARGET_AVX512VL && TARGET_AVX512BW)
21966 gen = gen_avx512vl_vpermi2varv8hi3;
21967 break;
21968 case V16HImode:
21969 if (TARGET_AVX512VL && TARGET_AVX512BW)
21970 gen = gen_avx512vl_vpermi2varv16hi3;
21971 break;
21972 case V64QImode:
21973 if (TARGET_AVX512VBMI)
21974 gen = gen_avx512bw_vpermi2varv64qi3;
21975 break;
21976 case V32HImode:
21977 if (TARGET_AVX512BW)
21978 gen = gen_avx512bw_vpermi2varv32hi3;
21979 break;
21980 case V4SImode:
21981 if (TARGET_AVX512VL)
21982 gen = gen_avx512vl_vpermi2varv4si3;
21983 break;
21984 case V8SImode:
21985 if (TARGET_AVX512VL)
21986 gen = gen_avx512vl_vpermi2varv8si3;
21987 break;
21988 case V16SImode:
21989 if (TARGET_AVX512F)
21990 gen = gen_avx512f_vpermi2varv16si3;
21991 break;
21992 case V4SFmode:
21993 if (TARGET_AVX512VL)
21995 gen = gen_avx512vl_vpermi2varv4sf3;
21996 maskmode = V4SImode;
21998 break;
21999 case V8SFmode:
22000 if (TARGET_AVX512VL)
22002 gen = gen_avx512vl_vpermi2varv8sf3;
22003 maskmode = V8SImode;
22005 break;
22006 case V16SFmode:
22007 if (TARGET_AVX512F)
22009 gen = gen_avx512f_vpermi2varv16sf3;
22010 maskmode = V16SImode;
22012 break;
22013 case V2DImode:
22014 if (TARGET_AVX512VL)
22015 gen = gen_avx512vl_vpermi2varv2di3;
22016 break;
22017 case V4DImode:
22018 if (TARGET_AVX512VL)
22019 gen = gen_avx512vl_vpermi2varv4di3;
22020 break;
22021 case V8DImode:
22022 if (TARGET_AVX512F)
22023 gen = gen_avx512f_vpermi2varv8di3;
22024 break;
22025 case V2DFmode:
22026 if (TARGET_AVX512VL)
22028 gen = gen_avx512vl_vpermi2varv2df3;
22029 maskmode = V2DImode;
22031 break;
22032 case V4DFmode:
22033 if (TARGET_AVX512VL)
22035 gen = gen_avx512vl_vpermi2varv4df3;
22036 maskmode = V4DImode;
22038 break;
22039 case V8DFmode:
22040 if (TARGET_AVX512F)
22042 gen = gen_avx512f_vpermi2varv8df3;
22043 maskmode = V8DImode;
22045 break;
22046 default:
22047 break;
22050 if (gen == NULL)
22051 return false;
22053 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
22054 expander, so args are either in d, or in op0, op1 etc. */
22055 if (d)
22057 rtx vec[64];
22058 target = d->target;
22059 op0 = d->op0;
22060 op1 = d->op1;
22061 for (int i = 0; i < d->nelt; ++i)
22062 vec[i] = GEN_INT (d->perm[i]);
22063 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
22066 emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
22067 return true;
22070 /* Expand a variable vector permutation. */
22072 void
22073 ix86_expand_vec_perm (rtx operands[])
22075 rtx target = operands[0];
22076 rtx op0 = operands[1];
22077 rtx op1 = operands[2];
22078 rtx mask = operands[3];
22079 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
22080 machine_mode mode = GET_MODE (op0);
22081 machine_mode maskmode = GET_MODE (mask);
22082 int w, e, i;
22083 bool one_operand_shuffle = rtx_equal_p (op0, op1);
22085 /* Number of elements in the vector. */
22086 w = GET_MODE_NUNITS (mode);
22087 e = GET_MODE_UNIT_SIZE (mode);
22088 gcc_assert (w <= 64);
22090 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
22091 return;
22093 if (TARGET_AVX2)
22095 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
22097 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
22098 an constant shuffle operand. With a tiny bit of effort we can
22099 use VPERMD instead. A re-interpretation stall for V4DFmode is
22100 unfortunate but there's no avoiding it.
22101 Similarly for V16HImode we don't have instructions for variable
22102 shuffling, while for V32QImode we can use after preparing suitable
22103 masks vpshufb; vpshufb; vpermq; vpor. */
22105 if (mode == V16HImode)
22107 maskmode = mode = V32QImode;
22108 w = 32;
22109 e = 1;
22111 else
22113 maskmode = mode = V8SImode;
22114 w = 8;
22115 e = 4;
22117 t1 = gen_reg_rtx (maskmode);
22119 /* Replicate the low bits of the V4DImode mask into V8SImode:
22120 mask = { A B C D }
22121 t1 = { A A B B C C D D }. */
22122 for (i = 0; i < w / 2; ++i)
22123 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
22124 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22125 vt = force_reg (maskmode, vt);
22126 mask = gen_lowpart (maskmode, mask);
22127 if (maskmode == V8SImode)
22128 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
22129 else
22130 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
22132 /* Multiply the shuffle indicies by two. */
22133 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
22134 OPTAB_DIRECT);
22136 /* Add one to the odd shuffle indicies:
22137 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
22138 for (i = 0; i < w / 2; ++i)
22140 vec[i * 2] = const0_rtx;
22141 vec[i * 2 + 1] = const1_rtx;
22143 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22144 vt = validize_mem (force_const_mem (maskmode, vt));
22145 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
22146 OPTAB_DIRECT);
22148 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
22149 operands[3] = mask = t1;
22150 target = gen_reg_rtx (mode);
22151 op0 = gen_lowpart (mode, op0);
22152 op1 = gen_lowpart (mode, op1);
22155 switch (mode)
22157 case V8SImode:
22158 /* The VPERMD and VPERMPS instructions already properly ignore
22159 the high bits of the shuffle elements. No need for us to
22160 perform an AND ourselves. */
22161 if (one_operand_shuffle)
22163 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
22164 if (target != operands[0])
22165 emit_move_insn (operands[0],
22166 gen_lowpart (GET_MODE (operands[0]), target));
22168 else
22170 t1 = gen_reg_rtx (V8SImode);
22171 t2 = gen_reg_rtx (V8SImode);
22172 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
22173 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
22174 goto merge_two;
22176 return;
22178 case V8SFmode:
22179 mask = gen_lowpart (V8SImode, mask);
22180 if (one_operand_shuffle)
22181 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
22182 else
22184 t1 = gen_reg_rtx (V8SFmode);
22185 t2 = gen_reg_rtx (V8SFmode);
22186 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
22187 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
22188 goto merge_two;
22190 return;
22192 case V4SImode:
22193 /* By combining the two 128-bit input vectors into one 256-bit
22194 input vector, we can use VPERMD and VPERMPS for the full
22195 two-operand shuffle. */
22196 t1 = gen_reg_rtx (V8SImode);
22197 t2 = gen_reg_rtx (V8SImode);
22198 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
22199 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22200 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
22201 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
22202 return;
22204 case V4SFmode:
22205 t1 = gen_reg_rtx (V8SFmode);
22206 t2 = gen_reg_rtx (V8SImode);
22207 mask = gen_lowpart (V4SImode, mask);
22208 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
22209 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22210 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
22211 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
22212 return;
22214 case V32QImode:
22215 t1 = gen_reg_rtx (V32QImode);
22216 t2 = gen_reg_rtx (V32QImode);
22217 t3 = gen_reg_rtx (V32QImode);
22218 vt2 = GEN_INT (-128);
22219 for (i = 0; i < 32; i++)
22220 vec[i] = vt2;
22221 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22222 vt = force_reg (V32QImode, vt);
22223 for (i = 0; i < 32; i++)
22224 vec[i] = i < 16 ? vt2 : const0_rtx;
22225 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22226 vt2 = force_reg (V32QImode, vt2);
22227 /* From mask create two adjusted masks, which contain the same
22228 bits as mask in the low 7 bits of each vector element.
22229 The first mask will have the most significant bit clear
22230 if it requests element from the same 128-bit lane
22231 and MSB set if it requests element from the other 128-bit lane.
22232 The second mask will have the opposite values of the MSB,
22233 and additionally will have its 128-bit lanes swapped.
22234 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
22235 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
22236 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
22237 stands for other 12 bytes. */
22238 /* The bit whether element is from the same lane or the other
22239 lane is bit 4, so shift it up by 3 to the MSB position. */
22240 t5 = gen_reg_rtx (V4DImode);
22241 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
22242 GEN_INT (3)));
22243 /* Clear MSB bits from the mask just in case it had them set. */
22244 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
22245 /* After this t1 will have MSB set for elements from other lane. */
22246 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
22247 /* Clear bits other than MSB. */
22248 emit_insn (gen_andv32qi3 (t1, t1, vt));
22249 /* Or in the lower bits from mask into t3. */
22250 emit_insn (gen_iorv32qi3 (t3, t1, t2));
22251 /* And invert MSB bits in t1, so MSB is set for elements from the same
22252 lane. */
22253 emit_insn (gen_xorv32qi3 (t1, t1, vt));
22254 /* Swap 128-bit lanes in t3. */
22255 t6 = gen_reg_rtx (V4DImode);
22256 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
22257 const2_rtx, GEN_INT (3),
22258 const0_rtx, const1_rtx));
22259 /* And or in the lower bits from mask into t1. */
22260 emit_insn (gen_iorv32qi3 (t1, t1, t2));
22261 if (one_operand_shuffle)
22263 /* Each of these shuffles will put 0s in places where
22264 element from the other 128-bit lane is needed, otherwise
22265 will shuffle in the requested value. */
22266 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
22267 gen_lowpart (V32QImode, t6)));
22268 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
22269 /* For t3 the 128-bit lanes are swapped again. */
22270 t7 = gen_reg_rtx (V4DImode);
22271 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
22272 const2_rtx, GEN_INT (3),
22273 const0_rtx, const1_rtx));
22274 /* And oring both together leads to the result. */
22275 emit_insn (gen_iorv32qi3 (target, t1,
22276 gen_lowpart (V32QImode, t7)));
22277 if (target != operands[0])
22278 emit_move_insn (operands[0],
22279 gen_lowpart (GET_MODE (operands[0]), target));
22280 return;
22283 t4 = gen_reg_rtx (V32QImode);
22284 /* Similarly to the above one_operand_shuffle code,
22285 just for repeated twice for each operand. merge_two:
22286 code will merge the two results together. */
22287 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
22288 gen_lowpart (V32QImode, t6)));
22289 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
22290 gen_lowpart (V32QImode, t6)));
22291 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
22292 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
22293 t7 = gen_reg_rtx (V4DImode);
22294 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
22295 const2_rtx, GEN_INT (3),
22296 const0_rtx, const1_rtx));
22297 t8 = gen_reg_rtx (V4DImode);
22298 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
22299 const2_rtx, GEN_INT (3),
22300 const0_rtx, const1_rtx));
22301 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
22302 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
22303 t1 = t4;
22304 t2 = t3;
22305 goto merge_two;
22307 default:
22308 gcc_assert (GET_MODE_SIZE (mode) <= 16);
22309 break;
22313 if (TARGET_XOP)
22315 /* The XOP VPPERM insn supports three inputs. By ignoring the
22316 one_operand_shuffle special case, we avoid creating another
22317 set of constant vectors in memory. */
22318 one_operand_shuffle = false;
22320 /* mask = mask & {2*w-1, ...} */
22321 vt = GEN_INT (2*w - 1);
22323 else
22325 /* mask = mask & {w-1, ...} */
22326 vt = GEN_INT (w - 1);
22329 for (i = 0; i < w; i++)
22330 vec[i] = vt;
22331 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22332 mask = expand_simple_binop (maskmode, AND, mask, vt,
22333 NULL_RTX, 0, OPTAB_DIRECT);
22335 /* For non-QImode operations, convert the word permutation control
22336 into a byte permutation control. */
22337 if (mode != V16QImode)
22339 mask = expand_simple_binop (maskmode, ASHIFT, mask,
22340 GEN_INT (exact_log2 (e)),
22341 NULL_RTX, 0, OPTAB_DIRECT);
22343 /* Convert mask to vector of chars. */
22344 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
22346 /* Replicate each of the input bytes into byte positions:
22347 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
22348 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
22349 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
22350 for (i = 0; i < 16; ++i)
22351 vec[i] = GEN_INT (i/e * e);
22352 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22353 vt = validize_mem (force_const_mem (V16QImode, vt));
22354 if (TARGET_XOP)
22355 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
22356 else
22357 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
22359 /* Convert it into the byte positions by doing
22360 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
22361 for (i = 0; i < 16; ++i)
22362 vec[i] = GEN_INT (i % e);
22363 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22364 vt = validize_mem (force_const_mem (V16QImode, vt));
22365 emit_insn (gen_addv16qi3 (mask, mask, vt));
22368 /* The actual shuffle operations all operate on V16QImode. */
22369 op0 = gen_lowpart (V16QImode, op0);
22370 op1 = gen_lowpart (V16QImode, op1);
22372 if (TARGET_XOP)
22374 if (GET_MODE (target) != V16QImode)
22375 target = gen_reg_rtx (V16QImode);
22376 emit_insn (gen_xop_pperm (target, op0, op1, mask));
22377 if (target != operands[0])
22378 emit_move_insn (operands[0],
22379 gen_lowpart (GET_MODE (operands[0]), target));
22381 else if (one_operand_shuffle)
22383 if (GET_MODE (target) != V16QImode)
22384 target = gen_reg_rtx (V16QImode);
22385 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
22386 if (target != operands[0])
22387 emit_move_insn (operands[0],
22388 gen_lowpart (GET_MODE (operands[0]), target));
22390 else
22392 rtx xops[6];
22393 bool ok;
22395 /* Shuffle the two input vectors independently. */
22396 t1 = gen_reg_rtx (V16QImode);
22397 t2 = gen_reg_rtx (V16QImode);
22398 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
22399 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
22401 merge_two:
22402 /* Then merge them together. The key is whether any given control
22403 element contained a bit set that indicates the second word. */
22404 mask = operands[3];
22405 vt = GEN_INT (w);
22406 if (maskmode == V2DImode && !TARGET_SSE4_1)
22408 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
22409 more shuffle to convert the V2DI input mask into a V4SI
22410 input mask. At which point the masking that expand_int_vcond
22411 will work as desired. */
22412 rtx t3 = gen_reg_rtx (V4SImode);
22413 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
22414 const0_rtx, const0_rtx,
22415 const2_rtx, const2_rtx));
22416 mask = t3;
22417 maskmode = V4SImode;
22418 e = w = 4;
22421 for (i = 0; i < w; i++)
22422 vec[i] = vt;
22423 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22424 vt = force_reg (maskmode, vt);
22425 mask = expand_simple_binop (maskmode, AND, mask, vt,
22426 NULL_RTX, 0, OPTAB_DIRECT);
22428 if (GET_MODE (target) != mode)
22429 target = gen_reg_rtx (mode);
22430 xops[0] = target;
22431 xops[1] = gen_lowpart (mode, t2);
22432 xops[2] = gen_lowpart (mode, t1);
22433 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
22434 xops[4] = mask;
22435 xops[5] = vt;
22436 ok = ix86_expand_int_vcond (xops);
22437 gcc_assert (ok);
22438 if (target != operands[0])
22439 emit_move_insn (operands[0],
22440 gen_lowpart (GET_MODE (operands[0]), target));
22444 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
22445 true if we should do zero extension, else sign extension. HIGH_P is
22446 true if we want the N/2 high elements, else the low elements. */
22448 void
22449 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
22451 machine_mode imode = GET_MODE (src);
22452 rtx tmp;
22454 if (TARGET_SSE4_1)
22456 rtx (*unpack)(rtx, rtx);
22457 rtx (*extract)(rtx, rtx) = NULL;
22458 machine_mode halfmode = BLKmode;
22460 switch (imode)
22462 case V64QImode:
22463 if (unsigned_p)
22464 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
22465 else
22466 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
22467 halfmode = V32QImode;
22468 extract
22469 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
22470 break;
22471 case V32QImode:
22472 if (unsigned_p)
22473 unpack = gen_avx2_zero_extendv16qiv16hi2;
22474 else
22475 unpack = gen_avx2_sign_extendv16qiv16hi2;
22476 halfmode = V16QImode;
22477 extract
22478 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
22479 break;
22480 case V32HImode:
22481 if (unsigned_p)
22482 unpack = gen_avx512f_zero_extendv16hiv16si2;
22483 else
22484 unpack = gen_avx512f_sign_extendv16hiv16si2;
22485 halfmode = V16HImode;
22486 extract
22487 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
22488 break;
22489 case V16HImode:
22490 if (unsigned_p)
22491 unpack = gen_avx2_zero_extendv8hiv8si2;
22492 else
22493 unpack = gen_avx2_sign_extendv8hiv8si2;
22494 halfmode = V8HImode;
22495 extract
22496 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
22497 break;
22498 case V16SImode:
22499 if (unsigned_p)
22500 unpack = gen_avx512f_zero_extendv8siv8di2;
22501 else
22502 unpack = gen_avx512f_sign_extendv8siv8di2;
22503 halfmode = V8SImode;
22504 extract
22505 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
22506 break;
22507 case V8SImode:
22508 if (unsigned_p)
22509 unpack = gen_avx2_zero_extendv4siv4di2;
22510 else
22511 unpack = gen_avx2_sign_extendv4siv4di2;
22512 halfmode = V4SImode;
22513 extract
22514 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
22515 break;
22516 case V16QImode:
22517 if (unsigned_p)
22518 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
22519 else
22520 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
22521 break;
22522 case V8HImode:
22523 if (unsigned_p)
22524 unpack = gen_sse4_1_zero_extendv4hiv4si2;
22525 else
22526 unpack = gen_sse4_1_sign_extendv4hiv4si2;
22527 break;
22528 case V4SImode:
22529 if (unsigned_p)
22530 unpack = gen_sse4_1_zero_extendv2siv2di2;
22531 else
22532 unpack = gen_sse4_1_sign_extendv2siv2di2;
22533 break;
22534 default:
22535 gcc_unreachable ();
22538 if (GET_MODE_SIZE (imode) >= 32)
22540 tmp = gen_reg_rtx (halfmode);
22541 emit_insn (extract (tmp, src));
22543 else if (high_p)
22545 /* Shift higher 8 bytes to lower 8 bytes. */
22546 tmp = gen_reg_rtx (V1TImode);
22547 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
22548 GEN_INT (64)));
22549 tmp = gen_lowpart (imode, tmp);
22551 else
22552 tmp = src;
22554 emit_insn (unpack (dest, tmp));
22556 else
22558 rtx (*unpack)(rtx, rtx, rtx);
22560 switch (imode)
22562 case V16QImode:
22563 if (high_p)
22564 unpack = gen_vec_interleave_highv16qi;
22565 else
22566 unpack = gen_vec_interleave_lowv16qi;
22567 break;
22568 case V8HImode:
22569 if (high_p)
22570 unpack = gen_vec_interleave_highv8hi;
22571 else
22572 unpack = gen_vec_interleave_lowv8hi;
22573 break;
22574 case V4SImode:
22575 if (high_p)
22576 unpack = gen_vec_interleave_highv4si;
22577 else
22578 unpack = gen_vec_interleave_lowv4si;
22579 break;
22580 default:
22581 gcc_unreachable ();
22584 if (unsigned_p)
22585 tmp = force_reg (imode, CONST0_RTX (imode));
22586 else
22587 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
22588 src, pc_rtx, pc_rtx);
22590 rtx tmp2 = gen_reg_rtx (imode);
22591 emit_insn (unpack (tmp2, src, tmp));
22592 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
22596 /* Expand conditional increment or decrement using adb/sbb instructions.
22597 The default case using setcc followed by the conditional move can be
22598 done by generic code. */
22599 bool
22600 ix86_expand_int_addcc (rtx operands[])
22602 enum rtx_code code = GET_CODE (operands[1]);
22603 rtx flags;
22604 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
22605 rtx compare_op;
22606 rtx val = const0_rtx;
22607 bool fpcmp = false;
22608 machine_mode mode;
22609 rtx op0 = XEXP (operands[1], 0);
22610 rtx op1 = XEXP (operands[1], 1);
22612 if (operands[3] != const1_rtx
22613 && operands[3] != constm1_rtx)
22614 return false;
22615 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22616 return false;
22617 code = GET_CODE (compare_op);
22619 flags = XEXP (compare_op, 0);
22621 if (GET_MODE (flags) == CCFPmode
22622 || GET_MODE (flags) == CCFPUmode)
22624 fpcmp = true;
22625 code = ix86_fp_compare_code_to_integer (code);
22628 if (code != LTU)
22630 val = constm1_rtx;
22631 if (fpcmp)
22632 PUT_CODE (compare_op,
22633 reverse_condition_maybe_unordered
22634 (GET_CODE (compare_op)));
22635 else
22636 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
22639 mode = GET_MODE (operands[0]);
22641 /* Construct either adc or sbb insn. */
22642 if ((code == LTU) == (operands[3] == constm1_rtx))
22644 switch (mode)
22646 case QImode:
22647 insn = gen_subqi3_carry;
22648 break;
22649 case HImode:
22650 insn = gen_subhi3_carry;
22651 break;
22652 case SImode:
22653 insn = gen_subsi3_carry;
22654 break;
22655 case DImode:
22656 insn = gen_subdi3_carry;
22657 break;
22658 default:
22659 gcc_unreachable ();
22662 else
22664 switch (mode)
22666 case QImode:
22667 insn = gen_addqi3_carry;
22668 break;
22669 case HImode:
22670 insn = gen_addhi3_carry;
22671 break;
22672 case SImode:
22673 insn = gen_addsi3_carry;
22674 break;
22675 case DImode:
22676 insn = gen_adddi3_carry;
22677 break;
22678 default:
22679 gcc_unreachable ();
22682 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22684 return true;
22688 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22689 but works for floating pointer parameters and nonoffsetable memories.
22690 For pushes, it returns just stack offsets; the values will be saved
22691 in the right order. Maximally three parts are generated. */
22693 static int
22694 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
22696 int size;
22698 if (!TARGET_64BIT)
22699 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22700 else
22701 size = (GET_MODE_SIZE (mode) + 4) / 8;
22703 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22704 gcc_assert (size >= 2 && size <= 4);
22706 /* Optimize constant pool reference to immediates. This is used by fp
22707 moves, that force all constants to memory to allow combining. */
22708 if (MEM_P (operand) && MEM_READONLY_P (operand))
22710 rtx tmp = maybe_get_pool_constant (operand);
22711 if (tmp)
22712 operand = tmp;
22715 if (MEM_P (operand) && !offsettable_memref_p (operand))
22717 /* The only non-offsetable memories we handle are pushes. */
22718 int ok = push_operand (operand, VOIDmode);
22720 gcc_assert (ok);
22722 operand = copy_rtx (operand);
22723 PUT_MODE (operand, word_mode);
22724 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22725 return size;
22728 if (GET_CODE (operand) == CONST_VECTOR)
22730 machine_mode imode = int_mode_for_mode (mode);
22731 /* Caution: if we looked through a constant pool memory above,
22732 the operand may actually have a different mode now. That's
22733 ok, since we want to pun this all the way back to an integer. */
22734 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22735 gcc_assert (operand != NULL);
22736 mode = imode;
22739 if (!TARGET_64BIT)
22741 if (mode == DImode)
22742 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22743 else
22745 int i;
22747 if (REG_P (operand))
22749 gcc_assert (reload_completed);
22750 for (i = 0; i < size; i++)
22751 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22753 else if (offsettable_memref_p (operand))
22755 operand = adjust_address (operand, SImode, 0);
22756 parts[0] = operand;
22757 for (i = 1; i < size; i++)
22758 parts[i] = adjust_address (operand, SImode, 4 * i);
22760 else if (GET_CODE (operand) == CONST_DOUBLE)
22762 REAL_VALUE_TYPE r;
22763 long l[4];
22765 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22766 switch (mode)
22768 case TFmode:
22769 real_to_target (l, &r, mode);
22770 parts[3] = gen_int_mode (l[3], SImode);
22771 parts[2] = gen_int_mode (l[2], SImode);
22772 break;
22773 case XFmode:
22774 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22775 long double may not be 80-bit. */
22776 real_to_target (l, &r, mode);
22777 parts[2] = gen_int_mode (l[2], SImode);
22778 break;
22779 case DFmode:
22780 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22781 break;
22782 default:
22783 gcc_unreachable ();
22785 parts[1] = gen_int_mode (l[1], SImode);
22786 parts[0] = gen_int_mode (l[0], SImode);
22788 else
22789 gcc_unreachable ();
22792 else
22794 if (mode == TImode)
22795 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22796 if (mode == XFmode || mode == TFmode)
22798 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22799 if (REG_P (operand))
22801 gcc_assert (reload_completed);
22802 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22803 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22805 else if (offsettable_memref_p (operand))
22807 operand = adjust_address (operand, DImode, 0);
22808 parts[0] = operand;
22809 parts[1] = adjust_address (operand, upper_mode, 8);
22811 else if (GET_CODE (operand) == CONST_DOUBLE)
22813 REAL_VALUE_TYPE r;
22814 long l[4];
22816 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22817 real_to_target (l, &r, mode);
22819 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22820 if (HOST_BITS_PER_WIDE_INT >= 64)
22821 parts[0]
22822 = gen_int_mode
22823 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22824 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22825 DImode);
22826 else
22827 parts[0] = immed_double_const (l[0], l[1], DImode);
22829 if (upper_mode == SImode)
22830 parts[1] = gen_int_mode (l[2], SImode);
22831 else if (HOST_BITS_PER_WIDE_INT >= 64)
22832 parts[1]
22833 = gen_int_mode
22834 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22835 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22836 DImode);
22837 else
22838 parts[1] = immed_double_const (l[2], l[3], DImode);
22840 else
22841 gcc_unreachable ();
22845 return size;
22848 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22849 Return false when normal moves are needed; true when all required
22850 insns have been emitted. Operands 2-4 contain the input values
22851 int the correct order; operands 5-7 contain the output values. */
22853 void
22854 ix86_split_long_move (rtx operands[])
22856 rtx part[2][4];
22857 int nparts, i, j;
22858 int push = 0;
22859 int collisions = 0;
22860 machine_mode mode = GET_MODE (operands[0]);
22861 bool collisionparts[4];
22863 /* The DFmode expanders may ask us to move double.
22864 For 64bit target this is single move. By hiding the fact
22865 here we simplify i386.md splitters. */
22866 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22868 /* Optimize constant pool reference to immediates. This is used by
22869 fp moves, that force all constants to memory to allow combining. */
22871 if (MEM_P (operands[1])
22872 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22873 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22874 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22875 if (push_operand (operands[0], VOIDmode))
22877 operands[0] = copy_rtx (operands[0]);
22878 PUT_MODE (operands[0], word_mode);
22880 else
22881 operands[0] = gen_lowpart (DImode, operands[0]);
22882 operands[1] = gen_lowpart (DImode, operands[1]);
22883 emit_move_insn (operands[0], operands[1]);
22884 return;
22887 /* The only non-offsettable memory we handle is push. */
22888 if (push_operand (operands[0], VOIDmode))
22889 push = 1;
22890 else
22891 gcc_assert (!MEM_P (operands[0])
22892 || offsettable_memref_p (operands[0]));
22894 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22895 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22897 /* When emitting push, take care for source operands on the stack. */
22898 if (push && MEM_P (operands[1])
22899 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22901 rtx src_base = XEXP (part[1][nparts - 1], 0);
22903 /* Compensate for the stack decrement by 4. */
22904 if (!TARGET_64BIT && nparts == 3
22905 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22906 src_base = plus_constant (Pmode, src_base, 4);
22908 /* src_base refers to the stack pointer and is
22909 automatically decreased by emitted push. */
22910 for (i = 0; i < nparts; i++)
22911 part[1][i] = change_address (part[1][i],
22912 GET_MODE (part[1][i]), src_base);
22915 /* We need to do copy in the right order in case an address register
22916 of the source overlaps the destination. */
22917 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22919 rtx tmp;
22921 for (i = 0; i < nparts; i++)
22923 collisionparts[i]
22924 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22925 if (collisionparts[i])
22926 collisions++;
22929 /* Collision in the middle part can be handled by reordering. */
22930 if (collisions == 1 && nparts == 3 && collisionparts [1])
22932 std::swap (part[0][1], part[0][2]);
22933 std::swap (part[1][1], part[1][2]);
22935 else if (collisions == 1
22936 && nparts == 4
22937 && (collisionparts [1] || collisionparts [2]))
22939 if (collisionparts [1])
22941 std::swap (part[0][1], part[0][2]);
22942 std::swap (part[1][1], part[1][2]);
22944 else
22946 std::swap (part[0][2], part[0][3]);
22947 std::swap (part[1][2], part[1][3]);
22951 /* If there are more collisions, we can't handle it by reordering.
22952 Do an lea to the last part and use only one colliding move. */
22953 else if (collisions > 1)
22955 rtx base;
22957 collisions = 1;
22959 base = part[0][nparts - 1];
22961 /* Handle the case when the last part isn't valid for lea.
22962 Happens in 64-bit mode storing the 12-byte XFmode. */
22963 if (GET_MODE (base) != Pmode)
22964 base = gen_rtx_REG (Pmode, REGNO (base));
22966 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22967 part[1][0] = replace_equiv_address (part[1][0], base);
22968 for (i = 1; i < nparts; i++)
22970 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22971 part[1][i] = replace_equiv_address (part[1][i], tmp);
22976 if (push)
22978 if (!TARGET_64BIT)
22980 if (nparts == 3)
22982 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22983 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22984 stack_pointer_rtx, GEN_INT (-4)));
22985 emit_move_insn (part[0][2], part[1][2]);
22987 else if (nparts == 4)
22989 emit_move_insn (part[0][3], part[1][3]);
22990 emit_move_insn (part[0][2], part[1][2]);
22993 else
22995 /* In 64bit mode we don't have 32bit push available. In case this is
22996 register, it is OK - we will just use larger counterpart. We also
22997 retype memory - these comes from attempt to avoid REX prefix on
22998 moving of second half of TFmode value. */
22999 if (GET_MODE (part[1][1]) == SImode)
23001 switch (GET_CODE (part[1][1]))
23003 case MEM:
23004 part[1][1] = adjust_address (part[1][1], DImode, 0);
23005 break;
23007 case REG:
23008 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
23009 break;
23011 default:
23012 gcc_unreachable ();
23015 if (GET_MODE (part[1][0]) == SImode)
23016 part[1][0] = part[1][1];
23019 emit_move_insn (part[0][1], part[1][1]);
23020 emit_move_insn (part[0][0], part[1][0]);
23021 return;
23024 /* Choose correct order to not overwrite the source before it is copied. */
23025 if ((REG_P (part[0][0])
23026 && REG_P (part[1][1])
23027 && (REGNO (part[0][0]) == REGNO (part[1][1])
23028 || (nparts == 3
23029 && REGNO (part[0][0]) == REGNO (part[1][2]))
23030 || (nparts == 4
23031 && REGNO (part[0][0]) == REGNO (part[1][3]))))
23032 || (collisions > 0
23033 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
23035 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
23037 operands[2 + i] = part[0][j];
23038 operands[6 + i] = part[1][j];
23041 else
23043 for (i = 0; i < nparts; i++)
23045 operands[2 + i] = part[0][i];
23046 operands[6 + i] = part[1][i];
23050 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
23051 if (optimize_insn_for_size_p ())
23053 for (j = 0; j < nparts - 1; j++)
23054 if (CONST_INT_P (operands[6 + j])
23055 && operands[6 + j] != const0_rtx
23056 && REG_P (operands[2 + j]))
23057 for (i = j; i < nparts - 1; i++)
23058 if (CONST_INT_P (operands[7 + i])
23059 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
23060 operands[7 + i] = operands[2 + j];
23063 for (i = 0; i < nparts; i++)
23064 emit_move_insn (operands[2 + i], operands[6 + i]);
23066 return;
23069 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
23070 left shift by a constant, either using a single shift or
23071 a sequence of add instructions. */
23073 static void
23074 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
23076 rtx (*insn)(rtx, rtx, rtx);
23078 if (count == 1
23079 || (count * ix86_cost->add <= ix86_cost->shift_const
23080 && !optimize_insn_for_size_p ()))
23082 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
23083 while (count-- > 0)
23084 emit_insn (insn (operand, operand, operand));
23086 else
23088 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23089 emit_insn (insn (operand, operand, GEN_INT (count)));
23093 void
23094 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
23096 rtx (*gen_ashl3)(rtx, rtx, rtx);
23097 rtx (*gen_shld)(rtx, rtx, rtx);
23098 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23100 rtx low[2], high[2];
23101 int count;
23103 if (CONST_INT_P (operands[2]))
23105 split_double_mode (mode, operands, 2, low, high);
23106 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23108 if (count >= half_width)
23110 emit_move_insn (high[0], low[1]);
23111 emit_move_insn (low[0], const0_rtx);
23113 if (count > half_width)
23114 ix86_expand_ashl_const (high[0], count - half_width, mode);
23116 else
23118 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23120 if (!rtx_equal_p (operands[0], operands[1]))
23121 emit_move_insn (operands[0], operands[1]);
23123 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
23124 ix86_expand_ashl_const (low[0], count, mode);
23126 return;
23129 split_double_mode (mode, operands, 1, low, high);
23131 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23133 if (operands[1] == const1_rtx)
23135 /* Assuming we've chosen a QImode capable registers, then 1 << N
23136 can be done with two 32/64-bit shifts, no branches, no cmoves. */
23137 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
23139 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
23141 ix86_expand_clear (low[0]);
23142 ix86_expand_clear (high[0]);
23143 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
23145 d = gen_lowpart (QImode, low[0]);
23146 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23147 s = gen_rtx_EQ (QImode, flags, const0_rtx);
23148 emit_insn (gen_rtx_SET (VOIDmode, d, s));
23150 d = gen_lowpart (QImode, high[0]);
23151 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23152 s = gen_rtx_NE (QImode, flags, const0_rtx);
23153 emit_insn (gen_rtx_SET (VOIDmode, d, s));
23156 /* Otherwise, we can get the same results by manually performing
23157 a bit extract operation on bit 5/6, and then performing the two
23158 shifts. The two methods of getting 0/1 into low/high are exactly
23159 the same size. Avoiding the shift in the bit extract case helps
23160 pentium4 a bit; no one else seems to care much either way. */
23161 else
23163 machine_mode half_mode;
23164 rtx (*gen_lshr3)(rtx, rtx, rtx);
23165 rtx (*gen_and3)(rtx, rtx, rtx);
23166 rtx (*gen_xor3)(rtx, rtx, rtx);
23167 HOST_WIDE_INT bits;
23168 rtx x;
23170 if (mode == DImode)
23172 half_mode = SImode;
23173 gen_lshr3 = gen_lshrsi3;
23174 gen_and3 = gen_andsi3;
23175 gen_xor3 = gen_xorsi3;
23176 bits = 5;
23178 else
23180 half_mode = DImode;
23181 gen_lshr3 = gen_lshrdi3;
23182 gen_and3 = gen_anddi3;
23183 gen_xor3 = gen_xordi3;
23184 bits = 6;
23187 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
23188 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
23189 else
23190 x = gen_lowpart (half_mode, operands[2]);
23191 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
23193 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
23194 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
23195 emit_move_insn (low[0], high[0]);
23196 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
23199 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23200 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
23201 return;
23204 if (operands[1] == constm1_rtx)
23206 /* For -1 << N, we can avoid the shld instruction, because we
23207 know that we're shifting 0...31/63 ones into a -1. */
23208 emit_move_insn (low[0], constm1_rtx);
23209 if (optimize_insn_for_size_p ())
23210 emit_move_insn (high[0], low[0]);
23211 else
23212 emit_move_insn (high[0], constm1_rtx);
23214 else
23216 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23218 if (!rtx_equal_p (operands[0], operands[1]))
23219 emit_move_insn (operands[0], operands[1]);
23221 split_double_mode (mode, operands, 1, low, high);
23222 emit_insn (gen_shld (high[0], low[0], operands[2]));
23225 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23227 if (TARGET_CMOVE && scratch)
23229 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23230 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23232 ix86_expand_clear (scratch);
23233 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
23235 else
23237 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23238 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23240 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
23244 void
23245 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
23247 rtx (*gen_ashr3)(rtx, rtx, rtx)
23248 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
23249 rtx (*gen_shrd)(rtx, rtx, rtx);
23250 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23252 rtx low[2], high[2];
23253 int count;
23255 if (CONST_INT_P (operands[2]))
23257 split_double_mode (mode, operands, 2, low, high);
23258 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23260 if (count == GET_MODE_BITSIZE (mode) - 1)
23262 emit_move_insn (high[0], high[1]);
23263 emit_insn (gen_ashr3 (high[0], high[0],
23264 GEN_INT (half_width - 1)));
23265 emit_move_insn (low[0], high[0]);
23268 else if (count >= half_width)
23270 emit_move_insn (low[0], high[1]);
23271 emit_move_insn (high[0], low[0]);
23272 emit_insn (gen_ashr3 (high[0], high[0],
23273 GEN_INT (half_width - 1)));
23275 if (count > half_width)
23276 emit_insn (gen_ashr3 (low[0], low[0],
23277 GEN_INT (count - half_width)));
23279 else
23281 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23283 if (!rtx_equal_p (operands[0], operands[1]))
23284 emit_move_insn (operands[0], operands[1]);
23286 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23287 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
23290 else
23292 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23294 if (!rtx_equal_p (operands[0], operands[1]))
23295 emit_move_insn (operands[0], operands[1]);
23297 split_double_mode (mode, operands, 1, low, high);
23299 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23300 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
23302 if (TARGET_CMOVE && scratch)
23304 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23305 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23307 emit_move_insn (scratch, high[0]);
23308 emit_insn (gen_ashr3 (scratch, scratch,
23309 GEN_INT (half_width - 1)));
23310 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23311 scratch));
23313 else
23315 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
23316 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
23318 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
23323 void
23324 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
23326 rtx (*gen_lshr3)(rtx, rtx, rtx)
23327 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
23328 rtx (*gen_shrd)(rtx, rtx, rtx);
23329 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23331 rtx low[2], high[2];
23332 int count;
23334 if (CONST_INT_P (operands[2]))
23336 split_double_mode (mode, operands, 2, low, high);
23337 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23339 if (count >= half_width)
23341 emit_move_insn (low[0], high[1]);
23342 ix86_expand_clear (high[0]);
23344 if (count > half_width)
23345 emit_insn (gen_lshr3 (low[0], low[0],
23346 GEN_INT (count - half_width)));
23348 else
23350 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23352 if (!rtx_equal_p (operands[0], operands[1]))
23353 emit_move_insn (operands[0], operands[1]);
23355 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23356 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
23359 else
23361 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23363 if (!rtx_equal_p (operands[0], operands[1]))
23364 emit_move_insn (operands[0], operands[1]);
23366 split_double_mode (mode, operands, 1, low, high);
23368 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23369 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
23371 if (TARGET_CMOVE && scratch)
23373 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23374 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23376 ix86_expand_clear (scratch);
23377 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23378 scratch));
23380 else
23382 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23383 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23385 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
23390 /* Predict just emitted jump instruction to be taken with probability PROB. */
23391 static void
23392 predict_jump (int prob)
23394 rtx insn = get_last_insn ();
23395 gcc_assert (JUMP_P (insn));
23396 add_int_reg_note (insn, REG_BR_PROB, prob);
23399 /* Helper function for the string operations below. Dest VARIABLE whether
23400 it is aligned to VALUE bytes. If true, jump to the label. */
23401 static rtx_code_label *
23402 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
23404 rtx_code_label *label = gen_label_rtx ();
23405 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
23406 if (GET_MODE (variable) == DImode)
23407 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
23408 else
23409 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
23410 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
23411 1, label);
23412 if (epilogue)
23413 predict_jump (REG_BR_PROB_BASE * 50 / 100);
23414 else
23415 predict_jump (REG_BR_PROB_BASE * 90 / 100);
23416 return label;
23419 /* Adjust COUNTER by the VALUE. */
23420 static void
23421 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
23423 rtx (*gen_add)(rtx, rtx, rtx)
23424 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
23426 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
23429 /* Zero extend possibly SImode EXP to Pmode register. */
23431 ix86_zero_extend_to_Pmode (rtx exp)
23433 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
23436 /* Divide COUNTREG by SCALE. */
23437 static rtx
23438 scale_counter (rtx countreg, int scale)
23440 rtx sc;
23442 if (scale == 1)
23443 return countreg;
23444 if (CONST_INT_P (countreg))
23445 return GEN_INT (INTVAL (countreg) / scale);
23446 gcc_assert (REG_P (countreg));
23448 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
23449 GEN_INT (exact_log2 (scale)),
23450 NULL, 1, OPTAB_DIRECT);
23451 return sc;
23454 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
23455 DImode for constant loop counts. */
23457 static machine_mode
23458 counter_mode (rtx count_exp)
23460 if (GET_MODE (count_exp) != VOIDmode)
23461 return GET_MODE (count_exp);
23462 if (!CONST_INT_P (count_exp))
23463 return Pmode;
23464 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
23465 return DImode;
23466 return SImode;
23469 /* Copy the address to a Pmode register. This is used for x32 to
23470 truncate DImode TLS address to a SImode register. */
23472 static rtx
23473 ix86_copy_addr_to_reg (rtx addr)
23475 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
23476 return copy_addr_to_reg (addr);
23477 else
23479 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
23480 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
23484 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
23485 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
23486 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
23487 memory by VALUE (supposed to be in MODE).
23489 The size is rounded down to whole number of chunk size moved at once.
23490 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
23493 static void
23494 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
23495 rtx destptr, rtx srcptr, rtx value,
23496 rtx count, machine_mode mode, int unroll,
23497 int expected_size, bool issetmem)
23499 rtx_code_label *out_label, *top_label;
23500 rtx iter, tmp;
23501 machine_mode iter_mode = counter_mode (count);
23502 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
23503 rtx piece_size = GEN_INT (piece_size_n);
23504 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
23505 rtx size;
23506 int i;
23508 top_label = gen_label_rtx ();
23509 out_label = gen_label_rtx ();
23510 iter = gen_reg_rtx (iter_mode);
23512 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
23513 NULL, 1, OPTAB_DIRECT);
23514 /* Those two should combine. */
23515 if (piece_size == const1_rtx)
23517 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
23518 true, out_label);
23519 predict_jump (REG_BR_PROB_BASE * 10 / 100);
23521 emit_move_insn (iter, const0_rtx);
23523 emit_label (top_label);
23525 tmp = convert_modes (Pmode, iter_mode, iter, true);
23527 /* This assert could be relaxed - in this case we'll need to compute
23528 smallest power of two, containing in PIECE_SIZE_N and pass it to
23529 offset_address. */
23530 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
23531 destmem = offset_address (destmem, tmp, piece_size_n);
23532 destmem = adjust_address (destmem, mode, 0);
23534 if (!issetmem)
23536 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
23537 srcmem = adjust_address (srcmem, mode, 0);
23539 /* When unrolling for chips that reorder memory reads and writes,
23540 we can save registers by using single temporary.
23541 Also using 4 temporaries is overkill in 32bit mode. */
23542 if (!TARGET_64BIT && 0)
23544 for (i = 0; i < unroll; i++)
23546 if (i)
23548 destmem =
23549 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23550 srcmem =
23551 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23553 emit_move_insn (destmem, srcmem);
23556 else
23558 rtx tmpreg[4];
23559 gcc_assert (unroll <= 4);
23560 for (i = 0; i < unroll; i++)
23562 tmpreg[i] = gen_reg_rtx (mode);
23563 if (i)
23565 srcmem =
23566 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23568 emit_move_insn (tmpreg[i], srcmem);
23570 for (i = 0; i < unroll; i++)
23572 if (i)
23574 destmem =
23575 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23577 emit_move_insn (destmem, tmpreg[i]);
23581 else
23582 for (i = 0; i < unroll; i++)
23584 if (i)
23585 destmem =
23586 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23587 emit_move_insn (destmem, value);
23590 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
23591 true, OPTAB_LIB_WIDEN);
23592 if (tmp != iter)
23593 emit_move_insn (iter, tmp);
23595 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
23596 true, top_label);
23597 if (expected_size != -1)
23599 expected_size /= GET_MODE_SIZE (mode) * unroll;
23600 if (expected_size == 0)
23601 predict_jump (0);
23602 else if (expected_size > REG_BR_PROB_BASE)
23603 predict_jump (REG_BR_PROB_BASE - 1);
23604 else
23605 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
23607 else
23608 predict_jump (REG_BR_PROB_BASE * 80 / 100);
23609 iter = ix86_zero_extend_to_Pmode (iter);
23610 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
23611 true, OPTAB_LIB_WIDEN);
23612 if (tmp != destptr)
23613 emit_move_insn (destptr, tmp);
23614 if (!issetmem)
23616 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
23617 true, OPTAB_LIB_WIDEN);
23618 if (tmp != srcptr)
23619 emit_move_insn (srcptr, tmp);
23621 emit_label (out_label);
23624 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
23625 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
23626 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
23627 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
23628 ORIG_VALUE is the original value passed to memset to fill the memory with.
23629 Other arguments have same meaning as for previous function. */
23631 static void
23632 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
23633 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
23634 rtx count,
23635 machine_mode mode, bool issetmem)
23637 rtx destexp;
23638 rtx srcexp;
23639 rtx countreg;
23640 HOST_WIDE_INT rounded_count;
23642 /* If possible, it is shorter to use rep movs.
23643 TODO: Maybe it is better to move this logic to decide_alg. */
23644 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
23645 && (!issetmem || orig_value == const0_rtx))
23646 mode = SImode;
23648 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
23649 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
23651 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
23652 GET_MODE_SIZE (mode)));
23653 if (mode != QImode)
23655 destexp = gen_rtx_ASHIFT (Pmode, countreg,
23656 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23657 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
23659 else
23660 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
23661 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
23663 rounded_count = (INTVAL (count)
23664 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23665 destmem = shallow_copy_rtx (destmem);
23666 set_mem_size (destmem, rounded_count);
23668 else if (MEM_SIZE_KNOWN_P (destmem))
23669 clear_mem_size (destmem);
23671 if (issetmem)
23673 value = force_reg (mode, gen_lowpart (mode, value));
23674 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23676 else
23678 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23679 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23680 if (mode != QImode)
23682 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23683 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23684 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23686 else
23687 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23688 if (CONST_INT_P (count))
23690 rounded_count = (INTVAL (count)
23691 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23692 srcmem = shallow_copy_rtx (srcmem);
23693 set_mem_size (srcmem, rounded_count);
23695 else
23697 if (MEM_SIZE_KNOWN_P (srcmem))
23698 clear_mem_size (srcmem);
23700 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23701 destexp, srcexp));
23705 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23706 DESTMEM.
23707 SRC is passed by pointer to be updated on return.
23708 Return value is updated DST. */
23709 static rtx
23710 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23711 HOST_WIDE_INT size_to_move)
23713 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23714 enum insn_code code;
23715 machine_mode move_mode;
23716 int piece_size, i;
23718 /* Find the widest mode in which we could perform moves.
23719 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23720 it until move of such size is supported. */
23721 piece_size = 1 << floor_log2 (size_to_move);
23722 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23723 code = optab_handler (mov_optab, move_mode);
23724 while (code == CODE_FOR_nothing && piece_size > 1)
23726 piece_size >>= 1;
23727 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23728 code = optab_handler (mov_optab, move_mode);
23731 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23732 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23733 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23735 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23736 move_mode = mode_for_vector (word_mode, nunits);
23737 code = optab_handler (mov_optab, move_mode);
23738 if (code == CODE_FOR_nothing)
23740 move_mode = word_mode;
23741 piece_size = GET_MODE_SIZE (move_mode);
23742 code = optab_handler (mov_optab, move_mode);
23745 gcc_assert (code != CODE_FOR_nothing);
23747 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23748 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23750 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23751 gcc_assert (size_to_move % piece_size == 0);
23752 adjust = GEN_INT (piece_size);
23753 for (i = 0; i < size_to_move; i += piece_size)
23755 /* We move from memory to memory, so we'll need to do it via
23756 a temporary register. */
23757 tempreg = gen_reg_rtx (move_mode);
23758 emit_insn (GEN_FCN (code) (tempreg, src));
23759 emit_insn (GEN_FCN (code) (dst, tempreg));
23761 emit_move_insn (destptr,
23762 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23763 emit_move_insn (srcptr,
23764 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23766 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23767 piece_size);
23768 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23769 piece_size);
23772 /* Update DST and SRC rtx. */
23773 *srcmem = src;
23774 return dst;
23777 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23778 static void
23779 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23780 rtx destptr, rtx srcptr, rtx count, int max_size)
23782 rtx src, dest;
23783 if (CONST_INT_P (count))
23785 HOST_WIDE_INT countval = INTVAL (count);
23786 HOST_WIDE_INT epilogue_size = countval % max_size;
23787 int i;
23789 /* For now MAX_SIZE should be a power of 2. This assert could be
23790 relaxed, but it'll require a bit more complicated epilogue
23791 expanding. */
23792 gcc_assert ((max_size & (max_size - 1)) == 0);
23793 for (i = max_size; i >= 1; i >>= 1)
23795 if (epilogue_size & i)
23796 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23798 return;
23800 if (max_size > 8)
23802 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23803 count, 1, OPTAB_DIRECT);
23804 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23805 count, QImode, 1, 4, false);
23806 return;
23809 /* When there are stringops, we can cheaply increase dest and src pointers.
23810 Otherwise we save code size by maintaining offset (zero is readily
23811 available from preceding rep operation) and using x86 addressing modes.
23813 if (TARGET_SINGLE_STRINGOP)
23815 if (max_size > 4)
23817 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23818 src = change_address (srcmem, SImode, srcptr);
23819 dest = change_address (destmem, SImode, destptr);
23820 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23821 emit_label (label);
23822 LABEL_NUSES (label) = 1;
23824 if (max_size > 2)
23826 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23827 src = change_address (srcmem, HImode, srcptr);
23828 dest = change_address (destmem, HImode, destptr);
23829 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23830 emit_label (label);
23831 LABEL_NUSES (label) = 1;
23833 if (max_size > 1)
23835 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23836 src = change_address (srcmem, QImode, srcptr);
23837 dest = change_address (destmem, QImode, destptr);
23838 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23839 emit_label (label);
23840 LABEL_NUSES (label) = 1;
23843 else
23845 rtx offset = force_reg (Pmode, const0_rtx);
23846 rtx tmp;
23848 if (max_size > 4)
23850 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23851 src = change_address (srcmem, SImode, srcptr);
23852 dest = change_address (destmem, SImode, destptr);
23853 emit_move_insn (dest, src);
23854 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23855 true, OPTAB_LIB_WIDEN);
23856 if (tmp != offset)
23857 emit_move_insn (offset, tmp);
23858 emit_label (label);
23859 LABEL_NUSES (label) = 1;
23861 if (max_size > 2)
23863 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23864 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23865 src = change_address (srcmem, HImode, tmp);
23866 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23867 dest = change_address (destmem, HImode, tmp);
23868 emit_move_insn (dest, src);
23869 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23870 true, OPTAB_LIB_WIDEN);
23871 if (tmp != offset)
23872 emit_move_insn (offset, tmp);
23873 emit_label (label);
23874 LABEL_NUSES (label) = 1;
23876 if (max_size > 1)
23878 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23879 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23880 src = change_address (srcmem, QImode, tmp);
23881 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23882 dest = change_address (destmem, QImode, tmp);
23883 emit_move_insn (dest, src);
23884 emit_label (label);
23885 LABEL_NUSES (label) = 1;
23890 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23891 with value PROMOTED_VAL.
23892 SRC is passed by pointer to be updated on return.
23893 Return value is updated DST. */
23894 static rtx
23895 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23896 HOST_WIDE_INT size_to_move)
23898 rtx dst = destmem, adjust;
23899 enum insn_code code;
23900 machine_mode move_mode;
23901 int piece_size, i;
23903 /* Find the widest mode in which we could perform moves.
23904 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23905 it until move of such size is supported. */
23906 move_mode = GET_MODE (promoted_val);
23907 if (move_mode == VOIDmode)
23908 move_mode = QImode;
23909 if (size_to_move < GET_MODE_SIZE (move_mode))
23911 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23912 promoted_val = gen_lowpart (move_mode, promoted_val);
23914 piece_size = GET_MODE_SIZE (move_mode);
23915 code = optab_handler (mov_optab, move_mode);
23916 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23918 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23920 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23921 gcc_assert (size_to_move % piece_size == 0);
23922 adjust = GEN_INT (piece_size);
23923 for (i = 0; i < size_to_move; i += piece_size)
23925 if (piece_size <= GET_MODE_SIZE (word_mode))
23927 emit_insn (gen_strset (destptr, dst, promoted_val));
23928 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23929 piece_size);
23930 continue;
23933 emit_insn (GEN_FCN (code) (dst, promoted_val));
23935 emit_move_insn (destptr,
23936 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23938 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23939 piece_size);
23942 /* Update DST rtx. */
23943 return dst;
23945 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23946 static void
23947 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23948 rtx count, int max_size)
23950 count =
23951 expand_simple_binop (counter_mode (count), AND, count,
23952 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23953 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23954 gen_lowpart (QImode, value), count, QImode,
23955 1, max_size / 2, true);
23958 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23959 static void
23960 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23961 rtx count, int max_size)
23963 rtx dest;
23965 if (CONST_INT_P (count))
23967 HOST_WIDE_INT countval = INTVAL (count);
23968 HOST_WIDE_INT epilogue_size = countval % max_size;
23969 int i;
23971 /* For now MAX_SIZE should be a power of 2. This assert could be
23972 relaxed, but it'll require a bit more complicated epilogue
23973 expanding. */
23974 gcc_assert ((max_size & (max_size - 1)) == 0);
23975 for (i = max_size; i >= 1; i >>= 1)
23977 if (epilogue_size & i)
23979 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23980 destmem = emit_memset (destmem, destptr, vec_value, i);
23981 else
23982 destmem = emit_memset (destmem, destptr, value, i);
23985 return;
23987 if (max_size > 32)
23989 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23990 return;
23992 if (max_size > 16)
23994 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
23995 if (TARGET_64BIT)
23997 dest = change_address (destmem, DImode, destptr);
23998 emit_insn (gen_strset (destptr, dest, value));
23999 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
24000 emit_insn (gen_strset (destptr, dest, value));
24002 else
24004 dest = change_address (destmem, SImode, destptr);
24005 emit_insn (gen_strset (destptr, dest, value));
24006 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
24007 emit_insn (gen_strset (destptr, dest, value));
24008 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
24009 emit_insn (gen_strset (destptr, dest, value));
24010 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
24011 emit_insn (gen_strset (destptr, dest, value));
24013 emit_label (label);
24014 LABEL_NUSES (label) = 1;
24016 if (max_size > 8)
24018 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
24019 if (TARGET_64BIT)
24021 dest = change_address (destmem, DImode, destptr);
24022 emit_insn (gen_strset (destptr, dest, value));
24024 else
24026 dest = change_address (destmem, SImode, destptr);
24027 emit_insn (gen_strset (destptr, dest, value));
24028 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
24029 emit_insn (gen_strset (destptr, dest, value));
24031 emit_label (label);
24032 LABEL_NUSES (label) = 1;
24034 if (max_size > 4)
24036 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
24037 dest = change_address (destmem, SImode, destptr);
24038 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
24039 emit_label (label);
24040 LABEL_NUSES (label) = 1;
24042 if (max_size > 2)
24044 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
24045 dest = change_address (destmem, HImode, destptr);
24046 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
24047 emit_label (label);
24048 LABEL_NUSES (label) = 1;
24050 if (max_size > 1)
24052 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
24053 dest = change_address (destmem, QImode, destptr);
24054 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
24055 emit_label (label);
24056 LABEL_NUSES (label) = 1;
24060 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
24061 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
24062 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
24063 ignored.
24064 Return value is updated DESTMEM. */
24065 static rtx
24066 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
24067 rtx destptr, rtx srcptr, rtx value,
24068 rtx vec_value, rtx count, int align,
24069 int desired_alignment, bool issetmem)
24071 int i;
24072 for (i = 1; i < desired_alignment; i <<= 1)
24074 if (align <= i)
24076 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
24077 if (issetmem)
24079 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
24080 destmem = emit_memset (destmem, destptr, vec_value, i);
24081 else
24082 destmem = emit_memset (destmem, destptr, value, i);
24084 else
24085 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
24086 ix86_adjust_counter (count, i);
24087 emit_label (label);
24088 LABEL_NUSES (label) = 1;
24089 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
24092 return destmem;
24095 /* Test if COUNT&SIZE is nonzero and if so, expand movme
24096 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
24097 and jump to DONE_LABEL. */
24098 static void
24099 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
24100 rtx destptr, rtx srcptr,
24101 rtx value, rtx vec_value,
24102 rtx count, int size,
24103 rtx done_label, bool issetmem)
24105 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
24106 machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
24107 rtx modesize;
24108 int n;
24110 /* If we do not have vector value to copy, we must reduce size. */
24111 if (issetmem)
24113 if (!vec_value)
24115 if (GET_MODE (value) == VOIDmode && size > 8)
24116 mode = Pmode;
24117 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
24118 mode = GET_MODE (value);
24120 else
24121 mode = GET_MODE (vec_value), value = vec_value;
24123 else
24125 /* Choose appropriate vector mode. */
24126 if (size >= 32)
24127 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
24128 else if (size >= 16)
24129 mode = TARGET_SSE ? V16QImode : DImode;
24130 srcmem = change_address (srcmem, mode, srcptr);
24132 destmem = change_address (destmem, mode, destptr);
24133 modesize = GEN_INT (GET_MODE_SIZE (mode));
24134 gcc_assert (GET_MODE_SIZE (mode) <= size);
24135 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24137 if (issetmem)
24138 emit_move_insn (destmem, gen_lowpart (mode, value));
24139 else
24141 emit_move_insn (destmem, srcmem);
24142 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24144 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24147 destmem = offset_address (destmem, count, 1);
24148 destmem = offset_address (destmem, GEN_INT (-2 * size),
24149 GET_MODE_SIZE (mode));
24150 if (!issetmem)
24152 srcmem = offset_address (srcmem, count, 1);
24153 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
24154 GET_MODE_SIZE (mode));
24156 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24158 if (issetmem)
24159 emit_move_insn (destmem, gen_lowpart (mode, value));
24160 else
24162 emit_move_insn (destmem, srcmem);
24163 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24165 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24167 emit_jump_insn (gen_jump (done_label));
24168 emit_barrier ();
24170 emit_label (label);
24171 LABEL_NUSES (label) = 1;
24174 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
24175 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
24176 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
24177 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
24178 DONE_LABEL is a label after the whole copying sequence. The label is created
24179 on demand if *DONE_LABEL is NULL.
24180 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
24181 bounds after the initial copies.
24183 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
24184 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
24185 we will dispatch to a library call for large blocks.
24187 In pseudocode we do:
24189 if (COUNT < SIZE)
24191 Assume that SIZE is 4. Bigger sizes are handled analogously
24192 if (COUNT & 4)
24194 copy 4 bytes from SRCPTR to DESTPTR
24195 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
24196 goto done_label
24198 if (!COUNT)
24199 goto done_label;
24200 copy 1 byte from SRCPTR to DESTPTR
24201 if (COUNT & 2)
24203 copy 2 bytes from SRCPTR to DESTPTR
24204 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
24207 else
24209 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
24210 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
24212 OLD_DESPTR = DESTPTR;
24213 Align DESTPTR up to DESIRED_ALIGN
24214 SRCPTR += DESTPTR - OLD_DESTPTR
24215 COUNT -= DEST_PTR - OLD_DESTPTR
24216 if (DYNAMIC_CHECK)
24217 Round COUNT down to multiple of SIZE
24218 << optional caller supplied zero size guard is here >>
24219 << optional caller suppplied dynamic check is here >>
24220 << caller supplied main copy loop is here >>
24222 done_label:
24224 static void
24225 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
24226 rtx *destptr, rtx *srcptr,
24227 machine_mode mode,
24228 rtx value, rtx vec_value,
24229 rtx *count,
24230 rtx_code_label **done_label,
24231 int size,
24232 int desired_align,
24233 int align,
24234 unsigned HOST_WIDE_INT *min_size,
24235 bool dynamic_check,
24236 bool issetmem)
24238 rtx_code_label *loop_label = NULL, *label;
24239 int n;
24240 rtx modesize;
24241 int prolog_size = 0;
24242 rtx mode_value;
24244 /* Chose proper value to copy. */
24245 if (issetmem && VECTOR_MODE_P (mode))
24246 mode_value = vec_value;
24247 else
24248 mode_value = value;
24249 gcc_assert (GET_MODE_SIZE (mode) <= size);
24251 /* See if block is big or small, handle small blocks. */
24252 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
24254 int size2 = size;
24255 loop_label = gen_label_rtx ();
24257 if (!*done_label)
24258 *done_label = gen_label_rtx ();
24260 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
24261 1, loop_label);
24262 size2 >>= 1;
24264 /* Handle sizes > 3. */
24265 for (;size2 > 2; size2 >>= 1)
24266 expand_small_movmem_or_setmem (destmem, srcmem,
24267 *destptr, *srcptr,
24268 value, vec_value,
24269 *count,
24270 size2, *done_label, issetmem);
24271 /* Nothing to copy? Jump to DONE_LABEL if so */
24272 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
24273 1, *done_label);
24275 /* Do a byte copy. */
24276 destmem = change_address (destmem, QImode, *destptr);
24277 if (issetmem)
24278 emit_move_insn (destmem, gen_lowpart (QImode, value));
24279 else
24281 srcmem = change_address (srcmem, QImode, *srcptr);
24282 emit_move_insn (destmem, srcmem);
24285 /* Handle sizes 2 and 3. */
24286 label = ix86_expand_aligntest (*count, 2, false);
24287 destmem = change_address (destmem, HImode, *destptr);
24288 destmem = offset_address (destmem, *count, 1);
24289 destmem = offset_address (destmem, GEN_INT (-2), 2);
24290 if (issetmem)
24291 emit_move_insn (destmem, gen_lowpart (HImode, value));
24292 else
24294 srcmem = change_address (srcmem, HImode, *srcptr);
24295 srcmem = offset_address (srcmem, *count, 1);
24296 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
24297 emit_move_insn (destmem, srcmem);
24300 emit_label (label);
24301 LABEL_NUSES (label) = 1;
24302 emit_jump_insn (gen_jump (*done_label));
24303 emit_barrier ();
24305 else
24306 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
24307 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
24309 /* Start memcpy for COUNT >= SIZE. */
24310 if (loop_label)
24312 emit_label (loop_label);
24313 LABEL_NUSES (loop_label) = 1;
24316 /* Copy first desired_align bytes. */
24317 if (!issetmem)
24318 srcmem = change_address (srcmem, mode, *srcptr);
24319 destmem = change_address (destmem, mode, *destptr);
24320 modesize = GEN_INT (GET_MODE_SIZE (mode));
24321 for (n = 0; prolog_size < desired_align - align; n++)
24323 if (issetmem)
24324 emit_move_insn (destmem, mode_value);
24325 else
24327 emit_move_insn (destmem, srcmem);
24328 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24330 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24331 prolog_size += GET_MODE_SIZE (mode);
24335 /* Copy last SIZE bytes. */
24336 destmem = offset_address (destmem, *count, 1);
24337 destmem = offset_address (destmem,
24338 GEN_INT (-size - prolog_size),
24340 if (issetmem)
24341 emit_move_insn (destmem, mode_value);
24342 else
24344 srcmem = offset_address (srcmem, *count, 1);
24345 srcmem = offset_address (srcmem,
24346 GEN_INT (-size - prolog_size),
24348 emit_move_insn (destmem, srcmem);
24350 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
24352 destmem = offset_address (destmem, modesize, 1);
24353 if (issetmem)
24354 emit_move_insn (destmem, mode_value);
24355 else
24357 srcmem = offset_address (srcmem, modesize, 1);
24358 emit_move_insn (destmem, srcmem);
24362 /* Align destination. */
24363 if (desired_align > 1 && desired_align > align)
24365 rtx saveddest = *destptr;
24367 gcc_assert (desired_align <= size);
24368 /* Align destptr up, place it to new register. */
24369 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
24370 GEN_INT (prolog_size),
24371 NULL_RTX, 1, OPTAB_DIRECT);
24372 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
24373 GEN_INT (-desired_align),
24374 *destptr, 1, OPTAB_DIRECT);
24375 /* See how many bytes we skipped. */
24376 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
24377 *destptr,
24378 saveddest, 1, OPTAB_DIRECT);
24379 /* Adjust srcptr and count. */
24380 if (!issetmem)
24381 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
24382 *srcptr, 1, OPTAB_DIRECT);
24383 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24384 saveddest, *count, 1, OPTAB_DIRECT);
24385 /* We copied at most size + prolog_size. */
24386 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
24387 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
24388 else
24389 *min_size = 0;
24391 /* Our loops always round down the bock size, but for dispatch to library
24392 we need precise value. */
24393 if (dynamic_check)
24394 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
24395 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
24397 else
24399 gcc_assert (prolog_size == 0);
24400 /* Decrease count, so we won't end up copying last word twice. */
24401 if (!CONST_INT_P (*count))
24402 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24403 constm1_rtx, *count, 1, OPTAB_DIRECT);
24404 else
24405 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
24406 if (*min_size)
24407 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
24412 /* This function is like the previous one, except here we know how many bytes
24413 need to be copied. That allows us to update alignment not only of DST, which
24414 is returned, but also of SRC, which is passed as a pointer for that
24415 reason. */
24416 static rtx
24417 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
24418 rtx srcreg, rtx value, rtx vec_value,
24419 int desired_align, int align_bytes,
24420 bool issetmem)
24422 rtx src = NULL;
24423 rtx orig_dst = dst;
24424 rtx orig_src = NULL;
24425 int piece_size = 1;
24426 int copied_bytes = 0;
24428 if (!issetmem)
24430 gcc_assert (srcp != NULL);
24431 src = *srcp;
24432 orig_src = src;
24435 for (piece_size = 1;
24436 piece_size <= desired_align && copied_bytes < align_bytes;
24437 piece_size <<= 1)
24439 if (align_bytes & piece_size)
24441 if (issetmem)
24443 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
24444 dst = emit_memset (dst, destreg, vec_value, piece_size);
24445 else
24446 dst = emit_memset (dst, destreg, value, piece_size);
24448 else
24449 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
24450 copied_bytes += piece_size;
24453 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
24454 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24455 if (MEM_SIZE_KNOWN_P (orig_dst))
24456 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
24458 if (!issetmem)
24460 int src_align_bytes = get_mem_align_offset (src, desired_align
24461 * BITS_PER_UNIT);
24462 if (src_align_bytes >= 0)
24463 src_align_bytes = desired_align - src_align_bytes;
24464 if (src_align_bytes >= 0)
24466 unsigned int src_align;
24467 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
24469 if ((src_align_bytes & (src_align - 1))
24470 == (align_bytes & (src_align - 1)))
24471 break;
24473 if (src_align > (unsigned int) desired_align)
24474 src_align = desired_align;
24475 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
24476 set_mem_align (src, src_align * BITS_PER_UNIT);
24478 if (MEM_SIZE_KNOWN_P (orig_src))
24479 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
24480 *srcp = src;
24483 return dst;
24486 /* Return true if ALG can be used in current context.
24487 Assume we expand memset if MEMSET is true. */
24488 static bool
24489 alg_usable_p (enum stringop_alg alg, bool memset)
24491 if (alg == no_stringop)
24492 return false;
24493 if (alg == vector_loop)
24494 return TARGET_SSE || TARGET_AVX;
24495 /* Algorithms using the rep prefix want at least edi and ecx;
24496 additionally, memset wants eax and memcpy wants esi. Don't
24497 consider such algorithms if the user has appropriated those
24498 registers for their own purposes. */
24499 if (alg == rep_prefix_1_byte
24500 || alg == rep_prefix_4_byte
24501 || alg == rep_prefix_8_byte)
24502 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
24503 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
24504 return true;
24507 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
24508 static enum stringop_alg
24509 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
24510 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
24511 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
24513 const struct stringop_algs * algs;
24514 bool optimize_for_speed;
24515 int max = 0;
24516 const struct processor_costs *cost;
24517 int i;
24518 bool any_alg_usable_p = false;
24520 *noalign = false;
24521 *dynamic_check = -1;
24523 /* Even if the string operation call is cold, we still might spend a lot
24524 of time processing large blocks. */
24525 if (optimize_function_for_size_p (cfun)
24526 || (optimize_insn_for_size_p ()
24527 && (max_size < 256
24528 || (expected_size != -1 && expected_size < 256))))
24529 optimize_for_speed = false;
24530 else
24531 optimize_for_speed = true;
24533 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
24534 if (memset)
24535 algs = &cost->memset[TARGET_64BIT != 0];
24536 else
24537 algs = &cost->memcpy[TARGET_64BIT != 0];
24539 /* See maximal size for user defined algorithm. */
24540 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24542 enum stringop_alg candidate = algs->size[i].alg;
24543 bool usable = alg_usable_p (candidate, memset);
24544 any_alg_usable_p |= usable;
24546 if (candidate != libcall && candidate && usable)
24547 max = algs->size[i].max;
24550 /* If expected size is not known but max size is small enough
24551 so inline version is a win, set expected size into
24552 the range. */
24553 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
24554 && expected_size == -1)
24555 expected_size = min_size / 2 + max_size / 2;
24557 /* If user specified the algorithm, honnor it if possible. */
24558 if (ix86_stringop_alg != no_stringop
24559 && alg_usable_p (ix86_stringop_alg, memset))
24560 return ix86_stringop_alg;
24561 /* rep; movq or rep; movl is the smallest variant. */
24562 else if (!optimize_for_speed)
24564 *noalign = true;
24565 if (!count || (count & 3) || (memset && !zero_memset))
24566 return alg_usable_p (rep_prefix_1_byte, memset)
24567 ? rep_prefix_1_byte : loop_1_byte;
24568 else
24569 return alg_usable_p (rep_prefix_4_byte, memset)
24570 ? rep_prefix_4_byte : loop;
24572 /* Very tiny blocks are best handled via the loop, REP is expensive to
24573 setup. */
24574 else if (expected_size != -1 && expected_size < 4)
24575 return loop_1_byte;
24576 else if (expected_size != -1)
24578 enum stringop_alg alg = libcall;
24579 bool alg_noalign = false;
24580 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24582 /* We get here if the algorithms that were not libcall-based
24583 were rep-prefix based and we are unable to use rep prefixes
24584 based on global register usage. Break out of the loop and
24585 use the heuristic below. */
24586 if (algs->size[i].max == 0)
24587 break;
24588 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
24590 enum stringop_alg candidate = algs->size[i].alg;
24592 if (candidate != libcall && alg_usable_p (candidate, memset))
24594 alg = candidate;
24595 alg_noalign = algs->size[i].noalign;
24597 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
24598 last non-libcall inline algorithm. */
24599 if (TARGET_INLINE_ALL_STRINGOPS)
24601 /* When the current size is best to be copied by a libcall,
24602 but we are still forced to inline, run the heuristic below
24603 that will pick code for medium sized blocks. */
24604 if (alg != libcall)
24606 *noalign = alg_noalign;
24607 return alg;
24609 else if (!any_alg_usable_p)
24610 break;
24612 else if (alg_usable_p (candidate, memset))
24614 *noalign = algs->size[i].noalign;
24615 return candidate;
24620 /* When asked to inline the call anyway, try to pick meaningful choice.
24621 We look for maximal size of block that is faster to copy by hand and
24622 take blocks of at most of that size guessing that average size will
24623 be roughly half of the block.
24625 If this turns out to be bad, we might simply specify the preferred
24626 choice in ix86_costs. */
24627 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24628 && (algs->unknown_size == libcall
24629 || !alg_usable_p (algs->unknown_size, memset)))
24631 enum stringop_alg alg;
24633 /* If there aren't any usable algorithms, then recursing on
24634 smaller sizes isn't going to find anything. Just return the
24635 simple byte-at-a-time copy loop. */
24636 if (!any_alg_usable_p)
24638 /* Pick something reasonable. */
24639 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24640 *dynamic_check = 128;
24641 return loop_1_byte;
24643 if (max <= 0)
24644 max = 4096;
24645 alg = decide_alg (count, max / 2, min_size, max_size, memset,
24646 zero_memset, dynamic_check, noalign);
24647 gcc_assert (*dynamic_check == -1);
24648 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24649 *dynamic_check = max;
24650 else
24651 gcc_assert (alg != libcall);
24652 return alg;
24654 return (alg_usable_p (algs->unknown_size, memset)
24655 ? algs->unknown_size : libcall);
24658 /* Decide on alignment. We know that the operand is already aligned to ALIGN
24659 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
24660 static int
24661 decide_alignment (int align,
24662 enum stringop_alg alg,
24663 int expected_size,
24664 machine_mode move_mode)
24666 int desired_align = 0;
24668 gcc_assert (alg != no_stringop);
24670 if (alg == libcall)
24671 return 0;
24672 if (move_mode == VOIDmode)
24673 return 0;
24675 desired_align = GET_MODE_SIZE (move_mode);
24676 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24677 copying whole cacheline at once. */
24678 if (TARGET_PENTIUMPRO
24679 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24680 desired_align = 8;
24682 if (optimize_size)
24683 desired_align = 1;
24684 if (desired_align < align)
24685 desired_align = align;
24686 if (expected_size != -1 && expected_size < 4)
24687 desired_align = align;
24689 return desired_align;
24693 /* Helper function for memcpy. For QImode value 0xXY produce
24694 0xXYXYXYXY of wide specified by MODE. This is essentially
24695 a * 0x10101010, but we can do slightly better than
24696 synth_mult by unwinding the sequence by hand on CPUs with
24697 slow multiply. */
24698 static rtx
24699 promote_duplicated_reg (machine_mode mode, rtx val)
24701 machine_mode valmode = GET_MODE (val);
24702 rtx tmp;
24703 int nops = mode == DImode ? 3 : 2;
24705 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24706 if (val == const0_rtx)
24707 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24708 if (CONST_INT_P (val))
24710 HOST_WIDE_INT v = INTVAL (val) & 255;
24712 v |= v << 8;
24713 v |= v << 16;
24714 if (mode == DImode)
24715 v |= (v << 16) << 16;
24716 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24719 if (valmode == VOIDmode)
24720 valmode = QImode;
24721 if (valmode != QImode)
24722 val = gen_lowpart (QImode, val);
24723 if (mode == QImode)
24724 return val;
24725 if (!TARGET_PARTIAL_REG_STALL)
24726 nops--;
24727 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24728 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24729 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24730 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24732 rtx reg = convert_modes (mode, QImode, val, true);
24733 tmp = promote_duplicated_reg (mode, const1_rtx);
24734 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24735 OPTAB_DIRECT);
24737 else
24739 rtx reg = convert_modes (mode, QImode, val, true);
24741 if (!TARGET_PARTIAL_REG_STALL)
24742 if (mode == SImode)
24743 emit_insn (gen_movsi_insv_1 (reg, reg));
24744 else
24745 emit_insn (gen_movdi_insv_1 (reg, reg));
24746 else
24748 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24749 NULL, 1, OPTAB_DIRECT);
24750 reg =
24751 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24753 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24754 NULL, 1, OPTAB_DIRECT);
24755 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24756 if (mode == SImode)
24757 return reg;
24758 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24759 NULL, 1, OPTAB_DIRECT);
24760 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24761 return reg;
24765 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24766 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24767 alignment from ALIGN to DESIRED_ALIGN. */
24768 static rtx
24769 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24770 int align)
24772 rtx promoted_val;
24774 if (TARGET_64BIT
24775 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24776 promoted_val = promote_duplicated_reg (DImode, val);
24777 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24778 promoted_val = promote_duplicated_reg (SImode, val);
24779 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24780 promoted_val = promote_duplicated_reg (HImode, val);
24781 else
24782 promoted_val = val;
24784 return promoted_val;
24787 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24788 operations when profitable. The code depends upon architecture, block size
24789 and alignment, but always has one of the following overall structures:
24791 Aligned move sequence:
24793 1) Prologue guard: Conditional that jumps up to epilogues for small
24794 blocks that can be handled by epilogue alone. This is faster
24795 but also needed for correctness, since prologue assume the block
24796 is larger than the desired alignment.
24798 Optional dynamic check for size and libcall for large
24799 blocks is emitted here too, with -minline-stringops-dynamically.
24801 2) Prologue: copy first few bytes in order to get destination
24802 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24803 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24804 copied. We emit either a jump tree on power of two sized
24805 blocks, or a byte loop.
24807 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24808 with specified algorithm.
24810 4) Epilogue: code copying tail of the block that is too small to be
24811 handled by main body (or up to size guarded by prologue guard).
24813 Misaligned move sequence
24815 1) missaligned move prologue/epilogue containing:
24816 a) Prologue handling small memory blocks and jumping to done_label
24817 (skipped if blocks are known to be large enough)
24818 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24819 needed by single possibly misaligned move
24820 (skipped if alignment is not needed)
24821 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24823 2) Zero size guard dispatching to done_label, if needed
24825 3) dispatch to library call, if needed,
24827 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24828 with specified algorithm. */
24829 bool
24830 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24831 rtx align_exp, rtx expected_align_exp,
24832 rtx expected_size_exp, rtx min_size_exp,
24833 rtx max_size_exp, rtx probable_max_size_exp,
24834 bool issetmem)
24836 rtx destreg;
24837 rtx srcreg = NULL;
24838 rtx_code_label *label = NULL;
24839 rtx tmp;
24840 rtx_code_label *jump_around_label = NULL;
24841 HOST_WIDE_INT align = 1;
24842 unsigned HOST_WIDE_INT count = 0;
24843 HOST_WIDE_INT expected_size = -1;
24844 int size_needed = 0, epilogue_size_needed;
24845 int desired_align = 0, align_bytes = 0;
24846 enum stringop_alg alg;
24847 rtx promoted_val = NULL;
24848 rtx vec_promoted_val = NULL;
24849 bool force_loopy_epilogue = false;
24850 int dynamic_check;
24851 bool need_zero_guard = false;
24852 bool noalign;
24853 machine_mode move_mode = VOIDmode;
24854 int unroll_factor = 1;
24855 /* TODO: Once value ranges are available, fill in proper data. */
24856 unsigned HOST_WIDE_INT min_size = 0;
24857 unsigned HOST_WIDE_INT max_size = -1;
24858 unsigned HOST_WIDE_INT probable_max_size = -1;
24859 bool misaligned_prologue_used = false;
24861 if (CONST_INT_P (align_exp))
24862 align = INTVAL (align_exp);
24863 /* i386 can do misaligned access on reasonably increased cost. */
24864 if (CONST_INT_P (expected_align_exp)
24865 && INTVAL (expected_align_exp) > align)
24866 align = INTVAL (expected_align_exp);
24867 /* ALIGN is the minimum of destination and source alignment, but we care here
24868 just about destination alignment. */
24869 else if (!issetmem
24870 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24871 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24873 if (CONST_INT_P (count_exp))
24875 min_size = max_size = probable_max_size = count = expected_size
24876 = INTVAL (count_exp);
24877 /* When COUNT is 0, there is nothing to do. */
24878 if (!count)
24879 return true;
24881 else
24883 if (min_size_exp)
24884 min_size = INTVAL (min_size_exp);
24885 if (max_size_exp)
24886 max_size = INTVAL (max_size_exp);
24887 if (probable_max_size_exp)
24888 probable_max_size = INTVAL (probable_max_size_exp);
24889 if (CONST_INT_P (expected_size_exp))
24890 expected_size = INTVAL (expected_size_exp);
24893 /* Make sure we don't need to care about overflow later on. */
24894 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24895 return false;
24897 /* Step 0: Decide on preferred algorithm, desired alignment and
24898 size of chunks to be copied by main loop. */
24899 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24900 issetmem,
24901 issetmem && val_exp == const0_rtx,
24902 &dynamic_check, &noalign);
24903 if (alg == libcall)
24904 return false;
24905 gcc_assert (alg != no_stringop);
24907 /* For now vector-version of memset is generated only for memory zeroing, as
24908 creating of promoted vector value is very cheap in this case. */
24909 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24910 alg = unrolled_loop;
24912 if (!count)
24913 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24914 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24915 if (!issetmem)
24916 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24918 unroll_factor = 1;
24919 move_mode = word_mode;
24920 switch (alg)
24922 case libcall:
24923 case no_stringop:
24924 case last_alg:
24925 gcc_unreachable ();
24926 case loop_1_byte:
24927 need_zero_guard = true;
24928 move_mode = QImode;
24929 break;
24930 case loop:
24931 need_zero_guard = true;
24932 break;
24933 case unrolled_loop:
24934 need_zero_guard = true;
24935 unroll_factor = (TARGET_64BIT ? 4 : 2);
24936 break;
24937 case vector_loop:
24938 need_zero_guard = true;
24939 unroll_factor = 4;
24940 /* Find the widest supported mode. */
24941 move_mode = word_mode;
24942 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24943 != CODE_FOR_nothing)
24944 move_mode = GET_MODE_WIDER_MODE (move_mode);
24946 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24947 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24948 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24950 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24951 move_mode = mode_for_vector (word_mode, nunits);
24952 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24953 move_mode = word_mode;
24955 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24956 break;
24957 case rep_prefix_8_byte:
24958 move_mode = DImode;
24959 break;
24960 case rep_prefix_4_byte:
24961 move_mode = SImode;
24962 break;
24963 case rep_prefix_1_byte:
24964 move_mode = QImode;
24965 break;
24967 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24968 epilogue_size_needed = size_needed;
24970 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24971 if (!TARGET_ALIGN_STRINGOPS || noalign)
24972 align = desired_align;
24974 /* Step 1: Prologue guard. */
24976 /* Alignment code needs count to be in register. */
24977 if (CONST_INT_P (count_exp) && desired_align > align)
24979 if (INTVAL (count_exp) > desired_align
24980 && INTVAL (count_exp) > size_needed)
24982 align_bytes
24983 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24984 if (align_bytes <= 0)
24985 align_bytes = 0;
24986 else
24987 align_bytes = desired_align - align_bytes;
24989 if (align_bytes == 0)
24990 count_exp = force_reg (counter_mode (count_exp), count_exp);
24992 gcc_assert (desired_align >= 1 && align >= 1);
24994 /* Misaligned move sequences handle both prologue and epilogue at once.
24995 Default code generation results in a smaller code for large alignments
24996 and also avoids redundant job when sizes are known precisely. */
24997 misaligned_prologue_used
24998 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24999 && MAX (desired_align, epilogue_size_needed) <= 32
25000 && desired_align <= epilogue_size_needed
25001 && ((desired_align > align && !align_bytes)
25002 || (!count && epilogue_size_needed > 1)));
25004 /* Do the cheap promotion to allow better CSE across the
25005 main loop and epilogue (ie one load of the big constant in the
25006 front of all code.
25007 For now the misaligned move sequences do not have fast path
25008 without broadcasting. */
25009 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
25011 if (alg == vector_loop)
25013 gcc_assert (val_exp == const0_rtx);
25014 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
25015 promoted_val = promote_duplicated_reg_to_size (val_exp,
25016 GET_MODE_SIZE (word_mode),
25017 desired_align, align);
25019 else
25021 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
25022 desired_align, align);
25025 /* Misaligned move sequences handles both prologues and epilogues at once.
25026 Default code generation results in smaller code for large alignments and
25027 also avoids redundant job when sizes are known precisely. */
25028 if (misaligned_prologue_used)
25030 /* Misaligned move prologue handled small blocks by itself. */
25031 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
25032 (dst, src, &destreg, &srcreg,
25033 move_mode, promoted_val, vec_promoted_val,
25034 &count_exp,
25035 &jump_around_label,
25036 desired_align < align
25037 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
25038 desired_align, align, &min_size, dynamic_check, issetmem);
25039 if (!issetmem)
25040 src = change_address (src, BLKmode, srcreg);
25041 dst = change_address (dst, BLKmode, destreg);
25042 set_mem_align (dst, desired_align * BITS_PER_UNIT);
25043 epilogue_size_needed = 0;
25044 if (need_zero_guard && !min_size)
25046 /* It is possible that we copied enough so the main loop will not
25047 execute. */
25048 gcc_assert (size_needed > 1);
25049 if (jump_around_label == NULL_RTX)
25050 jump_around_label = gen_label_rtx ();
25051 emit_cmp_and_jump_insns (count_exp,
25052 GEN_INT (size_needed),
25053 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
25054 if (expected_size == -1
25055 || expected_size < (desired_align - align) / 2 + size_needed)
25056 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25057 else
25058 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25061 /* Ensure that alignment prologue won't copy past end of block. */
25062 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
25064 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
25065 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
25066 Make sure it is power of 2. */
25067 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
25069 /* To improve performance of small blocks, we jump around the VAL
25070 promoting mode. This mean that if the promoted VAL is not constant,
25071 we might not use it in the epilogue and have to use byte
25072 loop variant. */
25073 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
25074 force_loopy_epilogue = true;
25075 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25076 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25078 /* If main algorithm works on QImode, no epilogue is needed.
25079 For small sizes just don't align anything. */
25080 if (size_needed == 1)
25081 desired_align = align;
25082 else
25083 goto epilogue;
25085 else if (!count
25086 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25088 label = gen_label_rtx ();
25089 emit_cmp_and_jump_insns (count_exp,
25090 GEN_INT (epilogue_size_needed),
25091 LTU, 0, counter_mode (count_exp), 1, label);
25092 if (expected_size == -1 || expected_size < epilogue_size_needed)
25093 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25094 else
25095 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25099 /* Emit code to decide on runtime whether library call or inline should be
25100 used. */
25101 if (dynamic_check != -1)
25103 if (!issetmem && CONST_INT_P (count_exp))
25105 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
25107 emit_block_move_via_libcall (dst, src, count_exp, false);
25108 count_exp = const0_rtx;
25109 goto epilogue;
25112 else
25114 rtx_code_label *hot_label = gen_label_rtx ();
25115 if (jump_around_label == NULL_RTX)
25116 jump_around_label = gen_label_rtx ();
25117 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
25118 LEU, 0, counter_mode (count_exp),
25119 1, hot_label);
25120 predict_jump (REG_BR_PROB_BASE * 90 / 100);
25121 if (issetmem)
25122 set_storage_via_libcall (dst, count_exp, val_exp, false);
25123 else
25124 emit_block_move_via_libcall (dst, src, count_exp, false);
25125 emit_jump (jump_around_label);
25126 emit_label (hot_label);
25130 /* Step 2: Alignment prologue. */
25131 /* Do the expensive promotion once we branched off the small blocks. */
25132 if (issetmem && !promoted_val)
25133 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
25134 desired_align, align);
25136 if (desired_align > align && !misaligned_prologue_used)
25138 if (align_bytes == 0)
25140 /* Except for the first move in prologue, we no longer know
25141 constant offset in aliasing info. It don't seems to worth
25142 the pain to maintain it for the first move, so throw away
25143 the info early. */
25144 dst = change_address (dst, BLKmode, destreg);
25145 if (!issetmem)
25146 src = change_address (src, BLKmode, srcreg);
25147 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
25148 promoted_val, vec_promoted_val,
25149 count_exp, align, desired_align,
25150 issetmem);
25151 /* At most desired_align - align bytes are copied. */
25152 if (min_size < (unsigned)(desired_align - align))
25153 min_size = 0;
25154 else
25155 min_size -= desired_align - align;
25157 else
25159 /* If we know how many bytes need to be stored before dst is
25160 sufficiently aligned, maintain aliasing info accurately. */
25161 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
25162 srcreg,
25163 promoted_val,
25164 vec_promoted_val,
25165 desired_align,
25166 align_bytes,
25167 issetmem);
25169 count_exp = plus_constant (counter_mode (count_exp),
25170 count_exp, -align_bytes);
25171 count -= align_bytes;
25172 min_size -= align_bytes;
25173 max_size -= align_bytes;
25175 if (need_zero_guard
25176 && !min_size
25177 && (count < (unsigned HOST_WIDE_INT) size_needed
25178 || (align_bytes == 0
25179 && count < ((unsigned HOST_WIDE_INT) size_needed
25180 + desired_align - align))))
25182 /* It is possible that we copied enough so the main loop will not
25183 execute. */
25184 gcc_assert (size_needed > 1);
25185 if (label == NULL_RTX)
25186 label = gen_label_rtx ();
25187 emit_cmp_and_jump_insns (count_exp,
25188 GEN_INT (size_needed),
25189 LTU, 0, counter_mode (count_exp), 1, label);
25190 if (expected_size == -1
25191 || expected_size < (desired_align - align) / 2 + size_needed)
25192 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25193 else
25194 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25197 if (label && size_needed == 1)
25199 emit_label (label);
25200 LABEL_NUSES (label) = 1;
25201 label = NULL;
25202 epilogue_size_needed = 1;
25203 if (issetmem)
25204 promoted_val = val_exp;
25206 else if (label == NULL_RTX && !misaligned_prologue_used)
25207 epilogue_size_needed = size_needed;
25209 /* Step 3: Main loop. */
25211 switch (alg)
25213 case libcall:
25214 case no_stringop:
25215 case last_alg:
25216 gcc_unreachable ();
25217 case loop_1_byte:
25218 case loop:
25219 case unrolled_loop:
25220 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
25221 count_exp, move_mode, unroll_factor,
25222 expected_size, issetmem);
25223 break;
25224 case vector_loop:
25225 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
25226 vec_promoted_val, count_exp, move_mode,
25227 unroll_factor, expected_size, issetmem);
25228 break;
25229 case rep_prefix_8_byte:
25230 case rep_prefix_4_byte:
25231 case rep_prefix_1_byte:
25232 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
25233 val_exp, count_exp, move_mode, issetmem);
25234 break;
25236 /* Adjust properly the offset of src and dest memory for aliasing. */
25237 if (CONST_INT_P (count_exp))
25239 if (!issetmem)
25240 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
25241 (count / size_needed) * size_needed);
25242 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
25243 (count / size_needed) * size_needed);
25245 else
25247 if (!issetmem)
25248 src = change_address (src, BLKmode, srcreg);
25249 dst = change_address (dst, BLKmode, destreg);
25252 /* Step 4: Epilogue to copy the remaining bytes. */
25253 epilogue:
25254 if (label)
25256 /* When the main loop is done, COUNT_EXP might hold original count,
25257 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
25258 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
25259 bytes. Compensate if needed. */
25261 if (size_needed < epilogue_size_needed)
25263 tmp =
25264 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
25265 GEN_INT (size_needed - 1), count_exp, 1,
25266 OPTAB_DIRECT);
25267 if (tmp != count_exp)
25268 emit_move_insn (count_exp, tmp);
25270 emit_label (label);
25271 LABEL_NUSES (label) = 1;
25274 if (count_exp != const0_rtx && epilogue_size_needed > 1)
25276 if (force_loopy_epilogue)
25277 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
25278 epilogue_size_needed);
25279 else
25281 if (issetmem)
25282 expand_setmem_epilogue (dst, destreg, promoted_val,
25283 vec_promoted_val, count_exp,
25284 epilogue_size_needed);
25285 else
25286 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
25287 epilogue_size_needed);
25290 if (jump_around_label)
25291 emit_label (jump_around_label);
25292 return true;
25296 /* Expand the appropriate insns for doing strlen if not just doing
25297 repnz; scasb
25299 out = result, initialized with the start address
25300 align_rtx = alignment of the address.
25301 scratch = scratch register, initialized with the startaddress when
25302 not aligned, otherwise undefined
25304 This is just the body. It needs the initializations mentioned above and
25305 some address computing at the end. These things are done in i386.md. */
25307 static void
25308 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
25310 int align;
25311 rtx tmp;
25312 rtx_code_label *align_2_label = NULL;
25313 rtx_code_label *align_3_label = NULL;
25314 rtx_code_label *align_4_label = gen_label_rtx ();
25315 rtx_code_label *end_0_label = gen_label_rtx ();
25316 rtx mem;
25317 rtx tmpreg = gen_reg_rtx (SImode);
25318 rtx scratch = gen_reg_rtx (SImode);
25319 rtx cmp;
25321 align = 0;
25322 if (CONST_INT_P (align_rtx))
25323 align = INTVAL (align_rtx);
25325 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
25327 /* Is there a known alignment and is it less than 4? */
25328 if (align < 4)
25330 rtx scratch1 = gen_reg_rtx (Pmode);
25331 emit_move_insn (scratch1, out);
25332 /* Is there a known alignment and is it not 2? */
25333 if (align != 2)
25335 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
25336 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
25338 /* Leave just the 3 lower bits. */
25339 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
25340 NULL_RTX, 0, OPTAB_WIDEN);
25342 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25343 Pmode, 1, align_4_label);
25344 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
25345 Pmode, 1, align_2_label);
25346 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
25347 Pmode, 1, align_3_label);
25349 else
25351 /* Since the alignment is 2, we have to check 2 or 0 bytes;
25352 check if is aligned to 4 - byte. */
25354 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
25355 NULL_RTX, 0, OPTAB_WIDEN);
25357 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25358 Pmode, 1, align_4_label);
25361 mem = change_address (src, QImode, out);
25363 /* Now compare the bytes. */
25365 /* Compare the first n unaligned byte on a byte per byte basis. */
25366 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
25367 QImode, 1, end_0_label);
25369 /* Increment the address. */
25370 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25372 /* Not needed with an alignment of 2 */
25373 if (align != 2)
25375 emit_label (align_2_label);
25377 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25378 end_0_label);
25380 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25382 emit_label (align_3_label);
25385 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25386 end_0_label);
25388 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25391 /* Generate loop to check 4 bytes at a time. It is not a good idea to
25392 align this loop. It gives only huge programs, but does not help to
25393 speed up. */
25394 emit_label (align_4_label);
25396 mem = change_address (src, SImode, out);
25397 emit_move_insn (scratch, mem);
25398 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
25400 /* This formula yields a nonzero result iff one of the bytes is zero.
25401 This saves three branches inside loop and many cycles. */
25403 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
25404 emit_insn (gen_one_cmplsi2 (scratch, scratch));
25405 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
25406 emit_insn (gen_andsi3 (tmpreg, tmpreg,
25407 gen_int_mode (0x80808080, SImode)));
25408 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
25409 align_4_label);
25411 if (TARGET_CMOVE)
25413 rtx reg = gen_reg_rtx (SImode);
25414 rtx reg2 = gen_reg_rtx (Pmode);
25415 emit_move_insn (reg, tmpreg);
25416 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
25418 /* If zero is not in the first two bytes, move two bytes forward. */
25419 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25420 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25421 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25422 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
25423 gen_rtx_IF_THEN_ELSE (SImode, tmp,
25424 reg,
25425 tmpreg)));
25426 /* Emit lea manually to avoid clobbering of flags. */
25427 emit_insn (gen_rtx_SET (SImode, reg2,
25428 gen_rtx_PLUS (Pmode, out, const2_rtx)));
25430 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25431 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25432 emit_insn (gen_rtx_SET (VOIDmode, out,
25433 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
25434 reg2,
25435 out)));
25437 else
25439 rtx_code_label *end_2_label = gen_label_rtx ();
25440 /* Is zero in the first two bytes? */
25442 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25443 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25444 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
25445 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
25446 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
25447 pc_rtx);
25448 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
25449 JUMP_LABEL (tmp) = end_2_label;
25451 /* Not in the first two. Move two bytes forward. */
25452 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
25453 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
25455 emit_label (end_2_label);
25459 /* Avoid branch in fixing the byte. */
25460 tmpreg = gen_lowpart (QImode, tmpreg);
25461 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
25462 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
25463 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
25464 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
25466 emit_label (end_0_label);
25469 /* Expand strlen. */
25471 bool
25472 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
25474 rtx addr, scratch1, scratch2, scratch3, scratch4;
25476 /* The generic case of strlen expander is long. Avoid it's
25477 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
25479 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25480 && !TARGET_INLINE_ALL_STRINGOPS
25481 && !optimize_insn_for_size_p ()
25482 && (!CONST_INT_P (align) || INTVAL (align) < 4))
25483 return false;
25485 addr = force_reg (Pmode, XEXP (src, 0));
25486 scratch1 = gen_reg_rtx (Pmode);
25488 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25489 && !optimize_insn_for_size_p ())
25491 /* Well it seems that some optimizer does not combine a call like
25492 foo(strlen(bar), strlen(bar));
25493 when the move and the subtraction is done here. It does calculate
25494 the length just once when these instructions are done inside of
25495 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
25496 often used and I use one fewer register for the lifetime of
25497 output_strlen_unroll() this is better. */
25499 emit_move_insn (out, addr);
25501 ix86_expand_strlensi_unroll_1 (out, src, align);
25503 /* strlensi_unroll_1 returns the address of the zero at the end of
25504 the string, like memchr(), so compute the length by subtracting
25505 the start address. */
25506 emit_insn (ix86_gen_sub3 (out, out, addr));
25508 else
25510 rtx unspec;
25512 /* Can't use this if the user has appropriated eax, ecx, or edi. */
25513 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
25514 return false;
25516 scratch2 = gen_reg_rtx (Pmode);
25517 scratch3 = gen_reg_rtx (Pmode);
25518 scratch4 = force_reg (Pmode, constm1_rtx);
25520 emit_move_insn (scratch3, addr);
25521 eoschar = force_reg (QImode, eoschar);
25523 src = replace_equiv_address_nv (src, scratch3);
25525 /* If .md starts supporting :P, this can be done in .md. */
25526 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
25527 scratch4), UNSPEC_SCAS);
25528 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
25529 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
25530 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
25532 return true;
25535 /* For given symbol (function) construct code to compute address of it's PLT
25536 entry in large x86-64 PIC model. */
25537 static rtx
25538 construct_plt_address (rtx symbol)
25540 rtx tmp, unspec;
25542 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
25543 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
25544 gcc_assert (Pmode == DImode);
25546 tmp = gen_reg_rtx (Pmode);
25547 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
25549 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
25550 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
25551 return tmp;
25555 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
25556 rtx callarg2,
25557 rtx pop, bool sibcall)
25559 rtx vec[3];
25560 rtx use = NULL, call;
25561 unsigned int vec_len = 0;
25563 if (pop == const0_rtx)
25564 pop = NULL;
25565 gcc_assert (!TARGET_64BIT || !pop);
25567 if (TARGET_MACHO && !TARGET_64BIT)
25569 #if TARGET_MACHO
25570 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
25571 fnaddr = machopic_indirect_call_target (fnaddr);
25572 #endif
25574 else
25576 /* Static functions and indirect calls don't need the pic register. */
25577 if (flag_pic
25578 && (!TARGET_64BIT
25579 || (ix86_cmodel == CM_LARGE_PIC
25580 && DEFAULT_ABI != MS_ABI))
25581 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25582 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
25584 use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
25585 if (ix86_use_pseudo_pic_reg ())
25586 emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
25587 pic_offset_table_rtx);
25591 /* Skip setting up RAX register for -mskip-rax-setup when there are no
25592 parameters passed in vector registers. */
25593 if (TARGET_64BIT
25594 && (INTVAL (callarg2) > 0
25595 || (INTVAL (callarg2) == 0
25596 && (TARGET_SSE || !flag_skip_rax_setup))))
25598 rtx al = gen_rtx_REG (QImode, AX_REG);
25599 emit_move_insn (al, callarg2);
25600 use_reg (&use, al);
25603 if (ix86_cmodel == CM_LARGE_PIC
25604 && !TARGET_PECOFF
25605 && MEM_P (fnaddr)
25606 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25607 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
25608 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
25609 else if (sibcall
25610 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
25611 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
25613 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
25614 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
25617 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
25619 if (retval)
25621 /* We should add bounds as destination register in case
25622 pointer with bounds may be returned. */
25623 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
25625 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
25626 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
25627 retval = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, retval, b0, b1));
25628 chkp_put_regs_to_expr_list (retval);
25631 call = gen_rtx_SET (VOIDmode, retval, call);
25633 vec[vec_len++] = call;
25635 if (pop)
25637 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
25638 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
25639 vec[vec_len++] = pop;
25642 if (TARGET_64BIT_MS_ABI
25643 && (!callarg2 || INTVAL (callarg2) != -2))
25645 int const cregs_size
25646 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
25647 int i;
25649 for (i = 0; i < cregs_size; i++)
25651 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
25652 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
25654 clobber_reg (&use, gen_rtx_REG (mode, regno));
25658 if (vec_len > 1)
25659 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
25660 call = emit_call_insn (call);
25661 if (use)
25662 CALL_INSN_FUNCTION_USAGE (call) = use;
25664 return call;
25667 /* Output the assembly for a call instruction. */
25669 const char *
25670 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
25672 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
25673 bool seh_nop_p = false;
25674 const char *xasm;
25676 if (SIBLING_CALL_P (insn))
25678 if (direct_p)
25679 xasm = "%!jmp\t%P0";
25680 /* SEH epilogue detection requires the indirect branch case
25681 to include REX.W. */
25682 else if (TARGET_SEH)
25683 xasm = "%!rex.W jmp %A0";
25684 else
25685 xasm = "%!jmp\t%A0";
25687 output_asm_insn (xasm, &call_op);
25688 return "";
25691 /* SEH unwinding can require an extra nop to be emitted in several
25692 circumstances. Determine if we have one of those. */
25693 if (TARGET_SEH)
25695 rtx_insn *i;
25697 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25699 /* If we get to another real insn, we don't need the nop. */
25700 if (INSN_P (i))
25701 break;
25703 /* If we get to the epilogue note, prevent a catch region from
25704 being adjacent to the standard epilogue sequence. If non-
25705 call-exceptions, we'll have done this during epilogue emission. */
25706 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25707 && !flag_non_call_exceptions
25708 && !can_throw_internal (insn))
25710 seh_nop_p = true;
25711 break;
25715 /* If we didn't find a real insn following the call, prevent the
25716 unwinder from looking into the next function. */
25717 if (i == NULL)
25718 seh_nop_p = true;
25721 if (direct_p)
25722 xasm = "%!call\t%P0";
25723 else
25724 xasm = "%!call\t%A0";
25726 output_asm_insn (xasm, &call_op);
25728 if (seh_nop_p)
25729 return "nop";
25731 return "";
25734 /* Clear stack slot assignments remembered from previous functions.
25735 This is called from INIT_EXPANDERS once before RTL is emitted for each
25736 function. */
25738 static struct machine_function *
25739 ix86_init_machine_status (void)
25741 struct machine_function *f;
25743 f = ggc_cleared_alloc<machine_function> ();
25744 f->use_fast_prologue_epilogue_nregs = -1;
25745 f->call_abi = ix86_abi;
25747 return f;
25750 /* Return a MEM corresponding to a stack slot with mode MODE.
25751 Allocate a new slot if necessary.
25753 The RTL for a function can have several slots available: N is
25754 which slot to use. */
25757 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
25759 struct stack_local_entry *s;
25761 gcc_assert (n < MAX_386_STACK_LOCALS);
25763 for (s = ix86_stack_locals; s; s = s->next)
25764 if (s->mode == mode && s->n == n)
25765 return validize_mem (copy_rtx (s->rtl));
25767 s = ggc_alloc<stack_local_entry> ();
25768 s->n = n;
25769 s->mode = mode;
25770 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25772 s->next = ix86_stack_locals;
25773 ix86_stack_locals = s;
25774 return validize_mem (copy_rtx (s->rtl));
25777 static void
25778 ix86_instantiate_decls (void)
25780 struct stack_local_entry *s;
25782 for (s = ix86_stack_locals; s; s = s->next)
25783 if (s->rtl != NULL_RTX)
25784 instantiate_decl_rtl (s->rtl);
25787 /* Check whether x86 address PARTS is a pc-relative address. */
25789 static bool
25790 rip_relative_addr_p (struct ix86_address *parts)
25792 rtx base, index, disp;
25794 base = parts->base;
25795 index = parts->index;
25796 disp = parts->disp;
25798 if (disp && !base && !index)
25800 if (TARGET_64BIT)
25802 rtx symbol = disp;
25804 if (GET_CODE (disp) == CONST)
25805 symbol = XEXP (disp, 0);
25806 if (GET_CODE (symbol) == PLUS
25807 && CONST_INT_P (XEXP (symbol, 1)))
25808 symbol = XEXP (symbol, 0);
25810 if (GET_CODE (symbol) == LABEL_REF
25811 || (GET_CODE (symbol) == SYMBOL_REF
25812 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25813 || (GET_CODE (symbol) == UNSPEC
25814 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25815 || XINT (symbol, 1) == UNSPEC_PCREL
25816 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25817 return true;
25820 return false;
25823 /* Calculate the length of the memory address in the instruction encoding.
25824 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25825 or other prefixes. We never generate addr32 prefix for LEA insn. */
25828 memory_address_length (rtx addr, bool lea)
25830 struct ix86_address parts;
25831 rtx base, index, disp;
25832 int len;
25833 int ok;
25835 if (GET_CODE (addr) == PRE_DEC
25836 || GET_CODE (addr) == POST_INC
25837 || GET_CODE (addr) == PRE_MODIFY
25838 || GET_CODE (addr) == POST_MODIFY)
25839 return 0;
25841 ok = ix86_decompose_address (addr, &parts);
25842 gcc_assert (ok);
25844 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25846 /* If this is not LEA instruction, add the length of addr32 prefix. */
25847 if (TARGET_64BIT && !lea
25848 && (SImode_address_operand (addr, VOIDmode)
25849 || (parts.base && GET_MODE (parts.base) == SImode)
25850 || (parts.index && GET_MODE (parts.index) == SImode)))
25851 len++;
25853 base = parts.base;
25854 index = parts.index;
25855 disp = parts.disp;
25857 if (base && GET_CODE (base) == SUBREG)
25858 base = SUBREG_REG (base);
25859 if (index && GET_CODE (index) == SUBREG)
25860 index = SUBREG_REG (index);
25862 gcc_assert (base == NULL_RTX || REG_P (base));
25863 gcc_assert (index == NULL_RTX || REG_P (index));
25865 /* Rule of thumb:
25866 - esp as the base always wants an index,
25867 - ebp as the base always wants a displacement,
25868 - r12 as the base always wants an index,
25869 - r13 as the base always wants a displacement. */
25871 /* Register Indirect. */
25872 if (base && !index && !disp)
25874 /* esp (for its index) and ebp (for its displacement) need
25875 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25876 code. */
25877 if (base == arg_pointer_rtx
25878 || base == frame_pointer_rtx
25879 || REGNO (base) == SP_REG
25880 || REGNO (base) == BP_REG
25881 || REGNO (base) == R12_REG
25882 || REGNO (base) == R13_REG)
25883 len++;
25886 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25887 is not disp32, but disp32(%rip), so for disp32
25888 SIB byte is needed, unless print_operand_address
25889 optimizes it into disp32(%rip) or (%rip) is implied
25890 by UNSPEC. */
25891 else if (disp && !base && !index)
25893 len += 4;
25894 if (rip_relative_addr_p (&parts))
25895 len++;
25897 else
25899 /* Find the length of the displacement constant. */
25900 if (disp)
25902 if (base && satisfies_constraint_K (disp))
25903 len += 1;
25904 else
25905 len += 4;
25907 /* ebp always wants a displacement. Similarly r13. */
25908 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25909 len++;
25911 /* An index requires the two-byte modrm form.... */
25912 if (index
25913 /* ...like esp (or r12), which always wants an index. */
25914 || base == arg_pointer_rtx
25915 || base == frame_pointer_rtx
25916 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25917 len++;
25920 return len;
25923 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25924 is set, expect that insn have 8bit immediate alternative. */
25926 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
25928 int len = 0;
25929 int i;
25930 extract_insn_cached (insn);
25931 for (i = recog_data.n_operands - 1; i >= 0; --i)
25932 if (CONSTANT_P (recog_data.operand[i]))
25934 enum attr_mode mode = get_attr_mode (insn);
25936 gcc_assert (!len);
25937 if (shortform && CONST_INT_P (recog_data.operand[i]))
25939 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25940 switch (mode)
25942 case MODE_QI:
25943 len = 1;
25944 continue;
25945 case MODE_HI:
25946 ival = trunc_int_for_mode (ival, HImode);
25947 break;
25948 case MODE_SI:
25949 ival = trunc_int_for_mode (ival, SImode);
25950 break;
25951 default:
25952 break;
25954 if (IN_RANGE (ival, -128, 127))
25956 len = 1;
25957 continue;
25960 switch (mode)
25962 case MODE_QI:
25963 len = 1;
25964 break;
25965 case MODE_HI:
25966 len = 2;
25967 break;
25968 case MODE_SI:
25969 len = 4;
25970 break;
25971 /* Immediates for DImode instructions are encoded
25972 as 32bit sign extended values. */
25973 case MODE_DI:
25974 len = 4;
25975 break;
25976 default:
25977 fatal_insn ("unknown insn mode", insn);
25980 return len;
25983 /* Compute default value for "length_address" attribute. */
25985 ix86_attr_length_address_default (rtx_insn *insn)
25987 int i;
25989 if (get_attr_type (insn) == TYPE_LEA)
25991 rtx set = PATTERN (insn), addr;
25993 if (GET_CODE (set) == PARALLEL)
25994 set = XVECEXP (set, 0, 0);
25996 gcc_assert (GET_CODE (set) == SET);
25998 addr = SET_SRC (set);
26000 return memory_address_length (addr, true);
26003 extract_insn_cached (insn);
26004 for (i = recog_data.n_operands - 1; i >= 0; --i)
26005 if (MEM_P (recog_data.operand[i]))
26007 constrain_operands_cached (insn, reload_completed);
26008 if (which_alternative != -1)
26010 const char *constraints = recog_data.constraints[i];
26011 int alt = which_alternative;
26013 while (*constraints == '=' || *constraints == '+')
26014 constraints++;
26015 while (alt-- > 0)
26016 while (*constraints++ != ',')
26018 /* Skip ignored operands. */
26019 if (*constraints == 'X')
26020 continue;
26022 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
26024 return 0;
26027 /* Compute default value for "length_vex" attribute. It includes
26028 2 or 3 byte VEX prefix and 1 opcode byte. */
26031 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
26032 bool has_vex_w)
26034 int i;
26036 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
26037 byte VEX prefix. */
26038 if (!has_0f_opcode || has_vex_w)
26039 return 3 + 1;
26041 /* We can always use 2 byte VEX prefix in 32bit. */
26042 if (!TARGET_64BIT)
26043 return 2 + 1;
26045 extract_insn_cached (insn);
26047 for (i = recog_data.n_operands - 1; i >= 0; --i)
26048 if (REG_P (recog_data.operand[i]))
26050 /* REX.W bit uses 3 byte VEX prefix. */
26051 if (GET_MODE (recog_data.operand[i]) == DImode
26052 && GENERAL_REG_P (recog_data.operand[i]))
26053 return 3 + 1;
26055 else
26057 /* REX.X or REX.B bits use 3 byte VEX prefix. */
26058 if (MEM_P (recog_data.operand[i])
26059 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
26060 return 3 + 1;
26063 return 2 + 1;
26066 /* Return the maximum number of instructions a cpu can issue. */
26068 static int
26069 ix86_issue_rate (void)
26071 switch (ix86_tune)
26073 case PROCESSOR_PENTIUM:
26074 case PROCESSOR_BONNELL:
26075 case PROCESSOR_SILVERMONT:
26076 case PROCESSOR_KNL:
26077 case PROCESSOR_INTEL:
26078 case PROCESSOR_K6:
26079 case PROCESSOR_BTVER2:
26080 case PROCESSOR_PENTIUM4:
26081 case PROCESSOR_NOCONA:
26082 return 2;
26084 case PROCESSOR_PENTIUMPRO:
26085 case PROCESSOR_ATHLON:
26086 case PROCESSOR_K8:
26087 case PROCESSOR_AMDFAM10:
26088 case PROCESSOR_GENERIC:
26089 case PROCESSOR_BTVER1:
26090 return 3;
26092 case PROCESSOR_BDVER1:
26093 case PROCESSOR_BDVER2:
26094 case PROCESSOR_BDVER3:
26095 case PROCESSOR_BDVER4:
26096 case PROCESSOR_CORE2:
26097 case PROCESSOR_NEHALEM:
26098 case PROCESSOR_SANDYBRIDGE:
26099 case PROCESSOR_HASWELL:
26100 return 4;
26102 default:
26103 return 1;
26107 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
26108 by DEP_INSN and nothing set by DEP_INSN. */
26110 static bool
26111 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
26113 rtx set, set2;
26115 /* Simplify the test for uninteresting insns. */
26116 if (insn_type != TYPE_SETCC
26117 && insn_type != TYPE_ICMOV
26118 && insn_type != TYPE_FCMOV
26119 && insn_type != TYPE_IBR)
26120 return false;
26122 if ((set = single_set (dep_insn)) != 0)
26124 set = SET_DEST (set);
26125 set2 = NULL_RTX;
26127 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
26128 && XVECLEN (PATTERN (dep_insn), 0) == 2
26129 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
26130 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
26132 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26133 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26135 else
26136 return false;
26138 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
26139 return false;
26141 /* This test is true if the dependent insn reads the flags but
26142 not any other potentially set register. */
26143 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
26144 return false;
26146 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
26147 return false;
26149 return true;
26152 /* Return true iff USE_INSN has a memory address with operands set by
26153 SET_INSN. */
26155 bool
26156 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
26158 int i;
26159 extract_insn_cached (use_insn);
26160 for (i = recog_data.n_operands - 1; i >= 0; --i)
26161 if (MEM_P (recog_data.operand[i]))
26163 rtx addr = XEXP (recog_data.operand[i], 0);
26164 return modified_in_p (addr, set_insn) != 0;
26166 return false;
26169 /* Helper function for exact_store_load_dependency.
26170 Return true if addr is found in insn. */
26171 static bool
26172 exact_dependency_1 (rtx addr, rtx insn)
26174 enum rtx_code code;
26175 const char *format_ptr;
26176 int i, j;
26178 code = GET_CODE (insn);
26179 switch (code)
26181 case MEM:
26182 if (rtx_equal_p (addr, insn))
26183 return true;
26184 break;
26185 case REG:
26186 CASE_CONST_ANY:
26187 case SYMBOL_REF:
26188 case CODE_LABEL:
26189 case PC:
26190 case CC0:
26191 case EXPR_LIST:
26192 return false;
26193 default:
26194 break;
26197 format_ptr = GET_RTX_FORMAT (code);
26198 for (i = 0; i < GET_RTX_LENGTH (code); i++)
26200 switch (*format_ptr++)
26202 case 'e':
26203 if (exact_dependency_1 (addr, XEXP (insn, i)))
26204 return true;
26205 break;
26206 case 'E':
26207 for (j = 0; j < XVECLEN (insn, i); j++)
26208 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
26209 return true;
26210 break;
26213 return false;
26216 /* Return true if there exists exact dependency for store & load, i.e.
26217 the same memory address is used in them. */
26218 static bool
26219 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
26221 rtx set1, set2;
26223 set1 = single_set (store);
26224 if (!set1)
26225 return false;
26226 if (!MEM_P (SET_DEST (set1)))
26227 return false;
26228 set2 = single_set (load);
26229 if (!set2)
26230 return false;
26231 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
26232 return true;
26233 return false;
26236 static int
26237 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
26239 enum attr_type insn_type, dep_insn_type;
26240 enum attr_memory memory;
26241 rtx set, set2;
26242 int dep_insn_code_number;
26244 /* Anti and output dependencies have zero cost on all CPUs. */
26245 if (REG_NOTE_KIND (link) != 0)
26246 return 0;
26248 dep_insn_code_number = recog_memoized (dep_insn);
26250 /* If we can't recognize the insns, we can't really do anything. */
26251 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
26252 return cost;
26254 insn_type = get_attr_type (insn);
26255 dep_insn_type = get_attr_type (dep_insn);
26257 switch (ix86_tune)
26259 case PROCESSOR_PENTIUM:
26260 /* Address Generation Interlock adds a cycle of latency. */
26261 if (insn_type == TYPE_LEA)
26263 rtx addr = PATTERN (insn);
26265 if (GET_CODE (addr) == PARALLEL)
26266 addr = XVECEXP (addr, 0, 0);
26268 gcc_assert (GET_CODE (addr) == SET);
26270 addr = SET_SRC (addr);
26271 if (modified_in_p (addr, dep_insn))
26272 cost += 1;
26274 else if (ix86_agi_dependent (dep_insn, insn))
26275 cost += 1;
26277 /* ??? Compares pair with jump/setcc. */
26278 if (ix86_flags_dependent (insn, dep_insn, insn_type))
26279 cost = 0;
26281 /* Floating point stores require value to be ready one cycle earlier. */
26282 if (insn_type == TYPE_FMOV
26283 && get_attr_memory (insn) == MEMORY_STORE
26284 && !ix86_agi_dependent (dep_insn, insn))
26285 cost += 1;
26286 break;
26288 case PROCESSOR_PENTIUMPRO:
26289 /* INT->FP conversion is expensive. */
26290 if (get_attr_fp_int_src (dep_insn))
26291 cost += 5;
26293 /* There is one cycle extra latency between an FP op and a store. */
26294 if (insn_type == TYPE_FMOV
26295 && (set = single_set (dep_insn)) != NULL_RTX
26296 && (set2 = single_set (insn)) != NULL_RTX
26297 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
26298 && MEM_P (SET_DEST (set2)))
26299 cost += 1;
26301 memory = get_attr_memory (insn);
26303 /* Show ability of reorder buffer to hide latency of load by executing
26304 in parallel with previous instruction in case
26305 previous instruction is not needed to compute the address. */
26306 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26307 && !ix86_agi_dependent (dep_insn, insn))
26309 /* Claim moves to take one cycle, as core can issue one load
26310 at time and the next load can start cycle later. */
26311 if (dep_insn_type == TYPE_IMOV
26312 || dep_insn_type == TYPE_FMOV)
26313 cost = 1;
26314 else if (cost > 1)
26315 cost--;
26317 break;
26319 case PROCESSOR_K6:
26320 /* The esp dependency is resolved before
26321 the instruction is really finished. */
26322 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26323 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26324 return 1;
26326 /* INT->FP conversion is expensive. */
26327 if (get_attr_fp_int_src (dep_insn))
26328 cost += 5;
26330 memory = get_attr_memory (insn);
26332 /* Show ability of reorder buffer to hide latency of load by executing
26333 in parallel with previous instruction in case
26334 previous instruction is not needed to compute the address. */
26335 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26336 && !ix86_agi_dependent (dep_insn, insn))
26338 /* Claim moves to take one cycle, as core can issue one load
26339 at time and the next load can start cycle later. */
26340 if (dep_insn_type == TYPE_IMOV
26341 || dep_insn_type == TYPE_FMOV)
26342 cost = 1;
26343 else if (cost > 2)
26344 cost -= 2;
26345 else
26346 cost = 1;
26348 break;
26350 case PROCESSOR_AMDFAM10:
26351 case PROCESSOR_BDVER1:
26352 case PROCESSOR_BDVER2:
26353 case PROCESSOR_BDVER3:
26354 case PROCESSOR_BDVER4:
26355 case PROCESSOR_BTVER1:
26356 case PROCESSOR_BTVER2:
26357 case PROCESSOR_GENERIC:
26358 /* Stack engine allows to execute push&pop instructions in parall. */
26359 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26360 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26361 return 0;
26362 /* FALLTHRU */
26364 case PROCESSOR_ATHLON:
26365 case PROCESSOR_K8:
26366 memory = get_attr_memory (insn);
26368 /* Show ability of reorder buffer to hide latency of load by executing
26369 in parallel with previous instruction in case
26370 previous instruction is not needed to compute the address. */
26371 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26372 && !ix86_agi_dependent (dep_insn, insn))
26374 enum attr_unit unit = get_attr_unit (insn);
26375 int loadcost = 3;
26377 /* Because of the difference between the length of integer and
26378 floating unit pipeline preparation stages, the memory operands
26379 for floating point are cheaper.
26381 ??? For Athlon it the difference is most probably 2. */
26382 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
26383 loadcost = 3;
26384 else
26385 loadcost = TARGET_ATHLON ? 2 : 0;
26387 if (cost >= loadcost)
26388 cost -= loadcost;
26389 else
26390 cost = 0;
26392 break;
26394 case PROCESSOR_CORE2:
26395 case PROCESSOR_NEHALEM:
26396 case PROCESSOR_SANDYBRIDGE:
26397 case PROCESSOR_HASWELL:
26398 /* Stack engine allows to execute push&pop instructions in parall. */
26399 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26400 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26401 return 0;
26403 memory = get_attr_memory (insn);
26405 /* Show ability of reorder buffer to hide latency of load by executing
26406 in parallel with previous instruction in case
26407 previous instruction is not needed to compute the address. */
26408 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26409 && !ix86_agi_dependent (dep_insn, insn))
26411 if (cost >= 4)
26412 cost -= 4;
26413 else
26414 cost = 0;
26416 break;
26418 case PROCESSOR_SILVERMONT:
26419 case PROCESSOR_KNL:
26420 case PROCESSOR_INTEL:
26421 if (!reload_completed)
26422 return cost;
26424 /* Increase cost of integer loads. */
26425 memory = get_attr_memory (dep_insn);
26426 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26428 enum attr_unit unit = get_attr_unit (dep_insn);
26429 if (unit == UNIT_INTEGER && cost == 1)
26431 if (memory == MEMORY_LOAD)
26432 cost = 3;
26433 else
26435 /* Increase cost of ld/st for short int types only
26436 because of store forwarding issue. */
26437 rtx set = single_set (dep_insn);
26438 if (set && (GET_MODE (SET_DEST (set)) == QImode
26439 || GET_MODE (SET_DEST (set)) == HImode))
26441 /* Increase cost of store/load insn if exact
26442 dependence exists and it is load insn. */
26443 enum attr_memory insn_memory = get_attr_memory (insn);
26444 if (insn_memory == MEMORY_LOAD
26445 && exact_store_load_dependency (dep_insn, insn))
26446 cost = 3;
26452 default:
26453 break;
26456 return cost;
26459 /* How many alternative schedules to try. This should be as wide as the
26460 scheduling freedom in the DFA, but no wider. Making this value too
26461 large results extra work for the scheduler. */
26463 static int
26464 ia32_multipass_dfa_lookahead (void)
26466 switch (ix86_tune)
26468 case PROCESSOR_PENTIUM:
26469 return 2;
26471 case PROCESSOR_PENTIUMPRO:
26472 case PROCESSOR_K6:
26473 return 1;
26475 case PROCESSOR_BDVER1:
26476 case PROCESSOR_BDVER2:
26477 case PROCESSOR_BDVER3:
26478 case PROCESSOR_BDVER4:
26479 /* We use lookahead value 4 for BD both before and after reload
26480 schedules. Plan is to have value 8 included for O3. */
26481 return 4;
26483 case PROCESSOR_CORE2:
26484 case PROCESSOR_NEHALEM:
26485 case PROCESSOR_SANDYBRIDGE:
26486 case PROCESSOR_HASWELL:
26487 case PROCESSOR_BONNELL:
26488 case PROCESSOR_SILVERMONT:
26489 case PROCESSOR_KNL:
26490 case PROCESSOR_INTEL:
26491 /* Generally, we want haifa-sched:max_issue() to look ahead as far
26492 as many instructions can be executed on a cycle, i.e.,
26493 issue_rate. I wonder why tuning for many CPUs does not do this. */
26494 if (reload_completed)
26495 return ix86_issue_rate ();
26496 /* Don't use lookahead for pre-reload schedule to save compile time. */
26497 return 0;
26499 default:
26500 return 0;
26504 /* Return true if target platform supports macro-fusion. */
26506 static bool
26507 ix86_macro_fusion_p ()
26509 return TARGET_FUSE_CMP_AND_BRANCH;
26512 /* Check whether current microarchitecture support macro fusion
26513 for insn pair "CONDGEN + CONDJMP". Refer to
26514 "Intel Architectures Optimization Reference Manual". */
26516 static bool
26517 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
26519 rtx src, dest;
26520 enum rtx_code ccode;
26521 rtx compare_set = NULL_RTX, test_if, cond;
26522 rtx alu_set = NULL_RTX, addr = NULL_RTX;
26524 if (!any_condjump_p (condjmp))
26525 return false;
26527 if (get_attr_type (condgen) != TYPE_TEST
26528 && get_attr_type (condgen) != TYPE_ICMP
26529 && get_attr_type (condgen) != TYPE_INCDEC
26530 && get_attr_type (condgen) != TYPE_ALU)
26531 return false;
26533 compare_set = single_set (condgen);
26534 if (compare_set == NULL_RTX
26535 && !TARGET_FUSE_ALU_AND_BRANCH)
26536 return false;
26538 if (compare_set == NULL_RTX)
26540 int i;
26541 rtx pat = PATTERN (condgen);
26542 for (i = 0; i < XVECLEN (pat, 0); i++)
26543 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26545 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
26546 if (GET_CODE (set_src) == COMPARE)
26547 compare_set = XVECEXP (pat, 0, i);
26548 else
26549 alu_set = XVECEXP (pat, 0, i);
26552 if (compare_set == NULL_RTX)
26553 return false;
26554 src = SET_SRC (compare_set);
26555 if (GET_CODE (src) != COMPARE)
26556 return false;
26558 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
26559 supported. */
26560 if ((MEM_P (XEXP (src, 0))
26561 && CONST_INT_P (XEXP (src, 1)))
26562 || (MEM_P (XEXP (src, 1))
26563 && CONST_INT_P (XEXP (src, 0))))
26564 return false;
26566 /* No fusion for RIP-relative address. */
26567 if (MEM_P (XEXP (src, 0)))
26568 addr = XEXP (XEXP (src, 0), 0);
26569 else if (MEM_P (XEXP (src, 1)))
26570 addr = XEXP (XEXP (src, 1), 0);
26572 if (addr) {
26573 ix86_address parts;
26574 int ok = ix86_decompose_address (addr, &parts);
26575 gcc_assert (ok);
26577 if (rip_relative_addr_p (&parts))
26578 return false;
26581 test_if = SET_SRC (pc_set (condjmp));
26582 cond = XEXP (test_if, 0);
26583 ccode = GET_CODE (cond);
26584 /* Check whether conditional jump use Sign or Overflow Flags. */
26585 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
26586 && (ccode == GE
26587 || ccode == GT
26588 || ccode == LE
26589 || ccode == LT))
26590 return false;
26592 /* Return true for TYPE_TEST and TYPE_ICMP. */
26593 if (get_attr_type (condgen) == TYPE_TEST
26594 || get_attr_type (condgen) == TYPE_ICMP)
26595 return true;
26597 /* The following is the case that macro-fusion for alu + jmp. */
26598 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
26599 return false;
26601 /* No fusion for alu op with memory destination operand. */
26602 dest = SET_DEST (alu_set);
26603 if (MEM_P (dest))
26604 return false;
26606 /* Macro-fusion for inc/dec + unsigned conditional jump is not
26607 supported. */
26608 if (get_attr_type (condgen) == TYPE_INCDEC
26609 && (ccode == GEU
26610 || ccode == GTU
26611 || ccode == LEU
26612 || ccode == LTU))
26613 return false;
26615 return true;
26618 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
26619 execution. It is applied if
26620 (1) IMUL instruction is on the top of list;
26621 (2) There exists the only producer of independent IMUL instruction in
26622 ready list.
26623 Return index of IMUL producer if it was found and -1 otherwise. */
26624 static int
26625 do_reorder_for_imul (rtx_insn **ready, int n_ready)
26627 rtx_insn *insn;
26628 rtx set, insn1, insn2;
26629 sd_iterator_def sd_it;
26630 dep_t dep;
26631 int index = -1;
26632 int i;
26634 if (!TARGET_BONNELL)
26635 return index;
26637 /* Check that IMUL instruction is on the top of ready list. */
26638 insn = ready[n_ready - 1];
26639 set = single_set (insn);
26640 if (!set)
26641 return index;
26642 if (!(GET_CODE (SET_SRC (set)) == MULT
26643 && GET_MODE (SET_SRC (set)) == SImode))
26644 return index;
26646 /* Search for producer of independent IMUL instruction. */
26647 for (i = n_ready - 2; i >= 0; i--)
26649 insn = ready[i];
26650 if (!NONDEBUG_INSN_P (insn))
26651 continue;
26652 /* Skip IMUL instruction. */
26653 insn2 = PATTERN (insn);
26654 if (GET_CODE (insn2) == PARALLEL)
26655 insn2 = XVECEXP (insn2, 0, 0);
26656 if (GET_CODE (insn2) == SET
26657 && GET_CODE (SET_SRC (insn2)) == MULT
26658 && GET_MODE (SET_SRC (insn2)) == SImode)
26659 continue;
26661 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
26663 rtx con;
26664 con = DEP_CON (dep);
26665 if (!NONDEBUG_INSN_P (con))
26666 continue;
26667 insn1 = PATTERN (con);
26668 if (GET_CODE (insn1) == PARALLEL)
26669 insn1 = XVECEXP (insn1, 0, 0);
26671 if (GET_CODE (insn1) == SET
26672 && GET_CODE (SET_SRC (insn1)) == MULT
26673 && GET_MODE (SET_SRC (insn1)) == SImode)
26675 sd_iterator_def sd_it1;
26676 dep_t dep1;
26677 /* Check if there is no other dependee for IMUL. */
26678 index = i;
26679 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
26681 rtx pro;
26682 pro = DEP_PRO (dep1);
26683 if (!NONDEBUG_INSN_P (pro))
26684 continue;
26685 if (pro != insn)
26686 index = -1;
26688 if (index >= 0)
26689 break;
26692 if (index >= 0)
26693 break;
26695 return index;
26698 /* Try to find the best candidate on the top of ready list if two insns
26699 have the same priority - candidate is best if its dependees were
26700 scheduled earlier. Applied for Silvermont only.
26701 Return true if top 2 insns must be interchanged. */
26702 static bool
26703 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
26705 rtx_insn *top = ready[n_ready - 1];
26706 rtx_insn *next = ready[n_ready - 2];
26707 rtx set;
26708 sd_iterator_def sd_it;
26709 dep_t dep;
26710 int clock1 = -1;
26711 int clock2 = -1;
26712 #define INSN_TICK(INSN) (HID (INSN)->tick)
26714 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26715 return false;
26717 if (!NONDEBUG_INSN_P (top))
26718 return false;
26719 if (!NONJUMP_INSN_P (top))
26720 return false;
26721 if (!NONDEBUG_INSN_P (next))
26722 return false;
26723 if (!NONJUMP_INSN_P (next))
26724 return false;
26725 set = single_set (top);
26726 if (!set)
26727 return false;
26728 set = single_set (next);
26729 if (!set)
26730 return false;
26732 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26734 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26735 return false;
26736 /* Determine winner more precise. */
26737 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26739 rtx pro;
26740 pro = DEP_PRO (dep);
26741 if (!NONDEBUG_INSN_P (pro))
26742 continue;
26743 if (INSN_TICK (pro) > clock1)
26744 clock1 = INSN_TICK (pro);
26746 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26748 rtx pro;
26749 pro = DEP_PRO (dep);
26750 if (!NONDEBUG_INSN_P (pro))
26751 continue;
26752 if (INSN_TICK (pro) > clock2)
26753 clock2 = INSN_TICK (pro);
26756 if (clock1 == clock2)
26758 /* Determine winner - load must win. */
26759 enum attr_memory memory1, memory2;
26760 memory1 = get_attr_memory (top);
26761 memory2 = get_attr_memory (next);
26762 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26763 return true;
26765 return (bool) (clock2 < clock1);
26767 return false;
26768 #undef INSN_TICK
26771 /* Perform possible reodering of ready list for Atom/Silvermont only.
26772 Return issue rate. */
26773 static int
26774 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
26775 int *pn_ready, int clock_var)
26777 int issue_rate = -1;
26778 int n_ready = *pn_ready;
26779 int i;
26780 rtx_insn *insn;
26781 int index = -1;
26783 /* Set up issue rate. */
26784 issue_rate = ix86_issue_rate ();
26786 /* Do reodering for BONNELL/SILVERMONT only. */
26787 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26788 return issue_rate;
26790 /* Nothing to do if ready list contains only 1 instruction. */
26791 if (n_ready <= 1)
26792 return issue_rate;
26794 /* Do reodering for post-reload scheduler only. */
26795 if (!reload_completed)
26796 return issue_rate;
26798 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26800 if (sched_verbose > 1)
26801 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26802 INSN_UID (ready[index]));
26804 /* Put IMUL producer (ready[index]) at the top of ready list. */
26805 insn = ready[index];
26806 for (i = index; i < n_ready - 1; i++)
26807 ready[i] = ready[i + 1];
26808 ready[n_ready - 1] = insn;
26809 return issue_rate;
26812 /* Skip selective scheduling since HID is not populated in it. */
26813 if (clock_var != 0
26814 && !sel_sched_p ()
26815 && swap_top_of_ready_list (ready, n_ready))
26817 if (sched_verbose > 1)
26818 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26819 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26820 /* Swap 2 top elements of ready list. */
26821 insn = ready[n_ready - 1];
26822 ready[n_ready - 1] = ready[n_ready - 2];
26823 ready[n_ready - 2] = insn;
26825 return issue_rate;
26828 static bool
26829 ix86_class_likely_spilled_p (reg_class_t);
26831 /* Returns true if lhs of insn is HW function argument register and set up
26832 is_spilled to true if it is likely spilled HW register. */
26833 static bool
26834 insn_is_function_arg (rtx insn, bool* is_spilled)
26836 rtx dst;
26838 if (!NONDEBUG_INSN_P (insn))
26839 return false;
26840 /* Call instructions are not movable, ignore it. */
26841 if (CALL_P (insn))
26842 return false;
26843 insn = PATTERN (insn);
26844 if (GET_CODE (insn) == PARALLEL)
26845 insn = XVECEXP (insn, 0, 0);
26846 if (GET_CODE (insn) != SET)
26847 return false;
26848 dst = SET_DEST (insn);
26849 if (REG_P (dst) && HARD_REGISTER_P (dst)
26850 && ix86_function_arg_regno_p (REGNO (dst)))
26852 /* Is it likely spilled HW register? */
26853 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26854 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26855 *is_spilled = true;
26856 return true;
26858 return false;
26861 /* Add output dependencies for chain of function adjacent arguments if only
26862 there is a move to likely spilled HW register. Return first argument
26863 if at least one dependence was added or NULL otherwise. */
26864 static rtx_insn *
26865 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
26867 rtx_insn *insn;
26868 rtx_insn *last = call;
26869 rtx_insn *first_arg = NULL;
26870 bool is_spilled = false;
26872 head = PREV_INSN (head);
26874 /* Find nearest to call argument passing instruction. */
26875 while (true)
26877 last = PREV_INSN (last);
26878 if (last == head)
26879 return NULL;
26880 if (!NONDEBUG_INSN_P (last))
26881 continue;
26882 if (insn_is_function_arg (last, &is_spilled))
26883 break;
26884 return NULL;
26887 first_arg = last;
26888 while (true)
26890 insn = PREV_INSN (last);
26891 if (!INSN_P (insn))
26892 break;
26893 if (insn == head)
26894 break;
26895 if (!NONDEBUG_INSN_P (insn))
26897 last = insn;
26898 continue;
26900 if (insn_is_function_arg (insn, &is_spilled))
26902 /* Add output depdendence between two function arguments if chain
26903 of output arguments contains likely spilled HW registers. */
26904 if (is_spilled)
26905 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26906 first_arg = last = insn;
26908 else
26909 break;
26911 if (!is_spilled)
26912 return NULL;
26913 return first_arg;
26916 /* Add output or anti dependency from insn to first_arg to restrict its code
26917 motion. */
26918 static void
26919 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
26921 rtx set;
26922 rtx tmp;
26924 /* Add anti dependencies for bounds stores. */
26925 if (INSN_P (insn)
26926 && GET_CODE (PATTERN (insn)) == PARALLEL
26927 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
26928 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_BNDSTX)
26930 add_dependence (first_arg, insn, REG_DEP_ANTI);
26931 return;
26934 set = single_set (insn);
26935 if (!set)
26936 return;
26937 tmp = SET_DEST (set);
26938 if (REG_P (tmp))
26940 /* Add output dependency to the first function argument. */
26941 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26942 return;
26944 /* Add anti dependency. */
26945 add_dependence (first_arg, insn, REG_DEP_ANTI);
26948 /* Avoid cross block motion of function argument through adding dependency
26949 from the first non-jump instruction in bb. */
26950 static void
26951 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
26953 rtx_insn *insn = BB_END (bb);
26955 while (insn)
26957 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26959 rtx set = single_set (insn);
26960 if (set)
26962 avoid_func_arg_motion (arg, insn);
26963 return;
26966 if (insn == BB_HEAD (bb))
26967 return;
26968 insn = PREV_INSN (insn);
26972 /* Hook for pre-reload schedule - avoid motion of function arguments
26973 passed in likely spilled HW registers. */
26974 static void
26975 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
26977 rtx_insn *insn;
26978 rtx_insn *first_arg = NULL;
26979 if (reload_completed)
26980 return;
26981 while (head != tail && DEBUG_INSN_P (head))
26982 head = NEXT_INSN (head);
26983 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26984 if (INSN_P (insn) && CALL_P (insn))
26986 first_arg = add_parameter_dependencies (insn, head);
26987 if (first_arg)
26989 /* Add dependee for first argument to predecessors if only
26990 region contains more than one block. */
26991 basic_block bb = BLOCK_FOR_INSN (insn);
26992 int rgn = CONTAINING_RGN (bb->index);
26993 int nr_blks = RGN_NR_BLOCKS (rgn);
26994 /* Skip trivial regions and region head blocks that can have
26995 predecessors outside of region. */
26996 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26998 edge e;
26999 edge_iterator ei;
27001 /* Regions are SCCs with the exception of selective
27002 scheduling with pipelining of outer blocks enabled.
27003 So also check that immediate predecessors of a non-head
27004 block are in the same region. */
27005 FOR_EACH_EDGE (e, ei, bb->preds)
27007 /* Avoid creating of loop-carried dependencies through
27008 using topological ordering in the region. */
27009 if (rgn == CONTAINING_RGN (e->src->index)
27010 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
27011 add_dependee_for_func_arg (first_arg, e->src);
27014 insn = first_arg;
27015 if (insn == head)
27016 break;
27019 else if (first_arg)
27020 avoid_func_arg_motion (first_arg, insn);
27023 /* Hook for pre-reload schedule - set priority of moves from likely spilled
27024 HW registers to maximum, to schedule them at soon as possible. These are
27025 moves from function argument registers at the top of the function entry
27026 and moves from function return value registers after call. */
27027 static int
27028 ix86_adjust_priority (rtx_insn *insn, int priority)
27030 rtx set;
27032 if (reload_completed)
27033 return priority;
27035 if (!NONDEBUG_INSN_P (insn))
27036 return priority;
27038 set = single_set (insn);
27039 if (set)
27041 rtx tmp = SET_SRC (set);
27042 if (REG_P (tmp)
27043 && HARD_REGISTER_P (tmp)
27044 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
27045 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
27046 return current_sched_info->sched_max_insns_priority;
27049 return priority;
27052 /* Model decoder of Core 2/i7.
27053 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
27054 track the instruction fetch block boundaries and make sure that long
27055 (9+ bytes) instructions are assigned to D0. */
27057 /* Maximum length of an insn that can be handled by
27058 a secondary decoder unit. '8' for Core 2/i7. */
27059 static int core2i7_secondary_decoder_max_insn_size;
27061 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
27062 '16' for Core 2/i7. */
27063 static int core2i7_ifetch_block_size;
27065 /* Maximum number of instructions decoder can handle per cycle.
27066 '6' for Core 2/i7. */
27067 static int core2i7_ifetch_block_max_insns;
27069 typedef struct ix86_first_cycle_multipass_data_ *
27070 ix86_first_cycle_multipass_data_t;
27071 typedef const struct ix86_first_cycle_multipass_data_ *
27072 const_ix86_first_cycle_multipass_data_t;
27074 /* A variable to store target state across calls to max_issue within
27075 one cycle. */
27076 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
27077 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
27079 /* Initialize DATA. */
27080 static void
27081 core2i7_first_cycle_multipass_init (void *_data)
27083 ix86_first_cycle_multipass_data_t data
27084 = (ix86_first_cycle_multipass_data_t) _data;
27086 data->ifetch_block_len = 0;
27087 data->ifetch_block_n_insns = 0;
27088 data->ready_try_change = NULL;
27089 data->ready_try_change_size = 0;
27092 /* Advancing the cycle; reset ifetch block counts. */
27093 static void
27094 core2i7_dfa_post_advance_cycle (void)
27096 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
27098 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27100 data->ifetch_block_len = 0;
27101 data->ifetch_block_n_insns = 0;
27104 static int min_insn_size (rtx_insn *);
27106 /* Filter out insns from ready_try that the core will not be able to issue
27107 on current cycle due to decoder. */
27108 static void
27109 core2i7_first_cycle_multipass_filter_ready_try
27110 (const_ix86_first_cycle_multipass_data_t data,
27111 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
27113 while (n_ready--)
27115 rtx_insn *insn;
27116 int insn_size;
27118 if (ready_try[n_ready])
27119 continue;
27121 insn = get_ready_element (n_ready);
27122 insn_size = min_insn_size (insn);
27124 if (/* If this is a too long an insn for a secondary decoder ... */
27125 (!first_cycle_insn_p
27126 && insn_size > core2i7_secondary_decoder_max_insn_size)
27127 /* ... or it would not fit into the ifetch block ... */
27128 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
27129 /* ... or the decoder is full already ... */
27130 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
27131 /* ... mask the insn out. */
27133 ready_try[n_ready] = 1;
27135 if (data->ready_try_change)
27136 bitmap_set_bit (data->ready_try_change, n_ready);
27141 /* Prepare for a new round of multipass lookahead scheduling. */
27142 static void
27143 core2i7_first_cycle_multipass_begin (void *_data,
27144 signed char *ready_try, int n_ready,
27145 bool first_cycle_insn_p)
27147 ix86_first_cycle_multipass_data_t data
27148 = (ix86_first_cycle_multipass_data_t) _data;
27149 const_ix86_first_cycle_multipass_data_t prev_data
27150 = ix86_first_cycle_multipass_data;
27152 /* Restore the state from the end of the previous round. */
27153 data->ifetch_block_len = prev_data->ifetch_block_len;
27154 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
27156 /* Filter instructions that cannot be issued on current cycle due to
27157 decoder restrictions. */
27158 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27159 first_cycle_insn_p);
27162 /* INSN is being issued in current solution. Account for its impact on
27163 the decoder model. */
27164 static void
27165 core2i7_first_cycle_multipass_issue (void *_data,
27166 signed char *ready_try, int n_ready,
27167 rtx_insn *insn, const void *_prev_data)
27169 ix86_first_cycle_multipass_data_t data
27170 = (ix86_first_cycle_multipass_data_t) _data;
27171 const_ix86_first_cycle_multipass_data_t prev_data
27172 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
27174 int insn_size = min_insn_size (insn);
27176 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
27177 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
27178 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
27179 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27181 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
27182 if (!data->ready_try_change)
27184 data->ready_try_change = sbitmap_alloc (n_ready);
27185 data->ready_try_change_size = n_ready;
27187 else if (data->ready_try_change_size < n_ready)
27189 data->ready_try_change = sbitmap_resize (data->ready_try_change,
27190 n_ready, 0);
27191 data->ready_try_change_size = n_ready;
27193 bitmap_clear (data->ready_try_change);
27195 /* Filter out insns from ready_try that the core will not be able to issue
27196 on current cycle due to decoder. */
27197 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27198 false);
27201 /* Revert the effect on ready_try. */
27202 static void
27203 core2i7_first_cycle_multipass_backtrack (const void *_data,
27204 signed char *ready_try,
27205 int n_ready ATTRIBUTE_UNUSED)
27207 const_ix86_first_cycle_multipass_data_t data
27208 = (const_ix86_first_cycle_multipass_data_t) _data;
27209 unsigned int i = 0;
27210 sbitmap_iterator sbi;
27212 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
27213 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
27215 ready_try[i] = 0;
27219 /* Save the result of multipass lookahead scheduling for the next round. */
27220 static void
27221 core2i7_first_cycle_multipass_end (const void *_data)
27223 const_ix86_first_cycle_multipass_data_t data
27224 = (const_ix86_first_cycle_multipass_data_t) _data;
27225 ix86_first_cycle_multipass_data_t next_data
27226 = ix86_first_cycle_multipass_data;
27228 if (data != NULL)
27230 next_data->ifetch_block_len = data->ifetch_block_len;
27231 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
27235 /* Deallocate target data. */
27236 static void
27237 core2i7_first_cycle_multipass_fini (void *_data)
27239 ix86_first_cycle_multipass_data_t data
27240 = (ix86_first_cycle_multipass_data_t) _data;
27242 if (data->ready_try_change)
27244 sbitmap_free (data->ready_try_change);
27245 data->ready_try_change = NULL;
27246 data->ready_try_change_size = 0;
27250 /* Prepare for scheduling pass. */
27251 static void
27252 ix86_sched_init_global (FILE *, int, int)
27254 /* Install scheduling hooks for current CPU. Some of these hooks are used
27255 in time-critical parts of the scheduler, so we only set them up when
27256 they are actually used. */
27257 switch (ix86_tune)
27259 case PROCESSOR_CORE2:
27260 case PROCESSOR_NEHALEM:
27261 case PROCESSOR_SANDYBRIDGE:
27262 case PROCESSOR_HASWELL:
27263 /* Do not perform multipass scheduling for pre-reload schedule
27264 to save compile time. */
27265 if (reload_completed)
27267 targetm.sched.dfa_post_advance_cycle
27268 = core2i7_dfa_post_advance_cycle;
27269 targetm.sched.first_cycle_multipass_init
27270 = core2i7_first_cycle_multipass_init;
27271 targetm.sched.first_cycle_multipass_begin
27272 = core2i7_first_cycle_multipass_begin;
27273 targetm.sched.first_cycle_multipass_issue
27274 = core2i7_first_cycle_multipass_issue;
27275 targetm.sched.first_cycle_multipass_backtrack
27276 = core2i7_first_cycle_multipass_backtrack;
27277 targetm.sched.first_cycle_multipass_end
27278 = core2i7_first_cycle_multipass_end;
27279 targetm.sched.first_cycle_multipass_fini
27280 = core2i7_first_cycle_multipass_fini;
27282 /* Set decoder parameters. */
27283 core2i7_secondary_decoder_max_insn_size = 8;
27284 core2i7_ifetch_block_size = 16;
27285 core2i7_ifetch_block_max_insns = 6;
27286 break;
27288 /* ... Fall through ... */
27289 default:
27290 targetm.sched.dfa_post_advance_cycle = NULL;
27291 targetm.sched.first_cycle_multipass_init = NULL;
27292 targetm.sched.first_cycle_multipass_begin = NULL;
27293 targetm.sched.first_cycle_multipass_issue = NULL;
27294 targetm.sched.first_cycle_multipass_backtrack = NULL;
27295 targetm.sched.first_cycle_multipass_end = NULL;
27296 targetm.sched.first_cycle_multipass_fini = NULL;
27297 break;
27302 /* Compute the alignment given to a constant that is being placed in memory.
27303 EXP is the constant and ALIGN is the alignment that the object would
27304 ordinarily have.
27305 The value of this function is used instead of that alignment to align
27306 the object. */
27309 ix86_constant_alignment (tree exp, int align)
27311 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
27312 || TREE_CODE (exp) == INTEGER_CST)
27314 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
27315 return 64;
27316 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
27317 return 128;
27319 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
27320 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
27321 return BITS_PER_WORD;
27323 return align;
27326 /* Compute the alignment for a static variable.
27327 TYPE is the data type, and ALIGN is the alignment that
27328 the object would ordinarily have. The value of this function is used
27329 instead of that alignment to align the object. */
27332 ix86_data_alignment (tree type, int align, bool opt)
27334 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
27335 for symbols from other compilation units or symbols that don't need
27336 to bind locally. In order to preserve some ABI compatibility with
27337 those compilers, ensure we don't decrease alignment from what we
27338 used to assume. */
27340 int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
27342 /* A data structure, equal or greater than the size of a cache line
27343 (64 bytes in the Pentium 4 and other recent Intel processors, including
27344 processors based on Intel Core microarchitecture) should be aligned
27345 so that its base address is a multiple of a cache line size. */
27347 int max_align
27348 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
27350 if (max_align < BITS_PER_WORD)
27351 max_align = BITS_PER_WORD;
27353 switch (ix86_align_data_type)
27355 case ix86_align_data_type_abi: opt = false; break;
27356 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
27357 case ix86_align_data_type_cacheline: break;
27360 if (opt
27361 && AGGREGATE_TYPE_P (type)
27362 && TYPE_SIZE (type)
27363 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
27365 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
27366 && align < max_align_compat)
27367 align = max_align_compat;
27368 if (wi::geu_p (TYPE_SIZE (type), max_align)
27369 && align < max_align)
27370 align = max_align;
27373 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27374 to 16byte boundary. */
27375 if (TARGET_64BIT)
27377 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
27378 && TYPE_SIZE (type)
27379 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27380 && wi::geu_p (TYPE_SIZE (type), 128)
27381 && align < 128)
27382 return 128;
27385 if (!opt)
27386 return align;
27388 if (TREE_CODE (type) == ARRAY_TYPE)
27390 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27391 return 64;
27392 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27393 return 128;
27395 else if (TREE_CODE (type) == COMPLEX_TYPE)
27398 if (TYPE_MODE (type) == DCmode && align < 64)
27399 return 64;
27400 if ((TYPE_MODE (type) == XCmode
27401 || TYPE_MODE (type) == TCmode) && align < 128)
27402 return 128;
27404 else if ((TREE_CODE (type) == RECORD_TYPE
27405 || TREE_CODE (type) == UNION_TYPE
27406 || TREE_CODE (type) == QUAL_UNION_TYPE)
27407 && TYPE_FIELDS (type))
27409 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27410 return 64;
27411 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27412 return 128;
27414 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27415 || TREE_CODE (type) == INTEGER_TYPE)
27417 if (TYPE_MODE (type) == DFmode && align < 64)
27418 return 64;
27419 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27420 return 128;
27423 return align;
27426 /* Compute the alignment for a local variable or a stack slot. EXP is
27427 the data type or decl itself, MODE is the widest mode available and
27428 ALIGN is the alignment that the object would ordinarily have. The
27429 value of this macro is used instead of that alignment to align the
27430 object. */
27432 unsigned int
27433 ix86_local_alignment (tree exp, machine_mode mode,
27434 unsigned int align)
27436 tree type, decl;
27438 if (exp && DECL_P (exp))
27440 type = TREE_TYPE (exp);
27441 decl = exp;
27443 else
27445 type = exp;
27446 decl = NULL;
27449 /* Don't do dynamic stack realignment for long long objects with
27450 -mpreferred-stack-boundary=2. */
27451 if (!TARGET_64BIT
27452 && align == 64
27453 && ix86_preferred_stack_boundary < 64
27454 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
27455 && (!type || !TYPE_USER_ALIGN (type))
27456 && (!decl || !DECL_USER_ALIGN (decl)))
27457 align = 32;
27459 /* If TYPE is NULL, we are allocating a stack slot for caller-save
27460 register in MODE. We will return the largest alignment of XF
27461 and DF. */
27462 if (!type)
27464 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
27465 align = GET_MODE_ALIGNMENT (DFmode);
27466 return align;
27469 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27470 to 16byte boundary. Exact wording is:
27472 An array uses the same alignment as its elements, except that a local or
27473 global array variable of length at least 16 bytes or
27474 a C99 variable-length array variable always has alignment of at least 16 bytes.
27476 This was added to allow use of aligned SSE instructions at arrays. This
27477 rule is meant for static storage (where compiler can not do the analysis
27478 by itself). We follow it for automatic variables only when convenient.
27479 We fully control everything in the function compiled and functions from
27480 other unit can not rely on the alignment.
27482 Exclude va_list type. It is the common case of local array where
27483 we can not benefit from the alignment.
27485 TODO: Probably one should optimize for size only when var is not escaping. */
27486 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
27487 && TARGET_SSE)
27489 if (AGGREGATE_TYPE_P (type)
27490 && (va_list_type_node == NULL_TREE
27491 || (TYPE_MAIN_VARIANT (type)
27492 != TYPE_MAIN_VARIANT (va_list_type_node)))
27493 && TYPE_SIZE (type)
27494 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27495 && wi::geu_p (TYPE_SIZE (type), 16)
27496 && align < 128)
27497 return 128;
27499 if (TREE_CODE (type) == ARRAY_TYPE)
27501 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27502 return 64;
27503 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27504 return 128;
27506 else if (TREE_CODE (type) == COMPLEX_TYPE)
27508 if (TYPE_MODE (type) == DCmode && align < 64)
27509 return 64;
27510 if ((TYPE_MODE (type) == XCmode
27511 || TYPE_MODE (type) == TCmode) && align < 128)
27512 return 128;
27514 else if ((TREE_CODE (type) == RECORD_TYPE
27515 || TREE_CODE (type) == UNION_TYPE
27516 || TREE_CODE (type) == QUAL_UNION_TYPE)
27517 && TYPE_FIELDS (type))
27519 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27520 return 64;
27521 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27522 return 128;
27524 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27525 || TREE_CODE (type) == INTEGER_TYPE)
27528 if (TYPE_MODE (type) == DFmode && align < 64)
27529 return 64;
27530 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27531 return 128;
27533 return align;
27536 /* Compute the minimum required alignment for dynamic stack realignment
27537 purposes for a local variable, parameter or a stack slot. EXP is
27538 the data type or decl itself, MODE is its mode and ALIGN is the
27539 alignment that the object would ordinarily have. */
27541 unsigned int
27542 ix86_minimum_alignment (tree exp, machine_mode mode,
27543 unsigned int align)
27545 tree type, decl;
27547 if (exp && DECL_P (exp))
27549 type = TREE_TYPE (exp);
27550 decl = exp;
27552 else
27554 type = exp;
27555 decl = NULL;
27558 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
27559 return align;
27561 /* Don't do dynamic stack realignment for long long objects with
27562 -mpreferred-stack-boundary=2. */
27563 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
27564 && (!type || !TYPE_USER_ALIGN (type))
27565 && (!decl || !DECL_USER_ALIGN (decl)))
27566 return 32;
27568 return align;
27571 /* Find a location for the static chain incoming to a nested function.
27572 This is a register, unless all free registers are used by arguments. */
27574 static rtx
27575 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
27577 unsigned regno;
27579 /* While this function won't be called by the middle-end when a static
27580 chain isn't needed, it's also used throughout the backend so it's
27581 easiest to keep this check centralized. */
27582 if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
27583 return NULL;
27585 if (TARGET_64BIT)
27587 /* We always use R10 in 64-bit mode. */
27588 regno = R10_REG;
27590 else
27592 const_tree fntype, fndecl;
27593 unsigned int ccvt;
27595 /* By default in 32-bit mode we use ECX to pass the static chain. */
27596 regno = CX_REG;
27598 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
27600 fntype = TREE_TYPE (fndecl_or_type);
27601 fndecl = fndecl_or_type;
27603 else
27605 fntype = fndecl_or_type;
27606 fndecl = NULL;
27609 ccvt = ix86_get_callcvt (fntype);
27610 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
27612 /* Fastcall functions use ecx/edx for arguments, which leaves
27613 us with EAX for the static chain.
27614 Thiscall functions use ecx for arguments, which also
27615 leaves us with EAX for the static chain. */
27616 regno = AX_REG;
27618 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
27620 /* Thiscall functions use ecx for arguments, which leaves
27621 us with EAX and EDX for the static chain.
27622 We are using for abi-compatibility EAX. */
27623 regno = AX_REG;
27625 else if (ix86_function_regparm (fntype, fndecl) == 3)
27627 /* For regparm 3, we have no free call-clobbered registers in
27628 which to store the static chain. In order to implement this,
27629 we have the trampoline push the static chain to the stack.
27630 However, we can't push a value below the return address when
27631 we call the nested function directly, so we have to use an
27632 alternate entry point. For this we use ESI, and have the
27633 alternate entry point push ESI, so that things appear the
27634 same once we're executing the nested function. */
27635 if (incoming_p)
27637 if (fndecl == current_function_decl)
27638 ix86_static_chain_on_stack = true;
27639 return gen_frame_mem (SImode,
27640 plus_constant (Pmode,
27641 arg_pointer_rtx, -8));
27643 regno = SI_REG;
27647 return gen_rtx_REG (Pmode, regno);
27650 /* Emit RTL insns to initialize the variable parts of a trampoline.
27651 FNDECL is the decl of the target address; M_TRAMP is a MEM for
27652 the trampoline, and CHAIN_VALUE is an RTX for the static chain
27653 to be passed to the target function. */
27655 static void
27656 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
27658 rtx mem, fnaddr;
27659 int opcode;
27660 int offset = 0;
27662 fnaddr = XEXP (DECL_RTL (fndecl), 0);
27664 if (TARGET_64BIT)
27666 int size;
27668 /* Load the function address to r11. Try to load address using
27669 the shorter movl instead of movabs. We may want to support
27670 movq for kernel mode, but kernel does not use trampolines at
27671 the moment. FNADDR is a 32bit address and may not be in
27672 DImode when ptr_mode == SImode. Always use movl in this
27673 case. */
27674 if (ptr_mode == SImode
27675 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
27677 fnaddr = copy_addr_to_reg (fnaddr);
27679 mem = adjust_address (m_tramp, HImode, offset);
27680 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
27682 mem = adjust_address (m_tramp, SImode, offset + 2);
27683 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
27684 offset += 6;
27686 else
27688 mem = adjust_address (m_tramp, HImode, offset);
27689 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
27691 mem = adjust_address (m_tramp, DImode, offset + 2);
27692 emit_move_insn (mem, fnaddr);
27693 offset += 10;
27696 /* Load static chain using movabs to r10. Use the shorter movl
27697 instead of movabs when ptr_mode == SImode. */
27698 if (ptr_mode == SImode)
27700 opcode = 0xba41;
27701 size = 6;
27703 else
27705 opcode = 0xba49;
27706 size = 10;
27709 mem = adjust_address (m_tramp, HImode, offset);
27710 emit_move_insn (mem, gen_int_mode (opcode, HImode));
27712 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
27713 emit_move_insn (mem, chain_value);
27714 offset += size;
27716 /* Jump to r11; the last (unused) byte is a nop, only there to
27717 pad the write out to a single 32-bit store. */
27718 mem = adjust_address (m_tramp, SImode, offset);
27719 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
27720 offset += 4;
27722 else
27724 rtx disp, chain;
27726 /* Depending on the static chain location, either load a register
27727 with a constant, or push the constant to the stack. All of the
27728 instructions are the same size. */
27729 chain = ix86_static_chain (fndecl, true);
27730 if (REG_P (chain))
27732 switch (REGNO (chain))
27734 case AX_REG:
27735 opcode = 0xb8; break;
27736 case CX_REG:
27737 opcode = 0xb9; break;
27738 default:
27739 gcc_unreachable ();
27742 else
27743 opcode = 0x68;
27745 mem = adjust_address (m_tramp, QImode, offset);
27746 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27748 mem = adjust_address (m_tramp, SImode, offset + 1);
27749 emit_move_insn (mem, chain_value);
27750 offset += 5;
27752 mem = adjust_address (m_tramp, QImode, offset);
27753 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27755 mem = adjust_address (m_tramp, SImode, offset + 1);
27757 /* Compute offset from the end of the jmp to the target function.
27758 In the case in which the trampoline stores the static chain on
27759 the stack, we need to skip the first insn which pushes the
27760 (call-saved) register static chain; this push is 1 byte. */
27761 offset += 5;
27762 disp = expand_binop (SImode, sub_optab, fnaddr,
27763 plus_constant (Pmode, XEXP (m_tramp, 0),
27764 offset - (MEM_P (chain) ? 1 : 0)),
27765 NULL_RTX, 1, OPTAB_DIRECT);
27766 emit_move_insn (mem, disp);
27769 gcc_assert (offset <= TRAMPOLINE_SIZE);
27771 #ifdef HAVE_ENABLE_EXECUTE_STACK
27772 #ifdef CHECK_EXECUTE_STACK_ENABLED
27773 if (CHECK_EXECUTE_STACK_ENABLED)
27774 #endif
27775 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27776 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27777 #endif
27780 /* The following file contains several enumerations and data structures
27781 built from the definitions in i386-builtin-types.def. */
27783 #include "i386-builtin-types.inc"
27785 /* Table for the ix86 builtin non-function types. */
27786 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27788 /* Retrieve an element from the above table, building some of
27789 the types lazily. */
27791 static tree
27792 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27794 unsigned int index;
27795 tree type, itype;
27797 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27799 type = ix86_builtin_type_tab[(int) tcode];
27800 if (type != NULL)
27801 return type;
27803 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27804 if (tcode <= IX86_BT_LAST_VECT)
27806 machine_mode mode;
27808 index = tcode - IX86_BT_LAST_PRIM - 1;
27809 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27810 mode = ix86_builtin_type_vect_mode[index];
27812 type = build_vector_type_for_mode (itype, mode);
27814 else
27816 int quals;
27818 index = tcode - IX86_BT_LAST_VECT - 1;
27819 if (tcode <= IX86_BT_LAST_PTR)
27820 quals = TYPE_UNQUALIFIED;
27821 else
27822 quals = TYPE_QUAL_CONST;
27824 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27825 if (quals != TYPE_UNQUALIFIED)
27826 itype = build_qualified_type (itype, quals);
27828 type = build_pointer_type (itype);
27831 ix86_builtin_type_tab[(int) tcode] = type;
27832 return type;
27835 /* Table for the ix86 builtin function types. */
27836 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27838 /* Retrieve an element from the above table, building some of
27839 the types lazily. */
27841 static tree
27842 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27844 tree type;
27846 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27848 type = ix86_builtin_func_type_tab[(int) tcode];
27849 if (type != NULL)
27850 return type;
27852 if (tcode <= IX86_BT_LAST_FUNC)
27854 unsigned start = ix86_builtin_func_start[(int) tcode];
27855 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27856 tree rtype, atype, args = void_list_node;
27857 unsigned i;
27859 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27860 for (i = after - 1; i > start; --i)
27862 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27863 args = tree_cons (NULL, atype, args);
27866 type = build_function_type (rtype, args);
27868 else
27870 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27871 enum ix86_builtin_func_type icode;
27873 icode = ix86_builtin_func_alias_base[index];
27874 type = ix86_get_builtin_func_type (icode);
27877 ix86_builtin_func_type_tab[(int) tcode] = type;
27878 return type;
27882 /* Codes for all the SSE/MMX builtins. */
27883 enum ix86_builtins
27885 IX86_BUILTIN_ADDPS,
27886 IX86_BUILTIN_ADDSS,
27887 IX86_BUILTIN_DIVPS,
27888 IX86_BUILTIN_DIVSS,
27889 IX86_BUILTIN_MULPS,
27890 IX86_BUILTIN_MULSS,
27891 IX86_BUILTIN_SUBPS,
27892 IX86_BUILTIN_SUBSS,
27894 IX86_BUILTIN_CMPEQPS,
27895 IX86_BUILTIN_CMPLTPS,
27896 IX86_BUILTIN_CMPLEPS,
27897 IX86_BUILTIN_CMPGTPS,
27898 IX86_BUILTIN_CMPGEPS,
27899 IX86_BUILTIN_CMPNEQPS,
27900 IX86_BUILTIN_CMPNLTPS,
27901 IX86_BUILTIN_CMPNLEPS,
27902 IX86_BUILTIN_CMPNGTPS,
27903 IX86_BUILTIN_CMPNGEPS,
27904 IX86_BUILTIN_CMPORDPS,
27905 IX86_BUILTIN_CMPUNORDPS,
27906 IX86_BUILTIN_CMPEQSS,
27907 IX86_BUILTIN_CMPLTSS,
27908 IX86_BUILTIN_CMPLESS,
27909 IX86_BUILTIN_CMPNEQSS,
27910 IX86_BUILTIN_CMPNLTSS,
27911 IX86_BUILTIN_CMPNLESS,
27912 IX86_BUILTIN_CMPORDSS,
27913 IX86_BUILTIN_CMPUNORDSS,
27915 IX86_BUILTIN_COMIEQSS,
27916 IX86_BUILTIN_COMILTSS,
27917 IX86_BUILTIN_COMILESS,
27918 IX86_BUILTIN_COMIGTSS,
27919 IX86_BUILTIN_COMIGESS,
27920 IX86_BUILTIN_COMINEQSS,
27921 IX86_BUILTIN_UCOMIEQSS,
27922 IX86_BUILTIN_UCOMILTSS,
27923 IX86_BUILTIN_UCOMILESS,
27924 IX86_BUILTIN_UCOMIGTSS,
27925 IX86_BUILTIN_UCOMIGESS,
27926 IX86_BUILTIN_UCOMINEQSS,
27928 IX86_BUILTIN_CVTPI2PS,
27929 IX86_BUILTIN_CVTPS2PI,
27930 IX86_BUILTIN_CVTSI2SS,
27931 IX86_BUILTIN_CVTSI642SS,
27932 IX86_BUILTIN_CVTSS2SI,
27933 IX86_BUILTIN_CVTSS2SI64,
27934 IX86_BUILTIN_CVTTPS2PI,
27935 IX86_BUILTIN_CVTTSS2SI,
27936 IX86_BUILTIN_CVTTSS2SI64,
27938 IX86_BUILTIN_MAXPS,
27939 IX86_BUILTIN_MAXSS,
27940 IX86_BUILTIN_MINPS,
27941 IX86_BUILTIN_MINSS,
27943 IX86_BUILTIN_LOADUPS,
27944 IX86_BUILTIN_STOREUPS,
27945 IX86_BUILTIN_MOVSS,
27947 IX86_BUILTIN_MOVHLPS,
27948 IX86_BUILTIN_MOVLHPS,
27949 IX86_BUILTIN_LOADHPS,
27950 IX86_BUILTIN_LOADLPS,
27951 IX86_BUILTIN_STOREHPS,
27952 IX86_BUILTIN_STORELPS,
27954 IX86_BUILTIN_MASKMOVQ,
27955 IX86_BUILTIN_MOVMSKPS,
27956 IX86_BUILTIN_PMOVMSKB,
27958 IX86_BUILTIN_MOVNTPS,
27959 IX86_BUILTIN_MOVNTQ,
27961 IX86_BUILTIN_LOADDQU,
27962 IX86_BUILTIN_STOREDQU,
27964 IX86_BUILTIN_PACKSSWB,
27965 IX86_BUILTIN_PACKSSDW,
27966 IX86_BUILTIN_PACKUSWB,
27968 IX86_BUILTIN_PADDB,
27969 IX86_BUILTIN_PADDW,
27970 IX86_BUILTIN_PADDD,
27971 IX86_BUILTIN_PADDQ,
27972 IX86_BUILTIN_PADDSB,
27973 IX86_BUILTIN_PADDSW,
27974 IX86_BUILTIN_PADDUSB,
27975 IX86_BUILTIN_PADDUSW,
27976 IX86_BUILTIN_PSUBB,
27977 IX86_BUILTIN_PSUBW,
27978 IX86_BUILTIN_PSUBD,
27979 IX86_BUILTIN_PSUBQ,
27980 IX86_BUILTIN_PSUBSB,
27981 IX86_BUILTIN_PSUBSW,
27982 IX86_BUILTIN_PSUBUSB,
27983 IX86_BUILTIN_PSUBUSW,
27985 IX86_BUILTIN_PAND,
27986 IX86_BUILTIN_PANDN,
27987 IX86_BUILTIN_POR,
27988 IX86_BUILTIN_PXOR,
27990 IX86_BUILTIN_PAVGB,
27991 IX86_BUILTIN_PAVGW,
27993 IX86_BUILTIN_PCMPEQB,
27994 IX86_BUILTIN_PCMPEQW,
27995 IX86_BUILTIN_PCMPEQD,
27996 IX86_BUILTIN_PCMPGTB,
27997 IX86_BUILTIN_PCMPGTW,
27998 IX86_BUILTIN_PCMPGTD,
28000 IX86_BUILTIN_PMADDWD,
28002 IX86_BUILTIN_PMAXSW,
28003 IX86_BUILTIN_PMAXUB,
28004 IX86_BUILTIN_PMINSW,
28005 IX86_BUILTIN_PMINUB,
28007 IX86_BUILTIN_PMULHUW,
28008 IX86_BUILTIN_PMULHW,
28009 IX86_BUILTIN_PMULLW,
28011 IX86_BUILTIN_PSADBW,
28012 IX86_BUILTIN_PSHUFW,
28014 IX86_BUILTIN_PSLLW,
28015 IX86_BUILTIN_PSLLD,
28016 IX86_BUILTIN_PSLLQ,
28017 IX86_BUILTIN_PSRAW,
28018 IX86_BUILTIN_PSRAD,
28019 IX86_BUILTIN_PSRLW,
28020 IX86_BUILTIN_PSRLD,
28021 IX86_BUILTIN_PSRLQ,
28022 IX86_BUILTIN_PSLLWI,
28023 IX86_BUILTIN_PSLLDI,
28024 IX86_BUILTIN_PSLLQI,
28025 IX86_BUILTIN_PSRAWI,
28026 IX86_BUILTIN_PSRADI,
28027 IX86_BUILTIN_PSRLWI,
28028 IX86_BUILTIN_PSRLDI,
28029 IX86_BUILTIN_PSRLQI,
28031 IX86_BUILTIN_PUNPCKHBW,
28032 IX86_BUILTIN_PUNPCKHWD,
28033 IX86_BUILTIN_PUNPCKHDQ,
28034 IX86_BUILTIN_PUNPCKLBW,
28035 IX86_BUILTIN_PUNPCKLWD,
28036 IX86_BUILTIN_PUNPCKLDQ,
28038 IX86_BUILTIN_SHUFPS,
28040 IX86_BUILTIN_RCPPS,
28041 IX86_BUILTIN_RCPSS,
28042 IX86_BUILTIN_RSQRTPS,
28043 IX86_BUILTIN_RSQRTPS_NR,
28044 IX86_BUILTIN_RSQRTSS,
28045 IX86_BUILTIN_RSQRTF,
28046 IX86_BUILTIN_SQRTPS,
28047 IX86_BUILTIN_SQRTPS_NR,
28048 IX86_BUILTIN_SQRTSS,
28050 IX86_BUILTIN_UNPCKHPS,
28051 IX86_BUILTIN_UNPCKLPS,
28053 IX86_BUILTIN_ANDPS,
28054 IX86_BUILTIN_ANDNPS,
28055 IX86_BUILTIN_ORPS,
28056 IX86_BUILTIN_XORPS,
28058 IX86_BUILTIN_EMMS,
28059 IX86_BUILTIN_LDMXCSR,
28060 IX86_BUILTIN_STMXCSR,
28061 IX86_BUILTIN_SFENCE,
28063 IX86_BUILTIN_FXSAVE,
28064 IX86_BUILTIN_FXRSTOR,
28065 IX86_BUILTIN_FXSAVE64,
28066 IX86_BUILTIN_FXRSTOR64,
28068 IX86_BUILTIN_XSAVE,
28069 IX86_BUILTIN_XRSTOR,
28070 IX86_BUILTIN_XSAVE64,
28071 IX86_BUILTIN_XRSTOR64,
28073 IX86_BUILTIN_XSAVEOPT,
28074 IX86_BUILTIN_XSAVEOPT64,
28076 IX86_BUILTIN_XSAVEC,
28077 IX86_BUILTIN_XSAVEC64,
28079 IX86_BUILTIN_XSAVES,
28080 IX86_BUILTIN_XRSTORS,
28081 IX86_BUILTIN_XSAVES64,
28082 IX86_BUILTIN_XRSTORS64,
28084 /* 3DNow! Original */
28085 IX86_BUILTIN_FEMMS,
28086 IX86_BUILTIN_PAVGUSB,
28087 IX86_BUILTIN_PF2ID,
28088 IX86_BUILTIN_PFACC,
28089 IX86_BUILTIN_PFADD,
28090 IX86_BUILTIN_PFCMPEQ,
28091 IX86_BUILTIN_PFCMPGE,
28092 IX86_BUILTIN_PFCMPGT,
28093 IX86_BUILTIN_PFMAX,
28094 IX86_BUILTIN_PFMIN,
28095 IX86_BUILTIN_PFMUL,
28096 IX86_BUILTIN_PFRCP,
28097 IX86_BUILTIN_PFRCPIT1,
28098 IX86_BUILTIN_PFRCPIT2,
28099 IX86_BUILTIN_PFRSQIT1,
28100 IX86_BUILTIN_PFRSQRT,
28101 IX86_BUILTIN_PFSUB,
28102 IX86_BUILTIN_PFSUBR,
28103 IX86_BUILTIN_PI2FD,
28104 IX86_BUILTIN_PMULHRW,
28106 /* 3DNow! Athlon Extensions */
28107 IX86_BUILTIN_PF2IW,
28108 IX86_BUILTIN_PFNACC,
28109 IX86_BUILTIN_PFPNACC,
28110 IX86_BUILTIN_PI2FW,
28111 IX86_BUILTIN_PSWAPDSI,
28112 IX86_BUILTIN_PSWAPDSF,
28114 /* SSE2 */
28115 IX86_BUILTIN_ADDPD,
28116 IX86_BUILTIN_ADDSD,
28117 IX86_BUILTIN_DIVPD,
28118 IX86_BUILTIN_DIVSD,
28119 IX86_BUILTIN_MULPD,
28120 IX86_BUILTIN_MULSD,
28121 IX86_BUILTIN_SUBPD,
28122 IX86_BUILTIN_SUBSD,
28124 IX86_BUILTIN_CMPEQPD,
28125 IX86_BUILTIN_CMPLTPD,
28126 IX86_BUILTIN_CMPLEPD,
28127 IX86_BUILTIN_CMPGTPD,
28128 IX86_BUILTIN_CMPGEPD,
28129 IX86_BUILTIN_CMPNEQPD,
28130 IX86_BUILTIN_CMPNLTPD,
28131 IX86_BUILTIN_CMPNLEPD,
28132 IX86_BUILTIN_CMPNGTPD,
28133 IX86_BUILTIN_CMPNGEPD,
28134 IX86_BUILTIN_CMPORDPD,
28135 IX86_BUILTIN_CMPUNORDPD,
28136 IX86_BUILTIN_CMPEQSD,
28137 IX86_BUILTIN_CMPLTSD,
28138 IX86_BUILTIN_CMPLESD,
28139 IX86_BUILTIN_CMPNEQSD,
28140 IX86_BUILTIN_CMPNLTSD,
28141 IX86_BUILTIN_CMPNLESD,
28142 IX86_BUILTIN_CMPORDSD,
28143 IX86_BUILTIN_CMPUNORDSD,
28145 IX86_BUILTIN_COMIEQSD,
28146 IX86_BUILTIN_COMILTSD,
28147 IX86_BUILTIN_COMILESD,
28148 IX86_BUILTIN_COMIGTSD,
28149 IX86_BUILTIN_COMIGESD,
28150 IX86_BUILTIN_COMINEQSD,
28151 IX86_BUILTIN_UCOMIEQSD,
28152 IX86_BUILTIN_UCOMILTSD,
28153 IX86_BUILTIN_UCOMILESD,
28154 IX86_BUILTIN_UCOMIGTSD,
28155 IX86_BUILTIN_UCOMIGESD,
28156 IX86_BUILTIN_UCOMINEQSD,
28158 IX86_BUILTIN_MAXPD,
28159 IX86_BUILTIN_MAXSD,
28160 IX86_BUILTIN_MINPD,
28161 IX86_BUILTIN_MINSD,
28163 IX86_BUILTIN_ANDPD,
28164 IX86_BUILTIN_ANDNPD,
28165 IX86_BUILTIN_ORPD,
28166 IX86_BUILTIN_XORPD,
28168 IX86_BUILTIN_SQRTPD,
28169 IX86_BUILTIN_SQRTSD,
28171 IX86_BUILTIN_UNPCKHPD,
28172 IX86_BUILTIN_UNPCKLPD,
28174 IX86_BUILTIN_SHUFPD,
28176 IX86_BUILTIN_LOADUPD,
28177 IX86_BUILTIN_STOREUPD,
28178 IX86_BUILTIN_MOVSD,
28180 IX86_BUILTIN_LOADHPD,
28181 IX86_BUILTIN_LOADLPD,
28183 IX86_BUILTIN_CVTDQ2PD,
28184 IX86_BUILTIN_CVTDQ2PS,
28186 IX86_BUILTIN_CVTPD2DQ,
28187 IX86_BUILTIN_CVTPD2PI,
28188 IX86_BUILTIN_CVTPD2PS,
28189 IX86_BUILTIN_CVTTPD2DQ,
28190 IX86_BUILTIN_CVTTPD2PI,
28192 IX86_BUILTIN_CVTPI2PD,
28193 IX86_BUILTIN_CVTSI2SD,
28194 IX86_BUILTIN_CVTSI642SD,
28196 IX86_BUILTIN_CVTSD2SI,
28197 IX86_BUILTIN_CVTSD2SI64,
28198 IX86_BUILTIN_CVTSD2SS,
28199 IX86_BUILTIN_CVTSS2SD,
28200 IX86_BUILTIN_CVTTSD2SI,
28201 IX86_BUILTIN_CVTTSD2SI64,
28203 IX86_BUILTIN_CVTPS2DQ,
28204 IX86_BUILTIN_CVTPS2PD,
28205 IX86_BUILTIN_CVTTPS2DQ,
28207 IX86_BUILTIN_MOVNTI,
28208 IX86_BUILTIN_MOVNTI64,
28209 IX86_BUILTIN_MOVNTPD,
28210 IX86_BUILTIN_MOVNTDQ,
28212 IX86_BUILTIN_MOVQ128,
28214 /* SSE2 MMX */
28215 IX86_BUILTIN_MASKMOVDQU,
28216 IX86_BUILTIN_MOVMSKPD,
28217 IX86_BUILTIN_PMOVMSKB128,
28219 IX86_BUILTIN_PACKSSWB128,
28220 IX86_BUILTIN_PACKSSDW128,
28221 IX86_BUILTIN_PACKUSWB128,
28223 IX86_BUILTIN_PADDB128,
28224 IX86_BUILTIN_PADDW128,
28225 IX86_BUILTIN_PADDD128,
28226 IX86_BUILTIN_PADDQ128,
28227 IX86_BUILTIN_PADDSB128,
28228 IX86_BUILTIN_PADDSW128,
28229 IX86_BUILTIN_PADDUSB128,
28230 IX86_BUILTIN_PADDUSW128,
28231 IX86_BUILTIN_PSUBB128,
28232 IX86_BUILTIN_PSUBW128,
28233 IX86_BUILTIN_PSUBD128,
28234 IX86_BUILTIN_PSUBQ128,
28235 IX86_BUILTIN_PSUBSB128,
28236 IX86_BUILTIN_PSUBSW128,
28237 IX86_BUILTIN_PSUBUSB128,
28238 IX86_BUILTIN_PSUBUSW128,
28240 IX86_BUILTIN_PAND128,
28241 IX86_BUILTIN_PANDN128,
28242 IX86_BUILTIN_POR128,
28243 IX86_BUILTIN_PXOR128,
28245 IX86_BUILTIN_PAVGB128,
28246 IX86_BUILTIN_PAVGW128,
28248 IX86_BUILTIN_PCMPEQB128,
28249 IX86_BUILTIN_PCMPEQW128,
28250 IX86_BUILTIN_PCMPEQD128,
28251 IX86_BUILTIN_PCMPGTB128,
28252 IX86_BUILTIN_PCMPGTW128,
28253 IX86_BUILTIN_PCMPGTD128,
28255 IX86_BUILTIN_PMADDWD128,
28257 IX86_BUILTIN_PMAXSW128,
28258 IX86_BUILTIN_PMAXUB128,
28259 IX86_BUILTIN_PMINSW128,
28260 IX86_BUILTIN_PMINUB128,
28262 IX86_BUILTIN_PMULUDQ,
28263 IX86_BUILTIN_PMULUDQ128,
28264 IX86_BUILTIN_PMULHUW128,
28265 IX86_BUILTIN_PMULHW128,
28266 IX86_BUILTIN_PMULLW128,
28268 IX86_BUILTIN_PSADBW128,
28269 IX86_BUILTIN_PSHUFHW,
28270 IX86_BUILTIN_PSHUFLW,
28271 IX86_BUILTIN_PSHUFD,
28273 IX86_BUILTIN_PSLLDQI128,
28274 IX86_BUILTIN_PSLLWI128,
28275 IX86_BUILTIN_PSLLDI128,
28276 IX86_BUILTIN_PSLLQI128,
28277 IX86_BUILTIN_PSRAWI128,
28278 IX86_BUILTIN_PSRADI128,
28279 IX86_BUILTIN_PSRLDQI128,
28280 IX86_BUILTIN_PSRLWI128,
28281 IX86_BUILTIN_PSRLDI128,
28282 IX86_BUILTIN_PSRLQI128,
28284 IX86_BUILTIN_PSLLDQ128,
28285 IX86_BUILTIN_PSLLW128,
28286 IX86_BUILTIN_PSLLD128,
28287 IX86_BUILTIN_PSLLQ128,
28288 IX86_BUILTIN_PSRAW128,
28289 IX86_BUILTIN_PSRAD128,
28290 IX86_BUILTIN_PSRLW128,
28291 IX86_BUILTIN_PSRLD128,
28292 IX86_BUILTIN_PSRLQ128,
28294 IX86_BUILTIN_PUNPCKHBW128,
28295 IX86_BUILTIN_PUNPCKHWD128,
28296 IX86_BUILTIN_PUNPCKHDQ128,
28297 IX86_BUILTIN_PUNPCKHQDQ128,
28298 IX86_BUILTIN_PUNPCKLBW128,
28299 IX86_BUILTIN_PUNPCKLWD128,
28300 IX86_BUILTIN_PUNPCKLDQ128,
28301 IX86_BUILTIN_PUNPCKLQDQ128,
28303 IX86_BUILTIN_CLFLUSH,
28304 IX86_BUILTIN_MFENCE,
28305 IX86_BUILTIN_LFENCE,
28306 IX86_BUILTIN_PAUSE,
28308 IX86_BUILTIN_FNSTENV,
28309 IX86_BUILTIN_FLDENV,
28310 IX86_BUILTIN_FNSTSW,
28311 IX86_BUILTIN_FNCLEX,
28313 IX86_BUILTIN_BSRSI,
28314 IX86_BUILTIN_BSRDI,
28315 IX86_BUILTIN_RDPMC,
28316 IX86_BUILTIN_RDTSC,
28317 IX86_BUILTIN_RDTSCP,
28318 IX86_BUILTIN_ROLQI,
28319 IX86_BUILTIN_ROLHI,
28320 IX86_BUILTIN_RORQI,
28321 IX86_BUILTIN_RORHI,
28323 /* SSE3. */
28324 IX86_BUILTIN_ADDSUBPS,
28325 IX86_BUILTIN_HADDPS,
28326 IX86_BUILTIN_HSUBPS,
28327 IX86_BUILTIN_MOVSHDUP,
28328 IX86_BUILTIN_MOVSLDUP,
28329 IX86_BUILTIN_ADDSUBPD,
28330 IX86_BUILTIN_HADDPD,
28331 IX86_BUILTIN_HSUBPD,
28332 IX86_BUILTIN_LDDQU,
28334 IX86_BUILTIN_MONITOR,
28335 IX86_BUILTIN_MWAIT,
28337 /* SSSE3. */
28338 IX86_BUILTIN_PHADDW,
28339 IX86_BUILTIN_PHADDD,
28340 IX86_BUILTIN_PHADDSW,
28341 IX86_BUILTIN_PHSUBW,
28342 IX86_BUILTIN_PHSUBD,
28343 IX86_BUILTIN_PHSUBSW,
28344 IX86_BUILTIN_PMADDUBSW,
28345 IX86_BUILTIN_PMULHRSW,
28346 IX86_BUILTIN_PSHUFB,
28347 IX86_BUILTIN_PSIGNB,
28348 IX86_BUILTIN_PSIGNW,
28349 IX86_BUILTIN_PSIGND,
28350 IX86_BUILTIN_PALIGNR,
28351 IX86_BUILTIN_PABSB,
28352 IX86_BUILTIN_PABSW,
28353 IX86_BUILTIN_PABSD,
28355 IX86_BUILTIN_PHADDW128,
28356 IX86_BUILTIN_PHADDD128,
28357 IX86_BUILTIN_PHADDSW128,
28358 IX86_BUILTIN_PHSUBW128,
28359 IX86_BUILTIN_PHSUBD128,
28360 IX86_BUILTIN_PHSUBSW128,
28361 IX86_BUILTIN_PMADDUBSW128,
28362 IX86_BUILTIN_PMULHRSW128,
28363 IX86_BUILTIN_PSHUFB128,
28364 IX86_BUILTIN_PSIGNB128,
28365 IX86_BUILTIN_PSIGNW128,
28366 IX86_BUILTIN_PSIGND128,
28367 IX86_BUILTIN_PALIGNR128,
28368 IX86_BUILTIN_PABSB128,
28369 IX86_BUILTIN_PABSW128,
28370 IX86_BUILTIN_PABSD128,
28372 /* AMDFAM10 - SSE4A New Instructions. */
28373 IX86_BUILTIN_MOVNTSD,
28374 IX86_BUILTIN_MOVNTSS,
28375 IX86_BUILTIN_EXTRQI,
28376 IX86_BUILTIN_EXTRQ,
28377 IX86_BUILTIN_INSERTQI,
28378 IX86_BUILTIN_INSERTQ,
28380 /* SSE4.1. */
28381 IX86_BUILTIN_BLENDPD,
28382 IX86_BUILTIN_BLENDPS,
28383 IX86_BUILTIN_BLENDVPD,
28384 IX86_BUILTIN_BLENDVPS,
28385 IX86_BUILTIN_PBLENDVB128,
28386 IX86_BUILTIN_PBLENDW128,
28388 IX86_BUILTIN_DPPD,
28389 IX86_BUILTIN_DPPS,
28391 IX86_BUILTIN_INSERTPS128,
28393 IX86_BUILTIN_MOVNTDQA,
28394 IX86_BUILTIN_MPSADBW128,
28395 IX86_BUILTIN_PACKUSDW128,
28396 IX86_BUILTIN_PCMPEQQ,
28397 IX86_BUILTIN_PHMINPOSUW128,
28399 IX86_BUILTIN_PMAXSB128,
28400 IX86_BUILTIN_PMAXSD128,
28401 IX86_BUILTIN_PMAXUD128,
28402 IX86_BUILTIN_PMAXUW128,
28404 IX86_BUILTIN_PMINSB128,
28405 IX86_BUILTIN_PMINSD128,
28406 IX86_BUILTIN_PMINUD128,
28407 IX86_BUILTIN_PMINUW128,
28409 IX86_BUILTIN_PMOVSXBW128,
28410 IX86_BUILTIN_PMOVSXBD128,
28411 IX86_BUILTIN_PMOVSXBQ128,
28412 IX86_BUILTIN_PMOVSXWD128,
28413 IX86_BUILTIN_PMOVSXWQ128,
28414 IX86_BUILTIN_PMOVSXDQ128,
28416 IX86_BUILTIN_PMOVZXBW128,
28417 IX86_BUILTIN_PMOVZXBD128,
28418 IX86_BUILTIN_PMOVZXBQ128,
28419 IX86_BUILTIN_PMOVZXWD128,
28420 IX86_BUILTIN_PMOVZXWQ128,
28421 IX86_BUILTIN_PMOVZXDQ128,
28423 IX86_BUILTIN_PMULDQ128,
28424 IX86_BUILTIN_PMULLD128,
28426 IX86_BUILTIN_ROUNDSD,
28427 IX86_BUILTIN_ROUNDSS,
28429 IX86_BUILTIN_ROUNDPD,
28430 IX86_BUILTIN_ROUNDPS,
28432 IX86_BUILTIN_FLOORPD,
28433 IX86_BUILTIN_CEILPD,
28434 IX86_BUILTIN_TRUNCPD,
28435 IX86_BUILTIN_RINTPD,
28436 IX86_BUILTIN_ROUNDPD_AZ,
28438 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
28439 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
28440 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
28442 IX86_BUILTIN_FLOORPS,
28443 IX86_BUILTIN_CEILPS,
28444 IX86_BUILTIN_TRUNCPS,
28445 IX86_BUILTIN_RINTPS,
28446 IX86_BUILTIN_ROUNDPS_AZ,
28448 IX86_BUILTIN_FLOORPS_SFIX,
28449 IX86_BUILTIN_CEILPS_SFIX,
28450 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
28452 IX86_BUILTIN_PTESTZ,
28453 IX86_BUILTIN_PTESTC,
28454 IX86_BUILTIN_PTESTNZC,
28456 IX86_BUILTIN_VEC_INIT_V2SI,
28457 IX86_BUILTIN_VEC_INIT_V4HI,
28458 IX86_BUILTIN_VEC_INIT_V8QI,
28459 IX86_BUILTIN_VEC_EXT_V2DF,
28460 IX86_BUILTIN_VEC_EXT_V2DI,
28461 IX86_BUILTIN_VEC_EXT_V4SF,
28462 IX86_BUILTIN_VEC_EXT_V4SI,
28463 IX86_BUILTIN_VEC_EXT_V8HI,
28464 IX86_BUILTIN_VEC_EXT_V2SI,
28465 IX86_BUILTIN_VEC_EXT_V4HI,
28466 IX86_BUILTIN_VEC_EXT_V16QI,
28467 IX86_BUILTIN_VEC_SET_V2DI,
28468 IX86_BUILTIN_VEC_SET_V4SF,
28469 IX86_BUILTIN_VEC_SET_V4SI,
28470 IX86_BUILTIN_VEC_SET_V8HI,
28471 IX86_BUILTIN_VEC_SET_V4HI,
28472 IX86_BUILTIN_VEC_SET_V16QI,
28474 IX86_BUILTIN_VEC_PACK_SFIX,
28475 IX86_BUILTIN_VEC_PACK_SFIX256,
28477 /* SSE4.2. */
28478 IX86_BUILTIN_CRC32QI,
28479 IX86_BUILTIN_CRC32HI,
28480 IX86_BUILTIN_CRC32SI,
28481 IX86_BUILTIN_CRC32DI,
28483 IX86_BUILTIN_PCMPESTRI128,
28484 IX86_BUILTIN_PCMPESTRM128,
28485 IX86_BUILTIN_PCMPESTRA128,
28486 IX86_BUILTIN_PCMPESTRC128,
28487 IX86_BUILTIN_PCMPESTRO128,
28488 IX86_BUILTIN_PCMPESTRS128,
28489 IX86_BUILTIN_PCMPESTRZ128,
28490 IX86_BUILTIN_PCMPISTRI128,
28491 IX86_BUILTIN_PCMPISTRM128,
28492 IX86_BUILTIN_PCMPISTRA128,
28493 IX86_BUILTIN_PCMPISTRC128,
28494 IX86_BUILTIN_PCMPISTRO128,
28495 IX86_BUILTIN_PCMPISTRS128,
28496 IX86_BUILTIN_PCMPISTRZ128,
28498 IX86_BUILTIN_PCMPGTQ,
28500 /* AES instructions */
28501 IX86_BUILTIN_AESENC128,
28502 IX86_BUILTIN_AESENCLAST128,
28503 IX86_BUILTIN_AESDEC128,
28504 IX86_BUILTIN_AESDECLAST128,
28505 IX86_BUILTIN_AESIMC128,
28506 IX86_BUILTIN_AESKEYGENASSIST128,
28508 /* PCLMUL instruction */
28509 IX86_BUILTIN_PCLMULQDQ128,
28511 /* AVX */
28512 IX86_BUILTIN_ADDPD256,
28513 IX86_BUILTIN_ADDPS256,
28514 IX86_BUILTIN_ADDSUBPD256,
28515 IX86_BUILTIN_ADDSUBPS256,
28516 IX86_BUILTIN_ANDPD256,
28517 IX86_BUILTIN_ANDPS256,
28518 IX86_BUILTIN_ANDNPD256,
28519 IX86_BUILTIN_ANDNPS256,
28520 IX86_BUILTIN_BLENDPD256,
28521 IX86_BUILTIN_BLENDPS256,
28522 IX86_BUILTIN_BLENDVPD256,
28523 IX86_BUILTIN_BLENDVPS256,
28524 IX86_BUILTIN_DIVPD256,
28525 IX86_BUILTIN_DIVPS256,
28526 IX86_BUILTIN_DPPS256,
28527 IX86_BUILTIN_HADDPD256,
28528 IX86_BUILTIN_HADDPS256,
28529 IX86_BUILTIN_HSUBPD256,
28530 IX86_BUILTIN_HSUBPS256,
28531 IX86_BUILTIN_MAXPD256,
28532 IX86_BUILTIN_MAXPS256,
28533 IX86_BUILTIN_MINPD256,
28534 IX86_BUILTIN_MINPS256,
28535 IX86_BUILTIN_MULPD256,
28536 IX86_BUILTIN_MULPS256,
28537 IX86_BUILTIN_ORPD256,
28538 IX86_BUILTIN_ORPS256,
28539 IX86_BUILTIN_SHUFPD256,
28540 IX86_BUILTIN_SHUFPS256,
28541 IX86_BUILTIN_SUBPD256,
28542 IX86_BUILTIN_SUBPS256,
28543 IX86_BUILTIN_XORPD256,
28544 IX86_BUILTIN_XORPS256,
28545 IX86_BUILTIN_CMPSD,
28546 IX86_BUILTIN_CMPSS,
28547 IX86_BUILTIN_CMPPD,
28548 IX86_BUILTIN_CMPPS,
28549 IX86_BUILTIN_CMPPD256,
28550 IX86_BUILTIN_CMPPS256,
28551 IX86_BUILTIN_CVTDQ2PD256,
28552 IX86_BUILTIN_CVTDQ2PS256,
28553 IX86_BUILTIN_CVTPD2PS256,
28554 IX86_BUILTIN_CVTPS2DQ256,
28555 IX86_BUILTIN_CVTPS2PD256,
28556 IX86_BUILTIN_CVTTPD2DQ256,
28557 IX86_BUILTIN_CVTPD2DQ256,
28558 IX86_BUILTIN_CVTTPS2DQ256,
28559 IX86_BUILTIN_EXTRACTF128PD256,
28560 IX86_BUILTIN_EXTRACTF128PS256,
28561 IX86_BUILTIN_EXTRACTF128SI256,
28562 IX86_BUILTIN_VZEROALL,
28563 IX86_BUILTIN_VZEROUPPER,
28564 IX86_BUILTIN_VPERMILVARPD,
28565 IX86_BUILTIN_VPERMILVARPS,
28566 IX86_BUILTIN_VPERMILVARPD256,
28567 IX86_BUILTIN_VPERMILVARPS256,
28568 IX86_BUILTIN_VPERMILPD,
28569 IX86_BUILTIN_VPERMILPS,
28570 IX86_BUILTIN_VPERMILPD256,
28571 IX86_BUILTIN_VPERMILPS256,
28572 IX86_BUILTIN_VPERMIL2PD,
28573 IX86_BUILTIN_VPERMIL2PS,
28574 IX86_BUILTIN_VPERMIL2PD256,
28575 IX86_BUILTIN_VPERMIL2PS256,
28576 IX86_BUILTIN_VPERM2F128PD256,
28577 IX86_BUILTIN_VPERM2F128PS256,
28578 IX86_BUILTIN_VPERM2F128SI256,
28579 IX86_BUILTIN_VBROADCASTSS,
28580 IX86_BUILTIN_VBROADCASTSD256,
28581 IX86_BUILTIN_VBROADCASTSS256,
28582 IX86_BUILTIN_VBROADCASTPD256,
28583 IX86_BUILTIN_VBROADCASTPS256,
28584 IX86_BUILTIN_VINSERTF128PD256,
28585 IX86_BUILTIN_VINSERTF128PS256,
28586 IX86_BUILTIN_VINSERTF128SI256,
28587 IX86_BUILTIN_LOADUPD256,
28588 IX86_BUILTIN_LOADUPS256,
28589 IX86_BUILTIN_STOREUPD256,
28590 IX86_BUILTIN_STOREUPS256,
28591 IX86_BUILTIN_LDDQU256,
28592 IX86_BUILTIN_MOVNTDQ256,
28593 IX86_BUILTIN_MOVNTPD256,
28594 IX86_BUILTIN_MOVNTPS256,
28595 IX86_BUILTIN_LOADDQU256,
28596 IX86_BUILTIN_STOREDQU256,
28597 IX86_BUILTIN_MASKLOADPD,
28598 IX86_BUILTIN_MASKLOADPS,
28599 IX86_BUILTIN_MASKSTOREPD,
28600 IX86_BUILTIN_MASKSTOREPS,
28601 IX86_BUILTIN_MASKLOADPD256,
28602 IX86_BUILTIN_MASKLOADPS256,
28603 IX86_BUILTIN_MASKSTOREPD256,
28604 IX86_BUILTIN_MASKSTOREPS256,
28605 IX86_BUILTIN_MOVSHDUP256,
28606 IX86_BUILTIN_MOVSLDUP256,
28607 IX86_BUILTIN_MOVDDUP256,
28609 IX86_BUILTIN_SQRTPD256,
28610 IX86_BUILTIN_SQRTPS256,
28611 IX86_BUILTIN_SQRTPS_NR256,
28612 IX86_BUILTIN_RSQRTPS256,
28613 IX86_BUILTIN_RSQRTPS_NR256,
28615 IX86_BUILTIN_RCPPS256,
28617 IX86_BUILTIN_ROUNDPD256,
28618 IX86_BUILTIN_ROUNDPS256,
28620 IX86_BUILTIN_FLOORPD256,
28621 IX86_BUILTIN_CEILPD256,
28622 IX86_BUILTIN_TRUNCPD256,
28623 IX86_BUILTIN_RINTPD256,
28624 IX86_BUILTIN_ROUNDPD_AZ256,
28626 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
28627 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
28628 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
28630 IX86_BUILTIN_FLOORPS256,
28631 IX86_BUILTIN_CEILPS256,
28632 IX86_BUILTIN_TRUNCPS256,
28633 IX86_BUILTIN_RINTPS256,
28634 IX86_BUILTIN_ROUNDPS_AZ256,
28636 IX86_BUILTIN_FLOORPS_SFIX256,
28637 IX86_BUILTIN_CEILPS_SFIX256,
28638 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
28640 IX86_BUILTIN_UNPCKHPD256,
28641 IX86_BUILTIN_UNPCKLPD256,
28642 IX86_BUILTIN_UNPCKHPS256,
28643 IX86_BUILTIN_UNPCKLPS256,
28645 IX86_BUILTIN_SI256_SI,
28646 IX86_BUILTIN_PS256_PS,
28647 IX86_BUILTIN_PD256_PD,
28648 IX86_BUILTIN_SI_SI256,
28649 IX86_BUILTIN_PS_PS256,
28650 IX86_BUILTIN_PD_PD256,
28652 IX86_BUILTIN_VTESTZPD,
28653 IX86_BUILTIN_VTESTCPD,
28654 IX86_BUILTIN_VTESTNZCPD,
28655 IX86_BUILTIN_VTESTZPS,
28656 IX86_BUILTIN_VTESTCPS,
28657 IX86_BUILTIN_VTESTNZCPS,
28658 IX86_BUILTIN_VTESTZPD256,
28659 IX86_BUILTIN_VTESTCPD256,
28660 IX86_BUILTIN_VTESTNZCPD256,
28661 IX86_BUILTIN_VTESTZPS256,
28662 IX86_BUILTIN_VTESTCPS256,
28663 IX86_BUILTIN_VTESTNZCPS256,
28664 IX86_BUILTIN_PTESTZ256,
28665 IX86_BUILTIN_PTESTC256,
28666 IX86_BUILTIN_PTESTNZC256,
28668 IX86_BUILTIN_MOVMSKPD256,
28669 IX86_BUILTIN_MOVMSKPS256,
28671 /* AVX2 */
28672 IX86_BUILTIN_MPSADBW256,
28673 IX86_BUILTIN_PABSB256,
28674 IX86_BUILTIN_PABSW256,
28675 IX86_BUILTIN_PABSD256,
28676 IX86_BUILTIN_PACKSSDW256,
28677 IX86_BUILTIN_PACKSSWB256,
28678 IX86_BUILTIN_PACKUSDW256,
28679 IX86_BUILTIN_PACKUSWB256,
28680 IX86_BUILTIN_PADDB256,
28681 IX86_BUILTIN_PADDW256,
28682 IX86_BUILTIN_PADDD256,
28683 IX86_BUILTIN_PADDQ256,
28684 IX86_BUILTIN_PADDSB256,
28685 IX86_BUILTIN_PADDSW256,
28686 IX86_BUILTIN_PADDUSB256,
28687 IX86_BUILTIN_PADDUSW256,
28688 IX86_BUILTIN_PALIGNR256,
28689 IX86_BUILTIN_AND256I,
28690 IX86_BUILTIN_ANDNOT256I,
28691 IX86_BUILTIN_PAVGB256,
28692 IX86_BUILTIN_PAVGW256,
28693 IX86_BUILTIN_PBLENDVB256,
28694 IX86_BUILTIN_PBLENDVW256,
28695 IX86_BUILTIN_PCMPEQB256,
28696 IX86_BUILTIN_PCMPEQW256,
28697 IX86_BUILTIN_PCMPEQD256,
28698 IX86_BUILTIN_PCMPEQQ256,
28699 IX86_BUILTIN_PCMPGTB256,
28700 IX86_BUILTIN_PCMPGTW256,
28701 IX86_BUILTIN_PCMPGTD256,
28702 IX86_BUILTIN_PCMPGTQ256,
28703 IX86_BUILTIN_PHADDW256,
28704 IX86_BUILTIN_PHADDD256,
28705 IX86_BUILTIN_PHADDSW256,
28706 IX86_BUILTIN_PHSUBW256,
28707 IX86_BUILTIN_PHSUBD256,
28708 IX86_BUILTIN_PHSUBSW256,
28709 IX86_BUILTIN_PMADDUBSW256,
28710 IX86_BUILTIN_PMADDWD256,
28711 IX86_BUILTIN_PMAXSB256,
28712 IX86_BUILTIN_PMAXSW256,
28713 IX86_BUILTIN_PMAXSD256,
28714 IX86_BUILTIN_PMAXUB256,
28715 IX86_BUILTIN_PMAXUW256,
28716 IX86_BUILTIN_PMAXUD256,
28717 IX86_BUILTIN_PMINSB256,
28718 IX86_BUILTIN_PMINSW256,
28719 IX86_BUILTIN_PMINSD256,
28720 IX86_BUILTIN_PMINUB256,
28721 IX86_BUILTIN_PMINUW256,
28722 IX86_BUILTIN_PMINUD256,
28723 IX86_BUILTIN_PMOVMSKB256,
28724 IX86_BUILTIN_PMOVSXBW256,
28725 IX86_BUILTIN_PMOVSXBD256,
28726 IX86_BUILTIN_PMOVSXBQ256,
28727 IX86_BUILTIN_PMOVSXWD256,
28728 IX86_BUILTIN_PMOVSXWQ256,
28729 IX86_BUILTIN_PMOVSXDQ256,
28730 IX86_BUILTIN_PMOVZXBW256,
28731 IX86_BUILTIN_PMOVZXBD256,
28732 IX86_BUILTIN_PMOVZXBQ256,
28733 IX86_BUILTIN_PMOVZXWD256,
28734 IX86_BUILTIN_PMOVZXWQ256,
28735 IX86_BUILTIN_PMOVZXDQ256,
28736 IX86_BUILTIN_PMULDQ256,
28737 IX86_BUILTIN_PMULHRSW256,
28738 IX86_BUILTIN_PMULHUW256,
28739 IX86_BUILTIN_PMULHW256,
28740 IX86_BUILTIN_PMULLW256,
28741 IX86_BUILTIN_PMULLD256,
28742 IX86_BUILTIN_PMULUDQ256,
28743 IX86_BUILTIN_POR256,
28744 IX86_BUILTIN_PSADBW256,
28745 IX86_BUILTIN_PSHUFB256,
28746 IX86_BUILTIN_PSHUFD256,
28747 IX86_BUILTIN_PSHUFHW256,
28748 IX86_BUILTIN_PSHUFLW256,
28749 IX86_BUILTIN_PSIGNB256,
28750 IX86_BUILTIN_PSIGNW256,
28751 IX86_BUILTIN_PSIGND256,
28752 IX86_BUILTIN_PSLLDQI256,
28753 IX86_BUILTIN_PSLLWI256,
28754 IX86_BUILTIN_PSLLW256,
28755 IX86_BUILTIN_PSLLDI256,
28756 IX86_BUILTIN_PSLLD256,
28757 IX86_BUILTIN_PSLLQI256,
28758 IX86_BUILTIN_PSLLQ256,
28759 IX86_BUILTIN_PSRAWI256,
28760 IX86_BUILTIN_PSRAW256,
28761 IX86_BUILTIN_PSRADI256,
28762 IX86_BUILTIN_PSRAD256,
28763 IX86_BUILTIN_PSRLDQI256,
28764 IX86_BUILTIN_PSRLWI256,
28765 IX86_BUILTIN_PSRLW256,
28766 IX86_BUILTIN_PSRLDI256,
28767 IX86_BUILTIN_PSRLD256,
28768 IX86_BUILTIN_PSRLQI256,
28769 IX86_BUILTIN_PSRLQ256,
28770 IX86_BUILTIN_PSUBB256,
28771 IX86_BUILTIN_PSUBW256,
28772 IX86_BUILTIN_PSUBD256,
28773 IX86_BUILTIN_PSUBQ256,
28774 IX86_BUILTIN_PSUBSB256,
28775 IX86_BUILTIN_PSUBSW256,
28776 IX86_BUILTIN_PSUBUSB256,
28777 IX86_BUILTIN_PSUBUSW256,
28778 IX86_BUILTIN_PUNPCKHBW256,
28779 IX86_BUILTIN_PUNPCKHWD256,
28780 IX86_BUILTIN_PUNPCKHDQ256,
28781 IX86_BUILTIN_PUNPCKHQDQ256,
28782 IX86_BUILTIN_PUNPCKLBW256,
28783 IX86_BUILTIN_PUNPCKLWD256,
28784 IX86_BUILTIN_PUNPCKLDQ256,
28785 IX86_BUILTIN_PUNPCKLQDQ256,
28786 IX86_BUILTIN_PXOR256,
28787 IX86_BUILTIN_MOVNTDQA256,
28788 IX86_BUILTIN_VBROADCASTSS_PS,
28789 IX86_BUILTIN_VBROADCASTSS_PS256,
28790 IX86_BUILTIN_VBROADCASTSD_PD256,
28791 IX86_BUILTIN_VBROADCASTSI256,
28792 IX86_BUILTIN_PBLENDD256,
28793 IX86_BUILTIN_PBLENDD128,
28794 IX86_BUILTIN_PBROADCASTB256,
28795 IX86_BUILTIN_PBROADCASTW256,
28796 IX86_BUILTIN_PBROADCASTD256,
28797 IX86_BUILTIN_PBROADCASTQ256,
28798 IX86_BUILTIN_PBROADCASTB128,
28799 IX86_BUILTIN_PBROADCASTW128,
28800 IX86_BUILTIN_PBROADCASTD128,
28801 IX86_BUILTIN_PBROADCASTQ128,
28802 IX86_BUILTIN_VPERMVARSI256,
28803 IX86_BUILTIN_VPERMDF256,
28804 IX86_BUILTIN_VPERMVARSF256,
28805 IX86_BUILTIN_VPERMDI256,
28806 IX86_BUILTIN_VPERMTI256,
28807 IX86_BUILTIN_VEXTRACT128I256,
28808 IX86_BUILTIN_VINSERT128I256,
28809 IX86_BUILTIN_MASKLOADD,
28810 IX86_BUILTIN_MASKLOADQ,
28811 IX86_BUILTIN_MASKLOADD256,
28812 IX86_BUILTIN_MASKLOADQ256,
28813 IX86_BUILTIN_MASKSTORED,
28814 IX86_BUILTIN_MASKSTOREQ,
28815 IX86_BUILTIN_MASKSTORED256,
28816 IX86_BUILTIN_MASKSTOREQ256,
28817 IX86_BUILTIN_PSLLVV4DI,
28818 IX86_BUILTIN_PSLLVV2DI,
28819 IX86_BUILTIN_PSLLVV8SI,
28820 IX86_BUILTIN_PSLLVV4SI,
28821 IX86_BUILTIN_PSRAVV8SI,
28822 IX86_BUILTIN_PSRAVV4SI,
28823 IX86_BUILTIN_PSRLVV4DI,
28824 IX86_BUILTIN_PSRLVV2DI,
28825 IX86_BUILTIN_PSRLVV8SI,
28826 IX86_BUILTIN_PSRLVV4SI,
28828 IX86_BUILTIN_GATHERSIV2DF,
28829 IX86_BUILTIN_GATHERSIV4DF,
28830 IX86_BUILTIN_GATHERDIV2DF,
28831 IX86_BUILTIN_GATHERDIV4DF,
28832 IX86_BUILTIN_GATHERSIV4SF,
28833 IX86_BUILTIN_GATHERSIV8SF,
28834 IX86_BUILTIN_GATHERDIV4SF,
28835 IX86_BUILTIN_GATHERDIV8SF,
28836 IX86_BUILTIN_GATHERSIV2DI,
28837 IX86_BUILTIN_GATHERSIV4DI,
28838 IX86_BUILTIN_GATHERDIV2DI,
28839 IX86_BUILTIN_GATHERDIV4DI,
28840 IX86_BUILTIN_GATHERSIV4SI,
28841 IX86_BUILTIN_GATHERSIV8SI,
28842 IX86_BUILTIN_GATHERDIV4SI,
28843 IX86_BUILTIN_GATHERDIV8SI,
28845 /* AVX512F */
28846 IX86_BUILTIN_SI512_SI256,
28847 IX86_BUILTIN_PD512_PD256,
28848 IX86_BUILTIN_PS512_PS256,
28849 IX86_BUILTIN_SI512_SI,
28850 IX86_BUILTIN_PD512_PD,
28851 IX86_BUILTIN_PS512_PS,
28852 IX86_BUILTIN_ADDPD512,
28853 IX86_BUILTIN_ADDPS512,
28854 IX86_BUILTIN_ADDSD_ROUND,
28855 IX86_BUILTIN_ADDSS_ROUND,
28856 IX86_BUILTIN_ALIGND512,
28857 IX86_BUILTIN_ALIGNQ512,
28858 IX86_BUILTIN_BLENDMD512,
28859 IX86_BUILTIN_BLENDMPD512,
28860 IX86_BUILTIN_BLENDMPS512,
28861 IX86_BUILTIN_BLENDMQ512,
28862 IX86_BUILTIN_BROADCASTF32X4_512,
28863 IX86_BUILTIN_BROADCASTF64X4_512,
28864 IX86_BUILTIN_BROADCASTI32X4_512,
28865 IX86_BUILTIN_BROADCASTI64X4_512,
28866 IX86_BUILTIN_BROADCASTSD512,
28867 IX86_BUILTIN_BROADCASTSS512,
28868 IX86_BUILTIN_CMPD512,
28869 IX86_BUILTIN_CMPPD512,
28870 IX86_BUILTIN_CMPPS512,
28871 IX86_BUILTIN_CMPQ512,
28872 IX86_BUILTIN_CMPSD_MASK,
28873 IX86_BUILTIN_CMPSS_MASK,
28874 IX86_BUILTIN_COMIDF,
28875 IX86_BUILTIN_COMISF,
28876 IX86_BUILTIN_COMPRESSPD512,
28877 IX86_BUILTIN_COMPRESSPDSTORE512,
28878 IX86_BUILTIN_COMPRESSPS512,
28879 IX86_BUILTIN_COMPRESSPSSTORE512,
28880 IX86_BUILTIN_CVTDQ2PD512,
28881 IX86_BUILTIN_CVTDQ2PS512,
28882 IX86_BUILTIN_CVTPD2DQ512,
28883 IX86_BUILTIN_CVTPD2PS512,
28884 IX86_BUILTIN_CVTPD2UDQ512,
28885 IX86_BUILTIN_CVTPH2PS512,
28886 IX86_BUILTIN_CVTPS2DQ512,
28887 IX86_BUILTIN_CVTPS2PD512,
28888 IX86_BUILTIN_CVTPS2PH512,
28889 IX86_BUILTIN_CVTPS2UDQ512,
28890 IX86_BUILTIN_CVTSD2SS_ROUND,
28891 IX86_BUILTIN_CVTSI2SD64,
28892 IX86_BUILTIN_CVTSI2SS32,
28893 IX86_BUILTIN_CVTSI2SS64,
28894 IX86_BUILTIN_CVTSS2SD_ROUND,
28895 IX86_BUILTIN_CVTTPD2DQ512,
28896 IX86_BUILTIN_CVTTPD2UDQ512,
28897 IX86_BUILTIN_CVTTPS2DQ512,
28898 IX86_BUILTIN_CVTTPS2UDQ512,
28899 IX86_BUILTIN_CVTUDQ2PD512,
28900 IX86_BUILTIN_CVTUDQ2PS512,
28901 IX86_BUILTIN_CVTUSI2SD32,
28902 IX86_BUILTIN_CVTUSI2SD64,
28903 IX86_BUILTIN_CVTUSI2SS32,
28904 IX86_BUILTIN_CVTUSI2SS64,
28905 IX86_BUILTIN_DIVPD512,
28906 IX86_BUILTIN_DIVPS512,
28907 IX86_BUILTIN_DIVSD_ROUND,
28908 IX86_BUILTIN_DIVSS_ROUND,
28909 IX86_BUILTIN_EXPANDPD512,
28910 IX86_BUILTIN_EXPANDPD512Z,
28911 IX86_BUILTIN_EXPANDPDLOAD512,
28912 IX86_BUILTIN_EXPANDPDLOAD512Z,
28913 IX86_BUILTIN_EXPANDPS512,
28914 IX86_BUILTIN_EXPANDPS512Z,
28915 IX86_BUILTIN_EXPANDPSLOAD512,
28916 IX86_BUILTIN_EXPANDPSLOAD512Z,
28917 IX86_BUILTIN_EXTRACTF32X4,
28918 IX86_BUILTIN_EXTRACTF64X4,
28919 IX86_BUILTIN_EXTRACTI32X4,
28920 IX86_BUILTIN_EXTRACTI64X4,
28921 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28922 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28923 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28924 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28925 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28926 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28927 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28928 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28929 IX86_BUILTIN_GETEXPPD512,
28930 IX86_BUILTIN_GETEXPPS512,
28931 IX86_BUILTIN_GETEXPSD128,
28932 IX86_BUILTIN_GETEXPSS128,
28933 IX86_BUILTIN_GETMANTPD512,
28934 IX86_BUILTIN_GETMANTPS512,
28935 IX86_BUILTIN_GETMANTSD128,
28936 IX86_BUILTIN_GETMANTSS128,
28937 IX86_BUILTIN_INSERTF32X4,
28938 IX86_BUILTIN_INSERTF64X4,
28939 IX86_BUILTIN_INSERTI32X4,
28940 IX86_BUILTIN_INSERTI64X4,
28941 IX86_BUILTIN_LOADAPD512,
28942 IX86_BUILTIN_LOADAPS512,
28943 IX86_BUILTIN_LOADDQUDI512,
28944 IX86_BUILTIN_LOADDQUSI512,
28945 IX86_BUILTIN_LOADUPD512,
28946 IX86_BUILTIN_LOADUPS512,
28947 IX86_BUILTIN_MAXPD512,
28948 IX86_BUILTIN_MAXPS512,
28949 IX86_BUILTIN_MAXSD_ROUND,
28950 IX86_BUILTIN_MAXSS_ROUND,
28951 IX86_BUILTIN_MINPD512,
28952 IX86_BUILTIN_MINPS512,
28953 IX86_BUILTIN_MINSD_ROUND,
28954 IX86_BUILTIN_MINSS_ROUND,
28955 IX86_BUILTIN_MOVAPD512,
28956 IX86_BUILTIN_MOVAPS512,
28957 IX86_BUILTIN_MOVDDUP512,
28958 IX86_BUILTIN_MOVDQA32LOAD512,
28959 IX86_BUILTIN_MOVDQA32STORE512,
28960 IX86_BUILTIN_MOVDQA32_512,
28961 IX86_BUILTIN_MOVDQA64LOAD512,
28962 IX86_BUILTIN_MOVDQA64STORE512,
28963 IX86_BUILTIN_MOVDQA64_512,
28964 IX86_BUILTIN_MOVNTDQ512,
28965 IX86_BUILTIN_MOVNTDQA512,
28966 IX86_BUILTIN_MOVNTPD512,
28967 IX86_BUILTIN_MOVNTPS512,
28968 IX86_BUILTIN_MOVSHDUP512,
28969 IX86_BUILTIN_MOVSLDUP512,
28970 IX86_BUILTIN_MULPD512,
28971 IX86_BUILTIN_MULPS512,
28972 IX86_BUILTIN_MULSD_ROUND,
28973 IX86_BUILTIN_MULSS_ROUND,
28974 IX86_BUILTIN_PABSD512,
28975 IX86_BUILTIN_PABSQ512,
28976 IX86_BUILTIN_PADDD512,
28977 IX86_BUILTIN_PADDQ512,
28978 IX86_BUILTIN_PANDD512,
28979 IX86_BUILTIN_PANDND512,
28980 IX86_BUILTIN_PANDNQ512,
28981 IX86_BUILTIN_PANDQ512,
28982 IX86_BUILTIN_PBROADCASTD512,
28983 IX86_BUILTIN_PBROADCASTD512_GPR,
28984 IX86_BUILTIN_PBROADCASTMB512,
28985 IX86_BUILTIN_PBROADCASTMW512,
28986 IX86_BUILTIN_PBROADCASTQ512,
28987 IX86_BUILTIN_PBROADCASTQ512_GPR,
28988 IX86_BUILTIN_PCMPEQD512_MASK,
28989 IX86_BUILTIN_PCMPEQQ512_MASK,
28990 IX86_BUILTIN_PCMPGTD512_MASK,
28991 IX86_BUILTIN_PCMPGTQ512_MASK,
28992 IX86_BUILTIN_PCOMPRESSD512,
28993 IX86_BUILTIN_PCOMPRESSDSTORE512,
28994 IX86_BUILTIN_PCOMPRESSQ512,
28995 IX86_BUILTIN_PCOMPRESSQSTORE512,
28996 IX86_BUILTIN_PEXPANDD512,
28997 IX86_BUILTIN_PEXPANDD512Z,
28998 IX86_BUILTIN_PEXPANDDLOAD512,
28999 IX86_BUILTIN_PEXPANDDLOAD512Z,
29000 IX86_BUILTIN_PEXPANDQ512,
29001 IX86_BUILTIN_PEXPANDQ512Z,
29002 IX86_BUILTIN_PEXPANDQLOAD512,
29003 IX86_BUILTIN_PEXPANDQLOAD512Z,
29004 IX86_BUILTIN_PMAXSD512,
29005 IX86_BUILTIN_PMAXSQ512,
29006 IX86_BUILTIN_PMAXUD512,
29007 IX86_BUILTIN_PMAXUQ512,
29008 IX86_BUILTIN_PMINSD512,
29009 IX86_BUILTIN_PMINSQ512,
29010 IX86_BUILTIN_PMINUD512,
29011 IX86_BUILTIN_PMINUQ512,
29012 IX86_BUILTIN_PMOVDB512,
29013 IX86_BUILTIN_PMOVDB512_MEM,
29014 IX86_BUILTIN_PMOVDW512,
29015 IX86_BUILTIN_PMOVDW512_MEM,
29016 IX86_BUILTIN_PMOVQB512,
29017 IX86_BUILTIN_PMOVQB512_MEM,
29018 IX86_BUILTIN_PMOVQD512,
29019 IX86_BUILTIN_PMOVQD512_MEM,
29020 IX86_BUILTIN_PMOVQW512,
29021 IX86_BUILTIN_PMOVQW512_MEM,
29022 IX86_BUILTIN_PMOVSDB512,
29023 IX86_BUILTIN_PMOVSDB512_MEM,
29024 IX86_BUILTIN_PMOVSDW512,
29025 IX86_BUILTIN_PMOVSDW512_MEM,
29026 IX86_BUILTIN_PMOVSQB512,
29027 IX86_BUILTIN_PMOVSQB512_MEM,
29028 IX86_BUILTIN_PMOVSQD512,
29029 IX86_BUILTIN_PMOVSQD512_MEM,
29030 IX86_BUILTIN_PMOVSQW512,
29031 IX86_BUILTIN_PMOVSQW512_MEM,
29032 IX86_BUILTIN_PMOVSXBD512,
29033 IX86_BUILTIN_PMOVSXBQ512,
29034 IX86_BUILTIN_PMOVSXDQ512,
29035 IX86_BUILTIN_PMOVSXWD512,
29036 IX86_BUILTIN_PMOVSXWQ512,
29037 IX86_BUILTIN_PMOVUSDB512,
29038 IX86_BUILTIN_PMOVUSDB512_MEM,
29039 IX86_BUILTIN_PMOVUSDW512,
29040 IX86_BUILTIN_PMOVUSDW512_MEM,
29041 IX86_BUILTIN_PMOVUSQB512,
29042 IX86_BUILTIN_PMOVUSQB512_MEM,
29043 IX86_BUILTIN_PMOVUSQD512,
29044 IX86_BUILTIN_PMOVUSQD512_MEM,
29045 IX86_BUILTIN_PMOVUSQW512,
29046 IX86_BUILTIN_PMOVUSQW512_MEM,
29047 IX86_BUILTIN_PMOVZXBD512,
29048 IX86_BUILTIN_PMOVZXBQ512,
29049 IX86_BUILTIN_PMOVZXDQ512,
29050 IX86_BUILTIN_PMOVZXWD512,
29051 IX86_BUILTIN_PMOVZXWQ512,
29052 IX86_BUILTIN_PMULDQ512,
29053 IX86_BUILTIN_PMULLD512,
29054 IX86_BUILTIN_PMULUDQ512,
29055 IX86_BUILTIN_PORD512,
29056 IX86_BUILTIN_PORQ512,
29057 IX86_BUILTIN_PROLD512,
29058 IX86_BUILTIN_PROLQ512,
29059 IX86_BUILTIN_PROLVD512,
29060 IX86_BUILTIN_PROLVQ512,
29061 IX86_BUILTIN_PRORD512,
29062 IX86_BUILTIN_PRORQ512,
29063 IX86_BUILTIN_PRORVD512,
29064 IX86_BUILTIN_PRORVQ512,
29065 IX86_BUILTIN_PSHUFD512,
29066 IX86_BUILTIN_PSLLD512,
29067 IX86_BUILTIN_PSLLDI512,
29068 IX86_BUILTIN_PSLLQ512,
29069 IX86_BUILTIN_PSLLQI512,
29070 IX86_BUILTIN_PSLLVV16SI,
29071 IX86_BUILTIN_PSLLVV8DI,
29072 IX86_BUILTIN_PSRAD512,
29073 IX86_BUILTIN_PSRADI512,
29074 IX86_BUILTIN_PSRAQ512,
29075 IX86_BUILTIN_PSRAQI512,
29076 IX86_BUILTIN_PSRAVV16SI,
29077 IX86_BUILTIN_PSRAVV8DI,
29078 IX86_BUILTIN_PSRLD512,
29079 IX86_BUILTIN_PSRLDI512,
29080 IX86_BUILTIN_PSRLQ512,
29081 IX86_BUILTIN_PSRLQI512,
29082 IX86_BUILTIN_PSRLVV16SI,
29083 IX86_BUILTIN_PSRLVV8DI,
29084 IX86_BUILTIN_PSUBD512,
29085 IX86_BUILTIN_PSUBQ512,
29086 IX86_BUILTIN_PTESTMD512,
29087 IX86_BUILTIN_PTESTMQ512,
29088 IX86_BUILTIN_PTESTNMD512,
29089 IX86_BUILTIN_PTESTNMQ512,
29090 IX86_BUILTIN_PUNPCKHDQ512,
29091 IX86_BUILTIN_PUNPCKHQDQ512,
29092 IX86_BUILTIN_PUNPCKLDQ512,
29093 IX86_BUILTIN_PUNPCKLQDQ512,
29094 IX86_BUILTIN_PXORD512,
29095 IX86_BUILTIN_PXORQ512,
29096 IX86_BUILTIN_RCP14PD512,
29097 IX86_BUILTIN_RCP14PS512,
29098 IX86_BUILTIN_RCP14SD,
29099 IX86_BUILTIN_RCP14SS,
29100 IX86_BUILTIN_RNDSCALEPD,
29101 IX86_BUILTIN_RNDSCALEPS,
29102 IX86_BUILTIN_RNDSCALESD,
29103 IX86_BUILTIN_RNDSCALESS,
29104 IX86_BUILTIN_RSQRT14PD512,
29105 IX86_BUILTIN_RSQRT14PS512,
29106 IX86_BUILTIN_RSQRT14SD,
29107 IX86_BUILTIN_RSQRT14SS,
29108 IX86_BUILTIN_SCALEFPD512,
29109 IX86_BUILTIN_SCALEFPS512,
29110 IX86_BUILTIN_SCALEFSD,
29111 IX86_BUILTIN_SCALEFSS,
29112 IX86_BUILTIN_SHUFPD512,
29113 IX86_BUILTIN_SHUFPS512,
29114 IX86_BUILTIN_SHUF_F32x4,
29115 IX86_BUILTIN_SHUF_F64x2,
29116 IX86_BUILTIN_SHUF_I32x4,
29117 IX86_BUILTIN_SHUF_I64x2,
29118 IX86_BUILTIN_SQRTPD512,
29119 IX86_BUILTIN_SQRTPD512_MASK,
29120 IX86_BUILTIN_SQRTPS512_MASK,
29121 IX86_BUILTIN_SQRTPS_NR512,
29122 IX86_BUILTIN_SQRTSD_ROUND,
29123 IX86_BUILTIN_SQRTSS_ROUND,
29124 IX86_BUILTIN_STOREAPD512,
29125 IX86_BUILTIN_STOREAPS512,
29126 IX86_BUILTIN_STOREDQUDI512,
29127 IX86_BUILTIN_STOREDQUSI512,
29128 IX86_BUILTIN_STOREUPD512,
29129 IX86_BUILTIN_STOREUPS512,
29130 IX86_BUILTIN_SUBPD512,
29131 IX86_BUILTIN_SUBPS512,
29132 IX86_BUILTIN_SUBSD_ROUND,
29133 IX86_BUILTIN_SUBSS_ROUND,
29134 IX86_BUILTIN_UCMPD512,
29135 IX86_BUILTIN_UCMPQ512,
29136 IX86_BUILTIN_UNPCKHPD512,
29137 IX86_BUILTIN_UNPCKHPS512,
29138 IX86_BUILTIN_UNPCKLPD512,
29139 IX86_BUILTIN_UNPCKLPS512,
29140 IX86_BUILTIN_VCVTSD2SI32,
29141 IX86_BUILTIN_VCVTSD2SI64,
29142 IX86_BUILTIN_VCVTSD2USI32,
29143 IX86_BUILTIN_VCVTSD2USI64,
29144 IX86_BUILTIN_VCVTSS2SI32,
29145 IX86_BUILTIN_VCVTSS2SI64,
29146 IX86_BUILTIN_VCVTSS2USI32,
29147 IX86_BUILTIN_VCVTSS2USI64,
29148 IX86_BUILTIN_VCVTTSD2SI32,
29149 IX86_BUILTIN_VCVTTSD2SI64,
29150 IX86_BUILTIN_VCVTTSD2USI32,
29151 IX86_BUILTIN_VCVTTSD2USI64,
29152 IX86_BUILTIN_VCVTTSS2SI32,
29153 IX86_BUILTIN_VCVTTSS2SI64,
29154 IX86_BUILTIN_VCVTTSS2USI32,
29155 IX86_BUILTIN_VCVTTSS2USI64,
29156 IX86_BUILTIN_VFMADDPD512_MASK,
29157 IX86_BUILTIN_VFMADDPD512_MASK3,
29158 IX86_BUILTIN_VFMADDPD512_MASKZ,
29159 IX86_BUILTIN_VFMADDPS512_MASK,
29160 IX86_BUILTIN_VFMADDPS512_MASK3,
29161 IX86_BUILTIN_VFMADDPS512_MASKZ,
29162 IX86_BUILTIN_VFMADDSD3_ROUND,
29163 IX86_BUILTIN_VFMADDSS3_ROUND,
29164 IX86_BUILTIN_VFMADDSUBPD512_MASK,
29165 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
29166 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
29167 IX86_BUILTIN_VFMADDSUBPS512_MASK,
29168 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
29169 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
29170 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
29171 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
29172 IX86_BUILTIN_VFMSUBPD512_MASK3,
29173 IX86_BUILTIN_VFMSUBPS512_MASK3,
29174 IX86_BUILTIN_VFMSUBSD3_MASK3,
29175 IX86_BUILTIN_VFMSUBSS3_MASK3,
29176 IX86_BUILTIN_VFNMADDPD512_MASK,
29177 IX86_BUILTIN_VFNMADDPS512_MASK,
29178 IX86_BUILTIN_VFNMSUBPD512_MASK,
29179 IX86_BUILTIN_VFNMSUBPD512_MASK3,
29180 IX86_BUILTIN_VFNMSUBPS512_MASK,
29181 IX86_BUILTIN_VFNMSUBPS512_MASK3,
29182 IX86_BUILTIN_VPCLZCNTD512,
29183 IX86_BUILTIN_VPCLZCNTQ512,
29184 IX86_BUILTIN_VPCONFLICTD512,
29185 IX86_BUILTIN_VPCONFLICTQ512,
29186 IX86_BUILTIN_VPERMDF512,
29187 IX86_BUILTIN_VPERMDI512,
29188 IX86_BUILTIN_VPERMI2VARD512,
29189 IX86_BUILTIN_VPERMI2VARPD512,
29190 IX86_BUILTIN_VPERMI2VARPS512,
29191 IX86_BUILTIN_VPERMI2VARQ512,
29192 IX86_BUILTIN_VPERMILPD512,
29193 IX86_BUILTIN_VPERMILPS512,
29194 IX86_BUILTIN_VPERMILVARPD512,
29195 IX86_BUILTIN_VPERMILVARPS512,
29196 IX86_BUILTIN_VPERMT2VARD512,
29197 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
29198 IX86_BUILTIN_VPERMT2VARPD512,
29199 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
29200 IX86_BUILTIN_VPERMT2VARPS512,
29201 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
29202 IX86_BUILTIN_VPERMT2VARQ512,
29203 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
29204 IX86_BUILTIN_VPERMVARDF512,
29205 IX86_BUILTIN_VPERMVARDI512,
29206 IX86_BUILTIN_VPERMVARSF512,
29207 IX86_BUILTIN_VPERMVARSI512,
29208 IX86_BUILTIN_VTERNLOGD512_MASK,
29209 IX86_BUILTIN_VTERNLOGD512_MASKZ,
29210 IX86_BUILTIN_VTERNLOGQ512_MASK,
29211 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
29213 /* Mask arithmetic operations */
29214 IX86_BUILTIN_KAND16,
29215 IX86_BUILTIN_KANDN16,
29216 IX86_BUILTIN_KNOT16,
29217 IX86_BUILTIN_KOR16,
29218 IX86_BUILTIN_KORTESTC16,
29219 IX86_BUILTIN_KORTESTZ16,
29220 IX86_BUILTIN_KUNPCKBW,
29221 IX86_BUILTIN_KXNOR16,
29222 IX86_BUILTIN_KXOR16,
29223 IX86_BUILTIN_KMOV16,
29225 /* AVX512VL. */
29226 IX86_BUILTIN_PMOVUSQD256_MEM,
29227 IX86_BUILTIN_PMOVUSQD128_MEM,
29228 IX86_BUILTIN_PMOVSQD256_MEM,
29229 IX86_BUILTIN_PMOVSQD128_MEM,
29230 IX86_BUILTIN_PMOVQD256_MEM,
29231 IX86_BUILTIN_PMOVQD128_MEM,
29232 IX86_BUILTIN_PMOVUSQW256_MEM,
29233 IX86_BUILTIN_PMOVUSQW128_MEM,
29234 IX86_BUILTIN_PMOVSQW256_MEM,
29235 IX86_BUILTIN_PMOVSQW128_MEM,
29236 IX86_BUILTIN_PMOVQW256_MEM,
29237 IX86_BUILTIN_PMOVQW128_MEM,
29238 IX86_BUILTIN_PMOVUSQB256_MEM,
29239 IX86_BUILTIN_PMOVUSQB128_MEM,
29240 IX86_BUILTIN_PMOVSQB256_MEM,
29241 IX86_BUILTIN_PMOVSQB128_MEM,
29242 IX86_BUILTIN_PMOVQB256_MEM,
29243 IX86_BUILTIN_PMOVQB128_MEM,
29244 IX86_BUILTIN_PMOVUSDW256_MEM,
29245 IX86_BUILTIN_PMOVUSDW128_MEM,
29246 IX86_BUILTIN_PMOVSDW256_MEM,
29247 IX86_BUILTIN_PMOVSDW128_MEM,
29248 IX86_BUILTIN_PMOVDW256_MEM,
29249 IX86_BUILTIN_PMOVDW128_MEM,
29250 IX86_BUILTIN_PMOVUSDB256_MEM,
29251 IX86_BUILTIN_PMOVUSDB128_MEM,
29252 IX86_BUILTIN_PMOVSDB256_MEM,
29253 IX86_BUILTIN_PMOVSDB128_MEM,
29254 IX86_BUILTIN_PMOVDB256_MEM,
29255 IX86_BUILTIN_PMOVDB128_MEM,
29256 IX86_BUILTIN_MOVDQA64LOAD256_MASK,
29257 IX86_BUILTIN_MOVDQA64LOAD128_MASK,
29258 IX86_BUILTIN_MOVDQA32LOAD256_MASK,
29259 IX86_BUILTIN_MOVDQA32LOAD128_MASK,
29260 IX86_BUILTIN_MOVDQA64STORE256_MASK,
29261 IX86_BUILTIN_MOVDQA64STORE128_MASK,
29262 IX86_BUILTIN_MOVDQA32STORE256_MASK,
29263 IX86_BUILTIN_MOVDQA32STORE128_MASK,
29264 IX86_BUILTIN_LOADAPD256_MASK,
29265 IX86_BUILTIN_LOADAPD128_MASK,
29266 IX86_BUILTIN_LOADAPS256_MASK,
29267 IX86_BUILTIN_LOADAPS128_MASK,
29268 IX86_BUILTIN_STOREAPD256_MASK,
29269 IX86_BUILTIN_STOREAPD128_MASK,
29270 IX86_BUILTIN_STOREAPS256_MASK,
29271 IX86_BUILTIN_STOREAPS128_MASK,
29272 IX86_BUILTIN_LOADUPD256_MASK,
29273 IX86_BUILTIN_LOADUPD128_MASK,
29274 IX86_BUILTIN_LOADUPS256_MASK,
29275 IX86_BUILTIN_LOADUPS128_MASK,
29276 IX86_BUILTIN_STOREUPD256_MASK,
29277 IX86_BUILTIN_STOREUPD128_MASK,
29278 IX86_BUILTIN_STOREUPS256_MASK,
29279 IX86_BUILTIN_STOREUPS128_MASK,
29280 IX86_BUILTIN_LOADDQUDI256_MASK,
29281 IX86_BUILTIN_LOADDQUDI128_MASK,
29282 IX86_BUILTIN_LOADDQUSI256_MASK,
29283 IX86_BUILTIN_LOADDQUSI128_MASK,
29284 IX86_BUILTIN_LOADDQUHI256_MASK,
29285 IX86_BUILTIN_LOADDQUHI128_MASK,
29286 IX86_BUILTIN_LOADDQUQI256_MASK,
29287 IX86_BUILTIN_LOADDQUQI128_MASK,
29288 IX86_BUILTIN_STOREDQUDI256_MASK,
29289 IX86_BUILTIN_STOREDQUDI128_MASK,
29290 IX86_BUILTIN_STOREDQUSI256_MASK,
29291 IX86_BUILTIN_STOREDQUSI128_MASK,
29292 IX86_BUILTIN_STOREDQUHI256_MASK,
29293 IX86_BUILTIN_STOREDQUHI128_MASK,
29294 IX86_BUILTIN_STOREDQUQI256_MASK,
29295 IX86_BUILTIN_STOREDQUQI128_MASK,
29296 IX86_BUILTIN_COMPRESSPDSTORE256,
29297 IX86_BUILTIN_COMPRESSPDSTORE128,
29298 IX86_BUILTIN_COMPRESSPSSTORE256,
29299 IX86_BUILTIN_COMPRESSPSSTORE128,
29300 IX86_BUILTIN_PCOMPRESSQSTORE256,
29301 IX86_BUILTIN_PCOMPRESSQSTORE128,
29302 IX86_BUILTIN_PCOMPRESSDSTORE256,
29303 IX86_BUILTIN_PCOMPRESSDSTORE128,
29304 IX86_BUILTIN_EXPANDPDLOAD256,
29305 IX86_BUILTIN_EXPANDPDLOAD128,
29306 IX86_BUILTIN_EXPANDPSLOAD256,
29307 IX86_BUILTIN_EXPANDPSLOAD128,
29308 IX86_BUILTIN_PEXPANDQLOAD256,
29309 IX86_BUILTIN_PEXPANDQLOAD128,
29310 IX86_BUILTIN_PEXPANDDLOAD256,
29311 IX86_BUILTIN_PEXPANDDLOAD128,
29312 IX86_BUILTIN_EXPANDPDLOAD256Z,
29313 IX86_BUILTIN_EXPANDPDLOAD128Z,
29314 IX86_BUILTIN_EXPANDPSLOAD256Z,
29315 IX86_BUILTIN_EXPANDPSLOAD128Z,
29316 IX86_BUILTIN_PEXPANDQLOAD256Z,
29317 IX86_BUILTIN_PEXPANDQLOAD128Z,
29318 IX86_BUILTIN_PEXPANDDLOAD256Z,
29319 IX86_BUILTIN_PEXPANDDLOAD128Z,
29320 IX86_BUILTIN_PALIGNR256_MASK,
29321 IX86_BUILTIN_PALIGNR128_MASK,
29322 IX86_BUILTIN_MOVDQA64_256_MASK,
29323 IX86_BUILTIN_MOVDQA64_128_MASK,
29324 IX86_BUILTIN_MOVDQA32_256_MASK,
29325 IX86_BUILTIN_MOVDQA32_128_MASK,
29326 IX86_BUILTIN_MOVAPD256_MASK,
29327 IX86_BUILTIN_MOVAPD128_MASK,
29328 IX86_BUILTIN_MOVAPS256_MASK,
29329 IX86_BUILTIN_MOVAPS128_MASK,
29330 IX86_BUILTIN_MOVDQUHI256_MASK,
29331 IX86_BUILTIN_MOVDQUHI128_MASK,
29332 IX86_BUILTIN_MOVDQUQI256_MASK,
29333 IX86_BUILTIN_MOVDQUQI128_MASK,
29334 IX86_BUILTIN_MINPS128_MASK,
29335 IX86_BUILTIN_MAXPS128_MASK,
29336 IX86_BUILTIN_MINPD128_MASK,
29337 IX86_BUILTIN_MAXPD128_MASK,
29338 IX86_BUILTIN_MAXPD256_MASK,
29339 IX86_BUILTIN_MAXPS256_MASK,
29340 IX86_BUILTIN_MINPD256_MASK,
29341 IX86_BUILTIN_MINPS256_MASK,
29342 IX86_BUILTIN_MULPS128_MASK,
29343 IX86_BUILTIN_DIVPS128_MASK,
29344 IX86_BUILTIN_MULPD128_MASK,
29345 IX86_BUILTIN_DIVPD128_MASK,
29346 IX86_BUILTIN_DIVPD256_MASK,
29347 IX86_BUILTIN_DIVPS256_MASK,
29348 IX86_BUILTIN_MULPD256_MASK,
29349 IX86_BUILTIN_MULPS256_MASK,
29350 IX86_BUILTIN_ADDPD128_MASK,
29351 IX86_BUILTIN_ADDPD256_MASK,
29352 IX86_BUILTIN_ADDPS128_MASK,
29353 IX86_BUILTIN_ADDPS256_MASK,
29354 IX86_BUILTIN_SUBPD128_MASK,
29355 IX86_BUILTIN_SUBPD256_MASK,
29356 IX86_BUILTIN_SUBPS128_MASK,
29357 IX86_BUILTIN_SUBPS256_MASK,
29358 IX86_BUILTIN_XORPD256_MASK,
29359 IX86_BUILTIN_XORPD128_MASK,
29360 IX86_BUILTIN_XORPS256_MASK,
29361 IX86_BUILTIN_XORPS128_MASK,
29362 IX86_BUILTIN_ORPD256_MASK,
29363 IX86_BUILTIN_ORPD128_MASK,
29364 IX86_BUILTIN_ORPS256_MASK,
29365 IX86_BUILTIN_ORPS128_MASK,
29366 IX86_BUILTIN_BROADCASTF32x2_256,
29367 IX86_BUILTIN_BROADCASTI32x2_256,
29368 IX86_BUILTIN_BROADCASTI32x2_128,
29369 IX86_BUILTIN_BROADCASTF64X2_256,
29370 IX86_BUILTIN_BROADCASTI64X2_256,
29371 IX86_BUILTIN_BROADCASTF32X4_256,
29372 IX86_BUILTIN_BROADCASTI32X4_256,
29373 IX86_BUILTIN_EXTRACTF32X4_256,
29374 IX86_BUILTIN_EXTRACTI32X4_256,
29375 IX86_BUILTIN_DBPSADBW256,
29376 IX86_BUILTIN_DBPSADBW128,
29377 IX86_BUILTIN_CVTTPD2QQ256,
29378 IX86_BUILTIN_CVTTPD2QQ128,
29379 IX86_BUILTIN_CVTTPD2UQQ256,
29380 IX86_BUILTIN_CVTTPD2UQQ128,
29381 IX86_BUILTIN_CVTPD2QQ256,
29382 IX86_BUILTIN_CVTPD2QQ128,
29383 IX86_BUILTIN_CVTPD2UQQ256,
29384 IX86_BUILTIN_CVTPD2UQQ128,
29385 IX86_BUILTIN_CVTPD2UDQ256_MASK,
29386 IX86_BUILTIN_CVTPD2UDQ128_MASK,
29387 IX86_BUILTIN_CVTTPS2QQ256,
29388 IX86_BUILTIN_CVTTPS2QQ128,
29389 IX86_BUILTIN_CVTTPS2UQQ256,
29390 IX86_BUILTIN_CVTTPS2UQQ128,
29391 IX86_BUILTIN_CVTTPS2DQ256_MASK,
29392 IX86_BUILTIN_CVTTPS2DQ128_MASK,
29393 IX86_BUILTIN_CVTTPS2UDQ256,
29394 IX86_BUILTIN_CVTTPS2UDQ128,
29395 IX86_BUILTIN_CVTTPD2DQ256_MASK,
29396 IX86_BUILTIN_CVTTPD2DQ128_MASK,
29397 IX86_BUILTIN_CVTTPD2UDQ256_MASK,
29398 IX86_BUILTIN_CVTTPD2UDQ128_MASK,
29399 IX86_BUILTIN_CVTPD2DQ256_MASK,
29400 IX86_BUILTIN_CVTPD2DQ128_MASK,
29401 IX86_BUILTIN_CVTDQ2PD256_MASK,
29402 IX86_BUILTIN_CVTDQ2PD128_MASK,
29403 IX86_BUILTIN_CVTUDQ2PD256_MASK,
29404 IX86_BUILTIN_CVTUDQ2PD128_MASK,
29405 IX86_BUILTIN_CVTDQ2PS256_MASK,
29406 IX86_BUILTIN_CVTDQ2PS128_MASK,
29407 IX86_BUILTIN_CVTUDQ2PS256_MASK,
29408 IX86_BUILTIN_CVTUDQ2PS128_MASK,
29409 IX86_BUILTIN_CVTPS2PD256_MASK,
29410 IX86_BUILTIN_CVTPS2PD128_MASK,
29411 IX86_BUILTIN_PBROADCASTB256_MASK,
29412 IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
29413 IX86_BUILTIN_PBROADCASTB128_MASK,
29414 IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
29415 IX86_BUILTIN_PBROADCASTW256_MASK,
29416 IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
29417 IX86_BUILTIN_PBROADCASTW128_MASK,
29418 IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
29419 IX86_BUILTIN_PBROADCASTD256_MASK,
29420 IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
29421 IX86_BUILTIN_PBROADCASTD128_MASK,
29422 IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
29423 IX86_BUILTIN_PBROADCASTQ256_MASK,
29424 IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
29425 IX86_BUILTIN_PBROADCASTQ128_MASK,
29426 IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
29427 IX86_BUILTIN_BROADCASTSS256,
29428 IX86_BUILTIN_BROADCASTSS128,
29429 IX86_BUILTIN_BROADCASTSD256,
29430 IX86_BUILTIN_EXTRACTF64X2_256,
29431 IX86_BUILTIN_EXTRACTI64X2_256,
29432 IX86_BUILTIN_INSERTF32X4_256,
29433 IX86_BUILTIN_INSERTI32X4_256,
29434 IX86_BUILTIN_PMOVSXBW256_MASK,
29435 IX86_BUILTIN_PMOVSXBW128_MASK,
29436 IX86_BUILTIN_PMOVSXBD256_MASK,
29437 IX86_BUILTIN_PMOVSXBD128_MASK,
29438 IX86_BUILTIN_PMOVSXBQ256_MASK,
29439 IX86_BUILTIN_PMOVSXBQ128_MASK,
29440 IX86_BUILTIN_PMOVSXWD256_MASK,
29441 IX86_BUILTIN_PMOVSXWD128_MASK,
29442 IX86_BUILTIN_PMOVSXWQ256_MASK,
29443 IX86_BUILTIN_PMOVSXWQ128_MASK,
29444 IX86_BUILTIN_PMOVSXDQ256_MASK,
29445 IX86_BUILTIN_PMOVSXDQ128_MASK,
29446 IX86_BUILTIN_PMOVZXBW256_MASK,
29447 IX86_BUILTIN_PMOVZXBW128_MASK,
29448 IX86_BUILTIN_PMOVZXBD256_MASK,
29449 IX86_BUILTIN_PMOVZXBD128_MASK,
29450 IX86_BUILTIN_PMOVZXBQ256_MASK,
29451 IX86_BUILTIN_PMOVZXBQ128_MASK,
29452 IX86_BUILTIN_PMOVZXWD256_MASK,
29453 IX86_BUILTIN_PMOVZXWD128_MASK,
29454 IX86_BUILTIN_PMOVZXWQ256_MASK,
29455 IX86_BUILTIN_PMOVZXWQ128_MASK,
29456 IX86_BUILTIN_PMOVZXDQ256_MASK,
29457 IX86_BUILTIN_PMOVZXDQ128_MASK,
29458 IX86_BUILTIN_REDUCEPD256_MASK,
29459 IX86_BUILTIN_REDUCEPD128_MASK,
29460 IX86_BUILTIN_REDUCEPS256_MASK,
29461 IX86_BUILTIN_REDUCEPS128_MASK,
29462 IX86_BUILTIN_REDUCESD_MASK,
29463 IX86_BUILTIN_REDUCESS_MASK,
29464 IX86_BUILTIN_VPERMVARHI256_MASK,
29465 IX86_BUILTIN_VPERMVARHI128_MASK,
29466 IX86_BUILTIN_VPERMT2VARHI256,
29467 IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
29468 IX86_BUILTIN_VPERMT2VARHI128,
29469 IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
29470 IX86_BUILTIN_VPERMI2VARHI256,
29471 IX86_BUILTIN_VPERMI2VARHI128,
29472 IX86_BUILTIN_RCP14PD256,
29473 IX86_BUILTIN_RCP14PD128,
29474 IX86_BUILTIN_RCP14PS256,
29475 IX86_BUILTIN_RCP14PS128,
29476 IX86_BUILTIN_RSQRT14PD256_MASK,
29477 IX86_BUILTIN_RSQRT14PD128_MASK,
29478 IX86_BUILTIN_RSQRT14PS256_MASK,
29479 IX86_BUILTIN_RSQRT14PS128_MASK,
29480 IX86_BUILTIN_SQRTPD256_MASK,
29481 IX86_BUILTIN_SQRTPD128_MASK,
29482 IX86_BUILTIN_SQRTPS256_MASK,
29483 IX86_BUILTIN_SQRTPS128_MASK,
29484 IX86_BUILTIN_PADDB128_MASK,
29485 IX86_BUILTIN_PADDW128_MASK,
29486 IX86_BUILTIN_PADDD128_MASK,
29487 IX86_BUILTIN_PADDQ128_MASK,
29488 IX86_BUILTIN_PSUBB128_MASK,
29489 IX86_BUILTIN_PSUBW128_MASK,
29490 IX86_BUILTIN_PSUBD128_MASK,
29491 IX86_BUILTIN_PSUBQ128_MASK,
29492 IX86_BUILTIN_PADDSB128_MASK,
29493 IX86_BUILTIN_PADDSW128_MASK,
29494 IX86_BUILTIN_PSUBSB128_MASK,
29495 IX86_BUILTIN_PSUBSW128_MASK,
29496 IX86_BUILTIN_PADDUSB128_MASK,
29497 IX86_BUILTIN_PADDUSW128_MASK,
29498 IX86_BUILTIN_PSUBUSB128_MASK,
29499 IX86_BUILTIN_PSUBUSW128_MASK,
29500 IX86_BUILTIN_PADDB256_MASK,
29501 IX86_BUILTIN_PADDW256_MASK,
29502 IX86_BUILTIN_PADDD256_MASK,
29503 IX86_BUILTIN_PADDQ256_MASK,
29504 IX86_BUILTIN_PADDSB256_MASK,
29505 IX86_BUILTIN_PADDSW256_MASK,
29506 IX86_BUILTIN_PADDUSB256_MASK,
29507 IX86_BUILTIN_PADDUSW256_MASK,
29508 IX86_BUILTIN_PSUBB256_MASK,
29509 IX86_BUILTIN_PSUBW256_MASK,
29510 IX86_BUILTIN_PSUBD256_MASK,
29511 IX86_BUILTIN_PSUBQ256_MASK,
29512 IX86_BUILTIN_PSUBSB256_MASK,
29513 IX86_BUILTIN_PSUBSW256_MASK,
29514 IX86_BUILTIN_PSUBUSB256_MASK,
29515 IX86_BUILTIN_PSUBUSW256_MASK,
29516 IX86_BUILTIN_SHUF_F64x2_256,
29517 IX86_BUILTIN_SHUF_I64x2_256,
29518 IX86_BUILTIN_SHUF_I32x4_256,
29519 IX86_BUILTIN_SHUF_F32x4_256,
29520 IX86_BUILTIN_PMOVWB128,
29521 IX86_BUILTIN_PMOVWB256,
29522 IX86_BUILTIN_PMOVSWB128,
29523 IX86_BUILTIN_PMOVSWB256,
29524 IX86_BUILTIN_PMOVUSWB128,
29525 IX86_BUILTIN_PMOVUSWB256,
29526 IX86_BUILTIN_PMOVDB128,
29527 IX86_BUILTIN_PMOVDB256,
29528 IX86_BUILTIN_PMOVSDB128,
29529 IX86_BUILTIN_PMOVSDB256,
29530 IX86_BUILTIN_PMOVUSDB128,
29531 IX86_BUILTIN_PMOVUSDB256,
29532 IX86_BUILTIN_PMOVDW128,
29533 IX86_BUILTIN_PMOVDW256,
29534 IX86_BUILTIN_PMOVSDW128,
29535 IX86_BUILTIN_PMOVSDW256,
29536 IX86_BUILTIN_PMOVUSDW128,
29537 IX86_BUILTIN_PMOVUSDW256,
29538 IX86_BUILTIN_PMOVQB128,
29539 IX86_BUILTIN_PMOVQB256,
29540 IX86_BUILTIN_PMOVSQB128,
29541 IX86_BUILTIN_PMOVSQB256,
29542 IX86_BUILTIN_PMOVUSQB128,
29543 IX86_BUILTIN_PMOVUSQB256,
29544 IX86_BUILTIN_PMOVQW128,
29545 IX86_BUILTIN_PMOVQW256,
29546 IX86_BUILTIN_PMOVSQW128,
29547 IX86_BUILTIN_PMOVSQW256,
29548 IX86_BUILTIN_PMOVUSQW128,
29549 IX86_BUILTIN_PMOVUSQW256,
29550 IX86_BUILTIN_PMOVQD128,
29551 IX86_BUILTIN_PMOVQD256,
29552 IX86_BUILTIN_PMOVSQD128,
29553 IX86_BUILTIN_PMOVSQD256,
29554 IX86_BUILTIN_PMOVUSQD128,
29555 IX86_BUILTIN_PMOVUSQD256,
29556 IX86_BUILTIN_RANGEPD256,
29557 IX86_BUILTIN_RANGEPD128,
29558 IX86_BUILTIN_RANGEPS256,
29559 IX86_BUILTIN_RANGEPS128,
29560 IX86_BUILTIN_GETEXPPS256,
29561 IX86_BUILTIN_GETEXPPD256,
29562 IX86_BUILTIN_GETEXPPS128,
29563 IX86_BUILTIN_GETEXPPD128,
29564 IX86_BUILTIN_FIXUPIMMPD256_MASK,
29565 IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
29566 IX86_BUILTIN_FIXUPIMMPS256_MASK,
29567 IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
29568 IX86_BUILTIN_FIXUPIMMPD128_MASK,
29569 IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
29570 IX86_BUILTIN_FIXUPIMMPS128_MASK,
29571 IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
29572 IX86_BUILTIN_PABSQ256,
29573 IX86_BUILTIN_PABSQ128,
29574 IX86_BUILTIN_PABSD256_MASK,
29575 IX86_BUILTIN_PABSD128_MASK,
29576 IX86_BUILTIN_PMULHRSW256_MASK,
29577 IX86_BUILTIN_PMULHRSW128_MASK,
29578 IX86_BUILTIN_PMULHUW128_MASK,
29579 IX86_BUILTIN_PMULHUW256_MASK,
29580 IX86_BUILTIN_PMULHW256_MASK,
29581 IX86_BUILTIN_PMULHW128_MASK,
29582 IX86_BUILTIN_PMULLW256_MASK,
29583 IX86_BUILTIN_PMULLW128_MASK,
29584 IX86_BUILTIN_PMULLQ256,
29585 IX86_BUILTIN_PMULLQ128,
29586 IX86_BUILTIN_ANDPD256_MASK,
29587 IX86_BUILTIN_ANDPD128_MASK,
29588 IX86_BUILTIN_ANDPS256_MASK,
29589 IX86_BUILTIN_ANDPS128_MASK,
29590 IX86_BUILTIN_ANDNPD256_MASK,
29591 IX86_BUILTIN_ANDNPD128_MASK,
29592 IX86_BUILTIN_ANDNPS256_MASK,
29593 IX86_BUILTIN_ANDNPS128_MASK,
29594 IX86_BUILTIN_PSLLWI128_MASK,
29595 IX86_BUILTIN_PSLLDI128_MASK,
29596 IX86_BUILTIN_PSLLQI128_MASK,
29597 IX86_BUILTIN_PSLLW128_MASK,
29598 IX86_BUILTIN_PSLLD128_MASK,
29599 IX86_BUILTIN_PSLLQ128_MASK,
29600 IX86_BUILTIN_PSLLWI256_MASK ,
29601 IX86_BUILTIN_PSLLW256_MASK,
29602 IX86_BUILTIN_PSLLDI256_MASK,
29603 IX86_BUILTIN_PSLLD256_MASK,
29604 IX86_BUILTIN_PSLLQI256_MASK,
29605 IX86_BUILTIN_PSLLQ256_MASK,
29606 IX86_BUILTIN_PSRADI128_MASK,
29607 IX86_BUILTIN_PSRAD128_MASK,
29608 IX86_BUILTIN_PSRADI256_MASK,
29609 IX86_BUILTIN_PSRAD256_MASK,
29610 IX86_BUILTIN_PSRAQI128_MASK,
29611 IX86_BUILTIN_PSRAQ128_MASK,
29612 IX86_BUILTIN_PSRAQI256_MASK,
29613 IX86_BUILTIN_PSRAQ256_MASK,
29614 IX86_BUILTIN_PANDD256,
29615 IX86_BUILTIN_PANDD128,
29616 IX86_BUILTIN_PSRLDI128_MASK,
29617 IX86_BUILTIN_PSRLD128_MASK,
29618 IX86_BUILTIN_PSRLDI256_MASK,
29619 IX86_BUILTIN_PSRLD256_MASK,
29620 IX86_BUILTIN_PSRLQI128_MASK,
29621 IX86_BUILTIN_PSRLQ128_MASK,
29622 IX86_BUILTIN_PSRLQI256_MASK,
29623 IX86_BUILTIN_PSRLQ256_MASK,
29624 IX86_BUILTIN_PANDQ256,
29625 IX86_BUILTIN_PANDQ128,
29626 IX86_BUILTIN_PANDND256,
29627 IX86_BUILTIN_PANDND128,
29628 IX86_BUILTIN_PANDNQ256,
29629 IX86_BUILTIN_PANDNQ128,
29630 IX86_BUILTIN_PORD256,
29631 IX86_BUILTIN_PORD128,
29632 IX86_BUILTIN_PORQ256,
29633 IX86_BUILTIN_PORQ128,
29634 IX86_BUILTIN_PXORD256,
29635 IX86_BUILTIN_PXORD128,
29636 IX86_BUILTIN_PXORQ256,
29637 IX86_BUILTIN_PXORQ128,
29638 IX86_BUILTIN_PACKSSWB256_MASK,
29639 IX86_BUILTIN_PACKSSWB128_MASK,
29640 IX86_BUILTIN_PACKUSWB256_MASK,
29641 IX86_BUILTIN_PACKUSWB128_MASK,
29642 IX86_BUILTIN_RNDSCALEPS256,
29643 IX86_BUILTIN_RNDSCALEPD256,
29644 IX86_BUILTIN_RNDSCALEPS128,
29645 IX86_BUILTIN_RNDSCALEPD128,
29646 IX86_BUILTIN_VTERNLOGQ256_MASK,
29647 IX86_BUILTIN_VTERNLOGQ256_MASKZ,
29648 IX86_BUILTIN_VTERNLOGD256_MASK,
29649 IX86_BUILTIN_VTERNLOGD256_MASKZ,
29650 IX86_BUILTIN_VTERNLOGQ128_MASK,
29651 IX86_BUILTIN_VTERNLOGQ128_MASKZ,
29652 IX86_BUILTIN_VTERNLOGD128_MASK,
29653 IX86_BUILTIN_VTERNLOGD128_MASKZ,
29654 IX86_BUILTIN_SCALEFPD256,
29655 IX86_BUILTIN_SCALEFPS256,
29656 IX86_BUILTIN_SCALEFPD128,
29657 IX86_BUILTIN_SCALEFPS128,
29658 IX86_BUILTIN_VFMADDPD256_MASK,
29659 IX86_BUILTIN_VFMADDPD256_MASK3,
29660 IX86_BUILTIN_VFMADDPD256_MASKZ,
29661 IX86_BUILTIN_VFMADDPD128_MASK,
29662 IX86_BUILTIN_VFMADDPD128_MASK3,
29663 IX86_BUILTIN_VFMADDPD128_MASKZ,
29664 IX86_BUILTIN_VFMADDPS256_MASK,
29665 IX86_BUILTIN_VFMADDPS256_MASK3,
29666 IX86_BUILTIN_VFMADDPS256_MASKZ,
29667 IX86_BUILTIN_VFMADDPS128_MASK,
29668 IX86_BUILTIN_VFMADDPS128_MASK3,
29669 IX86_BUILTIN_VFMADDPS128_MASKZ,
29670 IX86_BUILTIN_VFMSUBPD256_MASK3,
29671 IX86_BUILTIN_VFMSUBPD128_MASK3,
29672 IX86_BUILTIN_VFMSUBPS256_MASK3,
29673 IX86_BUILTIN_VFMSUBPS128_MASK3,
29674 IX86_BUILTIN_VFNMADDPD256_MASK,
29675 IX86_BUILTIN_VFNMADDPD128_MASK,
29676 IX86_BUILTIN_VFNMADDPS256_MASK,
29677 IX86_BUILTIN_VFNMADDPS128_MASK,
29678 IX86_BUILTIN_VFNMSUBPD256_MASK,
29679 IX86_BUILTIN_VFNMSUBPD256_MASK3,
29680 IX86_BUILTIN_VFNMSUBPD128_MASK,
29681 IX86_BUILTIN_VFNMSUBPD128_MASK3,
29682 IX86_BUILTIN_VFNMSUBPS256_MASK,
29683 IX86_BUILTIN_VFNMSUBPS256_MASK3,
29684 IX86_BUILTIN_VFNMSUBPS128_MASK,
29685 IX86_BUILTIN_VFNMSUBPS128_MASK3,
29686 IX86_BUILTIN_VFMADDSUBPD256_MASK,
29687 IX86_BUILTIN_VFMADDSUBPD256_MASK3,
29688 IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
29689 IX86_BUILTIN_VFMADDSUBPD128_MASK,
29690 IX86_BUILTIN_VFMADDSUBPD128_MASK3,
29691 IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
29692 IX86_BUILTIN_VFMADDSUBPS256_MASK,
29693 IX86_BUILTIN_VFMADDSUBPS256_MASK3,
29694 IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
29695 IX86_BUILTIN_VFMADDSUBPS128_MASK,
29696 IX86_BUILTIN_VFMADDSUBPS128_MASK3,
29697 IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
29698 IX86_BUILTIN_VFMSUBADDPD256_MASK3,
29699 IX86_BUILTIN_VFMSUBADDPD128_MASK3,
29700 IX86_BUILTIN_VFMSUBADDPS256_MASK3,
29701 IX86_BUILTIN_VFMSUBADDPS128_MASK3,
29702 IX86_BUILTIN_INSERTF64X2_256,
29703 IX86_BUILTIN_INSERTI64X2_256,
29704 IX86_BUILTIN_PSRAVV16HI,
29705 IX86_BUILTIN_PSRAVV8HI,
29706 IX86_BUILTIN_PMADDUBSW256_MASK,
29707 IX86_BUILTIN_PMADDUBSW128_MASK,
29708 IX86_BUILTIN_PMADDWD256_MASK,
29709 IX86_BUILTIN_PMADDWD128_MASK,
29710 IX86_BUILTIN_PSRLVV16HI,
29711 IX86_BUILTIN_PSRLVV8HI,
29712 IX86_BUILTIN_CVTPS2DQ256_MASK,
29713 IX86_BUILTIN_CVTPS2DQ128_MASK,
29714 IX86_BUILTIN_CVTPS2UDQ256,
29715 IX86_BUILTIN_CVTPS2UDQ128,
29716 IX86_BUILTIN_CVTPS2QQ256,
29717 IX86_BUILTIN_CVTPS2QQ128,
29718 IX86_BUILTIN_CVTPS2UQQ256,
29719 IX86_BUILTIN_CVTPS2UQQ128,
29720 IX86_BUILTIN_GETMANTPS256,
29721 IX86_BUILTIN_GETMANTPS128,
29722 IX86_BUILTIN_GETMANTPD256,
29723 IX86_BUILTIN_GETMANTPD128,
29724 IX86_BUILTIN_MOVDDUP256_MASK,
29725 IX86_BUILTIN_MOVDDUP128_MASK,
29726 IX86_BUILTIN_MOVSHDUP256_MASK,
29727 IX86_BUILTIN_MOVSHDUP128_MASK,
29728 IX86_BUILTIN_MOVSLDUP256_MASK,
29729 IX86_BUILTIN_MOVSLDUP128_MASK,
29730 IX86_BUILTIN_CVTQQ2PS256,
29731 IX86_BUILTIN_CVTQQ2PS128,
29732 IX86_BUILTIN_CVTUQQ2PS256,
29733 IX86_BUILTIN_CVTUQQ2PS128,
29734 IX86_BUILTIN_CVTQQ2PD256,
29735 IX86_BUILTIN_CVTQQ2PD128,
29736 IX86_BUILTIN_CVTUQQ2PD256,
29737 IX86_BUILTIN_CVTUQQ2PD128,
29738 IX86_BUILTIN_VPERMT2VARQ256,
29739 IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
29740 IX86_BUILTIN_VPERMT2VARD256,
29741 IX86_BUILTIN_VPERMT2VARD256_MASKZ,
29742 IX86_BUILTIN_VPERMI2VARQ256,
29743 IX86_BUILTIN_VPERMI2VARD256,
29744 IX86_BUILTIN_VPERMT2VARPD256,
29745 IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
29746 IX86_BUILTIN_VPERMT2VARPS256,
29747 IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
29748 IX86_BUILTIN_VPERMI2VARPD256,
29749 IX86_BUILTIN_VPERMI2VARPS256,
29750 IX86_BUILTIN_VPERMT2VARQ128,
29751 IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
29752 IX86_BUILTIN_VPERMT2VARD128,
29753 IX86_BUILTIN_VPERMT2VARD128_MASKZ,
29754 IX86_BUILTIN_VPERMI2VARQ128,
29755 IX86_BUILTIN_VPERMI2VARD128,
29756 IX86_BUILTIN_VPERMT2VARPD128,
29757 IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
29758 IX86_BUILTIN_VPERMT2VARPS128,
29759 IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
29760 IX86_BUILTIN_VPERMI2VARPD128,
29761 IX86_BUILTIN_VPERMI2VARPS128,
29762 IX86_BUILTIN_PSHUFB256_MASK,
29763 IX86_BUILTIN_PSHUFB128_MASK,
29764 IX86_BUILTIN_PSHUFHW256_MASK,
29765 IX86_BUILTIN_PSHUFHW128_MASK,
29766 IX86_BUILTIN_PSHUFLW256_MASK,
29767 IX86_BUILTIN_PSHUFLW128_MASK,
29768 IX86_BUILTIN_PSHUFD256_MASK,
29769 IX86_BUILTIN_PSHUFD128_MASK,
29770 IX86_BUILTIN_SHUFPD256_MASK,
29771 IX86_BUILTIN_SHUFPD128_MASK,
29772 IX86_BUILTIN_SHUFPS256_MASK,
29773 IX86_BUILTIN_SHUFPS128_MASK,
29774 IX86_BUILTIN_PROLVQ256,
29775 IX86_BUILTIN_PROLVQ128,
29776 IX86_BUILTIN_PROLQ256,
29777 IX86_BUILTIN_PROLQ128,
29778 IX86_BUILTIN_PRORVQ256,
29779 IX86_BUILTIN_PRORVQ128,
29780 IX86_BUILTIN_PRORQ256,
29781 IX86_BUILTIN_PRORQ128,
29782 IX86_BUILTIN_PSRAVQ128,
29783 IX86_BUILTIN_PSRAVQ256,
29784 IX86_BUILTIN_PSLLVV4DI_MASK,
29785 IX86_BUILTIN_PSLLVV2DI_MASK,
29786 IX86_BUILTIN_PSLLVV8SI_MASK,
29787 IX86_BUILTIN_PSLLVV4SI_MASK,
29788 IX86_BUILTIN_PSRAVV8SI_MASK,
29789 IX86_BUILTIN_PSRAVV4SI_MASK,
29790 IX86_BUILTIN_PSRLVV4DI_MASK,
29791 IX86_BUILTIN_PSRLVV2DI_MASK,
29792 IX86_BUILTIN_PSRLVV8SI_MASK,
29793 IX86_BUILTIN_PSRLVV4SI_MASK,
29794 IX86_BUILTIN_PSRAWI256_MASK,
29795 IX86_BUILTIN_PSRAW256_MASK,
29796 IX86_BUILTIN_PSRAWI128_MASK,
29797 IX86_BUILTIN_PSRAW128_MASK,
29798 IX86_BUILTIN_PSRLWI256_MASK,
29799 IX86_BUILTIN_PSRLW256_MASK,
29800 IX86_BUILTIN_PSRLWI128_MASK,
29801 IX86_BUILTIN_PSRLW128_MASK,
29802 IX86_BUILTIN_PRORVD256,
29803 IX86_BUILTIN_PROLVD256,
29804 IX86_BUILTIN_PRORD256,
29805 IX86_BUILTIN_PROLD256,
29806 IX86_BUILTIN_PRORVD128,
29807 IX86_BUILTIN_PROLVD128,
29808 IX86_BUILTIN_PRORD128,
29809 IX86_BUILTIN_PROLD128,
29810 IX86_BUILTIN_FPCLASSPD256,
29811 IX86_BUILTIN_FPCLASSPD128,
29812 IX86_BUILTIN_FPCLASSSD,
29813 IX86_BUILTIN_FPCLASSPS256,
29814 IX86_BUILTIN_FPCLASSPS128,
29815 IX86_BUILTIN_FPCLASSSS,
29816 IX86_BUILTIN_CVTB2MASK128,
29817 IX86_BUILTIN_CVTB2MASK256,
29818 IX86_BUILTIN_CVTW2MASK128,
29819 IX86_BUILTIN_CVTW2MASK256,
29820 IX86_BUILTIN_CVTD2MASK128,
29821 IX86_BUILTIN_CVTD2MASK256,
29822 IX86_BUILTIN_CVTQ2MASK128,
29823 IX86_BUILTIN_CVTQ2MASK256,
29824 IX86_BUILTIN_CVTMASK2B128,
29825 IX86_BUILTIN_CVTMASK2B256,
29826 IX86_BUILTIN_CVTMASK2W128,
29827 IX86_BUILTIN_CVTMASK2W256,
29828 IX86_BUILTIN_CVTMASK2D128,
29829 IX86_BUILTIN_CVTMASK2D256,
29830 IX86_BUILTIN_CVTMASK2Q128,
29831 IX86_BUILTIN_CVTMASK2Q256,
29832 IX86_BUILTIN_PCMPEQB128_MASK,
29833 IX86_BUILTIN_PCMPEQB256_MASK,
29834 IX86_BUILTIN_PCMPEQW128_MASK,
29835 IX86_BUILTIN_PCMPEQW256_MASK,
29836 IX86_BUILTIN_PCMPEQD128_MASK,
29837 IX86_BUILTIN_PCMPEQD256_MASK,
29838 IX86_BUILTIN_PCMPEQQ128_MASK,
29839 IX86_BUILTIN_PCMPEQQ256_MASK,
29840 IX86_BUILTIN_PCMPGTB128_MASK,
29841 IX86_BUILTIN_PCMPGTB256_MASK,
29842 IX86_BUILTIN_PCMPGTW128_MASK,
29843 IX86_BUILTIN_PCMPGTW256_MASK,
29844 IX86_BUILTIN_PCMPGTD128_MASK,
29845 IX86_BUILTIN_PCMPGTD256_MASK,
29846 IX86_BUILTIN_PCMPGTQ128_MASK,
29847 IX86_BUILTIN_PCMPGTQ256_MASK,
29848 IX86_BUILTIN_PTESTMB128,
29849 IX86_BUILTIN_PTESTMB256,
29850 IX86_BUILTIN_PTESTMW128,
29851 IX86_BUILTIN_PTESTMW256,
29852 IX86_BUILTIN_PTESTMD128,
29853 IX86_BUILTIN_PTESTMD256,
29854 IX86_BUILTIN_PTESTMQ128,
29855 IX86_BUILTIN_PTESTMQ256,
29856 IX86_BUILTIN_PTESTNMB128,
29857 IX86_BUILTIN_PTESTNMB256,
29858 IX86_BUILTIN_PTESTNMW128,
29859 IX86_BUILTIN_PTESTNMW256,
29860 IX86_BUILTIN_PTESTNMD128,
29861 IX86_BUILTIN_PTESTNMD256,
29862 IX86_BUILTIN_PTESTNMQ128,
29863 IX86_BUILTIN_PTESTNMQ256,
29864 IX86_BUILTIN_PBROADCASTMB128,
29865 IX86_BUILTIN_PBROADCASTMB256,
29866 IX86_BUILTIN_PBROADCASTMW128,
29867 IX86_BUILTIN_PBROADCASTMW256,
29868 IX86_BUILTIN_COMPRESSPD256,
29869 IX86_BUILTIN_COMPRESSPD128,
29870 IX86_BUILTIN_COMPRESSPS256,
29871 IX86_BUILTIN_COMPRESSPS128,
29872 IX86_BUILTIN_PCOMPRESSQ256,
29873 IX86_BUILTIN_PCOMPRESSQ128,
29874 IX86_BUILTIN_PCOMPRESSD256,
29875 IX86_BUILTIN_PCOMPRESSD128,
29876 IX86_BUILTIN_EXPANDPD256,
29877 IX86_BUILTIN_EXPANDPD128,
29878 IX86_BUILTIN_EXPANDPS256,
29879 IX86_BUILTIN_EXPANDPS128,
29880 IX86_BUILTIN_PEXPANDQ256,
29881 IX86_BUILTIN_PEXPANDQ128,
29882 IX86_BUILTIN_PEXPANDD256,
29883 IX86_BUILTIN_PEXPANDD128,
29884 IX86_BUILTIN_EXPANDPD256Z,
29885 IX86_BUILTIN_EXPANDPD128Z,
29886 IX86_BUILTIN_EXPANDPS256Z,
29887 IX86_BUILTIN_EXPANDPS128Z,
29888 IX86_BUILTIN_PEXPANDQ256Z,
29889 IX86_BUILTIN_PEXPANDQ128Z,
29890 IX86_BUILTIN_PEXPANDD256Z,
29891 IX86_BUILTIN_PEXPANDD128Z,
29892 IX86_BUILTIN_PMAXSD256_MASK,
29893 IX86_BUILTIN_PMINSD256_MASK,
29894 IX86_BUILTIN_PMAXUD256_MASK,
29895 IX86_BUILTIN_PMINUD256_MASK,
29896 IX86_BUILTIN_PMAXSD128_MASK,
29897 IX86_BUILTIN_PMINSD128_MASK,
29898 IX86_BUILTIN_PMAXUD128_MASK,
29899 IX86_BUILTIN_PMINUD128_MASK,
29900 IX86_BUILTIN_PMAXSQ256_MASK,
29901 IX86_BUILTIN_PMINSQ256_MASK,
29902 IX86_BUILTIN_PMAXUQ256_MASK,
29903 IX86_BUILTIN_PMINUQ256_MASK,
29904 IX86_BUILTIN_PMAXSQ128_MASK,
29905 IX86_BUILTIN_PMINSQ128_MASK,
29906 IX86_BUILTIN_PMAXUQ128_MASK,
29907 IX86_BUILTIN_PMINUQ128_MASK,
29908 IX86_BUILTIN_PMINSB256_MASK,
29909 IX86_BUILTIN_PMINUB256_MASK,
29910 IX86_BUILTIN_PMAXSB256_MASK,
29911 IX86_BUILTIN_PMAXUB256_MASK,
29912 IX86_BUILTIN_PMINSB128_MASK,
29913 IX86_BUILTIN_PMINUB128_MASK,
29914 IX86_BUILTIN_PMAXSB128_MASK,
29915 IX86_BUILTIN_PMAXUB128_MASK,
29916 IX86_BUILTIN_PMINSW256_MASK,
29917 IX86_BUILTIN_PMINUW256_MASK,
29918 IX86_BUILTIN_PMAXSW256_MASK,
29919 IX86_BUILTIN_PMAXUW256_MASK,
29920 IX86_BUILTIN_PMINSW128_MASK,
29921 IX86_BUILTIN_PMINUW128_MASK,
29922 IX86_BUILTIN_PMAXSW128_MASK,
29923 IX86_BUILTIN_PMAXUW128_MASK,
29924 IX86_BUILTIN_VPCONFLICTQ256,
29925 IX86_BUILTIN_VPCONFLICTD256,
29926 IX86_BUILTIN_VPCLZCNTQ256,
29927 IX86_BUILTIN_VPCLZCNTD256,
29928 IX86_BUILTIN_UNPCKHPD256_MASK,
29929 IX86_BUILTIN_UNPCKHPD128_MASK,
29930 IX86_BUILTIN_UNPCKHPS256_MASK,
29931 IX86_BUILTIN_UNPCKHPS128_MASK,
29932 IX86_BUILTIN_UNPCKLPD256_MASK,
29933 IX86_BUILTIN_UNPCKLPD128_MASK,
29934 IX86_BUILTIN_UNPCKLPS256_MASK,
29935 IX86_BUILTIN_VPCONFLICTQ128,
29936 IX86_BUILTIN_VPCONFLICTD128,
29937 IX86_BUILTIN_VPCLZCNTQ128,
29938 IX86_BUILTIN_VPCLZCNTD128,
29939 IX86_BUILTIN_UNPCKLPS128_MASK,
29940 IX86_BUILTIN_ALIGND256,
29941 IX86_BUILTIN_ALIGNQ256,
29942 IX86_BUILTIN_ALIGND128,
29943 IX86_BUILTIN_ALIGNQ128,
29944 IX86_BUILTIN_CVTPS2PH256_MASK,
29945 IX86_BUILTIN_CVTPS2PH_MASK,
29946 IX86_BUILTIN_CVTPH2PS_MASK,
29947 IX86_BUILTIN_CVTPH2PS256_MASK,
29948 IX86_BUILTIN_PUNPCKHDQ128_MASK,
29949 IX86_BUILTIN_PUNPCKHDQ256_MASK,
29950 IX86_BUILTIN_PUNPCKHQDQ128_MASK,
29951 IX86_BUILTIN_PUNPCKHQDQ256_MASK,
29952 IX86_BUILTIN_PUNPCKLDQ128_MASK,
29953 IX86_BUILTIN_PUNPCKLDQ256_MASK,
29954 IX86_BUILTIN_PUNPCKLQDQ128_MASK,
29955 IX86_BUILTIN_PUNPCKLQDQ256_MASK,
29956 IX86_BUILTIN_PUNPCKHBW128_MASK,
29957 IX86_BUILTIN_PUNPCKHBW256_MASK,
29958 IX86_BUILTIN_PUNPCKHWD128_MASK,
29959 IX86_BUILTIN_PUNPCKHWD256_MASK,
29960 IX86_BUILTIN_PUNPCKLBW128_MASK,
29961 IX86_BUILTIN_PUNPCKLBW256_MASK,
29962 IX86_BUILTIN_PUNPCKLWD128_MASK,
29963 IX86_BUILTIN_PUNPCKLWD256_MASK,
29964 IX86_BUILTIN_PSLLVV16HI,
29965 IX86_BUILTIN_PSLLVV8HI,
29966 IX86_BUILTIN_PACKSSDW256_MASK,
29967 IX86_BUILTIN_PACKSSDW128_MASK,
29968 IX86_BUILTIN_PACKUSDW256_MASK,
29969 IX86_BUILTIN_PACKUSDW128_MASK,
29970 IX86_BUILTIN_PAVGB256_MASK,
29971 IX86_BUILTIN_PAVGW256_MASK,
29972 IX86_BUILTIN_PAVGB128_MASK,
29973 IX86_BUILTIN_PAVGW128_MASK,
29974 IX86_BUILTIN_VPERMVARSF256_MASK,
29975 IX86_BUILTIN_VPERMVARDF256_MASK,
29976 IX86_BUILTIN_VPERMDF256_MASK,
29977 IX86_BUILTIN_PABSB256_MASK,
29978 IX86_BUILTIN_PABSB128_MASK,
29979 IX86_BUILTIN_PABSW256_MASK,
29980 IX86_BUILTIN_PABSW128_MASK,
29981 IX86_BUILTIN_VPERMILVARPD_MASK,
29982 IX86_BUILTIN_VPERMILVARPS_MASK,
29983 IX86_BUILTIN_VPERMILVARPD256_MASK,
29984 IX86_BUILTIN_VPERMILVARPS256_MASK,
29985 IX86_BUILTIN_VPERMILPD_MASK,
29986 IX86_BUILTIN_VPERMILPS_MASK,
29987 IX86_BUILTIN_VPERMILPD256_MASK,
29988 IX86_BUILTIN_VPERMILPS256_MASK,
29989 IX86_BUILTIN_BLENDMQ256,
29990 IX86_BUILTIN_BLENDMD256,
29991 IX86_BUILTIN_BLENDMPD256,
29992 IX86_BUILTIN_BLENDMPS256,
29993 IX86_BUILTIN_BLENDMQ128,
29994 IX86_BUILTIN_BLENDMD128,
29995 IX86_BUILTIN_BLENDMPD128,
29996 IX86_BUILTIN_BLENDMPS128,
29997 IX86_BUILTIN_BLENDMW256,
29998 IX86_BUILTIN_BLENDMB256,
29999 IX86_BUILTIN_BLENDMW128,
30000 IX86_BUILTIN_BLENDMB128,
30001 IX86_BUILTIN_PMULLD256_MASK,
30002 IX86_BUILTIN_PMULLD128_MASK,
30003 IX86_BUILTIN_PMULUDQ256_MASK,
30004 IX86_BUILTIN_PMULDQ256_MASK,
30005 IX86_BUILTIN_PMULDQ128_MASK,
30006 IX86_BUILTIN_PMULUDQ128_MASK,
30007 IX86_BUILTIN_CVTPD2PS256_MASK,
30008 IX86_BUILTIN_CVTPD2PS_MASK,
30009 IX86_BUILTIN_VPERMVARSI256_MASK,
30010 IX86_BUILTIN_VPERMVARDI256_MASK,
30011 IX86_BUILTIN_VPERMDI256_MASK,
30012 IX86_BUILTIN_CMPQ256,
30013 IX86_BUILTIN_CMPD256,
30014 IX86_BUILTIN_UCMPQ256,
30015 IX86_BUILTIN_UCMPD256,
30016 IX86_BUILTIN_CMPB256,
30017 IX86_BUILTIN_CMPW256,
30018 IX86_BUILTIN_UCMPB256,
30019 IX86_BUILTIN_UCMPW256,
30020 IX86_BUILTIN_CMPPD256_MASK,
30021 IX86_BUILTIN_CMPPS256_MASK,
30022 IX86_BUILTIN_CMPQ128,
30023 IX86_BUILTIN_CMPD128,
30024 IX86_BUILTIN_UCMPQ128,
30025 IX86_BUILTIN_UCMPD128,
30026 IX86_BUILTIN_CMPB128,
30027 IX86_BUILTIN_CMPW128,
30028 IX86_BUILTIN_UCMPB128,
30029 IX86_BUILTIN_UCMPW128,
30030 IX86_BUILTIN_CMPPD128_MASK,
30031 IX86_BUILTIN_CMPPS128_MASK,
30033 IX86_BUILTIN_GATHER3SIV8SF,
30034 IX86_BUILTIN_GATHER3SIV4SF,
30035 IX86_BUILTIN_GATHER3SIV4DF,
30036 IX86_BUILTIN_GATHER3SIV2DF,
30037 IX86_BUILTIN_GATHER3DIV8SF,
30038 IX86_BUILTIN_GATHER3DIV4SF,
30039 IX86_BUILTIN_GATHER3DIV4DF,
30040 IX86_BUILTIN_GATHER3DIV2DF,
30041 IX86_BUILTIN_GATHER3SIV8SI,
30042 IX86_BUILTIN_GATHER3SIV4SI,
30043 IX86_BUILTIN_GATHER3SIV4DI,
30044 IX86_BUILTIN_GATHER3SIV2DI,
30045 IX86_BUILTIN_GATHER3DIV8SI,
30046 IX86_BUILTIN_GATHER3DIV4SI,
30047 IX86_BUILTIN_GATHER3DIV4DI,
30048 IX86_BUILTIN_GATHER3DIV2DI,
30049 IX86_BUILTIN_SCATTERSIV8SF,
30050 IX86_BUILTIN_SCATTERSIV4SF,
30051 IX86_BUILTIN_SCATTERSIV4DF,
30052 IX86_BUILTIN_SCATTERSIV2DF,
30053 IX86_BUILTIN_SCATTERDIV8SF,
30054 IX86_BUILTIN_SCATTERDIV4SF,
30055 IX86_BUILTIN_SCATTERDIV4DF,
30056 IX86_BUILTIN_SCATTERDIV2DF,
30057 IX86_BUILTIN_SCATTERSIV8SI,
30058 IX86_BUILTIN_SCATTERSIV4SI,
30059 IX86_BUILTIN_SCATTERSIV4DI,
30060 IX86_BUILTIN_SCATTERSIV2DI,
30061 IX86_BUILTIN_SCATTERDIV8SI,
30062 IX86_BUILTIN_SCATTERDIV4SI,
30063 IX86_BUILTIN_SCATTERDIV4DI,
30064 IX86_BUILTIN_SCATTERDIV2DI,
30066 /* AVX512DQ. */
30067 IX86_BUILTIN_RANGESD128,
30068 IX86_BUILTIN_RANGESS128,
30069 IX86_BUILTIN_KUNPCKWD,
30070 IX86_BUILTIN_KUNPCKDQ,
30071 IX86_BUILTIN_BROADCASTF32x2_512,
30072 IX86_BUILTIN_BROADCASTI32x2_512,
30073 IX86_BUILTIN_BROADCASTF64X2_512,
30074 IX86_BUILTIN_BROADCASTI64X2_512,
30075 IX86_BUILTIN_BROADCASTF32X8_512,
30076 IX86_BUILTIN_BROADCASTI32X8_512,
30077 IX86_BUILTIN_EXTRACTF64X2_512,
30078 IX86_BUILTIN_EXTRACTF32X8,
30079 IX86_BUILTIN_EXTRACTI64X2_512,
30080 IX86_BUILTIN_EXTRACTI32X8,
30081 IX86_BUILTIN_REDUCEPD512_MASK,
30082 IX86_BUILTIN_REDUCEPS512_MASK,
30083 IX86_BUILTIN_PMULLQ512,
30084 IX86_BUILTIN_XORPD512,
30085 IX86_BUILTIN_XORPS512,
30086 IX86_BUILTIN_ORPD512,
30087 IX86_BUILTIN_ORPS512,
30088 IX86_BUILTIN_ANDPD512,
30089 IX86_BUILTIN_ANDPS512,
30090 IX86_BUILTIN_ANDNPD512,
30091 IX86_BUILTIN_ANDNPS512,
30092 IX86_BUILTIN_INSERTF32X8,
30093 IX86_BUILTIN_INSERTI32X8,
30094 IX86_BUILTIN_INSERTF64X2_512,
30095 IX86_BUILTIN_INSERTI64X2_512,
30096 IX86_BUILTIN_FPCLASSPD512,
30097 IX86_BUILTIN_FPCLASSPS512,
30098 IX86_BUILTIN_CVTD2MASK512,
30099 IX86_BUILTIN_CVTQ2MASK512,
30100 IX86_BUILTIN_CVTMASK2D512,
30101 IX86_BUILTIN_CVTMASK2Q512,
30102 IX86_BUILTIN_CVTPD2QQ512,
30103 IX86_BUILTIN_CVTPS2QQ512,
30104 IX86_BUILTIN_CVTPD2UQQ512,
30105 IX86_BUILTIN_CVTPS2UQQ512,
30106 IX86_BUILTIN_CVTQQ2PS512,
30107 IX86_BUILTIN_CVTUQQ2PS512,
30108 IX86_BUILTIN_CVTQQ2PD512,
30109 IX86_BUILTIN_CVTUQQ2PD512,
30110 IX86_BUILTIN_CVTTPS2QQ512,
30111 IX86_BUILTIN_CVTTPS2UQQ512,
30112 IX86_BUILTIN_CVTTPD2QQ512,
30113 IX86_BUILTIN_CVTTPD2UQQ512,
30114 IX86_BUILTIN_RANGEPS512,
30115 IX86_BUILTIN_RANGEPD512,
30117 /* AVX512BW. */
30118 IX86_BUILTIN_PACKUSDW512,
30119 IX86_BUILTIN_PACKSSDW512,
30120 IX86_BUILTIN_LOADDQUHI512_MASK,
30121 IX86_BUILTIN_LOADDQUQI512_MASK,
30122 IX86_BUILTIN_PSLLDQ512,
30123 IX86_BUILTIN_PSRLDQ512,
30124 IX86_BUILTIN_STOREDQUHI512_MASK,
30125 IX86_BUILTIN_STOREDQUQI512_MASK,
30126 IX86_BUILTIN_PALIGNR512,
30127 IX86_BUILTIN_PALIGNR512_MASK,
30128 IX86_BUILTIN_MOVDQUHI512_MASK,
30129 IX86_BUILTIN_MOVDQUQI512_MASK,
30130 IX86_BUILTIN_PSADBW512,
30131 IX86_BUILTIN_DBPSADBW512,
30132 IX86_BUILTIN_PBROADCASTB512,
30133 IX86_BUILTIN_PBROADCASTB512_GPR,
30134 IX86_BUILTIN_PBROADCASTW512,
30135 IX86_BUILTIN_PBROADCASTW512_GPR,
30136 IX86_BUILTIN_PMOVSXBW512_MASK,
30137 IX86_BUILTIN_PMOVZXBW512_MASK,
30138 IX86_BUILTIN_VPERMVARHI512_MASK,
30139 IX86_BUILTIN_VPERMT2VARHI512,
30140 IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
30141 IX86_BUILTIN_VPERMI2VARHI512,
30142 IX86_BUILTIN_PAVGB512,
30143 IX86_BUILTIN_PAVGW512,
30144 IX86_BUILTIN_PADDB512,
30145 IX86_BUILTIN_PSUBB512,
30146 IX86_BUILTIN_PSUBSB512,
30147 IX86_BUILTIN_PADDSB512,
30148 IX86_BUILTIN_PSUBUSB512,
30149 IX86_BUILTIN_PADDUSB512,
30150 IX86_BUILTIN_PSUBW512,
30151 IX86_BUILTIN_PADDW512,
30152 IX86_BUILTIN_PSUBSW512,
30153 IX86_BUILTIN_PADDSW512,
30154 IX86_BUILTIN_PSUBUSW512,
30155 IX86_BUILTIN_PADDUSW512,
30156 IX86_BUILTIN_PMAXUW512,
30157 IX86_BUILTIN_PMAXSW512,
30158 IX86_BUILTIN_PMINUW512,
30159 IX86_BUILTIN_PMINSW512,
30160 IX86_BUILTIN_PMAXUB512,
30161 IX86_BUILTIN_PMAXSB512,
30162 IX86_BUILTIN_PMINUB512,
30163 IX86_BUILTIN_PMINSB512,
30164 IX86_BUILTIN_PMOVWB512,
30165 IX86_BUILTIN_PMOVSWB512,
30166 IX86_BUILTIN_PMOVUSWB512,
30167 IX86_BUILTIN_PMULHRSW512_MASK,
30168 IX86_BUILTIN_PMULHUW512_MASK,
30169 IX86_BUILTIN_PMULHW512_MASK,
30170 IX86_BUILTIN_PMULLW512_MASK,
30171 IX86_BUILTIN_PSLLWI512_MASK,
30172 IX86_BUILTIN_PSLLW512_MASK,
30173 IX86_BUILTIN_PACKSSWB512,
30174 IX86_BUILTIN_PACKUSWB512,
30175 IX86_BUILTIN_PSRAVV32HI,
30176 IX86_BUILTIN_PMADDUBSW512_MASK,
30177 IX86_BUILTIN_PMADDWD512_MASK,
30178 IX86_BUILTIN_PSRLVV32HI,
30179 IX86_BUILTIN_PUNPCKHBW512,
30180 IX86_BUILTIN_PUNPCKHWD512,
30181 IX86_BUILTIN_PUNPCKLBW512,
30182 IX86_BUILTIN_PUNPCKLWD512,
30183 IX86_BUILTIN_PSHUFB512,
30184 IX86_BUILTIN_PSHUFHW512,
30185 IX86_BUILTIN_PSHUFLW512,
30186 IX86_BUILTIN_PSRAWI512,
30187 IX86_BUILTIN_PSRAW512,
30188 IX86_BUILTIN_PSRLWI512,
30189 IX86_BUILTIN_PSRLW512,
30190 IX86_BUILTIN_CVTB2MASK512,
30191 IX86_BUILTIN_CVTW2MASK512,
30192 IX86_BUILTIN_CVTMASK2B512,
30193 IX86_BUILTIN_CVTMASK2W512,
30194 IX86_BUILTIN_PCMPEQB512_MASK,
30195 IX86_BUILTIN_PCMPEQW512_MASK,
30196 IX86_BUILTIN_PCMPGTB512_MASK,
30197 IX86_BUILTIN_PCMPGTW512_MASK,
30198 IX86_BUILTIN_PTESTMB512,
30199 IX86_BUILTIN_PTESTMW512,
30200 IX86_BUILTIN_PTESTNMB512,
30201 IX86_BUILTIN_PTESTNMW512,
30202 IX86_BUILTIN_PSLLVV32HI,
30203 IX86_BUILTIN_PABSB512,
30204 IX86_BUILTIN_PABSW512,
30205 IX86_BUILTIN_BLENDMW512,
30206 IX86_BUILTIN_BLENDMB512,
30207 IX86_BUILTIN_CMPB512,
30208 IX86_BUILTIN_CMPW512,
30209 IX86_BUILTIN_UCMPB512,
30210 IX86_BUILTIN_UCMPW512,
30212 /* Alternate 4 and 8 element gather/scatter for the vectorizer
30213 where all operands are 32-byte or 64-byte wide respectively. */
30214 IX86_BUILTIN_GATHERALTSIV4DF,
30215 IX86_BUILTIN_GATHERALTDIV8SF,
30216 IX86_BUILTIN_GATHERALTSIV4DI,
30217 IX86_BUILTIN_GATHERALTDIV8SI,
30218 IX86_BUILTIN_GATHER3ALTDIV16SF,
30219 IX86_BUILTIN_GATHER3ALTDIV16SI,
30220 IX86_BUILTIN_GATHER3ALTSIV4DF,
30221 IX86_BUILTIN_GATHER3ALTDIV8SF,
30222 IX86_BUILTIN_GATHER3ALTSIV4DI,
30223 IX86_BUILTIN_GATHER3ALTDIV8SI,
30224 IX86_BUILTIN_GATHER3ALTSIV8DF,
30225 IX86_BUILTIN_GATHER3ALTSIV8DI,
30226 IX86_BUILTIN_GATHER3DIV16SF,
30227 IX86_BUILTIN_GATHER3DIV16SI,
30228 IX86_BUILTIN_GATHER3DIV8DF,
30229 IX86_BUILTIN_GATHER3DIV8DI,
30230 IX86_BUILTIN_GATHER3SIV16SF,
30231 IX86_BUILTIN_GATHER3SIV16SI,
30232 IX86_BUILTIN_GATHER3SIV8DF,
30233 IX86_BUILTIN_GATHER3SIV8DI,
30234 IX86_BUILTIN_SCATTERDIV16SF,
30235 IX86_BUILTIN_SCATTERDIV16SI,
30236 IX86_BUILTIN_SCATTERDIV8DF,
30237 IX86_BUILTIN_SCATTERDIV8DI,
30238 IX86_BUILTIN_SCATTERSIV16SF,
30239 IX86_BUILTIN_SCATTERSIV16SI,
30240 IX86_BUILTIN_SCATTERSIV8DF,
30241 IX86_BUILTIN_SCATTERSIV8DI,
30243 /* AVX512PF */
30244 IX86_BUILTIN_GATHERPFQPD,
30245 IX86_BUILTIN_GATHERPFDPS,
30246 IX86_BUILTIN_GATHERPFDPD,
30247 IX86_BUILTIN_GATHERPFQPS,
30248 IX86_BUILTIN_SCATTERPFDPD,
30249 IX86_BUILTIN_SCATTERPFDPS,
30250 IX86_BUILTIN_SCATTERPFQPD,
30251 IX86_BUILTIN_SCATTERPFQPS,
30253 /* AVX-512ER */
30254 IX86_BUILTIN_EXP2PD_MASK,
30255 IX86_BUILTIN_EXP2PS_MASK,
30256 IX86_BUILTIN_EXP2PS,
30257 IX86_BUILTIN_RCP28PD,
30258 IX86_BUILTIN_RCP28PS,
30259 IX86_BUILTIN_RCP28SD,
30260 IX86_BUILTIN_RCP28SS,
30261 IX86_BUILTIN_RSQRT28PD,
30262 IX86_BUILTIN_RSQRT28PS,
30263 IX86_BUILTIN_RSQRT28SD,
30264 IX86_BUILTIN_RSQRT28SS,
30266 /* AVX-512IFMA */
30267 IX86_BUILTIN_VPMADD52LUQ512,
30268 IX86_BUILTIN_VPMADD52HUQ512,
30269 IX86_BUILTIN_VPMADD52LUQ256,
30270 IX86_BUILTIN_VPMADD52HUQ256,
30271 IX86_BUILTIN_VPMADD52LUQ128,
30272 IX86_BUILTIN_VPMADD52HUQ128,
30273 IX86_BUILTIN_VPMADD52LUQ512_MASKZ,
30274 IX86_BUILTIN_VPMADD52HUQ512_MASKZ,
30275 IX86_BUILTIN_VPMADD52LUQ256_MASKZ,
30276 IX86_BUILTIN_VPMADD52HUQ256_MASKZ,
30277 IX86_BUILTIN_VPMADD52LUQ128_MASKZ,
30278 IX86_BUILTIN_VPMADD52HUQ128_MASKZ,
30280 /* AVX-512VBMI */
30281 IX86_BUILTIN_VPMULTISHIFTQB512,
30282 IX86_BUILTIN_VPMULTISHIFTQB256,
30283 IX86_BUILTIN_VPMULTISHIFTQB128,
30284 IX86_BUILTIN_VPERMVARQI512_MASK,
30285 IX86_BUILTIN_VPERMT2VARQI512,
30286 IX86_BUILTIN_VPERMT2VARQI512_MASKZ,
30287 IX86_BUILTIN_VPERMI2VARQI512,
30288 IX86_BUILTIN_VPERMVARQI256_MASK,
30289 IX86_BUILTIN_VPERMVARQI128_MASK,
30290 IX86_BUILTIN_VPERMT2VARQI256,
30291 IX86_BUILTIN_VPERMT2VARQI256_MASKZ,
30292 IX86_BUILTIN_VPERMT2VARQI128,
30293 IX86_BUILTIN_VPERMT2VARQI128_MASKZ,
30294 IX86_BUILTIN_VPERMI2VARQI256,
30295 IX86_BUILTIN_VPERMI2VARQI128,
30297 /* SHA builtins. */
30298 IX86_BUILTIN_SHA1MSG1,
30299 IX86_BUILTIN_SHA1MSG2,
30300 IX86_BUILTIN_SHA1NEXTE,
30301 IX86_BUILTIN_SHA1RNDS4,
30302 IX86_BUILTIN_SHA256MSG1,
30303 IX86_BUILTIN_SHA256MSG2,
30304 IX86_BUILTIN_SHA256RNDS2,
30306 /* CLWB instructions. */
30307 IX86_BUILTIN_CLWB,
30309 /* PCOMMIT instructions. */
30310 IX86_BUILTIN_PCOMMIT,
30312 /* CLFLUSHOPT instructions. */
30313 IX86_BUILTIN_CLFLUSHOPT,
30315 /* TFmode support builtins. */
30316 IX86_BUILTIN_INFQ,
30317 IX86_BUILTIN_HUGE_VALQ,
30318 IX86_BUILTIN_FABSQ,
30319 IX86_BUILTIN_COPYSIGNQ,
30321 /* Vectorizer support builtins. */
30322 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
30323 IX86_BUILTIN_CPYSGNPS,
30324 IX86_BUILTIN_CPYSGNPD,
30325 IX86_BUILTIN_CPYSGNPS256,
30326 IX86_BUILTIN_CPYSGNPS512,
30327 IX86_BUILTIN_CPYSGNPD256,
30328 IX86_BUILTIN_CPYSGNPD512,
30329 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
30330 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
30333 /* FMA4 instructions. */
30334 IX86_BUILTIN_VFMADDSS,
30335 IX86_BUILTIN_VFMADDSD,
30336 IX86_BUILTIN_VFMADDPS,
30337 IX86_BUILTIN_VFMADDPD,
30338 IX86_BUILTIN_VFMADDPS256,
30339 IX86_BUILTIN_VFMADDPD256,
30340 IX86_BUILTIN_VFMADDSUBPS,
30341 IX86_BUILTIN_VFMADDSUBPD,
30342 IX86_BUILTIN_VFMADDSUBPS256,
30343 IX86_BUILTIN_VFMADDSUBPD256,
30345 /* FMA3 instructions. */
30346 IX86_BUILTIN_VFMADDSS3,
30347 IX86_BUILTIN_VFMADDSD3,
30349 /* XOP instructions. */
30350 IX86_BUILTIN_VPCMOV,
30351 IX86_BUILTIN_VPCMOV_V2DI,
30352 IX86_BUILTIN_VPCMOV_V4SI,
30353 IX86_BUILTIN_VPCMOV_V8HI,
30354 IX86_BUILTIN_VPCMOV_V16QI,
30355 IX86_BUILTIN_VPCMOV_V4SF,
30356 IX86_BUILTIN_VPCMOV_V2DF,
30357 IX86_BUILTIN_VPCMOV256,
30358 IX86_BUILTIN_VPCMOV_V4DI256,
30359 IX86_BUILTIN_VPCMOV_V8SI256,
30360 IX86_BUILTIN_VPCMOV_V16HI256,
30361 IX86_BUILTIN_VPCMOV_V32QI256,
30362 IX86_BUILTIN_VPCMOV_V8SF256,
30363 IX86_BUILTIN_VPCMOV_V4DF256,
30365 IX86_BUILTIN_VPPERM,
30367 IX86_BUILTIN_VPMACSSWW,
30368 IX86_BUILTIN_VPMACSWW,
30369 IX86_BUILTIN_VPMACSSWD,
30370 IX86_BUILTIN_VPMACSWD,
30371 IX86_BUILTIN_VPMACSSDD,
30372 IX86_BUILTIN_VPMACSDD,
30373 IX86_BUILTIN_VPMACSSDQL,
30374 IX86_BUILTIN_VPMACSSDQH,
30375 IX86_BUILTIN_VPMACSDQL,
30376 IX86_BUILTIN_VPMACSDQH,
30377 IX86_BUILTIN_VPMADCSSWD,
30378 IX86_BUILTIN_VPMADCSWD,
30380 IX86_BUILTIN_VPHADDBW,
30381 IX86_BUILTIN_VPHADDBD,
30382 IX86_BUILTIN_VPHADDBQ,
30383 IX86_BUILTIN_VPHADDWD,
30384 IX86_BUILTIN_VPHADDWQ,
30385 IX86_BUILTIN_VPHADDDQ,
30386 IX86_BUILTIN_VPHADDUBW,
30387 IX86_BUILTIN_VPHADDUBD,
30388 IX86_BUILTIN_VPHADDUBQ,
30389 IX86_BUILTIN_VPHADDUWD,
30390 IX86_BUILTIN_VPHADDUWQ,
30391 IX86_BUILTIN_VPHADDUDQ,
30392 IX86_BUILTIN_VPHSUBBW,
30393 IX86_BUILTIN_VPHSUBWD,
30394 IX86_BUILTIN_VPHSUBDQ,
30396 IX86_BUILTIN_VPROTB,
30397 IX86_BUILTIN_VPROTW,
30398 IX86_BUILTIN_VPROTD,
30399 IX86_BUILTIN_VPROTQ,
30400 IX86_BUILTIN_VPROTB_IMM,
30401 IX86_BUILTIN_VPROTW_IMM,
30402 IX86_BUILTIN_VPROTD_IMM,
30403 IX86_BUILTIN_VPROTQ_IMM,
30405 IX86_BUILTIN_VPSHLB,
30406 IX86_BUILTIN_VPSHLW,
30407 IX86_BUILTIN_VPSHLD,
30408 IX86_BUILTIN_VPSHLQ,
30409 IX86_BUILTIN_VPSHAB,
30410 IX86_BUILTIN_VPSHAW,
30411 IX86_BUILTIN_VPSHAD,
30412 IX86_BUILTIN_VPSHAQ,
30414 IX86_BUILTIN_VFRCZSS,
30415 IX86_BUILTIN_VFRCZSD,
30416 IX86_BUILTIN_VFRCZPS,
30417 IX86_BUILTIN_VFRCZPD,
30418 IX86_BUILTIN_VFRCZPS256,
30419 IX86_BUILTIN_VFRCZPD256,
30421 IX86_BUILTIN_VPCOMEQUB,
30422 IX86_BUILTIN_VPCOMNEUB,
30423 IX86_BUILTIN_VPCOMLTUB,
30424 IX86_BUILTIN_VPCOMLEUB,
30425 IX86_BUILTIN_VPCOMGTUB,
30426 IX86_BUILTIN_VPCOMGEUB,
30427 IX86_BUILTIN_VPCOMFALSEUB,
30428 IX86_BUILTIN_VPCOMTRUEUB,
30430 IX86_BUILTIN_VPCOMEQUW,
30431 IX86_BUILTIN_VPCOMNEUW,
30432 IX86_BUILTIN_VPCOMLTUW,
30433 IX86_BUILTIN_VPCOMLEUW,
30434 IX86_BUILTIN_VPCOMGTUW,
30435 IX86_BUILTIN_VPCOMGEUW,
30436 IX86_BUILTIN_VPCOMFALSEUW,
30437 IX86_BUILTIN_VPCOMTRUEUW,
30439 IX86_BUILTIN_VPCOMEQUD,
30440 IX86_BUILTIN_VPCOMNEUD,
30441 IX86_BUILTIN_VPCOMLTUD,
30442 IX86_BUILTIN_VPCOMLEUD,
30443 IX86_BUILTIN_VPCOMGTUD,
30444 IX86_BUILTIN_VPCOMGEUD,
30445 IX86_BUILTIN_VPCOMFALSEUD,
30446 IX86_BUILTIN_VPCOMTRUEUD,
30448 IX86_BUILTIN_VPCOMEQUQ,
30449 IX86_BUILTIN_VPCOMNEUQ,
30450 IX86_BUILTIN_VPCOMLTUQ,
30451 IX86_BUILTIN_VPCOMLEUQ,
30452 IX86_BUILTIN_VPCOMGTUQ,
30453 IX86_BUILTIN_VPCOMGEUQ,
30454 IX86_BUILTIN_VPCOMFALSEUQ,
30455 IX86_BUILTIN_VPCOMTRUEUQ,
30457 IX86_BUILTIN_VPCOMEQB,
30458 IX86_BUILTIN_VPCOMNEB,
30459 IX86_BUILTIN_VPCOMLTB,
30460 IX86_BUILTIN_VPCOMLEB,
30461 IX86_BUILTIN_VPCOMGTB,
30462 IX86_BUILTIN_VPCOMGEB,
30463 IX86_BUILTIN_VPCOMFALSEB,
30464 IX86_BUILTIN_VPCOMTRUEB,
30466 IX86_BUILTIN_VPCOMEQW,
30467 IX86_BUILTIN_VPCOMNEW,
30468 IX86_BUILTIN_VPCOMLTW,
30469 IX86_BUILTIN_VPCOMLEW,
30470 IX86_BUILTIN_VPCOMGTW,
30471 IX86_BUILTIN_VPCOMGEW,
30472 IX86_BUILTIN_VPCOMFALSEW,
30473 IX86_BUILTIN_VPCOMTRUEW,
30475 IX86_BUILTIN_VPCOMEQD,
30476 IX86_BUILTIN_VPCOMNED,
30477 IX86_BUILTIN_VPCOMLTD,
30478 IX86_BUILTIN_VPCOMLED,
30479 IX86_BUILTIN_VPCOMGTD,
30480 IX86_BUILTIN_VPCOMGED,
30481 IX86_BUILTIN_VPCOMFALSED,
30482 IX86_BUILTIN_VPCOMTRUED,
30484 IX86_BUILTIN_VPCOMEQQ,
30485 IX86_BUILTIN_VPCOMNEQ,
30486 IX86_BUILTIN_VPCOMLTQ,
30487 IX86_BUILTIN_VPCOMLEQ,
30488 IX86_BUILTIN_VPCOMGTQ,
30489 IX86_BUILTIN_VPCOMGEQ,
30490 IX86_BUILTIN_VPCOMFALSEQ,
30491 IX86_BUILTIN_VPCOMTRUEQ,
30493 /* LWP instructions. */
30494 IX86_BUILTIN_LLWPCB,
30495 IX86_BUILTIN_SLWPCB,
30496 IX86_BUILTIN_LWPVAL32,
30497 IX86_BUILTIN_LWPVAL64,
30498 IX86_BUILTIN_LWPINS32,
30499 IX86_BUILTIN_LWPINS64,
30501 IX86_BUILTIN_CLZS,
30503 /* RTM */
30504 IX86_BUILTIN_XBEGIN,
30505 IX86_BUILTIN_XEND,
30506 IX86_BUILTIN_XABORT,
30507 IX86_BUILTIN_XTEST,
30509 /* MPX */
30510 IX86_BUILTIN_BNDMK,
30511 IX86_BUILTIN_BNDSTX,
30512 IX86_BUILTIN_BNDLDX,
30513 IX86_BUILTIN_BNDCL,
30514 IX86_BUILTIN_BNDCU,
30515 IX86_BUILTIN_BNDRET,
30516 IX86_BUILTIN_BNDNARROW,
30517 IX86_BUILTIN_BNDINT,
30518 IX86_BUILTIN_SIZEOF,
30519 IX86_BUILTIN_BNDLOWER,
30520 IX86_BUILTIN_BNDUPPER,
30522 /* BMI instructions. */
30523 IX86_BUILTIN_BEXTR32,
30524 IX86_BUILTIN_BEXTR64,
30525 IX86_BUILTIN_CTZS,
30527 /* TBM instructions. */
30528 IX86_BUILTIN_BEXTRI32,
30529 IX86_BUILTIN_BEXTRI64,
30531 /* BMI2 instructions. */
30532 IX86_BUILTIN_BZHI32,
30533 IX86_BUILTIN_BZHI64,
30534 IX86_BUILTIN_PDEP32,
30535 IX86_BUILTIN_PDEP64,
30536 IX86_BUILTIN_PEXT32,
30537 IX86_BUILTIN_PEXT64,
30539 /* ADX instructions. */
30540 IX86_BUILTIN_ADDCARRYX32,
30541 IX86_BUILTIN_ADDCARRYX64,
30543 /* SBB instructions. */
30544 IX86_BUILTIN_SBB32,
30545 IX86_BUILTIN_SBB64,
30547 /* FSGSBASE instructions. */
30548 IX86_BUILTIN_RDFSBASE32,
30549 IX86_BUILTIN_RDFSBASE64,
30550 IX86_BUILTIN_RDGSBASE32,
30551 IX86_BUILTIN_RDGSBASE64,
30552 IX86_BUILTIN_WRFSBASE32,
30553 IX86_BUILTIN_WRFSBASE64,
30554 IX86_BUILTIN_WRGSBASE32,
30555 IX86_BUILTIN_WRGSBASE64,
30557 /* RDRND instructions. */
30558 IX86_BUILTIN_RDRAND16_STEP,
30559 IX86_BUILTIN_RDRAND32_STEP,
30560 IX86_BUILTIN_RDRAND64_STEP,
30562 /* RDSEED instructions. */
30563 IX86_BUILTIN_RDSEED16_STEP,
30564 IX86_BUILTIN_RDSEED32_STEP,
30565 IX86_BUILTIN_RDSEED64_STEP,
30567 /* F16C instructions. */
30568 IX86_BUILTIN_CVTPH2PS,
30569 IX86_BUILTIN_CVTPH2PS256,
30570 IX86_BUILTIN_CVTPS2PH,
30571 IX86_BUILTIN_CVTPS2PH256,
30573 /* CFString built-in for darwin */
30574 IX86_BUILTIN_CFSTRING,
30576 /* Builtins to get CPU type and supported features. */
30577 IX86_BUILTIN_CPU_INIT,
30578 IX86_BUILTIN_CPU_IS,
30579 IX86_BUILTIN_CPU_SUPPORTS,
30581 /* Read/write FLAGS register built-ins. */
30582 IX86_BUILTIN_READ_FLAGS,
30583 IX86_BUILTIN_WRITE_FLAGS,
30585 IX86_BUILTIN_MAX
30588 /* Table for the ix86 builtin decls. */
30589 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
30591 /* Table of all of the builtin functions that are possible with different ISA's
30592 but are waiting to be built until a function is declared to use that
30593 ISA. */
30594 struct builtin_isa {
30595 const char *name; /* function name */
30596 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
30597 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
30598 bool const_p; /* true if the declaration is constant */
30599 bool leaf_p; /* true if the declaration has leaf attribute */
30600 bool nothrow_p; /* true if the declaration has nothrow attribute */
30601 bool set_and_not_built_p;
30604 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
30607 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
30608 of which isa_flags to use in the ix86_builtins_isa array. Stores the
30609 function decl in the ix86_builtins array. Returns the function decl or
30610 NULL_TREE, if the builtin was not added.
30612 If the front end has a special hook for builtin functions, delay adding
30613 builtin functions that aren't in the current ISA until the ISA is changed
30614 with function specific optimization. Doing so, can save about 300K for the
30615 default compiler. When the builtin is expanded, check at that time whether
30616 it is valid.
30618 If the front end doesn't have a special hook, record all builtins, even if
30619 it isn't an instruction set in the current ISA in case the user uses
30620 function specific options for a different ISA, so that we don't get scope
30621 errors if a builtin is added in the middle of a function scope. */
30623 static inline tree
30624 def_builtin (HOST_WIDE_INT mask, const char *name,
30625 enum ix86_builtin_func_type tcode,
30626 enum ix86_builtins code)
30628 tree decl = NULL_TREE;
30630 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
30632 ix86_builtins_isa[(int) code].isa = mask;
30634 mask &= ~OPTION_MASK_ISA_64BIT;
30635 if (mask == 0
30636 || (mask & ix86_isa_flags) != 0
30637 || (lang_hooks.builtin_function
30638 == lang_hooks.builtin_function_ext_scope))
30641 tree type = ix86_get_builtin_func_type (tcode);
30642 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30643 NULL, NULL_TREE);
30644 ix86_builtins[(int) code] = decl;
30645 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
30647 else
30649 ix86_builtins[(int) code] = NULL_TREE;
30650 ix86_builtins_isa[(int) code].tcode = tcode;
30651 ix86_builtins_isa[(int) code].name = name;
30652 ix86_builtins_isa[(int) code].leaf_p = false;
30653 ix86_builtins_isa[(int) code].nothrow_p = false;
30654 ix86_builtins_isa[(int) code].const_p = false;
30655 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
30659 return decl;
30662 /* Like def_builtin, but also marks the function decl "const". */
30664 static inline tree
30665 def_builtin_const (HOST_WIDE_INT mask, const char *name,
30666 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30668 tree decl = def_builtin (mask, name, tcode, code);
30669 if (decl)
30670 TREE_READONLY (decl) = 1;
30671 else
30672 ix86_builtins_isa[(int) code].const_p = true;
30674 return decl;
30677 /* Add any new builtin functions for a given ISA that may not have been
30678 declared. This saves a bit of space compared to adding all of the
30679 declarations to the tree, even if we didn't use them. */
30681 static void
30682 ix86_add_new_builtins (HOST_WIDE_INT isa)
30684 int i;
30685 tree saved_current_target_pragma = current_target_pragma;
30686 current_target_pragma = NULL_TREE;
30688 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
30690 if ((ix86_builtins_isa[i].isa & isa) != 0
30691 && ix86_builtins_isa[i].set_and_not_built_p)
30693 tree decl, type;
30695 /* Don't define the builtin again. */
30696 ix86_builtins_isa[i].set_and_not_built_p = false;
30698 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
30699 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
30700 type, i, BUILT_IN_MD, NULL,
30701 NULL_TREE);
30703 ix86_builtins[i] = decl;
30704 if (ix86_builtins_isa[i].const_p)
30705 TREE_READONLY (decl) = 1;
30706 if (ix86_builtins_isa[i].leaf_p)
30707 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
30708 NULL_TREE);
30709 if (ix86_builtins_isa[i].nothrow_p)
30710 TREE_NOTHROW (decl) = 1;
30714 current_target_pragma = saved_current_target_pragma;
30717 /* Bits for builtin_description.flag. */
30719 /* Set when we don't support the comparison natively, and should
30720 swap_comparison in order to support it. */
30721 #define BUILTIN_DESC_SWAP_OPERANDS 1
30723 struct builtin_description
30725 const HOST_WIDE_INT mask;
30726 const enum insn_code icode;
30727 const char *const name;
30728 const enum ix86_builtins code;
30729 const enum rtx_code comparison;
30730 const int flag;
30733 static const struct builtin_description bdesc_comi[] =
30735 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
30736 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
30737 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
30738 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
30739 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
30740 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
30741 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
30742 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
30743 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
30744 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
30745 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
30746 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
30747 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
30748 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
30749 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
30750 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
30751 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
30752 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
30753 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
30754 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
30755 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
30756 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
30757 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
30758 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
30761 static const struct builtin_description bdesc_pcmpestr[] =
30763 /* SSE4.2 */
30764 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
30765 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
30766 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
30767 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
30768 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
30769 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
30770 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
30773 static const struct builtin_description bdesc_pcmpistr[] =
30775 /* SSE4.2 */
30776 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
30777 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
30778 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
30779 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
30780 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
30781 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
30782 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
30785 /* Special builtins with variable number of arguments. */
30786 static const struct builtin_description bdesc_special_args[] =
30788 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
30789 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
30790 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
30792 /* 80387 (for use internally for atomic compound assignment). */
30793 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
30794 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
30795 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
30796 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
30798 /* MMX */
30799 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30801 /* 3DNow! */
30802 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30804 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
30805 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
30806 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
30807 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30808 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30809 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30810 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30811 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30812 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30814 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30815 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30816 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30817 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30818 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30819 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30820 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30821 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30823 /* SSE */
30824 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30825 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30826 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30828 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30829 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30830 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30831 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30833 /* SSE or 3DNow!A */
30834 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30835 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
30837 /* SSE2 */
30838 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30839 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30840 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30841 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
30842 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30843 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
30844 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
30845 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
30846 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
30847 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30849 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30850 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30852 /* SSE3 */
30853 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30855 /* SSE4.1 */
30856 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
30858 /* SSE4A */
30859 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30860 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30862 /* AVX */
30863 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
30864 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
30866 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30867 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30868 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30869 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
30870 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
30872 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30873 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30874 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30875 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30876 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30877 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
30878 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30880 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
30881 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30882 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30884 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
30885 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
30886 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
30887 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
30888 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
30889 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
30890 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
30891 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
30893 /* AVX2 */
30894 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
30895 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
30896 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
30897 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
30898 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
30899 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
30900 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
30901 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
30902 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
30904 /* AVX512F */
30905 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30906 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30907 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30908 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30909 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30910 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30911 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30912 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30913 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30914 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30915 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30916 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30917 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30918 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30919 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30920 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30921 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30922 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30923 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30924 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30925 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
30926 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
30927 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
30928 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
30929 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30930 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30931 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30932 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30933 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30936 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30953 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
30954 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
30955 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
30956 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
30957 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
30958 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
30960 /* FSGSBASE */
30961 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30962 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30963 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30964 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30965 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30966 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30967 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30968 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30970 /* RTM */
30971 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30972 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
30973 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
30975 /* AVX512BW */
30976 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_SI },
30977 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_DI },
30978 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_SI },
30979 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_DI },
30981 /* AVX512VL */
30982 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_HI },
30983 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_QI },
30984 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_SI },
30985 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_HI },
30986 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30987 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30988 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30989 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30990 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30991 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30992 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30993 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30994 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30995 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30996 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30997 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30998 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30999 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
31000 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
31001 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
31002 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31003 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31004 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31005 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31006 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
31007 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
31008 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
31009 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
31010 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31011 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31012 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31013 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31014 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
31015 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
31016 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
31017 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
31018 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_HI },
31019 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_QI },
31020 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_SI },
31021 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_HI },
31022 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
31023 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
31024 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
31025 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
31026 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
31027 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
31028 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
31029 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
31030 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31031 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31032 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31033 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31034 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31035 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31036 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31037 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31038 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31039 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31040 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31041 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31042 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31043 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31044 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31045 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31046 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
31047 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
31048 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
31049 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
31050 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
31051 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
31052 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
31053 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31054 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
31055 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31056 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
31057 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31058 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31059 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31060 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31061 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31062 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31063 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31064 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31065 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31066 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31067 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31068 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31069 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31070 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31071 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31072 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31073 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31074 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31075 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31077 /* PCOMMIT. */
31078 { OPTION_MASK_ISA_PCOMMIT, CODE_FOR_pcommit, "__builtin_ia32_pcommit", IX86_BUILTIN_PCOMMIT, UNKNOWN, (int) VOID_FTYPE_VOID },
31081 /* Builtins with variable number of arguments. */
31082 static const struct builtin_description bdesc_args[] =
31084 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
31085 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
31086 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
31087 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
31088 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
31089 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
31090 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
31092 /* MMX */
31093 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31094 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31095 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31096 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31097 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31098 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31100 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31101 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31102 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31103 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31104 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31105 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31106 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31107 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31109 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31110 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31112 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31113 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31114 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31115 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31117 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31118 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31119 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31120 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31121 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31122 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31124 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31125 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31126 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31127 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31128 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
31129 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
31131 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31132 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
31133 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31135 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
31137 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31138 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31139 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31140 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31141 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31142 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31144 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31145 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31146 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31147 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31148 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31149 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31151 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31152 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31153 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31154 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31156 /* 3DNow! */
31157 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31158 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31159 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31160 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31162 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31163 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31164 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31165 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31166 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31167 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31168 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31169 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31170 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31171 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31172 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31173 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31174 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31175 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31176 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31178 /* 3DNow!A */
31179 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31180 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31181 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31182 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31183 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31184 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31186 /* SSE */
31187 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
31188 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31189 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31190 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31191 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31192 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31193 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31194 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31195 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31196 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31197 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31198 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31200 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31202 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31203 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31204 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31205 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31206 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31207 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31208 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31209 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31211 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31212 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31213 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31214 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31215 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31216 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31217 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31218 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31219 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31220 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31221 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
31222 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31223 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31224 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31225 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31226 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31227 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31228 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31229 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31230 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31232 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31233 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31234 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31235 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31237 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31238 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31239 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31240 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31242 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31244 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31245 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31246 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31247 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31248 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31250 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
31251 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
31252 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
31254 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
31256 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31257 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31258 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31260 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
31261 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
31263 /* SSE MMX or 3Dnow!A */
31264 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31265 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31266 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31268 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31269 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31270 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31271 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31273 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
31274 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
31276 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
31278 /* SSE2 */
31279 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31281 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
31282 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
31283 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31284 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
31285 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
31287 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31288 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31289 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
31290 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31291 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31293 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
31295 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31296 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31297 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31298 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31300 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31301 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
31302 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31304 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31305 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31306 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31307 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31308 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31309 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31310 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31311 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31313 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31314 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31315 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31316 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31317 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
31318 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31319 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31320 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31321 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31322 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31323 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31324 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31325 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31326 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31327 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31328 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31329 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31330 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31331 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31332 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31334 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31335 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31336 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31337 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31339 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31340 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31341 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31342 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31344 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31346 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31347 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31348 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31350 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31352 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31353 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31354 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31355 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31356 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31357 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31358 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31359 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31361 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31362 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31363 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31364 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31365 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31366 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31367 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31368 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31370 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31371 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
31373 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31374 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31375 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31376 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31378 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31379 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31381 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31382 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31383 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31384 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31385 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31386 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31388 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31389 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31390 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31391 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31393 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31394 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31395 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31396 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31397 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31398 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31399 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31400 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31402 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31403 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31404 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31406 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31407 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
31409 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
31410 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31412 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
31414 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
31415 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
31416 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
31417 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
31419 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31420 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31421 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31422 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31423 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31424 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31425 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31427 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31428 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31429 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31430 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31431 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31432 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31433 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31435 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31436 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31437 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31438 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31440 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
31441 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31442 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31444 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
31446 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31448 /* SSE2 MMX */
31449 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31450 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31452 /* SSE3 */
31453 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
31454 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31456 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31457 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31458 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31459 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31460 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31461 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31463 /* SSSE3 */
31464 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31465 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
31466 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31467 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
31468 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31469 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31471 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31472 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31473 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31474 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31475 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31476 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31477 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31478 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31479 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31480 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31481 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31482 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31483 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
31484 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
31485 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31486 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31487 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31488 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31489 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31490 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31491 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31492 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31493 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31494 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31496 /* SSSE3. */
31497 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
31498 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
31500 /* SSE4.1 */
31501 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31502 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31503 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
31504 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
31505 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31506 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31507 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31508 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
31509 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
31510 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
31512 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31513 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31514 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31515 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31516 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31517 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31518 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31519 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31520 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31521 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31522 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31523 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31524 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31526 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31527 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31528 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31529 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31530 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31531 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31532 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31533 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31534 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31535 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31536 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31537 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31539 /* SSE4.1 */
31540 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31541 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31542 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31543 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31545 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
31546 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
31547 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
31548 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
31550 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31551 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31553 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31554 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31556 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
31557 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
31558 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
31559 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
31561 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
31562 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
31564 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31565 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31567 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31568 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31569 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31571 /* SSE4.2 */
31572 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31573 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
31574 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
31575 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31576 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31578 /* SSE4A */
31579 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
31580 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
31581 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
31582 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31584 /* AES */
31585 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
31586 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31588 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31589 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31590 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31591 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31593 /* PCLMUL */
31594 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
31596 /* AVX */
31597 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31598 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31599 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31600 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31601 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31602 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31603 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31604 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31605 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31606 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31607 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31608 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31609 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31610 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31611 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31612 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31613 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31614 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31615 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31616 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31617 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31618 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31619 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31620 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31621 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31622 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31624 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
31625 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
31626 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
31627 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31629 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31630 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31631 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
31632 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
31633 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31634 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31635 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31636 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31637 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31638 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31639 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31640 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31641 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31642 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
31643 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
31644 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
31645 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
31646 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
31647 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
31648 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31649 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
31650 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31651 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31652 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31653 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31654 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31655 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31656 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31657 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31658 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31659 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31660 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
31661 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
31662 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
31664 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31665 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31666 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31668 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31669 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31670 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31671 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31672 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31674 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31676 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31677 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31679 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
31680 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
31681 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
31682 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
31684 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31685 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31687 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31688 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31690 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
31691 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
31692 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
31693 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
31695 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
31696 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
31698 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31699 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31701 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31702 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31703 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31704 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31706 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31707 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31708 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31709 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
31710 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
31711 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
31713 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31714 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31715 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31716 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31717 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31718 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31720 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31721 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31722 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31724 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31725 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31727 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
31730 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
31732 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31733 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31735 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31737 /* AVX2 */
31738 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
31739 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
31740 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
31741 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
31742 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31743 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31744 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31745 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31746 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31747 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31748 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31749 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31750 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31751 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31752 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31753 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31754 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
31755 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31756 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31757 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31758 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31759 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
31760 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
31761 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31762 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31763 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31764 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31765 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31766 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31767 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31768 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31769 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31770 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31771 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31772 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31773 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31774 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31775 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31776 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
31777 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31778 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31779 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31780 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31781 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31782 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31783 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31784 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31785 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31786 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31787 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31788 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31789 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
31790 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31791 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31792 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31793 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31794 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31795 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31796 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31797 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31798 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31799 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31800 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31801 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31802 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31803 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31804 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31805 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31806 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31807 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31808 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31809 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31810 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31811 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31812 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
31813 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31814 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31815 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31816 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31817 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31818 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31819 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31820 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31821 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31822 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31823 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31824 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31825 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31826 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31827 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31828 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31829 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31830 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31831 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31832 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31833 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31834 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31835 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31836 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31837 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31838 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31839 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31840 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31841 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31842 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31843 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31844 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31845 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31846 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31847 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31848 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31849 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31850 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31851 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31852 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31853 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31854 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31855 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31856 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31857 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
31858 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31859 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
31860 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
31861 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31862 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31863 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31864 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31865 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31866 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31867 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31868 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31869 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31870 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
31871 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
31872 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
31873 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
31874 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31875 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31876 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31877 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31878 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31879 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31880 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31881 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31882 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31883 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31885 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31887 /* BMI */
31888 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31889 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31890 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31892 /* TBM */
31893 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31894 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31896 /* F16C */
31897 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
31898 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
31899 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
31900 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
31902 /* BMI2 */
31903 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31904 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31905 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31906 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31907 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31908 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31910 /* AVX512F */
31911 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
31912 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
31913 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
31914 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
31915 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
31916 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
31917 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
31918 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
31919 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31920 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31921 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31922 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31923 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31924 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
31925 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31926 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
31927 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
31928 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31929 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
31930 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
31931 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31932 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31933 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
31935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31936 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
31937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
31942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
31943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
31944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
31945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
31946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
31947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
31948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
31949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31957 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31964 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31965 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
31966 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
31967 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
31968 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
31969 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
31970 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31971 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31972 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31973 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31974 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31975 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31976 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31977 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31978 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31979 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31980 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31981 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31982 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31983 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31984 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31985 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31986 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31987 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31988 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31989 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31990 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31991 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31992 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31993 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31994 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31995 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31996 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31997 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31998 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
31999 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
32000 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
32001 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
32002 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
32003 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
32004 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
32005 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
32006 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
32007 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
32008 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
32009 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
32010 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
32011 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
32012 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
32013 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
32014 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32015 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
32016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32017 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32019 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32020 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32021 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32022 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32023 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32024 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
32028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
32030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
32034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
32036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
32040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
32042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
32048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
32050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
32066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
32067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
32068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
32069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
32070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
32071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
32072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
32073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32077 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32078 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32079 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32080 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
32089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
32094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
32095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
32096 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
32097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
32104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
32105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
32106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
32108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
32109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
32110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
32111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32112 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
32114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32117 /* Mask arithmetic operations */
32118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
32121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
32125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
32129 /* SHA */
32130 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32131 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32132 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32133 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
32134 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32135 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32136 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
32138 /* AVX512VL. */
32139 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT },
32140 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT },
32141 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32142 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32143 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32144 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32145 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32146 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32147 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32148 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32149 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32150 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32151 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32152 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32153 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32154 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32155 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32156 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32157 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32158 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32159 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32160 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32161 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32162 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32163 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32164 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32165 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32166 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32167 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32168 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32169 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32170 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32171 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32172 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32173 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32174 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32175 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32176 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32177 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32178 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32179 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32180 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32181 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32182 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32183 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32184 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32185 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32186 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32187 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32188 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32189 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32190 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32191 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32192 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_QI },
32193 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_QI },
32194 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI },
32195 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI },
32196 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32197 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32198 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32199 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32200 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32201 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32202 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32203 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32204 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32205 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32206 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32207 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32208 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32209 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32210 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32211 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32212 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32213 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32214 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32215 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32216 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32217 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32218 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32219 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32220 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32221 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32222 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32223 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32224 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32225 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32226 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32227 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32228 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_QI },
32229 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_QI },
32230 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_SI },
32231 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_SI },
32232 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32233 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_HI },
32234 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_HI },
32235 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_HI },
32236 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32237 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_QI },
32238 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32239 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_QI },
32240 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32241 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_QI },
32242 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32243 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
32244 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32245 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
32246 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32247 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32248 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32249 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_QI },
32250 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_QI },
32251 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI },
32252 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI },
32253 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32254 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32255 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32256 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32257 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32258 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32259 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32260 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32261 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32262 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32263 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32264 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32265 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32266 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32267 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32268 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32269 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32270 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32271 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32272 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32273 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32274 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32275 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32276 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32277 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32278 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32279 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32280 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32281 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32282 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32283 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32284 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32285 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32286 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32287 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32288 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32289 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32290 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32291 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32292 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32293 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32294 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32295 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32296 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32297 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32298 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32299 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32300 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32301 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32302 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32303 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32304 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32305 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32306 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32307 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32308 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32309 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32310 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32311 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32312 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32313 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32314 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32315 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32316 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32317 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32318 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32319 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32320 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32321 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32322 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32323 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32324 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32325 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32326 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32327 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32328 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32329 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32330 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32331 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32332 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32333 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32334 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32335 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32336 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32337 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32338 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32339 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32340 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32341 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32342 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32343 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32344 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32345 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32346 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32347 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32348 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32349 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32350 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32351 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32352 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32353 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32354 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32355 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32356 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32357 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32358 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32359 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32360 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32361 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32362 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32363 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32364 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32365 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32366 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32367 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32368 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32369 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32370 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32371 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32372 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32373 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32374 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32375 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32376 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32377 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32378 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32379 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32380 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32381 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32382 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32383 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32384 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32385 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32386 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32387 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32388 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32389 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32390 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32391 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32392 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32393 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32394 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32395 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32396 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32397 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32398 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32399 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32400 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32401 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32402 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32403 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32404 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32405 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32406 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32407 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32408 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32409 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32410 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32411 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32412 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32413 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32414 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32415 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32416 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32417 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32418 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32419 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32420 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32421 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32422 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32423 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32424 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32425 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32426 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32427 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32428 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32429 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32430 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32431 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32432 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32433 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32434 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32435 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32436 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32437 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32438 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32439 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32440 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32441 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32442 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32443 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32444 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32445 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32446 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32447 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32448 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32449 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32450 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32451 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32452 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32453 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32454 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32455 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32456 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32457 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32458 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32459 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32460 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32461 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32462 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32463 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32464 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32465 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32466 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32467 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32468 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32469 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32470 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32471 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32472 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32473 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32474 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32475 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32476 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32477 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32478 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32479 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32480 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32481 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32482 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32483 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32484 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32485 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32486 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32487 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32488 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32489 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32490 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32491 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32492 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32493 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32494 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32495 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32496 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32497 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32498 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32499 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32500 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32501 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32502 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32503 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32504 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32505 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32506 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32507 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32508 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32509 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32510 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32511 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32512 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32513 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32514 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32515 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32516 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32517 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32518 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32519 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32520 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32521 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI },
32522 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI },
32523 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32524 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32525 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_HI },
32526 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_QI },
32527 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_QI },
32528 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_QI },
32529 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32530 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32531 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32532 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32533 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32534 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32535 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32536 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32537 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32538 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32539 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32540 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32541 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32542 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32543 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32544 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32545 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32546 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32547 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32548 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32549 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32550 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32551 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32552 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32553 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32554 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32555 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32556 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32557 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32558 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32559 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32560 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32561 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32562 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32563 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32564 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32565 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32566 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32567 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32568 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32569 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32570 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32571 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32572 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32573 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32574 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32575 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32576 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32577 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32578 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32579 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32580 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32581 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32582 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32583 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32584 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32585 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32586 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32587 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32588 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32589 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32590 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32591 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32592 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32593 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32594 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32595 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32596 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32597 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32598 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32599 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32600 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32601 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32602 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32603 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32604 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32605 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32606 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32607 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32608 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32609 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32610 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32611 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32612 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32613 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32614 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32615 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32616 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32617 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32618 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32619 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32620 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32621 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32622 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32623 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32624 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32625 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32626 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32627 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32628 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32629 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_QI },
32630 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_QI },
32631 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
32632 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_QI },
32633 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_QI },
32634 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
32635 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) HI_FTYPE_V16QI },
32636 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) SI_FTYPE_V32QI },
32637 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) QI_FTYPE_V8HI },
32638 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) HI_FTYPE_V16HI },
32639 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) QI_FTYPE_V4SI },
32640 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) QI_FTYPE_V8SI },
32641 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) QI_FTYPE_V2DI },
32642 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) QI_FTYPE_V4DI },
32643 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_HI },
32644 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_SI },
32645 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_QI },
32646 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_HI },
32647 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_QI },
32648 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_QI },
32649 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_QI },
32650 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_QI },
32651 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32652 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32653 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32654 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32655 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32656 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32657 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32658 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32659 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32660 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32661 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32662 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32663 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32664 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32665 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32666 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32667 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32668 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32669 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32670 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32671 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32672 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32673 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32674 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32675 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32676 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32677 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32678 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32679 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32680 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32681 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32682 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32683 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_QI },
32684 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_QI },
32685 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_HI },
32686 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_HI },
32687 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32688 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32689 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32690 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32691 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32692 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32693 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32694 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32695 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32696 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32697 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32698 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32699 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32700 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32701 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32702 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32703 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32704 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32705 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32706 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32707 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32708 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32709 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32710 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32711 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32712 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32713 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32714 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32715 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32716 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32717 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32718 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32719 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32720 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32721 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32722 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32723 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32724 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32725 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32726 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32727 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32728 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32729 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32730 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32731 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32732 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32733 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32734 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32735 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32736 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32737 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32738 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32739 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32740 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32741 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32742 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32743 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32744 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32745 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32746 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32747 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32748 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32749 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32750 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32751 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32752 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32753 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32754 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32755 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32756 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32757 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32758 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32759 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32760 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32761 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI },
32762 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI },
32763 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_QI },
32764 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_QI },
32765 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_QI },
32766 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_QI },
32767 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32768 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32769 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32770 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32771 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32772 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32773 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32774 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32775 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32776 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32777 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32778 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32779 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32780 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32781 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32782 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32783 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32784 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32785 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32786 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32787 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32788 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32789 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32790 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32791 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32792 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32793 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32794 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32795 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32796 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32797 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32798 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32799 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32800 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32801 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32802 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32803 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32804 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32805 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32806 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32807 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32808 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32809 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32810 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32811 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32812 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32813 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32814 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32815 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32816 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32817 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32818 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32819 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32820 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32821 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32822 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32823 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32824 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32825 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32826 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_QI },
32827 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_QI },
32828 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32829 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32830 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32831 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32832 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32833 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32834 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32835 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32836 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32837 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32838 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32839 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_QI },
32840 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_QI },
32841 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32842 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32843 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32844 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32845 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32846 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32847 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32848 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32849 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI },
32850 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI },
32852 /* AVX512DQ. */
32853 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
32854 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
32855 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
32856 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
32857 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_HI },
32858 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_HI },
32859 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_QI },
32860 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_QI },
32861 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_QI },
32862 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_QI },
32863 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32864 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
32865 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32866 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32867 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32868 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32869 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32870 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32871 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32872 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI},
32873 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32874 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI },
32875 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI },
32876 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI },
32877 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI },
32878 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_QI },
32879 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_HI },
32880 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) HI_FTYPE_V16SI },
32881 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) QI_FTYPE_V8DI },
32882 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_HI },
32883 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_QI },
32885 /* AVX512BW. */
32886 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) SI_FTYPE_SI_SI },
32887 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) DI_FTYPE_DI_DI },
32888 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32889 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32890 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32891 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32892 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
32893 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT },
32894 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32895 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32896 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
32897 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI },
32898 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_DI },
32899 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_DI },
32900 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_SI },
32901 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_SI },
32902 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32903 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32904 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32905 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32906 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32907 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32908 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32909 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32910 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32911 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32912 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32913 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32914 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32915 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32916 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32917 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32918 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32919 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32920 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32921 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32922 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32923 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32924 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32925 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32926 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32927 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32928 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32929 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32930 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32931 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32932 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32933 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32934 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32935 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32936 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32937 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32938 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32939 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32940 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32941 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32942 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_SI },
32943 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_HI },
32944 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32945 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32946 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32947 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32948 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32949 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32950 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32951 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32952 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32953 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32954 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32955 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32956 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) DI_FTYPE_V64QI },
32957 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) SI_FTYPE_V32HI },
32958 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_DI },
32959 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_SI },
32960 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32961 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32962 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32963 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32964 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32965 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32966 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32967 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32968 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32969 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32970 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32971 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32972 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32973 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32974 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
32975 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32976 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
32978 /* AVX512IFMA */
32979 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32980 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32981 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32982 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32983 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32984 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32985 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32986 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32987 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32988 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32989 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32990 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32992 /* AVX512VBMI */
32993 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32994 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32995 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32996 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32997 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32998 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32999 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33000 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33001 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33002 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33003 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_maskz, "__builtin_ia32_vpermt2varqi256_maskz", IX86_BUILTIN_VPERMT2VARQI256_MASKZ, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33004 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_mask, "__builtin_ia32_vpermt2varqi128_mask", IX86_BUILTIN_VPERMT2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33005 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_maskz, "__builtin_ia32_vpermt2varqi128_maskz", IX86_BUILTIN_VPERMT2VARQI128_MASKZ, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33006 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv32qi3_mask, "__builtin_ia32_vpermi2varqi256_mask", IX86_BUILTIN_VPERMI2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33007 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33010 /* Builtins with rounding support. */
33011 static const struct builtin_description bdesc_round_args[] =
33013 /* AVX512F */
33014 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33015 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33017 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
33019 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
33020 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
33021 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
33022 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
33023 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
33024 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
33025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
33027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
33029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
33031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
33033 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
33034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
33035 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
33036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
33037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
33042 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
33043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
33044 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
33045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
33050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
33051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
33052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
33053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
33054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
33055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
33056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
33057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
33062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
33063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
33078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
33079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
33094 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
33095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
33096 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
33097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33098 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33100 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
33102 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
33103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
33104 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
33105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33106 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33108 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
33116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
33117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33134 /* AVX512ER */
33135 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33136 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33137 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33138 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33139 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33140 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33141 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33142 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33143 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33144 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33146 /* AVX512DQ. */
33147 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33148 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33149 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33150 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33151 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33152 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33153 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33154 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33155 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33156 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33157 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33158 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33159 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33160 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33161 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
33162 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
33165 /* Bultins for MPX. */
33166 static const struct builtin_description bdesc_mpx[] =
33168 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
33169 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33170 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33173 /* Const builtins for MPX. */
33174 static const struct builtin_description bdesc_mpx_const[] =
33176 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
33177 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
33178 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
33179 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
33180 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
33181 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
33182 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
33183 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
33186 /* FMA4 and XOP. */
33187 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
33188 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
33189 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
33190 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
33191 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
33192 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
33193 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
33194 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
33195 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
33196 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
33197 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
33198 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
33199 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
33200 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
33201 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
33202 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
33203 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
33204 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
33205 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
33206 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
33207 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
33208 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
33209 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
33210 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
33211 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
33212 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
33213 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
33214 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
33215 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
33216 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
33217 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
33218 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
33219 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
33220 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
33221 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
33222 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
33223 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
33224 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
33225 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
33226 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
33227 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
33228 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
33229 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
33230 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
33231 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
33232 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
33233 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
33234 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
33235 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
33236 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
33237 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
33238 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
33240 static const struct builtin_description bdesc_multi_arg[] =
33242 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
33243 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
33244 UNKNOWN, (int)MULTI_ARG_3_SF },
33245 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
33246 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
33247 UNKNOWN, (int)MULTI_ARG_3_DF },
33249 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
33250 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
33251 UNKNOWN, (int)MULTI_ARG_3_SF },
33252 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
33253 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
33254 UNKNOWN, (int)MULTI_ARG_3_DF },
33256 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
33257 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
33258 UNKNOWN, (int)MULTI_ARG_3_SF },
33259 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
33260 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
33261 UNKNOWN, (int)MULTI_ARG_3_DF },
33262 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
33263 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
33264 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33265 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
33266 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
33267 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33269 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
33270 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
33271 UNKNOWN, (int)MULTI_ARG_3_SF },
33272 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
33273 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
33274 UNKNOWN, (int)MULTI_ARG_3_DF },
33275 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
33276 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
33277 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33278 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
33279 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
33280 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33282 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
33283 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
33284 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
33285 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
33286 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
33287 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
33288 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
33290 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33291 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33292 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
33293 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
33294 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
33295 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
33296 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
33298 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
33300 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33301 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33302 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33303 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33304 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33305 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33306 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33307 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33308 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33309 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33310 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33311 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33313 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33314 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
33315 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
33316 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
33317 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
33318 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
33319 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
33320 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
33321 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33322 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
33323 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
33324 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
33325 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33326 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
33327 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
33328 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
33330 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
33331 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
33332 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
33333 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
33334 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
33335 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
33337 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33338 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33339 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33340 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33341 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33342 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33343 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33344 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33345 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33346 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33347 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33348 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33349 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33350 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33351 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33353 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
33354 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33355 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33356 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
33357 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
33358 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
33359 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
33361 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
33362 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33363 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33364 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
33365 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
33366 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
33367 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
33369 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
33370 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33371 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33372 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
33373 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
33374 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
33375 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
33377 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33378 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33379 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33380 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
33381 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
33382 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
33383 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
33385 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
33386 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33387 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33388 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
33389 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
33390 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
33391 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
33393 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
33394 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33395 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33396 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
33397 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
33398 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
33399 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
33401 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
33402 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33403 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33404 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
33405 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
33406 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
33407 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
33409 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33410 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33411 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33412 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
33413 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
33414 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
33415 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
33417 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33418 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33419 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33420 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33421 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33422 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33423 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33424 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33426 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33427 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33428 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33429 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33430 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33431 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33432 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33433 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33435 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
33436 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
33437 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
33438 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
33442 /* TM vector builtins. */
33444 /* Reuse the existing x86-specific `struct builtin_description' cause
33445 we're lazy. Add casts to make them fit. */
33446 static const struct builtin_description bdesc_tm[] =
33448 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33449 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33450 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33451 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33452 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33453 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33454 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33456 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33457 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33458 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33459 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33460 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33461 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33462 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33464 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33465 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33466 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33467 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33468 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33469 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33470 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33472 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
33473 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
33474 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
33477 /* TM callbacks. */
33479 /* Return the builtin decl needed to load a vector of TYPE. */
33481 static tree
33482 ix86_builtin_tm_load (tree type)
33484 if (TREE_CODE (type) == VECTOR_TYPE)
33486 switch (tree_to_uhwi (TYPE_SIZE (type)))
33488 case 64:
33489 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
33490 case 128:
33491 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
33492 case 256:
33493 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
33496 return NULL_TREE;
33499 /* Return the builtin decl needed to store a vector of TYPE. */
33501 static tree
33502 ix86_builtin_tm_store (tree type)
33504 if (TREE_CODE (type) == VECTOR_TYPE)
33506 switch (tree_to_uhwi (TYPE_SIZE (type)))
33508 case 64:
33509 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
33510 case 128:
33511 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
33512 case 256:
33513 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
33516 return NULL_TREE;
33519 /* Initialize the transactional memory vector load/store builtins. */
33521 static void
33522 ix86_init_tm_builtins (void)
33524 enum ix86_builtin_func_type ftype;
33525 const struct builtin_description *d;
33526 size_t i;
33527 tree decl;
33528 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
33529 tree attrs_log, attrs_type_log;
33531 if (!flag_tm)
33532 return;
33534 /* If there are no builtins defined, we must be compiling in a
33535 language without trans-mem support. */
33536 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
33537 return;
33539 /* Use whatever attributes a normal TM load has. */
33540 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
33541 attrs_load = DECL_ATTRIBUTES (decl);
33542 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33543 /* Use whatever attributes a normal TM store has. */
33544 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
33545 attrs_store = DECL_ATTRIBUTES (decl);
33546 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33547 /* Use whatever attributes a normal TM log has. */
33548 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
33549 attrs_log = DECL_ATTRIBUTES (decl);
33550 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33552 for (i = 0, d = bdesc_tm;
33553 i < ARRAY_SIZE (bdesc_tm);
33554 i++, d++)
33556 if ((d->mask & ix86_isa_flags) != 0
33557 || (lang_hooks.builtin_function
33558 == lang_hooks.builtin_function_ext_scope))
33560 tree type, attrs, attrs_type;
33561 enum built_in_function code = (enum built_in_function) d->code;
33563 ftype = (enum ix86_builtin_func_type) d->flag;
33564 type = ix86_get_builtin_func_type (ftype);
33566 if (BUILTIN_TM_LOAD_P (code))
33568 attrs = attrs_load;
33569 attrs_type = attrs_type_load;
33571 else if (BUILTIN_TM_STORE_P (code))
33573 attrs = attrs_store;
33574 attrs_type = attrs_type_store;
33576 else
33578 attrs = attrs_log;
33579 attrs_type = attrs_type_log;
33581 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
33582 /* The builtin without the prefix for
33583 calling it directly. */
33584 d->name + strlen ("__builtin_"),
33585 attrs);
33586 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
33587 set the TYPE_ATTRIBUTES. */
33588 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
33590 set_builtin_decl (code, decl, false);
33595 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
33596 in the current target ISA to allow the user to compile particular modules
33597 with different target specific options that differ from the command line
33598 options. */
33599 static void
33600 ix86_init_mmx_sse_builtins (void)
33602 const struct builtin_description * d;
33603 enum ix86_builtin_func_type ftype;
33604 size_t i;
33606 /* Add all special builtins with variable number of operands. */
33607 for (i = 0, d = bdesc_special_args;
33608 i < ARRAY_SIZE (bdesc_special_args);
33609 i++, d++)
33611 if (d->name == 0)
33612 continue;
33614 ftype = (enum ix86_builtin_func_type) d->flag;
33615 def_builtin (d->mask, d->name, ftype, d->code);
33618 /* Add all builtins with variable number of operands. */
33619 for (i = 0, d = bdesc_args;
33620 i < ARRAY_SIZE (bdesc_args);
33621 i++, d++)
33623 if (d->name == 0)
33624 continue;
33626 ftype = (enum ix86_builtin_func_type) d->flag;
33627 def_builtin_const (d->mask, d->name, ftype, d->code);
33630 /* Add all builtins with rounding. */
33631 for (i = 0, d = bdesc_round_args;
33632 i < ARRAY_SIZE (bdesc_round_args);
33633 i++, d++)
33635 if (d->name == 0)
33636 continue;
33638 ftype = (enum ix86_builtin_func_type) d->flag;
33639 def_builtin_const (d->mask, d->name, ftype, d->code);
33642 /* pcmpestr[im] insns. */
33643 for (i = 0, d = bdesc_pcmpestr;
33644 i < ARRAY_SIZE (bdesc_pcmpestr);
33645 i++, d++)
33647 if (d->code == IX86_BUILTIN_PCMPESTRM128)
33648 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
33649 else
33650 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
33651 def_builtin_const (d->mask, d->name, ftype, d->code);
33654 /* pcmpistr[im] insns. */
33655 for (i = 0, d = bdesc_pcmpistr;
33656 i < ARRAY_SIZE (bdesc_pcmpistr);
33657 i++, d++)
33659 if (d->code == IX86_BUILTIN_PCMPISTRM128)
33660 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
33661 else
33662 ftype = INT_FTYPE_V16QI_V16QI_INT;
33663 def_builtin_const (d->mask, d->name, ftype, d->code);
33666 /* comi/ucomi insns. */
33667 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
33669 if (d->mask == OPTION_MASK_ISA_SSE2)
33670 ftype = INT_FTYPE_V2DF_V2DF;
33671 else
33672 ftype = INT_FTYPE_V4SF_V4SF;
33673 def_builtin_const (d->mask, d->name, ftype, d->code);
33676 /* SSE */
33677 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
33678 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
33679 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
33680 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
33682 /* SSE or 3DNow!A */
33683 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33684 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
33685 IX86_BUILTIN_MASKMOVQ);
33687 /* SSE2 */
33688 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
33689 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
33691 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
33692 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
33693 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
33694 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
33696 /* SSE3. */
33697 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
33698 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
33699 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
33700 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
33702 /* AES */
33703 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
33704 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
33705 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
33706 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
33707 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
33708 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
33709 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
33710 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
33711 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
33712 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
33713 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
33714 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
33716 /* PCLMUL */
33717 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
33718 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
33720 /* RDRND */
33721 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
33722 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
33723 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
33724 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
33725 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
33726 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
33727 IX86_BUILTIN_RDRAND64_STEP);
33729 /* AVX2 */
33730 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
33731 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
33732 IX86_BUILTIN_GATHERSIV2DF);
33734 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
33735 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
33736 IX86_BUILTIN_GATHERSIV4DF);
33738 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
33739 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
33740 IX86_BUILTIN_GATHERDIV2DF);
33742 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
33743 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
33744 IX86_BUILTIN_GATHERDIV4DF);
33746 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
33747 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
33748 IX86_BUILTIN_GATHERSIV4SF);
33750 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
33751 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
33752 IX86_BUILTIN_GATHERSIV8SF);
33754 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
33755 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
33756 IX86_BUILTIN_GATHERDIV4SF);
33758 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
33759 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
33760 IX86_BUILTIN_GATHERDIV8SF);
33762 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
33763 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
33764 IX86_BUILTIN_GATHERSIV2DI);
33766 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
33767 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
33768 IX86_BUILTIN_GATHERSIV4DI);
33770 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
33771 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
33772 IX86_BUILTIN_GATHERDIV2DI);
33774 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
33775 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
33776 IX86_BUILTIN_GATHERDIV4DI);
33778 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
33779 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
33780 IX86_BUILTIN_GATHERSIV4SI);
33782 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
33783 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
33784 IX86_BUILTIN_GATHERSIV8SI);
33786 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
33787 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
33788 IX86_BUILTIN_GATHERDIV4SI);
33790 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
33791 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
33792 IX86_BUILTIN_GATHERDIV8SI);
33794 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
33795 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
33796 IX86_BUILTIN_GATHERALTSIV4DF);
33798 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
33799 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
33800 IX86_BUILTIN_GATHERALTDIV8SF);
33802 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
33803 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
33804 IX86_BUILTIN_GATHERALTSIV4DI);
33806 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
33807 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
33808 IX86_BUILTIN_GATHERALTDIV8SI);
33810 /* AVX512F */
33811 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
33812 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
33813 IX86_BUILTIN_GATHER3SIV16SF);
33815 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
33816 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
33817 IX86_BUILTIN_GATHER3SIV8DF);
33819 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
33820 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
33821 IX86_BUILTIN_GATHER3DIV16SF);
33823 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
33824 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
33825 IX86_BUILTIN_GATHER3DIV8DF);
33827 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
33828 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
33829 IX86_BUILTIN_GATHER3SIV16SI);
33831 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
33832 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
33833 IX86_BUILTIN_GATHER3SIV8DI);
33835 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
33836 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
33837 IX86_BUILTIN_GATHER3DIV16SI);
33839 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
33840 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
33841 IX86_BUILTIN_GATHER3DIV8DI);
33843 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
33844 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
33845 IX86_BUILTIN_GATHER3ALTSIV8DF);
33847 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
33848 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
33849 IX86_BUILTIN_GATHER3ALTDIV16SF);
33851 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
33852 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
33853 IX86_BUILTIN_GATHER3ALTSIV8DI);
33855 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
33856 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
33857 IX86_BUILTIN_GATHER3ALTDIV16SI);
33859 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
33860 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
33861 IX86_BUILTIN_SCATTERSIV16SF);
33863 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
33864 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
33865 IX86_BUILTIN_SCATTERSIV8DF);
33867 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
33868 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
33869 IX86_BUILTIN_SCATTERDIV16SF);
33871 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
33872 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
33873 IX86_BUILTIN_SCATTERDIV8DF);
33875 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
33876 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
33877 IX86_BUILTIN_SCATTERSIV16SI);
33879 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
33880 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
33881 IX86_BUILTIN_SCATTERSIV8DI);
33883 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
33884 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
33885 IX86_BUILTIN_SCATTERDIV16SI);
33887 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
33888 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
33889 IX86_BUILTIN_SCATTERDIV8DI);
33891 /* AVX512VL */
33892 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
33893 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
33894 IX86_BUILTIN_GATHER3SIV2DF);
33896 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
33897 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
33898 IX86_BUILTIN_GATHER3SIV4DF);
33900 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
33901 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
33902 IX86_BUILTIN_GATHER3DIV2DF);
33904 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
33905 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
33906 IX86_BUILTIN_GATHER3DIV4DF);
33908 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
33909 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
33910 IX86_BUILTIN_GATHER3SIV4SF);
33912 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
33913 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
33914 IX86_BUILTIN_GATHER3SIV8SF);
33916 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
33917 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
33918 IX86_BUILTIN_GATHER3DIV4SF);
33920 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
33921 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
33922 IX86_BUILTIN_GATHER3DIV8SF);
33924 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
33925 V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
33926 IX86_BUILTIN_GATHER3SIV2DI);
33928 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
33929 V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
33930 IX86_BUILTIN_GATHER3SIV4DI);
33932 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
33933 V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
33934 IX86_BUILTIN_GATHER3DIV2DI);
33936 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
33937 V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
33938 IX86_BUILTIN_GATHER3DIV4DI);
33940 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
33941 V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
33942 IX86_BUILTIN_GATHER3SIV4SI);
33944 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
33945 V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
33946 IX86_BUILTIN_GATHER3SIV8SI);
33948 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
33949 V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
33950 IX86_BUILTIN_GATHER3DIV4SI);
33952 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
33953 V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
33954 IX86_BUILTIN_GATHER3DIV8SI);
33956 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
33957 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
33958 IX86_BUILTIN_GATHER3ALTSIV4DF);
33960 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
33961 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
33962 IX86_BUILTIN_GATHER3ALTDIV8SF);
33964 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
33965 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
33966 IX86_BUILTIN_GATHER3ALTSIV4DI);
33968 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
33969 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
33970 IX86_BUILTIN_GATHER3ALTDIV8SI);
33972 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
33973 VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
33974 IX86_BUILTIN_SCATTERSIV8SF);
33976 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
33977 VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
33978 IX86_BUILTIN_SCATTERSIV4SF);
33980 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
33981 VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
33982 IX86_BUILTIN_SCATTERSIV4DF);
33984 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
33985 VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
33986 IX86_BUILTIN_SCATTERSIV2DF);
33988 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
33989 VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
33990 IX86_BUILTIN_SCATTERDIV8SF);
33992 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
33993 VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
33994 IX86_BUILTIN_SCATTERDIV4SF);
33996 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
33997 VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
33998 IX86_BUILTIN_SCATTERDIV4DF);
34000 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
34001 VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
34002 IX86_BUILTIN_SCATTERDIV2DF);
34004 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
34005 VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
34006 IX86_BUILTIN_SCATTERSIV8SI);
34008 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
34009 VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
34010 IX86_BUILTIN_SCATTERSIV4SI);
34012 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
34013 VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
34014 IX86_BUILTIN_SCATTERSIV4DI);
34016 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
34017 VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
34018 IX86_BUILTIN_SCATTERSIV2DI);
34020 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
34021 VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
34022 IX86_BUILTIN_SCATTERDIV8SI);
34024 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
34025 VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
34026 IX86_BUILTIN_SCATTERDIV4SI);
34028 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
34029 VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
34030 IX86_BUILTIN_SCATTERDIV4DI);
34032 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
34033 VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
34034 IX86_BUILTIN_SCATTERDIV2DI);
34036 /* AVX512PF */
34037 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
34038 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
34039 IX86_BUILTIN_GATHERPFDPD);
34040 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
34041 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
34042 IX86_BUILTIN_GATHERPFDPS);
34043 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
34044 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
34045 IX86_BUILTIN_GATHERPFQPD);
34046 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
34047 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
34048 IX86_BUILTIN_GATHERPFQPS);
34049 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
34050 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
34051 IX86_BUILTIN_SCATTERPFDPD);
34052 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
34053 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
34054 IX86_BUILTIN_SCATTERPFDPS);
34055 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
34056 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
34057 IX86_BUILTIN_SCATTERPFQPD);
34058 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
34059 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
34060 IX86_BUILTIN_SCATTERPFQPS);
34062 /* SHA */
34063 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
34064 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
34065 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
34066 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
34067 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
34068 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
34069 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
34070 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
34071 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
34072 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
34073 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
34074 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
34075 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
34076 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
34078 /* RTM. */
34079 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
34080 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
34082 /* MMX access to the vec_init patterns. */
34083 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
34084 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
34086 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
34087 V4HI_FTYPE_HI_HI_HI_HI,
34088 IX86_BUILTIN_VEC_INIT_V4HI);
34090 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
34091 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
34092 IX86_BUILTIN_VEC_INIT_V8QI);
34094 /* Access to the vec_extract patterns. */
34095 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
34096 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
34097 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
34098 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
34099 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
34100 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
34101 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
34102 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
34103 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
34104 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
34106 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34107 "__builtin_ia32_vec_ext_v4hi",
34108 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
34110 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
34111 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
34113 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
34114 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
34116 /* Access to the vec_set patterns. */
34117 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
34118 "__builtin_ia32_vec_set_v2di",
34119 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
34121 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
34122 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
34124 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
34125 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
34127 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
34128 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
34130 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34131 "__builtin_ia32_vec_set_v4hi",
34132 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
34134 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
34135 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
34137 /* RDSEED */
34138 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
34139 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
34140 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
34141 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
34142 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
34143 "__builtin_ia32_rdseed_di_step",
34144 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
34146 /* ADCX */
34147 def_builtin (0, "__builtin_ia32_addcarryx_u32",
34148 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
34149 def_builtin (OPTION_MASK_ISA_64BIT,
34150 "__builtin_ia32_addcarryx_u64",
34151 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34152 IX86_BUILTIN_ADDCARRYX64);
34154 /* SBB */
34155 def_builtin (0, "__builtin_ia32_sbb_u32",
34156 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
34157 def_builtin (OPTION_MASK_ISA_64BIT,
34158 "__builtin_ia32_sbb_u64",
34159 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34160 IX86_BUILTIN_SBB64);
34162 /* Read/write FLAGS. */
34163 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
34164 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34165 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
34166 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34167 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
34168 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
34169 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
34170 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
34172 /* CLFLUSHOPT. */
34173 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
34174 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
34176 /* CLWB. */
34177 def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
34178 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
34180 /* Add FMA4 multi-arg argument instructions */
34181 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
34183 if (d->name == 0)
34184 continue;
34186 ftype = (enum ix86_builtin_func_type) d->flag;
34187 def_builtin_const (d->mask, d->name, ftype, d->code);
34191 static void
34192 ix86_init_mpx_builtins ()
34194 const struct builtin_description * d;
34195 enum ix86_builtin_func_type ftype;
34196 tree decl;
34197 size_t i;
34199 for (i = 0, d = bdesc_mpx;
34200 i < ARRAY_SIZE (bdesc_mpx);
34201 i++, d++)
34203 if (d->name == 0)
34204 continue;
34206 ftype = (enum ix86_builtin_func_type) d->flag;
34207 decl = def_builtin (d->mask, d->name, ftype, d->code);
34209 /* With no leaf and nothrow flags for MPX builtins
34210 abnormal edges may follow its call when setjmp
34211 presents in the function. Since we may have a lot
34212 of MPX builtins calls it causes lots of useless
34213 edges and enormous PHI nodes. To avoid this we mark
34214 MPX builtins as leaf and nothrow. */
34215 if (decl)
34217 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34218 NULL_TREE);
34219 TREE_NOTHROW (decl) = 1;
34221 else
34223 ix86_builtins_isa[(int)d->code].leaf_p = true;
34224 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34228 for (i = 0, d = bdesc_mpx_const;
34229 i < ARRAY_SIZE (bdesc_mpx_const);
34230 i++, d++)
34232 if (d->name == 0)
34233 continue;
34235 ftype = (enum ix86_builtin_func_type) d->flag;
34236 decl = def_builtin_const (d->mask, d->name, ftype, d->code);
34238 if (decl)
34240 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34241 NULL_TREE);
34242 TREE_NOTHROW (decl) = 1;
34244 else
34246 ix86_builtins_isa[(int)d->code].leaf_p = true;
34247 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34252 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
34253 to return a pointer to VERSION_DECL if the outcome of the expression
34254 formed by PREDICATE_CHAIN is true. This function will be called during
34255 version dispatch to decide which function version to execute. It returns
34256 the basic block at the end, to which more conditions can be added. */
34258 static basic_block
34259 add_condition_to_bb (tree function_decl, tree version_decl,
34260 tree predicate_chain, basic_block new_bb)
34262 gimple return_stmt;
34263 tree convert_expr, result_var;
34264 gimple convert_stmt;
34265 gimple call_cond_stmt;
34266 gimple if_else_stmt;
34268 basic_block bb1, bb2, bb3;
34269 edge e12, e23;
34271 tree cond_var, and_expr_var = NULL_TREE;
34272 gimple_seq gseq;
34274 tree predicate_decl, predicate_arg;
34276 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
34278 gcc_assert (new_bb != NULL);
34279 gseq = bb_seq (new_bb);
34282 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
34283 build_fold_addr_expr (version_decl));
34284 result_var = create_tmp_var (ptr_type_node);
34285 convert_stmt = gimple_build_assign (result_var, convert_expr);
34286 return_stmt = gimple_build_return (result_var);
34288 if (predicate_chain == NULL_TREE)
34290 gimple_seq_add_stmt (&gseq, convert_stmt);
34291 gimple_seq_add_stmt (&gseq, return_stmt);
34292 set_bb_seq (new_bb, gseq);
34293 gimple_set_bb (convert_stmt, new_bb);
34294 gimple_set_bb (return_stmt, new_bb);
34295 pop_cfun ();
34296 return new_bb;
34299 while (predicate_chain != NULL)
34301 cond_var = create_tmp_var (integer_type_node);
34302 predicate_decl = TREE_PURPOSE (predicate_chain);
34303 predicate_arg = TREE_VALUE (predicate_chain);
34304 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
34305 gimple_call_set_lhs (call_cond_stmt, cond_var);
34307 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
34308 gimple_set_bb (call_cond_stmt, new_bb);
34309 gimple_seq_add_stmt (&gseq, call_cond_stmt);
34311 predicate_chain = TREE_CHAIN (predicate_chain);
34313 if (and_expr_var == NULL)
34314 and_expr_var = cond_var;
34315 else
34317 gimple assign_stmt;
34318 /* Use MIN_EXPR to check if any integer is zero?.
34319 and_expr_var = min_expr <cond_var, and_expr_var> */
34320 assign_stmt = gimple_build_assign (and_expr_var,
34321 build2 (MIN_EXPR, integer_type_node,
34322 cond_var, and_expr_var));
34324 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
34325 gimple_set_bb (assign_stmt, new_bb);
34326 gimple_seq_add_stmt (&gseq, assign_stmt);
34330 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
34331 integer_zero_node,
34332 NULL_TREE, NULL_TREE);
34333 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
34334 gimple_set_bb (if_else_stmt, new_bb);
34335 gimple_seq_add_stmt (&gseq, if_else_stmt);
34337 gimple_seq_add_stmt (&gseq, convert_stmt);
34338 gimple_seq_add_stmt (&gseq, return_stmt);
34339 set_bb_seq (new_bb, gseq);
34341 bb1 = new_bb;
34342 e12 = split_block (bb1, if_else_stmt);
34343 bb2 = e12->dest;
34344 e12->flags &= ~EDGE_FALLTHRU;
34345 e12->flags |= EDGE_TRUE_VALUE;
34347 e23 = split_block (bb2, return_stmt);
34349 gimple_set_bb (convert_stmt, bb2);
34350 gimple_set_bb (return_stmt, bb2);
34352 bb3 = e23->dest;
34353 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
34355 remove_edge (e23);
34356 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
34358 pop_cfun ();
34360 return bb3;
34363 /* This parses the attribute arguments to target in DECL and determines
34364 the right builtin to use to match the platform specification.
34365 It returns the priority value for this version decl. If PREDICATE_LIST
34366 is not NULL, it stores the list of cpu features that need to be checked
34367 before dispatching this function. */
34369 static unsigned int
34370 get_builtin_code_for_version (tree decl, tree *predicate_list)
34372 tree attrs;
34373 struct cl_target_option cur_target;
34374 tree target_node;
34375 struct cl_target_option *new_target;
34376 const char *arg_str = NULL;
34377 const char *attrs_str = NULL;
34378 char *tok_str = NULL;
34379 char *token;
34381 /* Priority of i386 features, greater value is higher priority. This is
34382 used to decide the order in which function dispatch must happen. For
34383 instance, a version specialized for SSE4.2 should be checked for dispatch
34384 before a version for SSE3, as SSE4.2 implies SSE3. */
34385 enum feature_priority
34387 P_ZERO = 0,
34388 P_MMX,
34389 P_SSE,
34390 P_SSE2,
34391 P_SSE3,
34392 P_SSSE3,
34393 P_PROC_SSSE3,
34394 P_SSE4_A,
34395 P_PROC_SSE4_A,
34396 P_SSE4_1,
34397 P_SSE4_2,
34398 P_PROC_SSE4_2,
34399 P_POPCNT,
34400 P_AVX,
34401 P_PROC_AVX,
34402 P_BMI,
34403 P_PROC_BMI,
34404 P_FMA4,
34405 P_XOP,
34406 P_PROC_XOP,
34407 P_FMA,
34408 P_PROC_FMA,
34409 P_BMI2,
34410 P_AVX2,
34411 P_PROC_AVX2,
34412 P_AVX512F,
34413 P_PROC_AVX512F
34416 enum feature_priority priority = P_ZERO;
34418 /* These are the target attribute strings for which a dispatcher is
34419 available, from fold_builtin_cpu. */
34421 static struct _feature_list
34423 const char *const name;
34424 const enum feature_priority priority;
34426 const feature_list[] =
34428 {"mmx", P_MMX},
34429 {"sse", P_SSE},
34430 {"sse2", P_SSE2},
34431 {"sse3", P_SSE3},
34432 {"sse4a", P_SSE4_A},
34433 {"ssse3", P_SSSE3},
34434 {"sse4.1", P_SSE4_1},
34435 {"sse4.2", P_SSE4_2},
34436 {"popcnt", P_POPCNT},
34437 {"avx", P_AVX},
34438 {"bmi", P_BMI},
34439 {"fma4", P_FMA4},
34440 {"xop", P_XOP},
34441 {"fma", P_FMA},
34442 {"bmi2", P_BMI2},
34443 {"avx2", P_AVX2},
34444 {"avx512f", P_AVX512F}
34448 static unsigned int NUM_FEATURES
34449 = sizeof (feature_list) / sizeof (struct _feature_list);
34451 unsigned int i;
34453 tree predicate_chain = NULL_TREE;
34454 tree predicate_decl, predicate_arg;
34456 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34457 gcc_assert (attrs != NULL);
34459 attrs = TREE_VALUE (TREE_VALUE (attrs));
34461 gcc_assert (TREE_CODE (attrs) == STRING_CST);
34462 attrs_str = TREE_STRING_POINTER (attrs);
34464 /* Return priority zero for default function. */
34465 if (strcmp (attrs_str, "default") == 0)
34466 return 0;
34468 /* Handle arch= if specified. For priority, set it to be 1 more than
34469 the best instruction set the processor can handle. For instance, if
34470 there is a version for atom and a version for ssse3 (the highest ISA
34471 priority for atom), the atom version must be checked for dispatch
34472 before the ssse3 version. */
34473 if (strstr (attrs_str, "arch=") != NULL)
34475 cl_target_option_save (&cur_target, &global_options);
34476 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
34477 &global_options_set);
34479 gcc_assert (target_node);
34480 new_target = TREE_TARGET_OPTION (target_node);
34481 gcc_assert (new_target);
34483 if (new_target->arch_specified && new_target->arch > 0)
34485 switch (new_target->arch)
34487 case PROCESSOR_CORE2:
34488 arg_str = "core2";
34489 priority = P_PROC_SSSE3;
34490 break;
34491 case PROCESSOR_NEHALEM:
34492 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
34493 arg_str = "westmere";
34494 else
34495 /* We translate "arch=corei7" and "arch=nehalem" to
34496 "corei7" so that it will be mapped to M_INTEL_COREI7
34497 as cpu type to cover all M_INTEL_COREI7_XXXs. */
34498 arg_str = "corei7";
34499 priority = P_PROC_SSE4_2;
34500 break;
34501 case PROCESSOR_SANDYBRIDGE:
34502 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
34503 arg_str = "ivybridge";
34504 else
34505 arg_str = "sandybridge";
34506 priority = P_PROC_AVX;
34507 break;
34508 case PROCESSOR_HASWELL:
34509 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
34510 arg_str = "broadwell";
34511 else
34512 arg_str = "haswell";
34513 priority = P_PROC_AVX2;
34514 break;
34515 case PROCESSOR_BONNELL:
34516 arg_str = "bonnell";
34517 priority = P_PROC_SSSE3;
34518 break;
34519 case PROCESSOR_KNL:
34520 arg_str = "knl";
34521 priority = P_PROC_AVX512F;
34522 break;
34523 case PROCESSOR_SILVERMONT:
34524 arg_str = "silvermont";
34525 priority = P_PROC_SSE4_2;
34526 break;
34527 case PROCESSOR_AMDFAM10:
34528 arg_str = "amdfam10h";
34529 priority = P_PROC_SSE4_A;
34530 break;
34531 case PROCESSOR_BTVER1:
34532 arg_str = "btver1";
34533 priority = P_PROC_SSE4_A;
34534 break;
34535 case PROCESSOR_BTVER2:
34536 arg_str = "btver2";
34537 priority = P_PROC_BMI;
34538 break;
34539 case PROCESSOR_BDVER1:
34540 arg_str = "bdver1";
34541 priority = P_PROC_XOP;
34542 break;
34543 case PROCESSOR_BDVER2:
34544 arg_str = "bdver2";
34545 priority = P_PROC_FMA;
34546 break;
34547 case PROCESSOR_BDVER3:
34548 arg_str = "bdver3";
34549 priority = P_PROC_FMA;
34550 break;
34551 case PROCESSOR_BDVER4:
34552 arg_str = "bdver4";
34553 priority = P_PROC_AVX2;
34554 break;
34558 cl_target_option_restore (&global_options, &cur_target);
34560 if (predicate_list && arg_str == NULL)
34562 error_at (DECL_SOURCE_LOCATION (decl),
34563 "No dispatcher found for the versioning attributes");
34564 return 0;
34567 if (predicate_list)
34569 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
34570 /* For a C string literal the length includes the trailing NULL. */
34571 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
34572 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34573 predicate_chain);
34577 /* Process feature name. */
34578 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
34579 strcpy (tok_str, attrs_str);
34580 token = strtok (tok_str, ",");
34581 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
34583 while (token != NULL)
34585 /* Do not process "arch=" */
34586 if (strncmp (token, "arch=", 5) == 0)
34588 token = strtok (NULL, ",");
34589 continue;
34591 for (i = 0; i < NUM_FEATURES; ++i)
34593 if (strcmp (token, feature_list[i].name) == 0)
34595 if (predicate_list)
34597 predicate_arg = build_string_literal (
34598 strlen (feature_list[i].name) + 1,
34599 feature_list[i].name);
34600 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34601 predicate_chain);
34603 /* Find the maximum priority feature. */
34604 if (feature_list[i].priority > priority)
34605 priority = feature_list[i].priority;
34607 break;
34610 if (predicate_list && i == NUM_FEATURES)
34612 error_at (DECL_SOURCE_LOCATION (decl),
34613 "No dispatcher found for %s", token);
34614 return 0;
34616 token = strtok (NULL, ",");
34618 free (tok_str);
34620 if (predicate_list && predicate_chain == NULL_TREE)
34622 error_at (DECL_SOURCE_LOCATION (decl),
34623 "No dispatcher found for the versioning attributes : %s",
34624 attrs_str);
34625 return 0;
34627 else if (predicate_list)
34629 predicate_chain = nreverse (predicate_chain);
34630 *predicate_list = predicate_chain;
34633 return priority;
34636 /* This compares the priority of target features in function DECL1
34637 and DECL2. It returns positive value if DECL1 is higher priority,
34638 negative value if DECL2 is higher priority and 0 if they are the
34639 same. */
34641 static int
34642 ix86_compare_version_priority (tree decl1, tree decl2)
34644 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
34645 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
34647 return (int)priority1 - (int)priority2;
34650 /* V1 and V2 point to function versions with different priorities
34651 based on the target ISA. This function compares their priorities. */
34653 static int
34654 feature_compare (const void *v1, const void *v2)
34656 typedef struct _function_version_info
34658 tree version_decl;
34659 tree predicate_chain;
34660 unsigned int dispatch_priority;
34661 } function_version_info;
34663 const function_version_info c1 = *(const function_version_info *)v1;
34664 const function_version_info c2 = *(const function_version_info *)v2;
34665 return (c2.dispatch_priority - c1.dispatch_priority);
34668 /* This function generates the dispatch function for
34669 multi-versioned functions. DISPATCH_DECL is the function which will
34670 contain the dispatch logic. FNDECLS are the function choices for
34671 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
34672 in DISPATCH_DECL in which the dispatch code is generated. */
34674 static int
34675 dispatch_function_versions (tree dispatch_decl,
34676 void *fndecls_p,
34677 basic_block *empty_bb)
34679 tree default_decl;
34680 gimple ifunc_cpu_init_stmt;
34681 gimple_seq gseq;
34682 int ix;
34683 tree ele;
34684 vec<tree> *fndecls;
34685 unsigned int num_versions = 0;
34686 unsigned int actual_versions = 0;
34687 unsigned int i;
34689 struct _function_version_info
34691 tree version_decl;
34692 tree predicate_chain;
34693 unsigned int dispatch_priority;
34694 }*function_version_info;
34696 gcc_assert (dispatch_decl != NULL
34697 && fndecls_p != NULL
34698 && empty_bb != NULL);
34700 /*fndecls_p is actually a vector. */
34701 fndecls = static_cast<vec<tree> *> (fndecls_p);
34703 /* At least one more version other than the default. */
34704 num_versions = fndecls->length ();
34705 gcc_assert (num_versions >= 2);
34707 function_version_info = (struct _function_version_info *)
34708 XNEWVEC (struct _function_version_info, (num_versions - 1));
34710 /* The first version in the vector is the default decl. */
34711 default_decl = (*fndecls)[0];
34713 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
34715 gseq = bb_seq (*empty_bb);
34716 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
34717 constructors, so explicity call __builtin_cpu_init here. */
34718 ifunc_cpu_init_stmt = gimple_build_call_vec (
34719 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
34720 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
34721 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
34722 set_bb_seq (*empty_bb, gseq);
34724 pop_cfun ();
34727 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
34729 tree version_decl = ele;
34730 tree predicate_chain = NULL_TREE;
34731 unsigned int priority;
34732 /* Get attribute string, parse it and find the right predicate decl.
34733 The predicate function could be a lengthy combination of many
34734 features, like arch-type and various isa-variants. */
34735 priority = get_builtin_code_for_version (version_decl,
34736 &predicate_chain);
34738 if (predicate_chain == NULL_TREE)
34739 continue;
34741 function_version_info [actual_versions].version_decl = version_decl;
34742 function_version_info [actual_versions].predicate_chain
34743 = predicate_chain;
34744 function_version_info [actual_versions].dispatch_priority = priority;
34745 actual_versions++;
34748 /* Sort the versions according to descending order of dispatch priority. The
34749 priority is based on the ISA. This is not a perfect solution. There
34750 could still be ambiguity. If more than one function version is suitable
34751 to execute, which one should be dispatched? In future, allow the user
34752 to specify a dispatch priority next to the version. */
34753 qsort (function_version_info, actual_versions,
34754 sizeof (struct _function_version_info), feature_compare);
34756 for (i = 0; i < actual_versions; ++i)
34757 *empty_bb = add_condition_to_bb (dispatch_decl,
34758 function_version_info[i].version_decl,
34759 function_version_info[i].predicate_chain,
34760 *empty_bb);
34762 /* dispatch default version at the end. */
34763 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
34764 NULL, *empty_bb);
34766 free (function_version_info);
34767 return 0;
34770 /* Comparator function to be used in qsort routine to sort attribute
34771 specification strings to "target". */
34773 static int
34774 attr_strcmp (const void *v1, const void *v2)
34776 const char *c1 = *(char *const*)v1;
34777 const char *c2 = *(char *const*)v2;
34778 return strcmp (c1, c2);
34781 /* ARGLIST is the argument to target attribute. This function tokenizes
34782 the comma separated arguments, sorts them and returns a string which
34783 is a unique identifier for the comma separated arguments. It also
34784 replaces non-identifier characters "=,-" with "_". */
34786 static char *
34787 sorted_attr_string (tree arglist)
34789 tree arg;
34790 size_t str_len_sum = 0;
34791 char **args = NULL;
34792 char *attr_str, *ret_str;
34793 char *attr = NULL;
34794 unsigned int argnum = 1;
34795 unsigned int i;
34797 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34799 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34800 size_t len = strlen (str);
34801 str_len_sum += len + 1;
34802 if (arg != arglist)
34803 argnum++;
34804 for (i = 0; i < strlen (str); i++)
34805 if (str[i] == ',')
34806 argnum++;
34809 attr_str = XNEWVEC (char, str_len_sum);
34810 str_len_sum = 0;
34811 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34813 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34814 size_t len = strlen (str);
34815 memcpy (attr_str + str_len_sum, str, len);
34816 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
34817 str_len_sum += len + 1;
34820 /* Replace "=,-" with "_". */
34821 for (i = 0; i < strlen (attr_str); i++)
34822 if (attr_str[i] == '=' || attr_str[i]== '-')
34823 attr_str[i] = '_';
34825 if (argnum == 1)
34826 return attr_str;
34828 args = XNEWVEC (char *, argnum);
34830 i = 0;
34831 attr = strtok (attr_str, ",");
34832 while (attr != NULL)
34834 args[i] = attr;
34835 i++;
34836 attr = strtok (NULL, ",");
34839 qsort (args, argnum, sizeof (char *), attr_strcmp);
34841 ret_str = XNEWVEC (char, str_len_sum);
34842 str_len_sum = 0;
34843 for (i = 0; i < argnum; i++)
34845 size_t len = strlen (args[i]);
34846 memcpy (ret_str + str_len_sum, args[i], len);
34847 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
34848 str_len_sum += len + 1;
34851 XDELETEVEC (args);
34852 XDELETEVEC (attr_str);
34853 return ret_str;
34856 /* This function changes the assembler name for functions that are
34857 versions. If DECL is a function version and has a "target"
34858 attribute, it appends the attribute string to its assembler name. */
34860 static tree
34861 ix86_mangle_function_version_assembler_name (tree decl, tree id)
34863 tree version_attr;
34864 const char *orig_name, *version_string;
34865 char *attr_str, *assembler_name;
34867 if (DECL_DECLARED_INLINE_P (decl)
34868 && lookup_attribute ("gnu_inline",
34869 DECL_ATTRIBUTES (decl)))
34870 error_at (DECL_SOURCE_LOCATION (decl),
34871 "Function versions cannot be marked as gnu_inline,"
34872 " bodies have to be generated");
34874 if (DECL_VIRTUAL_P (decl)
34875 || DECL_VINDEX (decl))
34876 sorry ("Virtual function multiversioning not supported");
34878 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34880 /* target attribute string cannot be NULL. */
34881 gcc_assert (version_attr != NULL_TREE);
34883 orig_name = IDENTIFIER_POINTER (id);
34884 version_string
34885 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
34887 if (strcmp (version_string, "default") == 0)
34888 return id;
34890 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
34891 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
34893 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
34895 /* Allow assembler name to be modified if already set. */
34896 if (DECL_ASSEMBLER_NAME_SET_P (decl))
34897 SET_DECL_RTL (decl, NULL);
34899 tree ret = get_identifier (assembler_name);
34900 XDELETEVEC (attr_str);
34901 XDELETEVEC (assembler_name);
34902 return ret;
34905 /* This function returns true if FN1 and FN2 are versions of the same function,
34906 that is, the target strings of the function decls are different. This assumes
34907 that FN1 and FN2 have the same signature. */
34909 static bool
34910 ix86_function_versions (tree fn1, tree fn2)
34912 tree attr1, attr2;
34913 char *target1, *target2;
34914 bool result;
34916 if (TREE_CODE (fn1) != FUNCTION_DECL
34917 || TREE_CODE (fn2) != FUNCTION_DECL)
34918 return false;
34920 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
34921 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
34923 /* At least one function decl should have the target attribute specified. */
34924 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
34925 return false;
34927 /* Diagnose missing target attribute if one of the decls is already
34928 multi-versioned. */
34929 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
34931 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
34933 if (attr2 != NULL_TREE)
34935 tree tem = fn1;
34936 fn1 = fn2;
34937 fn2 = tem;
34938 attr1 = attr2;
34940 error_at (DECL_SOURCE_LOCATION (fn2),
34941 "missing %<target%> attribute for multi-versioned %D",
34942 fn2);
34943 inform (DECL_SOURCE_LOCATION (fn1),
34944 "previous declaration of %D", fn1);
34945 /* Prevent diagnosing of the same error multiple times. */
34946 DECL_ATTRIBUTES (fn2)
34947 = tree_cons (get_identifier ("target"),
34948 copy_node (TREE_VALUE (attr1)),
34949 DECL_ATTRIBUTES (fn2));
34951 return false;
34954 target1 = sorted_attr_string (TREE_VALUE (attr1));
34955 target2 = sorted_attr_string (TREE_VALUE (attr2));
34957 /* The sorted target strings must be different for fn1 and fn2
34958 to be versions. */
34959 if (strcmp (target1, target2) == 0)
34960 result = false;
34961 else
34962 result = true;
34964 XDELETEVEC (target1);
34965 XDELETEVEC (target2);
34967 return result;
34970 static tree
34971 ix86_mangle_decl_assembler_name (tree decl, tree id)
34973 /* For function version, add the target suffix to the assembler name. */
34974 if (TREE_CODE (decl) == FUNCTION_DECL
34975 && DECL_FUNCTION_VERSIONED (decl))
34976 id = ix86_mangle_function_version_assembler_name (decl, id);
34977 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
34978 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
34979 #endif
34981 return id;
34984 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
34985 is true, append the full path name of the source file. */
34987 static char *
34988 make_name (tree decl, const char *suffix, bool make_unique)
34990 char *global_var_name;
34991 int name_len;
34992 const char *name;
34993 const char *unique_name = NULL;
34995 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
34997 /* Get a unique name that can be used globally without any chances
34998 of collision at link time. */
34999 if (make_unique)
35000 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
35002 name_len = strlen (name) + strlen (suffix) + 2;
35004 if (make_unique)
35005 name_len += strlen (unique_name) + 1;
35006 global_var_name = XNEWVEC (char, name_len);
35008 /* Use '.' to concatenate names as it is demangler friendly. */
35009 if (make_unique)
35010 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
35011 suffix);
35012 else
35013 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
35015 return global_var_name;
35018 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
35020 /* Make a dispatcher declaration for the multi-versioned function DECL.
35021 Calls to DECL function will be replaced with calls to the dispatcher
35022 by the front-end. Return the decl created. */
35024 static tree
35025 make_dispatcher_decl (const tree decl)
35027 tree func_decl;
35028 char *func_name;
35029 tree fn_type, func_type;
35030 bool is_uniq = false;
35032 if (TREE_PUBLIC (decl) == 0)
35033 is_uniq = true;
35035 func_name = make_name (decl, "ifunc", is_uniq);
35037 fn_type = TREE_TYPE (decl);
35038 func_type = build_function_type (TREE_TYPE (fn_type),
35039 TYPE_ARG_TYPES (fn_type));
35041 func_decl = build_fn_decl (func_name, func_type);
35042 XDELETEVEC (func_name);
35043 TREE_USED (func_decl) = 1;
35044 DECL_CONTEXT (func_decl) = NULL_TREE;
35045 DECL_INITIAL (func_decl) = error_mark_node;
35046 DECL_ARTIFICIAL (func_decl) = 1;
35047 /* Mark this func as external, the resolver will flip it again if
35048 it gets generated. */
35049 DECL_EXTERNAL (func_decl) = 1;
35050 /* This will be of type IFUNCs have to be externally visible. */
35051 TREE_PUBLIC (func_decl) = 1;
35053 return func_decl;
35056 #endif
35058 /* Returns true if decl is multi-versioned and DECL is the default function,
35059 that is it is not tagged with target specific optimization. */
35061 static bool
35062 is_function_default_version (const tree decl)
35064 if (TREE_CODE (decl) != FUNCTION_DECL
35065 || !DECL_FUNCTION_VERSIONED (decl))
35066 return false;
35067 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
35068 gcc_assert (attr);
35069 attr = TREE_VALUE (TREE_VALUE (attr));
35070 return (TREE_CODE (attr) == STRING_CST
35071 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
35074 /* Make a dispatcher declaration for the multi-versioned function DECL.
35075 Calls to DECL function will be replaced with calls to the dispatcher
35076 by the front-end. Returns the decl of the dispatcher function. */
35078 static tree
35079 ix86_get_function_versions_dispatcher (void *decl)
35081 tree fn = (tree) decl;
35082 struct cgraph_node *node = NULL;
35083 struct cgraph_node *default_node = NULL;
35084 struct cgraph_function_version_info *node_v = NULL;
35085 struct cgraph_function_version_info *first_v = NULL;
35087 tree dispatch_decl = NULL;
35089 struct cgraph_function_version_info *default_version_info = NULL;
35091 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
35093 node = cgraph_node::get (fn);
35094 gcc_assert (node != NULL);
35096 node_v = node->function_version ();
35097 gcc_assert (node_v != NULL);
35099 if (node_v->dispatcher_resolver != NULL)
35100 return node_v->dispatcher_resolver;
35102 /* Find the default version and make it the first node. */
35103 first_v = node_v;
35104 /* Go to the beginning of the chain. */
35105 while (first_v->prev != NULL)
35106 first_v = first_v->prev;
35107 default_version_info = first_v;
35108 while (default_version_info != NULL)
35110 if (is_function_default_version
35111 (default_version_info->this_node->decl))
35112 break;
35113 default_version_info = default_version_info->next;
35116 /* If there is no default node, just return NULL. */
35117 if (default_version_info == NULL)
35118 return NULL;
35120 /* Make default info the first node. */
35121 if (first_v != default_version_info)
35123 default_version_info->prev->next = default_version_info->next;
35124 if (default_version_info->next)
35125 default_version_info->next->prev = default_version_info->prev;
35126 first_v->prev = default_version_info;
35127 default_version_info->next = first_v;
35128 default_version_info->prev = NULL;
35131 default_node = default_version_info->this_node;
35133 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
35134 if (targetm.has_ifunc_p ())
35136 struct cgraph_function_version_info *it_v = NULL;
35137 struct cgraph_node *dispatcher_node = NULL;
35138 struct cgraph_function_version_info *dispatcher_version_info = NULL;
35140 /* Right now, the dispatching is done via ifunc. */
35141 dispatch_decl = make_dispatcher_decl (default_node->decl);
35143 dispatcher_node = cgraph_node::get_create (dispatch_decl);
35144 gcc_assert (dispatcher_node != NULL);
35145 dispatcher_node->dispatcher_function = 1;
35146 dispatcher_version_info
35147 = dispatcher_node->insert_new_function_version ();
35148 dispatcher_version_info->next = default_version_info;
35149 dispatcher_node->definition = 1;
35151 /* Set the dispatcher for all the versions. */
35152 it_v = default_version_info;
35153 while (it_v != NULL)
35155 it_v->dispatcher_resolver = dispatch_decl;
35156 it_v = it_v->next;
35159 else
35160 #endif
35162 error_at (DECL_SOURCE_LOCATION (default_node->decl),
35163 "multiversioning needs ifunc which is not supported "
35164 "on this target");
35167 return dispatch_decl;
35170 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
35171 it to CHAIN. */
35173 static tree
35174 make_attribute (const char *name, const char *arg_name, tree chain)
35176 tree attr_name;
35177 tree attr_arg_name;
35178 tree attr_args;
35179 tree attr;
35181 attr_name = get_identifier (name);
35182 attr_arg_name = build_string (strlen (arg_name), arg_name);
35183 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
35184 attr = tree_cons (attr_name, attr_args, chain);
35185 return attr;
35188 /* Make the resolver function decl to dispatch the versions of
35189 a multi-versioned function, DEFAULT_DECL. Create an
35190 empty basic block in the resolver and store the pointer in
35191 EMPTY_BB. Return the decl of the resolver function. */
35193 static tree
35194 make_resolver_func (const tree default_decl,
35195 const tree dispatch_decl,
35196 basic_block *empty_bb)
35198 char *resolver_name;
35199 tree decl, type, decl_name, t;
35200 bool is_uniq = false;
35202 /* IFUNC's have to be globally visible. So, if the default_decl is
35203 not, then the name of the IFUNC should be made unique. */
35204 if (TREE_PUBLIC (default_decl) == 0)
35205 is_uniq = true;
35207 /* Append the filename to the resolver function if the versions are
35208 not externally visible. This is because the resolver function has
35209 to be externally visible for the loader to find it. So, appending
35210 the filename will prevent conflicts with a resolver function from
35211 another module which is based on the same version name. */
35212 resolver_name = make_name (default_decl, "resolver", is_uniq);
35214 /* The resolver function should return a (void *). */
35215 type = build_function_type_list (ptr_type_node, NULL_TREE);
35217 decl = build_fn_decl (resolver_name, type);
35218 decl_name = get_identifier (resolver_name);
35219 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
35221 DECL_NAME (decl) = decl_name;
35222 TREE_USED (decl) = 1;
35223 DECL_ARTIFICIAL (decl) = 1;
35224 DECL_IGNORED_P (decl) = 0;
35225 /* IFUNC resolvers have to be externally visible. */
35226 TREE_PUBLIC (decl) = 1;
35227 DECL_UNINLINABLE (decl) = 1;
35229 /* Resolver is not external, body is generated. */
35230 DECL_EXTERNAL (decl) = 0;
35231 DECL_EXTERNAL (dispatch_decl) = 0;
35233 DECL_CONTEXT (decl) = NULL_TREE;
35234 DECL_INITIAL (decl) = make_node (BLOCK);
35235 DECL_STATIC_CONSTRUCTOR (decl) = 0;
35237 if (DECL_COMDAT_GROUP (default_decl)
35238 || TREE_PUBLIC (default_decl))
35240 /* In this case, each translation unit with a call to this
35241 versioned function will put out a resolver. Ensure it
35242 is comdat to keep just one copy. */
35243 DECL_COMDAT (decl) = 1;
35244 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
35246 /* Build result decl and add to function_decl. */
35247 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
35248 DECL_ARTIFICIAL (t) = 1;
35249 DECL_IGNORED_P (t) = 1;
35250 DECL_RESULT (decl) = t;
35252 gimplify_function_tree (decl);
35253 push_cfun (DECL_STRUCT_FUNCTION (decl));
35254 *empty_bb = init_lowered_empty_function (decl, false, 0);
35256 cgraph_node::add_new_function (decl, true);
35257 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
35259 pop_cfun ();
35261 gcc_assert (dispatch_decl != NULL);
35262 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
35263 DECL_ATTRIBUTES (dispatch_decl)
35264 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
35266 /* Create the alias for dispatch to resolver here. */
35267 /*cgraph_create_function_alias (dispatch_decl, decl);*/
35268 cgraph_node::create_same_body_alias (dispatch_decl, decl);
35269 XDELETEVEC (resolver_name);
35270 return decl;
35273 /* Generate the dispatching code body to dispatch multi-versioned function
35274 DECL. The target hook is called to process the "target" attributes and
35275 provide the code to dispatch the right function at run-time. NODE points
35276 to the dispatcher decl whose body will be created. */
35278 static tree
35279 ix86_generate_version_dispatcher_body (void *node_p)
35281 tree resolver_decl;
35282 basic_block empty_bb;
35283 tree default_ver_decl;
35284 struct cgraph_node *versn;
35285 struct cgraph_node *node;
35287 struct cgraph_function_version_info *node_version_info = NULL;
35288 struct cgraph_function_version_info *versn_info = NULL;
35290 node = (cgraph_node *)node_p;
35292 node_version_info = node->function_version ();
35293 gcc_assert (node->dispatcher_function
35294 && node_version_info != NULL);
35296 if (node_version_info->dispatcher_resolver)
35297 return node_version_info->dispatcher_resolver;
35299 /* The first version in the chain corresponds to the default version. */
35300 default_ver_decl = node_version_info->next->this_node->decl;
35302 /* node is going to be an alias, so remove the finalized bit. */
35303 node->definition = false;
35305 resolver_decl = make_resolver_func (default_ver_decl,
35306 node->decl, &empty_bb);
35308 node_version_info->dispatcher_resolver = resolver_decl;
35310 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
35312 auto_vec<tree, 2> fn_ver_vec;
35314 for (versn_info = node_version_info->next; versn_info;
35315 versn_info = versn_info->next)
35317 versn = versn_info->this_node;
35318 /* Check for virtual functions here again, as by this time it should
35319 have been determined if this function needs a vtable index or
35320 not. This happens for methods in derived classes that override
35321 virtual methods in base classes but are not explicitly marked as
35322 virtual. */
35323 if (DECL_VINDEX (versn->decl))
35324 sorry ("Virtual function multiversioning not supported");
35326 fn_ver_vec.safe_push (versn->decl);
35329 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
35330 cgraph_edge::rebuild_edges ();
35331 pop_cfun ();
35332 return resolver_decl;
35334 /* This builds the processor_model struct type defined in
35335 libgcc/config/i386/cpuinfo.c */
35337 static tree
35338 build_processor_model_struct (void)
35340 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
35341 "__cpu_features"};
35342 tree field = NULL_TREE, field_chain = NULL_TREE;
35343 int i;
35344 tree type = make_node (RECORD_TYPE);
35346 /* The first 3 fields are unsigned int. */
35347 for (i = 0; i < 3; ++i)
35349 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35350 get_identifier (field_name[i]), unsigned_type_node);
35351 if (field_chain != NULL_TREE)
35352 DECL_CHAIN (field) = field_chain;
35353 field_chain = field;
35356 /* The last field is an array of unsigned integers of size one. */
35357 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35358 get_identifier (field_name[3]),
35359 build_array_type (unsigned_type_node,
35360 build_index_type (size_one_node)));
35361 if (field_chain != NULL_TREE)
35362 DECL_CHAIN (field) = field_chain;
35363 field_chain = field;
35365 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
35366 return type;
35369 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
35371 static tree
35372 make_var_decl (tree type, const char *name)
35374 tree new_decl;
35376 new_decl = build_decl (UNKNOWN_LOCATION,
35377 VAR_DECL,
35378 get_identifier(name),
35379 type);
35381 DECL_EXTERNAL (new_decl) = 1;
35382 TREE_STATIC (new_decl) = 1;
35383 TREE_PUBLIC (new_decl) = 1;
35384 DECL_INITIAL (new_decl) = 0;
35385 DECL_ARTIFICIAL (new_decl) = 0;
35386 DECL_PRESERVE_P (new_decl) = 1;
35388 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
35389 assemble_variable (new_decl, 0, 0, 0);
35391 return new_decl;
35394 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
35395 into an integer defined in libgcc/config/i386/cpuinfo.c */
35397 static tree
35398 fold_builtin_cpu (tree fndecl, tree *args)
35400 unsigned int i;
35401 enum ix86_builtins fn_code = (enum ix86_builtins)
35402 DECL_FUNCTION_CODE (fndecl);
35403 tree param_string_cst = NULL;
35405 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
35406 enum processor_features
35408 F_CMOV = 0,
35409 F_MMX,
35410 F_POPCNT,
35411 F_SSE,
35412 F_SSE2,
35413 F_SSE3,
35414 F_SSSE3,
35415 F_SSE4_1,
35416 F_SSE4_2,
35417 F_AVX,
35418 F_AVX2,
35419 F_SSE4_A,
35420 F_FMA4,
35421 F_XOP,
35422 F_FMA,
35423 F_AVX512F,
35424 F_BMI,
35425 F_BMI2,
35426 F_MAX
35429 /* These are the values for vendor types and cpu types and subtypes
35430 in cpuinfo.c. Cpu types and subtypes should be subtracted by
35431 the corresponding start value. */
35432 enum processor_model
35434 M_INTEL = 1,
35435 M_AMD,
35436 M_CPU_TYPE_START,
35437 M_INTEL_BONNELL,
35438 M_INTEL_CORE2,
35439 M_INTEL_COREI7,
35440 M_AMDFAM10H,
35441 M_AMDFAM15H,
35442 M_INTEL_SILVERMONT,
35443 M_INTEL_KNL,
35444 M_AMD_BTVER1,
35445 M_AMD_BTVER2,
35446 M_CPU_SUBTYPE_START,
35447 M_INTEL_COREI7_NEHALEM,
35448 M_INTEL_COREI7_WESTMERE,
35449 M_INTEL_COREI7_SANDYBRIDGE,
35450 M_AMDFAM10H_BARCELONA,
35451 M_AMDFAM10H_SHANGHAI,
35452 M_AMDFAM10H_ISTANBUL,
35453 M_AMDFAM15H_BDVER1,
35454 M_AMDFAM15H_BDVER2,
35455 M_AMDFAM15H_BDVER3,
35456 M_AMDFAM15H_BDVER4,
35457 M_INTEL_COREI7_IVYBRIDGE,
35458 M_INTEL_COREI7_HASWELL,
35459 M_INTEL_COREI7_BROADWELL
35462 static struct _arch_names_table
35464 const char *const name;
35465 const enum processor_model model;
35467 const arch_names_table[] =
35469 {"amd", M_AMD},
35470 {"intel", M_INTEL},
35471 {"atom", M_INTEL_BONNELL},
35472 {"slm", M_INTEL_SILVERMONT},
35473 {"core2", M_INTEL_CORE2},
35474 {"corei7", M_INTEL_COREI7},
35475 {"nehalem", M_INTEL_COREI7_NEHALEM},
35476 {"westmere", M_INTEL_COREI7_WESTMERE},
35477 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
35478 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
35479 {"haswell", M_INTEL_COREI7_HASWELL},
35480 {"broadwell", M_INTEL_COREI7_BROADWELL},
35481 {"bonnell", M_INTEL_BONNELL},
35482 {"silvermont", M_INTEL_SILVERMONT},
35483 {"knl", M_INTEL_KNL},
35484 {"amdfam10h", M_AMDFAM10H},
35485 {"barcelona", M_AMDFAM10H_BARCELONA},
35486 {"shanghai", M_AMDFAM10H_SHANGHAI},
35487 {"istanbul", M_AMDFAM10H_ISTANBUL},
35488 {"btver1", M_AMD_BTVER1},
35489 {"amdfam15h", M_AMDFAM15H},
35490 {"bdver1", M_AMDFAM15H_BDVER1},
35491 {"bdver2", M_AMDFAM15H_BDVER2},
35492 {"bdver3", M_AMDFAM15H_BDVER3},
35493 {"bdver4", M_AMDFAM15H_BDVER4},
35494 {"btver2", M_AMD_BTVER2},
35497 static struct _isa_names_table
35499 const char *const name;
35500 const enum processor_features feature;
35502 const isa_names_table[] =
35504 {"cmov", F_CMOV},
35505 {"mmx", F_MMX},
35506 {"popcnt", F_POPCNT},
35507 {"sse", F_SSE},
35508 {"sse2", F_SSE2},
35509 {"sse3", F_SSE3},
35510 {"ssse3", F_SSSE3},
35511 {"sse4a", F_SSE4_A},
35512 {"sse4.1", F_SSE4_1},
35513 {"sse4.2", F_SSE4_2},
35514 {"avx", F_AVX},
35515 {"fma4", F_FMA4},
35516 {"xop", F_XOP},
35517 {"fma", F_FMA},
35518 {"avx2", F_AVX2},
35519 {"avx512f",F_AVX512F},
35520 {"bmi", F_BMI},
35521 {"bmi2", F_BMI2}
35524 tree __processor_model_type = build_processor_model_struct ();
35525 tree __cpu_model_var = make_var_decl (__processor_model_type,
35526 "__cpu_model");
35529 varpool_node::add (__cpu_model_var);
35531 gcc_assert ((args != NULL) && (*args != NULL));
35533 param_string_cst = *args;
35534 while (param_string_cst
35535 && TREE_CODE (param_string_cst) != STRING_CST)
35537 /* *args must be a expr that can contain other EXPRS leading to a
35538 STRING_CST. */
35539 if (!EXPR_P (param_string_cst))
35541 error ("Parameter to builtin must be a string constant or literal");
35542 return integer_zero_node;
35544 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
35547 gcc_assert (param_string_cst);
35549 if (fn_code == IX86_BUILTIN_CPU_IS)
35551 tree ref;
35552 tree field;
35553 tree final;
35555 unsigned int field_val = 0;
35556 unsigned int NUM_ARCH_NAMES
35557 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
35559 for (i = 0; i < NUM_ARCH_NAMES; i++)
35560 if (strcmp (arch_names_table[i].name,
35561 TREE_STRING_POINTER (param_string_cst)) == 0)
35562 break;
35564 if (i == NUM_ARCH_NAMES)
35566 error ("Parameter to builtin not valid: %s",
35567 TREE_STRING_POINTER (param_string_cst));
35568 return integer_zero_node;
35571 field = TYPE_FIELDS (__processor_model_type);
35572 field_val = arch_names_table[i].model;
35574 /* CPU types are stored in the next field. */
35575 if (field_val > M_CPU_TYPE_START
35576 && field_val < M_CPU_SUBTYPE_START)
35578 field = DECL_CHAIN (field);
35579 field_val -= M_CPU_TYPE_START;
35582 /* CPU subtypes are stored in the next field. */
35583 if (field_val > M_CPU_SUBTYPE_START)
35585 field = DECL_CHAIN ( DECL_CHAIN (field));
35586 field_val -= M_CPU_SUBTYPE_START;
35589 /* Get the appropriate field in __cpu_model. */
35590 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35591 field, NULL_TREE);
35593 /* Check the value. */
35594 final = build2 (EQ_EXPR, unsigned_type_node, ref,
35595 build_int_cstu (unsigned_type_node, field_val));
35596 return build1 (CONVERT_EXPR, integer_type_node, final);
35598 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35600 tree ref;
35601 tree array_elt;
35602 tree field;
35603 tree final;
35605 unsigned int field_val = 0;
35606 unsigned int NUM_ISA_NAMES
35607 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
35609 for (i = 0; i < NUM_ISA_NAMES; i++)
35610 if (strcmp (isa_names_table[i].name,
35611 TREE_STRING_POINTER (param_string_cst)) == 0)
35612 break;
35614 if (i == NUM_ISA_NAMES)
35616 error ("Parameter to builtin not valid: %s",
35617 TREE_STRING_POINTER (param_string_cst));
35618 return integer_zero_node;
35621 field = TYPE_FIELDS (__processor_model_type);
35622 /* Get the last field, which is __cpu_features. */
35623 while (DECL_CHAIN (field))
35624 field = DECL_CHAIN (field);
35626 /* Get the appropriate field: __cpu_model.__cpu_features */
35627 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35628 field, NULL_TREE);
35630 /* Access the 0th element of __cpu_features array. */
35631 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
35632 integer_zero_node, NULL_TREE, NULL_TREE);
35634 field_val = (1 << isa_names_table[i].feature);
35635 /* Return __cpu_model.__cpu_features[0] & field_val */
35636 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
35637 build_int_cstu (unsigned_type_node, field_val));
35638 return build1 (CONVERT_EXPR, integer_type_node, final);
35640 gcc_unreachable ();
35643 static tree
35644 ix86_fold_builtin (tree fndecl, int n_args,
35645 tree *args, bool ignore ATTRIBUTE_UNUSED)
35647 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35649 enum ix86_builtins fn_code = (enum ix86_builtins)
35650 DECL_FUNCTION_CODE (fndecl);
35651 if (fn_code == IX86_BUILTIN_CPU_IS
35652 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35654 gcc_assert (n_args == 1);
35655 return fold_builtin_cpu (fndecl, args);
35659 #ifdef SUBTARGET_FOLD_BUILTIN
35660 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
35661 #endif
35663 return NULL_TREE;
35666 /* Make builtins to detect cpu type and features supported. NAME is
35667 the builtin name, CODE is the builtin code, and FTYPE is the function
35668 type of the builtin. */
35670 static void
35671 make_cpu_type_builtin (const char* name, int code,
35672 enum ix86_builtin_func_type ftype, bool is_const)
35674 tree decl;
35675 tree type;
35677 type = ix86_get_builtin_func_type (ftype);
35678 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
35679 NULL, NULL_TREE);
35680 gcc_assert (decl != NULL_TREE);
35681 ix86_builtins[(int) code] = decl;
35682 TREE_READONLY (decl) = is_const;
35685 /* Make builtins to get CPU type and features supported. The created
35686 builtins are :
35688 __builtin_cpu_init (), to detect cpu type and features,
35689 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
35690 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
35693 static void
35694 ix86_init_platform_type_builtins (void)
35696 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
35697 INT_FTYPE_VOID, false);
35698 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
35699 INT_FTYPE_PCCHAR, true);
35700 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
35701 INT_FTYPE_PCCHAR, true);
35704 /* Internal method for ix86_init_builtins. */
35706 static void
35707 ix86_init_builtins_va_builtins_abi (void)
35709 tree ms_va_ref, sysv_va_ref;
35710 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
35711 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
35712 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
35713 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
35715 if (!TARGET_64BIT)
35716 return;
35717 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
35718 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
35719 ms_va_ref = build_reference_type (ms_va_list_type_node);
35720 sysv_va_ref =
35721 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
35723 fnvoid_va_end_ms =
35724 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35725 fnvoid_va_start_ms =
35726 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35727 fnvoid_va_end_sysv =
35728 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
35729 fnvoid_va_start_sysv =
35730 build_varargs_function_type_list (void_type_node, sysv_va_ref,
35731 NULL_TREE);
35732 fnvoid_va_copy_ms =
35733 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
35734 NULL_TREE);
35735 fnvoid_va_copy_sysv =
35736 build_function_type_list (void_type_node, sysv_va_ref,
35737 sysv_va_ref, NULL_TREE);
35739 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
35740 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
35741 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
35742 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
35743 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
35744 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
35745 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
35746 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35747 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
35748 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35749 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
35750 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35753 static void
35754 ix86_init_builtin_types (void)
35756 tree float128_type_node, float80_type_node;
35758 /* The __float80 type. */
35759 float80_type_node = long_double_type_node;
35760 if (TYPE_MODE (float80_type_node) != XFmode)
35762 /* The __float80 type. */
35763 float80_type_node = make_node (REAL_TYPE);
35765 TYPE_PRECISION (float80_type_node) = 80;
35766 layout_type (float80_type_node);
35768 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
35770 /* The __float128 type. */
35771 float128_type_node = make_node (REAL_TYPE);
35772 TYPE_PRECISION (float128_type_node) = 128;
35773 layout_type (float128_type_node);
35774 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
35776 /* This macro is built by i386-builtin-types.awk. */
35777 DEFINE_BUILTIN_PRIMITIVE_TYPES;
35780 static void
35781 ix86_init_builtins (void)
35783 tree t;
35785 ix86_init_builtin_types ();
35787 /* Builtins to get CPU type and features. */
35788 ix86_init_platform_type_builtins ();
35790 /* TFmode support builtins. */
35791 def_builtin_const (0, "__builtin_infq",
35792 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
35793 def_builtin_const (0, "__builtin_huge_valq",
35794 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
35796 /* We will expand them to normal call if SSE isn't available since
35797 they are used by libgcc. */
35798 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
35799 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
35800 BUILT_IN_MD, "__fabstf2", NULL_TREE);
35801 TREE_READONLY (t) = 1;
35802 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
35804 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
35805 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
35806 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
35807 TREE_READONLY (t) = 1;
35808 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
35810 ix86_init_tm_builtins ();
35811 ix86_init_mmx_sse_builtins ();
35812 ix86_init_mpx_builtins ();
35814 if (TARGET_LP64)
35815 ix86_init_builtins_va_builtins_abi ();
35817 #ifdef SUBTARGET_INIT_BUILTINS
35818 SUBTARGET_INIT_BUILTINS;
35819 #endif
35822 /* Return the ix86 builtin for CODE. */
35824 static tree
35825 ix86_builtin_decl (unsigned code, bool)
35827 if (code >= IX86_BUILTIN_MAX)
35828 return error_mark_node;
35830 return ix86_builtins[code];
35833 /* Errors in the source file can cause expand_expr to return const0_rtx
35834 where we expect a vector. To avoid crashing, use one of the vector
35835 clear instructions. */
35836 static rtx
35837 safe_vector_operand (rtx x, machine_mode mode)
35839 if (x == const0_rtx)
35840 x = CONST0_RTX (mode);
35841 return x;
35844 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
35846 static rtx
35847 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
35849 rtx pat;
35850 tree arg0 = CALL_EXPR_ARG (exp, 0);
35851 tree arg1 = CALL_EXPR_ARG (exp, 1);
35852 rtx op0 = expand_normal (arg0);
35853 rtx op1 = expand_normal (arg1);
35854 machine_mode tmode = insn_data[icode].operand[0].mode;
35855 machine_mode mode0 = insn_data[icode].operand[1].mode;
35856 machine_mode mode1 = insn_data[icode].operand[2].mode;
35858 if (VECTOR_MODE_P (mode0))
35859 op0 = safe_vector_operand (op0, mode0);
35860 if (VECTOR_MODE_P (mode1))
35861 op1 = safe_vector_operand (op1, mode1);
35863 if (optimize || !target
35864 || GET_MODE (target) != tmode
35865 || !insn_data[icode].operand[0].predicate (target, tmode))
35866 target = gen_reg_rtx (tmode);
35868 if (GET_MODE (op1) == SImode && mode1 == TImode)
35870 rtx x = gen_reg_rtx (V4SImode);
35871 emit_insn (gen_sse2_loadd (x, op1));
35872 op1 = gen_lowpart (TImode, x);
35875 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35876 op0 = copy_to_mode_reg (mode0, op0);
35877 if (!insn_data[icode].operand[2].predicate (op1, mode1))
35878 op1 = copy_to_mode_reg (mode1, op1);
35880 pat = GEN_FCN (icode) (target, op0, op1);
35881 if (! pat)
35882 return 0;
35884 emit_insn (pat);
35886 return target;
35889 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
35891 static rtx
35892 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
35893 enum ix86_builtin_func_type m_type,
35894 enum rtx_code sub_code)
35896 rtx pat;
35897 int i;
35898 int nargs;
35899 bool comparison_p = false;
35900 bool tf_p = false;
35901 bool last_arg_constant = false;
35902 int num_memory = 0;
35903 struct {
35904 rtx op;
35905 machine_mode mode;
35906 } args[4];
35908 machine_mode tmode = insn_data[icode].operand[0].mode;
35910 switch (m_type)
35912 case MULTI_ARG_4_DF2_DI_I:
35913 case MULTI_ARG_4_DF2_DI_I1:
35914 case MULTI_ARG_4_SF2_SI_I:
35915 case MULTI_ARG_4_SF2_SI_I1:
35916 nargs = 4;
35917 last_arg_constant = true;
35918 break;
35920 case MULTI_ARG_3_SF:
35921 case MULTI_ARG_3_DF:
35922 case MULTI_ARG_3_SF2:
35923 case MULTI_ARG_3_DF2:
35924 case MULTI_ARG_3_DI:
35925 case MULTI_ARG_3_SI:
35926 case MULTI_ARG_3_SI_DI:
35927 case MULTI_ARG_3_HI:
35928 case MULTI_ARG_3_HI_SI:
35929 case MULTI_ARG_3_QI:
35930 case MULTI_ARG_3_DI2:
35931 case MULTI_ARG_3_SI2:
35932 case MULTI_ARG_3_HI2:
35933 case MULTI_ARG_3_QI2:
35934 nargs = 3;
35935 break;
35937 case MULTI_ARG_2_SF:
35938 case MULTI_ARG_2_DF:
35939 case MULTI_ARG_2_DI:
35940 case MULTI_ARG_2_SI:
35941 case MULTI_ARG_2_HI:
35942 case MULTI_ARG_2_QI:
35943 nargs = 2;
35944 break;
35946 case MULTI_ARG_2_DI_IMM:
35947 case MULTI_ARG_2_SI_IMM:
35948 case MULTI_ARG_2_HI_IMM:
35949 case MULTI_ARG_2_QI_IMM:
35950 nargs = 2;
35951 last_arg_constant = true;
35952 break;
35954 case MULTI_ARG_1_SF:
35955 case MULTI_ARG_1_DF:
35956 case MULTI_ARG_1_SF2:
35957 case MULTI_ARG_1_DF2:
35958 case MULTI_ARG_1_DI:
35959 case MULTI_ARG_1_SI:
35960 case MULTI_ARG_1_HI:
35961 case MULTI_ARG_1_QI:
35962 case MULTI_ARG_1_SI_DI:
35963 case MULTI_ARG_1_HI_DI:
35964 case MULTI_ARG_1_HI_SI:
35965 case MULTI_ARG_1_QI_DI:
35966 case MULTI_ARG_1_QI_SI:
35967 case MULTI_ARG_1_QI_HI:
35968 nargs = 1;
35969 break;
35971 case MULTI_ARG_2_DI_CMP:
35972 case MULTI_ARG_2_SI_CMP:
35973 case MULTI_ARG_2_HI_CMP:
35974 case MULTI_ARG_2_QI_CMP:
35975 nargs = 2;
35976 comparison_p = true;
35977 break;
35979 case MULTI_ARG_2_SF_TF:
35980 case MULTI_ARG_2_DF_TF:
35981 case MULTI_ARG_2_DI_TF:
35982 case MULTI_ARG_2_SI_TF:
35983 case MULTI_ARG_2_HI_TF:
35984 case MULTI_ARG_2_QI_TF:
35985 nargs = 2;
35986 tf_p = true;
35987 break;
35989 default:
35990 gcc_unreachable ();
35993 if (optimize || !target
35994 || GET_MODE (target) != tmode
35995 || !insn_data[icode].operand[0].predicate (target, tmode))
35996 target = gen_reg_rtx (tmode);
35998 gcc_assert (nargs <= 4);
36000 for (i = 0; i < nargs; i++)
36002 tree arg = CALL_EXPR_ARG (exp, i);
36003 rtx op = expand_normal (arg);
36004 int adjust = (comparison_p) ? 1 : 0;
36005 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
36007 if (last_arg_constant && i == nargs - 1)
36009 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
36011 enum insn_code new_icode = icode;
36012 switch (icode)
36014 case CODE_FOR_xop_vpermil2v2df3:
36015 case CODE_FOR_xop_vpermil2v4sf3:
36016 case CODE_FOR_xop_vpermil2v4df3:
36017 case CODE_FOR_xop_vpermil2v8sf3:
36018 error ("the last argument must be a 2-bit immediate");
36019 return gen_reg_rtx (tmode);
36020 case CODE_FOR_xop_rotlv2di3:
36021 new_icode = CODE_FOR_rotlv2di3;
36022 goto xop_rotl;
36023 case CODE_FOR_xop_rotlv4si3:
36024 new_icode = CODE_FOR_rotlv4si3;
36025 goto xop_rotl;
36026 case CODE_FOR_xop_rotlv8hi3:
36027 new_icode = CODE_FOR_rotlv8hi3;
36028 goto xop_rotl;
36029 case CODE_FOR_xop_rotlv16qi3:
36030 new_icode = CODE_FOR_rotlv16qi3;
36031 xop_rotl:
36032 if (CONST_INT_P (op))
36034 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
36035 op = GEN_INT (INTVAL (op) & mask);
36036 gcc_checking_assert
36037 (insn_data[icode].operand[i + 1].predicate (op, mode));
36039 else
36041 gcc_checking_assert
36042 (nargs == 2
36043 && insn_data[new_icode].operand[0].mode == tmode
36044 && insn_data[new_icode].operand[1].mode == tmode
36045 && insn_data[new_icode].operand[2].mode == mode
36046 && insn_data[new_icode].operand[0].predicate
36047 == insn_data[icode].operand[0].predicate
36048 && insn_data[new_icode].operand[1].predicate
36049 == insn_data[icode].operand[1].predicate);
36050 icode = new_icode;
36051 goto non_constant;
36053 break;
36054 default:
36055 gcc_unreachable ();
36059 else
36061 non_constant:
36062 if (VECTOR_MODE_P (mode))
36063 op = safe_vector_operand (op, mode);
36065 /* If we aren't optimizing, only allow one memory operand to be
36066 generated. */
36067 if (memory_operand (op, mode))
36068 num_memory++;
36070 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
36072 if (optimize
36073 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
36074 || num_memory > 1)
36075 op = force_reg (mode, op);
36078 args[i].op = op;
36079 args[i].mode = mode;
36082 switch (nargs)
36084 case 1:
36085 pat = GEN_FCN (icode) (target, args[0].op);
36086 break;
36088 case 2:
36089 if (tf_p)
36090 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
36091 GEN_INT ((int)sub_code));
36092 else if (! comparison_p)
36093 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
36094 else
36096 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
36097 args[0].op,
36098 args[1].op);
36100 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
36102 break;
36104 case 3:
36105 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
36106 break;
36108 case 4:
36109 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
36110 break;
36112 default:
36113 gcc_unreachable ();
36116 if (! pat)
36117 return 0;
36119 emit_insn (pat);
36120 return target;
36123 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
36124 insns with vec_merge. */
36126 static rtx
36127 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
36128 rtx target)
36130 rtx pat;
36131 tree arg0 = CALL_EXPR_ARG (exp, 0);
36132 rtx op1, op0 = expand_normal (arg0);
36133 machine_mode tmode = insn_data[icode].operand[0].mode;
36134 machine_mode mode0 = insn_data[icode].operand[1].mode;
36136 if (optimize || !target
36137 || GET_MODE (target) != tmode
36138 || !insn_data[icode].operand[0].predicate (target, tmode))
36139 target = gen_reg_rtx (tmode);
36141 if (VECTOR_MODE_P (mode0))
36142 op0 = safe_vector_operand (op0, mode0);
36144 if ((optimize && !register_operand (op0, mode0))
36145 || !insn_data[icode].operand[1].predicate (op0, mode0))
36146 op0 = copy_to_mode_reg (mode0, op0);
36148 op1 = op0;
36149 if (!insn_data[icode].operand[2].predicate (op1, mode0))
36150 op1 = copy_to_mode_reg (mode0, op1);
36152 pat = GEN_FCN (icode) (target, op0, op1);
36153 if (! pat)
36154 return 0;
36155 emit_insn (pat);
36156 return target;
36159 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
36161 static rtx
36162 ix86_expand_sse_compare (const struct builtin_description *d,
36163 tree exp, rtx target, bool swap)
36165 rtx pat;
36166 tree arg0 = CALL_EXPR_ARG (exp, 0);
36167 tree arg1 = CALL_EXPR_ARG (exp, 1);
36168 rtx op0 = expand_normal (arg0);
36169 rtx op1 = expand_normal (arg1);
36170 rtx op2;
36171 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36172 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36173 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36174 enum rtx_code comparison = d->comparison;
36176 if (VECTOR_MODE_P (mode0))
36177 op0 = safe_vector_operand (op0, mode0);
36178 if (VECTOR_MODE_P (mode1))
36179 op1 = safe_vector_operand (op1, mode1);
36181 /* Swap operands if we have a comparison that isn't available in
36182 hardware. */
36183 if (swap)
36184 std::swap (op0, op1);
36186 if (optimize || !target
36187 || GET_MODE (target) != tmode
36188 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36189 target = gen_reg_rtx (tmode);
36191 if ((optimize && !register_operand (op0, mode0))
36192 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
36193 op0 = copy_to_mode_reg (mode0, op0);
36194 if ((optimize && !register_operand (op1, mode1))
36195 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
36196 op1 = copy_to_mode_reg (mode1, op1);
36198 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
36199 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36200 if (! pat)
36201 return 0;
36202 emit_insn (pat);
36203 return target;
36206 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
36208 static rtx
36209 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
36210 rtx target)
36212 rtx pat;
36213 tree arg0 = CALL_EXPR_ARG (exp, 0);
36214 tree arg1 = CALL_EXPR_ARG (exp, 1);
36215 rtx op0 = expand_normal (arg0);
36216 rtx op1 = expand_normal (arg1);
36217 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36218 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36219 enum rtx_code comparison = d->comparison;
36221 if (VECTOR_MODE_P (mode0))
36222 op0 = safe_vector_operand (op0, mode0);
36223 if (VECTOR_MODE_P (mode1))
36224 op1 = safe_vector_operand (op1, mode1);
36226 /* Swap operands if we have a comparison that isn't available in
36227 hardware. */
36228 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
36229 std::swap (op0, op1);
36231 target = gen_reg_rtx (SImode);
36232 emit_move_insn (target, const0_rtx);
36233 target = gen_rtx_SUBREG (QImode, target, 0);
36235 if ((optimize && !register_operand (op0, mode0))
36236 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36237 op0 = copy_to_mode_reg (mode0, op0);
36238 if ((optimize && !register_operand (op1, mode1))
36239 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36240 op1 = copy_to_mode_reg (mode1, op1);
36242 pat = GEN_FCN (d->icode) (op0, op1);
36243 if (! pat)
36244 return 0;
36245 emit_insn (pat);
36246 emit_insn (gen_rtx_SET (VOIDmode,
36247 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36248 gen_rtx_fmt_ee (comparison, QImode,
36249 SET_DEST (pat),
36250 const0_rtx)));
36252 return SUBREG_REG (target);
36255 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
36257 static rtx
36258 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
36259 rtx target)
36261 rtx pat;
36262 tree arg0 = CALL_EXPR_ARG (exp, 0);
36263 rtx op1, op0 = expand_normal (arg0);
36264 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36265 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36267 if (optimize || target == 0
36268 || GET_MODE (target) != tmode
36269 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36270 target = gen_reg_rtx (tmode);
36272 if (VECTOR_MODE_P (mode0))
36273 op0 = safe_vector_operand (op0, mode0);
36275 if ((optimize && !register_operand (op0, mode0))
36276 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36277 op0 = copy_to_mode_reg (mode0, op0);
36279 op1 = GEN_INT (d->comparison);
36281 pat = GEN_FCN (d->icode) (target, op0, op1);
36282 if (! pat)
36283 return 0;
36284 emit_insn (pat);
36285 return target;
36288 static rtx
36289 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
36290 tree exp, rtx target)
36292 rtx pat;
36293 tree arg0 = CALL_EXPR_ARG (exp, 0);
36294 tree arg1 = CALL_EXPR_ARG (exp, 1);
36295 rtx op0 = expand_normal (arg0);
36296 rtx op1 = expand_normal (arg1);
36297 rtx op2;
36298 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36299 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36300 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36302 if (optimize || target == 0
36303 || GET_MODE (target) != tmode
36304 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36305 target = gen_reg_rtx (tmode);
36307 op0 = safe_vector_operand (op0, mode0);
36308 op1 = safe_vector_operand (op1, mode1);
36310 if ((optimize && !register_operand (op0, mode0))
36311 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36312 op0 = copy_to_mode_reg (mode0, op0);
36313 if ((optimize && !register_operand (op1, mode1))
36314 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36315 op1 = copy_to_mode_reg (mode1, op1);
36317 op2 = GEN_INT (d->comparison);
36319 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36320 if (! pat)
36321 return 0;
36322 emit_insn (pat);
36323 return target;
36326 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
36328 static rtx
36329 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
36330 rtx target)
36332 rtx pat;
36333 tree arg0 = CALL_EXPR_ARG (exp, 0);
36334 tree arg1 = CALL_EXPR_ARG (exp, 1);
36335 rtx op0 = expand_normal (arg0);
36336 rtx op1 = expand_normal (arg1);
36337 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36338 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36339 enum rtx_code comparison = d->comparison;
36341 if (VECTOR_MODE_P (mode0))
36342 op0 = safe_vector_operand (op0, mode0);
36343 if (VECTOR_MODE_P (mode1))
36344 op1 = safe_vector_operand (op1, mode1);
36346 target = gen_reg_rtx (SImode);
36347 emit_move_insn (target, const0_rtx);
36348 target = gen_rtx_SUBREG (QImode, target, 0);
36350 if ((optimize && !register_operand (op0, mode0))
36351 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36352 op0 = copy_to_mode_reg (mode0, op0);
36353 if ((optimize && !register_operand (op1, mode1))
36354 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36355 op1 = copy_to_mode_reg (mode1, op1);
36357 pat = GEN_FCN (d->icode) (op0, op1);
36358 if (! pat)
36359 return 0;
36360 emit_insn (pat);
36361 emit_insn (gen_rtx_SET (VOIDmode,
36362 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36363 gen_rtx_fmt_ee (comparison, QImode,
36364 SET_DEST (pat),
36365 const0_rtx)));
36367 return SUBREG_REG (target);
36370 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
36372 static rtx
36373 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
36374 tree exp, rtx target)
36376 rtx pat;
36377 tree arg0 = CALL_EXPR_ARG (exp, 0);
36378 tree arg1 = CALL_EXPR_ARG (exp, 1);
36379 tree arg2 = CALL_EXPR_ARG (exp, 2);
36380 tree arg3 = CALL_EXPR_ARG (exp, 3);
36381 tree arg4 = CALL_EXPR_ARG (exp, 4);
36382 rtx scratch0, scratch1;
36383 rtx op0 = expand_normal (arg0);
36384 rtx op1 = expand_normal (arg1);
36385 rtx op2 = expand_normal (arg2);
36386 rtx op3 = expand_normal (arg3);
36387 rtx op4 = expand_normal (arg4);
36388 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
36390 tmode0 = insn_data[d->icode].operand[0].mode;
36391 tmode1 = insn_data[d->icode].operand[1].mode;
36392 modev2 = insn_data[d->icode].operand[2].mode;
36393 modei3 = insn_data[d->icode].operand[3].mode;
36394 modev4 = insn_data[d->icode].operand[4].mode;
36395 modei5 = insn_data[d->icode].operand[5].mode;
36396 modeimm = insn_data[d->icode].operand[6].mode;
36398 if (VECTOR_MODE_P (modev2))
36399 op0 = safe_vector_operand (op0, modev2);
36400 if (VECTOR_MODE_P (modev4))
36401 op2 = safe_vector_operand (op2, modev4);
36403 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36404 op0 = copy_to_mode_reg (modev2, op0);
36405 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
36406 op1 = copy_to_mode_reg (modei3, op1);
36407 if ((optimize && !register_operand (op2, modev4))
36408 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
36409 op2 = copy_to_mode_reg (modev4, op2);
36410 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
36411 op3 = copy_to_mode_reg (modei5, op3);
36413 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
36415 error ("the fifth argument must be an 8-bit immediate");
36416 return const0_rtx;
36419 if (d->code == IX86_BUILTIN_PCMPESTRI128)
36421 if (optimize || !target
36422 || GET_MODE (target) != tmode0
36423 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36424 target = gen_reg_rtx (tmode0);
36426 scratch1 = gen_reg_rtx (tmode1);
36428 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
36430 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
36432 if (optimize || !target
36433 || GET_MODE (target) != tmode1
36434 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36435 target = gen_reg_rtx (tmode1);
36437 scratch0 = gen_reg_rtx (tmode0);
36439 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
36441 else
36443 gcc_assert (d->flag);
36445 scratch0 = gen_reg_rtx (tmode0);
36446 scratch1 = gen_reg_rtx (tmode1);
36448 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
36451 if (! pat)
36452 return 0;
36454 emit_insn (pat);
36456 if (d->flag)
36458 target = gen_reg_rtx (SImode);
36459 emit_move_insn (target, const0_rtx);
36460 target = gen_rtx_SUBREG (QImode, target, 0);
36462 emit_insn
36463 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36464 gen_rtx_fmt_ee (EQ, QImode,
36465 gen_rtx_REG ((machine_mode) d->flag,
36466 FLAGS_REG),
36467 const0_rtx)));
36468 return SUBREG_REG (target);
36470 else
36471 return target;
36475 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
36477 static rtx
36478 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
36479 tree exp, rtx target)
36481 rtx pat;
36482 tree arg0 = CALL_EXPR_ARG (exp, 0);
36483 tree arg1 = CALL_EXPR_ARG (exp, 1);
36484 tree arg2 = CALL_EXPR_ARG (exp, 2);
36485 rtx scratch0, scratch1;
36486 rtx op0 = expand_normal (arg0);
36487 rtx op1 = expand_normal (arg1);
36488 rtx op2 = expand_normal (arg2);
36489 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
36491 tmode0 = insn_data[d->icode].operand[0].mode;
36492 tmode1 = insn_data[d->icode].operand[1].mode;
36493 modev2 = insn_data[d->icode].operand[2].mode;
36494 modev3 = insn_data[d->icode].operand[3].mode;
36495 modeimm = insn_data[d->icode].operand[4].mode;
36497 if (VECTOR_MODE_P (modev2))
36498 op0 = safe_vector_operand (op0, modev2);
36499 if (VECTOR_MODE_P (modev3))
36500 op1 = safe_vector_operand (op1, modev3);
36502 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36503 op0 = copy_to_mode_reg (modev2, op0);
36504 if ((optimize && !register_operand (op1, modev3))
36505 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
36506 op1 = copy_to_mode_reg (modev3, op1);
36508 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
36510 error ("the third argument must be an 8-bit immediate");
36511 return const0_rtx;
36514 if (d->code == IX86_BUILTIN_PCMPISTRI128)
36516 if (optimize || !target
36517 || GET_MODE (target) != tmode0
36518 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36519 target = gen_reg_rtx (tmode0);
36521 scratch1 = gen_reg_rtx (tmode1);
36523 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
36525 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
36527 if (optimize || !target
36528 || GET_MODE (target) != tmode1
36529 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36530 target = gen_reg_rtx (tmode1);
36532 scratch0 = gen_reg_rtx (tmode0);
36534 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
36536 else
36538 gcc_assert (d->flag);
36540 scratch0 = gen_reg_rtx (tmode0);
36541 scratch1 = gen_reg_rtx (tmode1);
36543 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
36546 if (! pat)
36547 return 0;
36549 emit_insn (pat);
36551 if (d->flag)
36553 target = gen_reg_rtx (SImode);
36554 emit_move_insn (target, const0_rtx);
36555 target = gen_rtx_SUBREG (QImode, target, 0);
36557 emit_insn
36558 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36559 gen_rtx_fmt_ee (EQ, QImode,
36560 gen_rtx_REG ((machine_mode) d->flag,
36561 FLAGS_REG),
36562 const0_rtx)));
36563 return SUBREG_REG (target);
36565 else
36566 return target;
36569 /* Subroutine of ix86_expand_builtin to take care of insns with
36570 variable number of operands. */
36572 static rtx
36573 ix86_expand_args_builtin (const struct builtin_description *d,
36574 tree exp, rtx target)
36576 rtx pat, real_target;
36577 unsigned int i, nargs;
36578 unsigned int nargs_constant = 0;
36579 unsigned int mask_pos = 0;
36580 int num_memory = 0;
36581 struct
36583 rtx op;
36584 machine_mode mode;
36585 } args[6];
36586 bool last_arg_count = false;
36587 enum insn_code icode = d->icode;
36588 const struct insn_data_d *insn_p = &insn_data[icode];
36589 machine_mode tmode = insn_p->operand[0].mode;
36590 machine_mode rmode = VOIDmode;
36591 bool swap = false;
36592 enum rtx_code comparison = d->comparison;
36594 switch ((enum ix86_builtin_func_type) d->flag)
36596 case V2DF_FTYPE_V2DF_ROUND:
36597 case V4DF_FTYPE_V4DF_ROUND:
36598 case V4SF_FTYPE_V4SF_ROUND:
36599 case V8SF_FTYPE_V8SF_ROUND:
36600 case V4SI_FTYPE_V4SF_ROUND:
36601 case V8SI_FTYPE_V8SF_ROUND:
36602 return ix86_expand_sse_round (d, exp, target);
36603 case V4SI_FTYPE_V2DF_V2DF_ROUND:
36604 case V8SI_FTYPE_V4DF_V4DF_ROUND:
36605 case V16SI_FTYPE_V8DF_V8DF_ROUND:
36606 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
36607 case INT_FTYPE_V8SF_V8SF_PTEST:
36608 case INT_FTYPE_V4DI_V4DI_PTEST:
36609 case INT_FTYPE_V4DF_V4DF_PTEST:
36610 case INT_FTYPE_V4SF_V4SF_PTEST:
36611 case INT_FTYPE_V2DI_V2DI_PTEST:
36612 case INT_FTYPE_V2DF_V2DF_PTEST:
36613 return ix86_expand_sse_ptest (d, exp, target);
36614 case FLOAT128_FTYPE_FLOAT128:
36615 case FLOAT_FTYPE_FLOAT:
36616 case INT_FTYPE_INT:
36617 case UINT64_FTYPE_INT:
36618 case UINT16_FTYPE_UINT16:
36619 case INT64_FTYPE_INT64:
36620 case INT64_FTYPE_V4SF:
36621 case INT64_FTYPE_V2DF:
36622 case INT_FTYPE_V16QI:
36623 case INT_FTYPE_V8QI:
36624 case INT_FTYPE_V8SF:
36625 case INT_FTYPE_V4DF:
36626 case INT_FTYPE_V4SF:
36627 case INT_FTYPE_V2DF:
36628 case INT_FTYPE_V32QI:
36629 case V16QI_FTYPE_V16QI:
36630 case V8SI_FTYPE_V8SF:
36631 case V8SI_FTYPE_V4SI:
36632 case V8HI_FTYPE_V8HI:
36633 case V8HI_FTYPE_V16QI:
36634 case V8QI_FTYPE_V8QI:
36635 case V8SF_FTYPE_V8SF:
36636 case V8SF_FTYPE_V8SI:
36637 case V8SF_FTYPE_V4SF:
36638 case V8SF_FTYPE_V8HI:
36639 case V4SI_FTYPE_V4SI:
36640 case V4SI_FTYPE_V16QI:
36641 case V4SI_FTYPE_V4SF:
36642 case V4SI_FTYPE_V8SI:
36643 case V4SI_FTYPE_V8HI:
36644 case V4SI_FTYPE_V4DF:
36645 case V4SI_FTYPE_V2DF:
36646 case V4HI_FTYPE_V4HI:
36647 case V4DF_FTYPE_V4DF:
36648 case V4DF_FTYPE_V4SI:
36649 case V4DF_FTYPE_V4SF:
36650 case V4DF_FTYPE_V2DF:
36651 case V4SF_FTYPE_V4SF:
36652 case V4SF_FTYPE_V4SI:
36653 case V4SF_FTYPE_V8SF:
36654 case V4SF_FTYPE_V4DF:
36655 case V4SF_FTYPE_V8HI:
36656 case V4SF_FTYPE_V2DF:
36657 case V2DI_FTYPE_V2DI:
36658 case V2DI_FTYPE_V16QI:
36659 case V2DI_FTYPE_V8HI:
36660 case V2DI_FTYPE_V4SI:
36661 case V2DF_FTYPE_V2DF:
36662 case V2DF_FTYPE_V4SI:
36663 case V2DF_FTYPE_V4DF:
36664 case V2DF_FTYPE_V4SF:
36665 case V2DF_FTYPE_V2SI:
36666 case V2SI_FTYPE_V2SI:
36667 case V2SI_FTYPE_V4SF:
36668 case V2SI_FTYPE_V2SF:
36669 case V2SI_FTYPE_V2DF:
36670 case V2SF_FTYPE_V2SF:
36671 case V2SF_FTYPE_V2SI:
36672 case V32QI_FTYPE_V32QI:
36673 case V32QI_FTYPE_V16QI:
36674 case V16HI_FTYPE_V16HI:
36675 case V16HI_FTYPE_V8HI:
36676 case V8SI_FTYPE_V8SI:
36677 case V16HI_FTYPE_V16QI:
36678 case V8SI_FTYPE_V16QI:
36679 case V4DI_FTYPE_V16QI:
36680 case V8SI_FTYPE_V8HI:
36681 case V4DI_FTYPE_V8HI:
36682 case V4DI_FTYPE_V4SI:
36683 case V4DI_FTYPE_V2DI:
36684 case HI_FTYPE_HI:
36685 case HI_FTYPE_V16QI:
36686 case SI_FTYPE_V32QI:
36687 case DI_FTYPE_V64QI:
36688 case V16QI_FTYPE_HI:
36689 case V32QI_FTYPE_SI:
36690 case V64QI_FTYPE_DI:
36691 case V8HI_FTYPE_QI:
36692 case V16HI_FTYPE_HI:
36693 case V32HI_FTYPE_SI:
36694 case V4SI_FTYPE_QI:
36695 case V8SI_FTYPE_QI:
36696 case V4SI_FTYPE_HI:
36697 case V8SI_FTYPE_HI:
36698 case QI_FTYPE_V8HI:
36699 case HI_FTYPE_V16HI:
36700 case SI_FTYPE_V32HI:
36701 case QI_FTYPE_V4SI:
36702 case QI_FTYPE_V8SI:
36703 case HI_FTYPE_V16SI:
36704 case QI_FTYPE_V2DI:
36705 case QI_FTYPE_V4DI:
36706 case QI_FTYPE_V8DI:
36707 case UINT_FTYPE_V2DF:
36708 case UINT_FTYPE_V4SF:
36709 case UINT64_FTYPE_V2DF:
36710 case UINT64_FTYPE_V4SF:
36711 case V16QI_FTYPE_V8DI:
36712 case V16HI_FTYPE_V16SI:
36713 case V16SI_FTYPE_HI:
36714 case V2DI_FTYPE_QI:
36715 case V4DI_FTYPE_QI:
36716 case V16SI_FTYPE_V16SI:
36717 case V16SI_FTYPE_INT:
36718 case V16SF_FTYPE_FLOAT:
36719 case V16SF_FTYPE_V8SF:
36720 case V16SI_FTYPE_V8SI:
36721 case V16SF_FTYPE_V4SF:
36722 case V16SI_FTYPE_V4SI:
36723 case V16SF_FTYPE_V16SF:
36724 case V8HI_FTYPE_V8DI:
36725 case V8UHI_FTYPE_V8UHI:
36726 case V8SI_FTYPE_V8DI:
36727 case V8SF_FTYPE_V8DF:
36728 case V8DI_FTYPE_QI:
36729 case V8DI_FTYPE_INT64:
36730 case V8DI_FTYPE_V4DI:
36731 case V8DI_FTYPE_V8DI:
36732 case V8DF_FTYPE_DOUBLE:
36733 case V8DF_FTYPE_V4DF:
36734 case V8DF_FTYPE_V2DF:
36735 case V8DF_FTYPE_V8DF:
36736 case V8DF_FTYPE_V8SI:
36737 nargs = 1;
36738 break;
36739 case V4SF_FTYPE_V4SF_VEC_MERGE:
36740 case V2DF_FTYPE_V2DF_VEC_MERGE:
36741 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
36742 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
36743 case V16QI_FTYPE_V16QI_V16QI:
36744 case V16QI_FTYPE_V8HI_V8HI:
36745 case V16SI_FTYPE_V16SI_V16SI:
36746 case V16SF_FTYPE_V16SF_V16SF:
36747 case V16SF_FTYPE_V16SF_V16SI:
36748 case V8QI_FTYPE_V8QI_V8QI:
36749 case V8QI_FTYPE_V4HI_V4HI:
36750 case V8HI_FTYPE_V8HI_V8HI:
36751 case V8HI_FTYPE_V16QI_V16QI:
36752 case V8HI_FTYPE_V4SI_V4SI:
36753 case V8SF_FTYPE_V8SF_V8SF:
36754 case V8SF_FTYPE_V8SF_V8SI:
36755 case V8DI_FTYPE_V8DI_V8DI:
36756 case V8DF_FTYPE_V8DF_V8DF:
36757 case V8DF_FTYPE_V8DF_V8DI:
36758 case V4SI_FTYPE_V4SI_V4SI:
36759 case V4SI_FTYPE_V8HI_V8HI:
36760 case V4SI_FTYPE_V4SF_V4SF:
36761 case V4SI_FTYPE_V2DF_V2DF:
36762 case V4HI_FTYPE_V4HI_V4HI:
36763 case V4HI_FTYPE_V8QI_V8QI:
36764 case V4HI_FTYPE_V2SI_V2SI:
36765 case V4DF_FTYPE_V4DF_V4DF:
36766 case V4DF_FTYPE_V4DF_V4DI:
36767 case V4SF_FTYPE_V4SF_V4SF:
36768 case V4SF_FTYPE_V4SF_V4SI:
36769 case V4SF_FTYPE_V4SF_V2SI:
36770 case V4SF_FTYPE_V4SF_V2DF:
36771 case V4SF_FTYPE_V4SF_UINT:
36772 case V4SF_FTYPE_V4SF_UINT64:
36773 case V4SF_FTYPE_V4SF_DI:
36774 case V4SF_FTYPE_V4SF_SI:
36775 case V2DI_FTYPE_V2DI_V2DI:
36776 case V2DI_FTYPE_V16QI_V16QI:
36777 case V2DI_FTYPE_V4SI_V4SI:
36778 case V2UDI_FTYPE_V4USI_V4USI:
36779 case V2DI_FTYPE_V2DI_V16QI:
36780 case V2DI_FTYPE_V2DF_V2DF:
36781 case V2SI_FTYPE_V2SI_V2SI:
36782 case V2SI_FTYPE_V4HI_V4HI:
36783 case V2SI_FTYPE_V2SF_V2SF:
36784 case V2DF_FTYPE_V2DF_V2DF:
36785 case V2DF_FTYPE_V2DF_V4SF:
36786 case V2DF_FTYPE_V2DF_V2DI:
36787 case V2DF_FTYPE_V2DF_DI:
36788 case V2DF_FTYPE_V2DF_SI:
36789 case V2DF_FTYPE_V2DF_UINT:
36790 case V2DF_FTYPE_V2DF_UINT64:
36791 case V2SF_FTYPE_V2SF_V2SF:
36792 case V1DI_FTYPE_V1DI_V1DI:
36793 case V1DI_FTYPE_V8QI_V8QI:
36794 case V1DI_FTYPE_V2SI_V2SI:
36795 case V32QI_FTYPE_V16HI_V16HI:
36796 case V16HI_FTYPE_V8SI_V8SI:
36797 case V32QI_FTYPE_V32QI_V32QI:
36798 case V16HI_FTYPE_V32QI_V32QI:
36799 case V16HI_FTYPE_V16HI_V16HI:
36800 case V8SI_FTYPE_V4DF_V4DF:
36801 case V8SI_FTYPE_V8SI_V8SI:
36802 case V8SI_FTYPE_V16HI_V16HI:
36803 case V4DI_FTYPE_V4DI_V4DI:
36804 case V4DI_FTYPE_V8SI_V8SI:
36805 case V4UDI_FTYPE_V8USI_V8USI:
36806 case QI_FTYPE_V8DI_V8DI:
36807 case V8DI_FTYPE_V64QI_V64QI:
36808 case HI_FTYPE_V16SI_V16SI:
36809 if (comparison == UNKNOWN)
36810 return ix86_expand_binop_builtin (icode, exp, target);
36811 nargs = 2;
36812 break;
36813 case V4SF_FTYPE_V4SF_V4SF_SWAP:
36814 case V2DF_FTYPE_V2DF_V2DF_SWAP:
36815 gcc_assert (comparison != UNKNOWN);
36816 nargs = 2;
36817 swap = true;
36818 break;
36819 case V16HI_FTYPE_V16HI_V8HI_COUNT:
36820 case V16HI_FTYPE_V16HI_SI_COUNT:
36821 case V8SI_FTYPE_V8SI_V4SI_COUNT:
36822 case V8SI_FTYPE_V8SI_SI_COUNT:
36823 case V4DI_FTYPE_V4DI_V2DI_COUNT:
36824 case V4DI_FTYPE_V4DI_INT_COUNT:
36825 case V8HI_FTYPE_V8HI_V8HI_COUNT:
36826 case V8HI_FTYPE_V8HI_SI_COUNT:
36827 case V4SI_FTYPE_V4SI_V4SI_COUNT:
36828 case V4SI_FTYPE_V4SI_SI_COUNT:
36829 case V4HI_FTYPE_V4HI_V4HI_COUNT:
36830 case V4HI_FTYPE_V4HI_SI_COUNT:
36831 case V2DI_FTYPE_V2DI_V2DI_COUNT:
36832 case V2DI_FTYPE_V2DI_SI_COUNT:
36833 case V2SI_FTYPE_V2SI_V2SI_COUNT:
36834 case V2SI_FTYPE_V2SI_SI_COUNT:
36835 case V1DI_FTYPE_V1DI_V1DI_COUNT:
36836 case V1DI_FTYPE_V1DI_SI_COUNT:
36837 nargs = 2;
36838 last_arg_count = true;
36839 break;
36840 case UINT64_FTYPE_UINT64_UINT64:
36841 case UINT_FTYPE_UINT_UINT:
36842 case UINT_FTYPE_UINT_USHORT:
36843 case UINT_FTYPE_UINT_UCHAR:
36844 case UINT16_FTYPE_UINT16_INT:
36845 case UINT8_FTYPE_UINT8_INT:
36846 case HI_FTYPE_HI_HI:
36847 case SI_FTYPE_SI_SI:
36848 case DI_FTYPE_DI_DI:
36849 case V16SI_FTYPE_V8DF_V8DF:
36850 nargs = 2;
36851 break;
36852 case V2DI_FTYPE_V2DI_INT_CONVERT:
36853 nargs = 2;
36854 rmode = V1TImode;
36855 nargs_constant = 1;
36856 break;
36857 case V4DI_FTYPE_V4DI_INT_CONVERT:
36858 nargs = 2;
36859 rmode = V2TImode;
36860 nargs_constant = 1;
36861 break;
36862 case V8DI_FTYPE_V8DI_INT_CONVERT:
36863 nargs = 2;
36864 rmode = V4TImode;
36865 nargs_constant = 1;
36866 break;
36867 case V8HI_FTYPE_V8HI_INT:
36868 case V8HI_FTYPE_V8SF_INT:
36869 case V16HI_FTYPE_V16SF_INT:
36870 case V8HI_FTYPE_V4SF_INT:
36871 case V8SF_FTYPE_V8SF_INT:
36872 case V4SF_FTYPE_V16SF_INT:
36873 case V16SF_FTYPE_V16SF_INT:
36874 case V4SI_FTYPE_V4SI_INT:
36875 case V4SI_FTYPE_V8SI_INT:
36876 case V4HI_FTYPE_V4HI_INT:
36877 case V4DF_FTYPE_V4DF_INT:
36878 case V4DF_FTYPE_V8DF_INT:
36879 case V4SF_FTYPE_V4SF_INT:
36880 case V4SF_FTYPE_V8SF_INT:
36881 case V2DI_FTYPE_V2DI_INT:
36882 case V2DF_FTYPE_V2DF_INT:
36883 case V2DF_FTYPE_V4DF_INT:
36884 case V16HI_FTYPE_V16HI_INT:
36885 case V8SI_FTYPE_V8SI_INT:
36886 case V16SI_FTYPE_V16SI_INT:
36887 case V4SI_FTYPE_V16SI_INT:
36888 case V4DI_FTYPE_V4DI_INT:
36889 case V2DI_FTYPE_V4DI_INT:
36890 case V4DI_FTYPE_V8DI_INT:
36891 case HI_FTYPE_HI_INT:
36892 case QI_FTYPE_V4SF_INT:
36893 case QI_FTYPE_V2DF_INT:
36894 nargs = 2;
36895 nargs_constant = 1;
36896 break;
36897 case V16QI_FTYPE_V16QI_V16QI_V16QI:
36898 case V8SF_FTYPE_V8SF_V8SF_V8SF:
36899 case V4DF_FTYPE_V4DF_V4DF_V4DF:
36900 case V4SF_FTYPE_V4SF_V4SF_V4SF:
36901 case V2DF_FTYPE_V2DF_V2DF_V2DF:
36902 case V32QI_FTYPE_V32QI_V32QI_V32QI:
36903 case HI_FTYPE_V16SI_V16SI_HI:
36904 case QI_FTYPE_V8DI_V8DI_QI:
36905 case V16HI_FTYPE_V16SI_V16HI_HI:
36906 case V16QI_FTYPE_V16SI_V16QI_HI:
36907 case V16QI_FTYPE_V8DI_V16QI_QI:
36908 case V16SF_FTYPE_V16SF_V16SF_HI:
36909 case V16SF_FTYPE_V16SF_V16SF_V16SF:
36910 case V16SF_FTYPE_V16SF_V16SI_V16SF:
36911 case V16SF_FTYPE_V16SI_V16SF_HI:
36912 case V16SF_FTYPE_V16SI_V16SF_V16SF:
36913 case V16SF_FTYPE_V4SF_V16SF_HI:
36914 case V16SI_FTYPE_SI_V16SI_HI:
36915 case V16SI_FTYPE_V16HI_V16SI_HI:
36916 case V16SI_FTYPE_V16QI_V16SI_HI:
36917 case V16SI_FTYPE_V16SF_V16SI_HI:
36918 case V8SF_FTYPE_V4SF_V8SF_QI:
36919 case V4DF_FTYPE_V2DF_V4DF_QI:
36920 case V8SI_FTYPE_V4SI_V8SI_QI:
36921 case V8SI_FTYPE_SI_V8SI_QI:
36922 case V4SI_FTYPE_V4SI_V4SI_QI:
36923 case V4SI_FTYPE_SI_V4SI_QI:
36924 case V4DI_FTYPE_V2DI_V4DI_QI:
36925 case V4DI_FTYPE_DI_V4DI_QI:
36926 case V2DI_FTYPE_V2DI_V2DI_QI:
36927 case V2DI_FTYPE_DI_V2DI_QI:
36928 case V64QI_FTYPE_V64QI_V64QI_DI:
36929 case V64QI_FTYPE_V16QI_V64QI_DI:
36930 case V64QI_FTYPE_QI_V64QI_DI:
36931 case V32QI_FTYPE_V32QI_V32QI_SI:
36932 case V32QI_FTYPE_V16QI_V32QI_SI:
36933 case V32QI_FTYPE_QI_V32QI_SI:
36934 case V16QI_FTYPE_V16QI_V16QI_HI:
36935 case V16QI_FTYPE_QI_V16QI_HI:
36936 case V32HI_FTYPE_V8HI_V32HI_SI:
36937 case V32HI_FTYPE_HI_V32HI_SI:
36938 case V16HI_FTYPE_V8HI_V16HI_HI:
36939 case V16HI_FTYPE_HI_V16HI_HI:
36940 case V8HI_FTYPE_V8HI_V8HI_QI:
36941 case V8HI_FTYPE_HI_V8HI_QI:
36942 case V8SF_FTYPE_V8HI_V8SF_QI:
36943 case V4SF_FTYPE_V8HI_V4SF_QI:
36944 case V8SI_FTYPE_V8SF_V8SI_QI:
36945 case V4SI_FTYPE_V4SF_V4SI_QI:
36946 case V8DI_FTYPE_V8SF_V8DI_QI:
36947 case V4DI_FTYPE_V4SF_V4DI_QI:
36948 case V2DI_FTYPE_V4SF_V2DI_QI:
36949 case V8SF_FTYPE_V8DI_V8SF_QI:
36950 case V4SF_FTYPE_V4DI_V4SF_QI:
36951 case V4SF_FTYPE_V2DI_V4SF_QI:
36952 case V8DF_FTYPE_V8DI_V8DF_QI:
36953 case V4DF_FTYPE_V4DI_V4DF_QI:
36954 case V2DF_FTYPE_V2DI_V2DF_QI:
36955 case V16QI_FTYPE_V8HI_V16QI_QI:
36956 case V16QI_FTYPE_V16HI_V16QI_HI:
36957 case V16QI_FTYPE_V4SI_V16QI_QI:
36958 case V16QI_FTYPE_V8SI_V16QI_QI:
36959 case V8HI_FTYPE_V4SI_V8HI_QI:
36960 case V8HI_FTYPE_V8SI_V8HI_QI:
36961 case V16QI_FTYPE_V2DI_V16QI_QI:
36962 case V16QI_FTYPE_V4DI_V16QI_QI:
36963 case V8HI_FTYPE_V2DI_V8HI_QI:
36964 case V8HI_FTYPE_V4DI_V8HI_QI:
36965 case V4SI_FTYPE_V2DI_V4SI_QI:
36966 case V4SI_FTYPE_V4DI_V4SI_QI:
36967 case V32QI_FTYPE_V32HI_V32QI_SI:
36968 case HI_FTYPE_V16QI_V16QI_HI:
36969 case SI_FTYPE_V32QI_V32QI_SI:
36970 case DI_FTYPE_V64QI_V64QI_DI:
36971 case QI_FTYPE_V8HI_V8HI_QI:
36972 case HI_FTYPE_V16HI_V16HI_HI:
36973 case SI_FTYPE_V32HI_V32HI_SI:
36974 case QI_FTYPE_V4SI_V4SI_QI:
36975 case QI_FTYPE_V8SI_V8SI_QI:
36976 case QI_FTYPE_V2DI_V2DI_QI:
36977 case QI_FTYPE_V4DI_V4DI_QI:
36978 case V4SF_FTYPE_V2DF_V4SF_QI:
36979 case V4SF_FTYPE_V4DF_V4SF_QI:
36980 case V16SI_FTYPE_V16SI_V16SI_HI:
36981 case V16SI_FTYPE_V16SI_V16SI_V16SI:
36982 case V16SI_FTYPE_V4SI_V16SI_HI:
36983 case V2DI_FTYPE_V2DI_V2DI_V2DI:
36984 case V2DI_FTYPE_V4SI_V2DI_QI:
36985 case V2DI_FTYPE_V8HI_V2DI_QI:
36986 case V2DI_FTYPE_V16QI_V2DI_QI:
36987 case V4DI_FTYPE_V4DI_V4DI_QI:
36988 case V4DI_FTYPE_V4SI_V4DI_QI:
36989 case V4DI_FTYPE_V8HI_V4DI_QI:
36990 case V4DI_FTYPE_V16QI_V4DI_QI:
36991 case V8DI_FTYPE_V8DF_V8DI_QI:
36992 case V4DI_FTYPE_V4DF_V4DI_QI:
36993 case V2DI_FTYPE_V2DF_V2DI_QI:
36994 case V4SI_FTYPE_V4DF_V4SI_QI:
36995 case V4SI_FTYPE_V2DF_V4SI_QI:
36996 case V4SI_FTYPE_V8HI_V4SI_QI:
36997 case V4SI_FTYPE_V16QI_V4SI_QI:
36998 case V8SI_FTYPE_V8SI_V8SI_V8SI:
36999 case V4DI_FTYPE_V4DI_V4DI_V4DI:
37000 case V8DF_FTYPE_V2DF_V8DF_QI:
37001 case V8DF_FTYPE_V4DF_V8DF_QI:
37002 case V8DF_FTYPE_V8DF_V8DF_QI:
37003 case V8DF_FTYPE_V8DF_V8DF_V8DF:
37004 case V8SF_FTYPE_V8SF_V8SF_QI:
37005 case V8SF_FTYPE_V8SI_V8SF_QI:
37006 case V4DF_FTYPE_V4DF_V4DF_QI:
37007 case V4SF_FTYPE_V4SF_V4SF_QI:
37008 case V2DF_FTYPE_V2DF_V2DF_QI:
37009 case V2DF_FTYPE_V4SF_V2DF_QI:
37010 case V2DF_FTYPE_V4SI_V2DF_QI:
37011 case V4SF_FTYPE_V4SI_V4SF_QI:
37012 case V4DF_FTYPE_V4SF_V4DF_QI:
37013 case V4DF_FTYPE_V4SI_V4DF_QI:
37014 case V8SI_FTYPE_V8SI_V8SI_QI:
37015 case V8SI_FTYPE_V8HI_V8SI_QI:
37016 case V8SI_FTYPE_V16QI_V8SI_QI:
37017 case V8DF_FTYPE_V8DF_V8DI_V8DF:
37018 case V8DF_FTYPE_V8DI_V8DF_V8DF:
37019 case V8DF_FTYPE_V8SF_V8DF_QI:
37020 case V8DF_FTYPE_V8SI_V8DF_QI:
37021 case V8DI_FTYPE_DI_V8DI_QI:
37022 case V16SF_FTYPE_V8SF_V16SF_HI:
37023 case V16SI_FTYPE_V8SI_V16SI_HI:
37024 case V16HI_FTYPE_V16HI_V16HI_HI:
37025 case V8HI_FTYPE_V16QI_V8HI_QI:
37026 case V16HI_FTYPE_V16QI_V16HI_HI:
37027 case V32HI_FTYPE_V32HI_V32HI_SI:
37028 case V32HI_FTYPE_V32QI_V32HI_SI:
37029 case V8DI_FTYPE_V16QI_V8DI_QI:
37030 case V8DI_FTYPE_V2DI_V8DI_QI:
37031 case V8DI_FTYPE_V4DI_V8DI_QI:
37032 case V8DI_FTYPE_V8DI_V8DI_QI:
37033 case V8DI_FTYPE_V8DI_V8DI_V8DI:
37034 case V8DI_FTYPE_V8HI_V8DI_QI:
37035 case V8DI_FTYPE_V8SI_V8DI_QI:
37036 case V8HI_FTYPE_V8DI_V8HI_QI:
37037 case V8SF_FTYPE_V8DF_V8SF_QI:
37038 case V8SI_FTYPE_V8DF_V8SI_QI:
37039 case V8SI_FTYPE_V8DI_V8SI_QI:
37040 case V4SI_FTYPE_V4SI_V4SI_V4SI:
37041 nargs = 3;
37042 break;
37043 case V32QI_FTYPE_V32QI_V32QI_INT:
37044 case V16HI_FTYPE_V16HI_V16HI_INT:
37045 case V16QI_FTYPE_V16QI_V16QI_INT:
37046 case V4DI_FTYPE_V4DI_V4DI_INT:
37047 case V8HI_FTYPE_V8HI_V8HI_INT:
37048 case V8SI_FTYPE_V8SI_V8SI_INT:
37049 case V8SI_FTYPE_V8SI_V4SI_INT:
37050 case V8SF_FTYPE_V8SF_V8SF_INT:
37051 case V8SF_FTYPE_V8SF_V4SF_INT:
37052 case V4SI_FTYPE_V4SI_V4SI_INT:
37053 case V4DF_FTYPE_V4DF_V4DF_INT:
37054 case V16SF_FTYPE_V16SF_V16SF_INT:
37055 case V16SF_FTYPE_V16SF_V4SF_INT:
37056 case V16SI_FTYPE_V16SI_V4SI_INT:
37057 case V4DF_FTYPE_V4DF_V2DF_INT:
37058 case V4SF_FTYPE_V4SF_V4SF_INT:
37059 case V2DI_FTYPE_V2DI_V2DI_INT:
37060 case V4DI_FTYPE_V4DI_V2DI_INT:
37061 case V2DF_FTYPE_V2DF_V2DF_INT:
37062 case QI_FTYPE_V8DI_V8DI_INT:
37063 case QI_FTYPE_V8DF_V8DF_INT:
37064 case QI_FTYPE_V2DF_V2DF_INT:
37065 case QI_FTYPE_V4SF_V4SF_INT:
37066 case HI_FTYPE_V16SI_V16SI_INT:
37067 case HI_FTYPE_V16SF_V16SF_INT:
37068 nargs = 3;
37069 nargs_constant = 1;
37070 break;
37071 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
37072 nargs = 3;
37073 rmode = V4DImode;
37074 nargs_constant = 1;
37075 break;
37076 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
37077 nargs = 3;
37078 rmode = V2DImode;
37079 nargs_constant = 1;
37080 break;
37081 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
37082 nargs = 3;
37083 rmode = DImode;
37084 nargs_constant = 1;
37085 break;
37086 case V2DI_FTYPE_V2DI_UINT_UINT:
37087 nargs = 3;
37088 nargs_constant = 2;
37089 break;
37090 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
37091 nargs = 3;
37092 rmode = V8DImode;
37093 nargs_constant = 1;
37094 break;
37095 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT:
37096 nargs = 5;
37097 rmode = V8DImode;
37098 mask_pos = 2;
37099 nargs_constant = 1;
37100 break;
37101 case QI_FTYPE_V8DF_INT_QI:
37102 case QI_FTYPE_V4DF_INT_QI:
37103 case QI_FTYPE_V2DF_INT_QI:
37104 case HI_FTYPE_V16SF_INT_HI:
37105 case QI_FTYPE_V8SF_INT_QI:
37106 case QI_FTYPE_V4SF_INT_QI:
37107 nargs = 3;
37108 mask_pos = 1;
37109 nargs_constant = 1;
37110 break;
37111 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT:
37112 nargs = 5;
37113 rmode = V4DImode;
37114 mask_pos = 2;
37115 nargs_constant = 1;
37116 break;
37117 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT:
37118 nargs = 5;
37119 rmode = V2DImode;
37120 mask_pos = 2;
37121 nargs_constant = 1;
37122 break;
37123 case V32QI_FTYPE_V32QI_V32QI_V32QI_SI:
37124 case V32HI_FTYPE_V32HI_V32HI_V32HI_SI:
37125 case V32HI_FTYPE_V64QI_V64QI_V32HI_SI:
37126 case V16SI_FTYPE_V32HI_V32HI_V16SI_HI:
37127 case V64QI_FTYPE_V64QI_V64QI_V64QI_DI:
37128 case V32HI_FTYPE_V32HI_V8HI_V32HI_SI:
37129 case V16HI_FTYPE_V16HI_V8HI_V16HI_HI:
37130 case V8SI_FTYPE_V8SI_V4SI_V8SI_QI:
37131 case V4DI_FTYPE_V4DI_V2DI_V4DI_QI:
37132 case V64QI_FTYPE_V32HI_V32HI_V64QI_DI:
37133 case V32QI_FTYPE_V16HI_V16HI_V32QI_SI:
37134 case V16QI_FTYPE_V8HI_V8HI_V16QI_HI:
37135 case V32HI_FTYPE_V16SI_V16SI_V32HI_SI:
37136 case V16HI_FTYPE_V8SI_V8SI_V16HI_HI:
37137 case V8HI_FTYPE_V4SI_V4SI_V8HI_QI:
37138 case V4DF_FTYPE_V4DF_V4DI_V4DF_QI:
37139 case V8SF_FTYPE_V8SF_V8SI_V8SF_QI:
37140 case V4SF_FTYPE_V4SF_V4SI_V4SF_QI:
37141 case V2DF_FTYPE_V2DF_V2DI_V2DF_QI:
37142 case V2DI_FTYPE_V4SI_V4SI_V2DI_QI:
37143 case V4DI_FTYPE_V8SI_V8SI_V4DI_QI:
37144 case V4DF_FTYPE_V4DI_V4DF_V4DF_QI:
37145 case V8SF_FTYPE_V8SI_V8SF_V8SF_QI:
37146 case V2DF_FTYPE_V2DI_V2DF_V2DF_QI:
37147 case V4SF_FTYPE_V4SI_V4SF_V4SF_QI:
37148 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
37149 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
37150 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
37151 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
37152 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
37153 case V8HI_FTYPE_V8HI_V8HI_V8HI_QI:
37154 case V8SI_FTYPE_V8SI_V8SI_V8SI_QI:
37155 case V4SI_FTYPE_V4SI_V4SI_V4SI_QI:
37156 case V8SF_FTYPE_V8SF_V8SF_V8SF_QI:
37157 case V16QI_FTYPE_V16QI_V16QI_V16QI_HI:
37158 case V16HI_FTYPE_V16HI_V16HI_V16HI_HI:
37159 case V2DI_FTYPE_V2DI_V2DI_V2DI_QI:
37160 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
37161 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
37162 case V4DI_FTYPE_V4DI_V4DI_V4DI_QI:
37163 case V4DF_FTYPE_V4DF_V4DF_V4DF_QI:
37164 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
37165 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
37166 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
37167 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
37168 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
37169 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
37170 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
37171 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
37172 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
37173 case V8HI_FTYPE_V16QI_V16QI_V8HI_QI:
37174 case V16HI_FTYPE_V32QI_V32QI_V16HI_HI:
37175 case V8SI_FTYPE_V16HI_V16HI_V8SI_QI:
37176 case V4SI_FTYPE_V8HI_V8HI_V4SI_QI:
37177 nargs = 4;
37178 break;
37179 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
37180 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
37181 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
37182 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
37183 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
37184 nargs = 4;
37185 nargs_constant = 1;
37186 break;
37187 case QI_FTYPE_V4DI_V4DI_INT_QI:
37188 case QI_FTYPE_V8SI_V8SI_INT_QI:
37189 case QI_FTYPE_V4DF_V4DF_INT_QI:
37190 case QI_FTYPE_V8SF_V8SF_INT_QI:
37191 case QI_FTYPE_V2DI_V2DI_INT_QI:
37192 case QI_FTYPE_V4SI_V4SI_INT_QI:
37193 case QI_FTYPE_V2DF_V2DF_INT_QI:
37194 case QI_FTYPE_V4SF_V4SF_INT_QI:
37195 case DI_FTYPE_V64QI_V64QI_INT_DI:
37196 case SI_FTYPE_V32QI_V32QI_INT_SI:
37197 case HI_FTYPE_V16QI_V16QI_INT_HI:
37198 case SI_FTYPE_V32HI_V32HI_INT_SI:
37199 case HI_FTYPE_V16HI_V16HI_INT_HI:
37200 case QI_FTYPE_V8HI_V8HI_INT_QI:
37201 nargs = 4;
37202 mask_pos = 1;
37203 nargs_constant = 1;
37204 break;
37205 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
37206 nargs = 4;
37207 nargs_constant = 2;
37208 break;
37209 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
37210 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
37211 nargs = 4;
37212 break;
37213 case QI_FTYPE_V8DI_V8DI_INT_QI:
37214 case HI_FTYPE_V16SI_V16SI_INT_HI:
37215 case QI_FTYPE_V8DF_V8DF_INT_QI:
37216 case HI_FTYPE_V16SF_V16SF_INT_HI:
37217 mask_pos = 1;
37218 nargs = 4;
37219 nargs_constant = 1;
37220 break;
37221 case V8SF_FTYPE_V8SF_INT_V8SF_QI:
37222 case V4SF_FTYPE_V4SF_INT_V4SF_QI:
37223 case V2DF_FTYPE_V4DF_INT_V2DF_QI:
37224 case V2DI_FTYPE_V4DI_INT_V2DI_QI:
37225 case V8SF_FTYPE_V16SF_INT_V8SF_QI:
37226 case V8SI_FTYPE_V16SI_INT_V8SI_QI:
37227 case V2DF_FTYPE_V8DF_INT_V2DF_QI:
37228 case V2DI_FTYPE_V8DI_INT_V2DI_QI:
37229 case V4SF_FTYPE_V8SF_INT_V4SF_QI:
37230 case V4SI_FTYPE_V8SI_INT_V4SI_QI:
37231 case V8HI_FTYPE_V8SF_INT_V8HI_QI:
37232 case V8HI_FTYPE_V4SF_INT_V8HI_QI:
37233 case V32HI_FTYPE_V32HI_INT_V32HI_SI:
37234 case V16HI_FTYPE_V16HI_INT_V16HI_HI:
37235 case V8HI_FTYPE_V8HI_INT_V8HI_QI:
37236 case V4DI_FTYPE_V4DI_INT_V4DI_QI:
37237 case V2DI_FTYPE_V2DI_INT_V2DI_QI:
37238 case V8SI_FTYPE_V8SI_INT_V8SI_QI:
37239 case V4SI_FTYPE_V4SI_INT_V4SI_QI:
37240 case V4DF_FTYPE_V4DF_INT_V4DF_QI:
37241 case V2DF_FTYPE_V2DF_INT_V2DF_QI:
37242 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
37243 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
37244 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
37245 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
37246 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
37247 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
37248 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
37249 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
37250 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
37251 nargs = 4;
37252 mask_pos = 2;
37253 nargs_constant = 1;
37254 break;
37255 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
37256 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
37257 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
37258 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
37259 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
37260 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
37261 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
37262 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
37263 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
37264 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
37265 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI:
37266 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI:
37267 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI:
37268 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI:
37269 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI:
37270 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI:
37271 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI:
37272 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI:
37273 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI:
37274 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI:
37275 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI:
37276 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI:
37277 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI:
37278 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI:
37279 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI:
37280 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI:
37281 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI:
37282 nargs = 5;
37283 mask_pos = 2;
37284 nargs_constant = 1;
37285 break;
37286 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
37287 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
37288 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
37289 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
37290 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
37291 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI:
37292 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI:
37293 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI:
37294 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI:
37295 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI:
37296 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI:
37297 nargs = 5;
37298 nargs = 5;
37299 mask_pos = 1;
37300 nargs_constant = 1;
37301 break;
37303 default:
37304 gcc_unreachable ();
37307 gcc_assert (nargs <= ARRAY_SIZE (args));
37309 if (comparison != UNKNOWN)
37311 gcc_assert (nargs == 2);
37312 return ix86_expand_sse_compare (d, exp, target, swap);
37315 if (rmode == VOIDmode || rmode == tmode)
37317 if (optimize
37318 || target == 0
37319 || GET_MODE (target) != tmode
37320 || !insn_p->operand[0].predicate (target, tmode))
37321 target = gen_reg_rtx (tmode);
37322 real_target = target;
37324 else
37326 real_target = gen_reg_rtx (tmode);
37327 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
37330 for (i = 0; i < nargs; i++)
37332 tree arg = CALL_EXPR_ARG (exp, i);
37333 rtx op = expand_normal (arg);
37334 machine_mode mode = insn_p->operand[i + 1].mode;
37335 bool match = insn_p->operand[i + 1].predicate (op, mode);
37337 if (last_arg_count && (i + 1) == nargs)
37339 /* SIMD shift insns take either an 8-bit immediate or
37340 register as count. But builtin functions take int as
37341 count. If count doesn't match, we put it in register. */
37342 if (!match)
37344 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
37345 if (!insn_p->operand[i + 1].predicate (op, mode))
37346 op = copy_to_reg (op);
37349 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37350 (!mask_pos && (nargs - i) <= nargs_constant))
37352 if (!match)
37353 switch (icode)
37355 case CODE_FOR_avx_vinsertf128v4di:
37356 case CODE_FOR_avx_vextractf128v4di:
37357 error ("the last argument must be an 1-bit immediate");
37358 return const0_rtx;
37360 case CODE_FOR_avx512f_cmpv8di3_mask:
37361 case CODE_FOR_avx512f_cmpv16si3_mask:
37362 case CODE_FOR_avx512f_ucmpv8di3_mask:
37363 case CODE_FOR_avx512f_ucmpv16si3_mask:
37364 case CODE_FOR_avx512vl_cmpv4di3_mask:
37365 case CODE_FOR_avx512vl_cmpv8si3_mask:
37366 case CODE_FOR_avx512vl_ucmpv4di3_mask:
37367 case CODE_FOR_avx512vl_ucmpv8si3_mask:
37368 case CODE_FOR_avx512vl_cmpv2di3_mask:
37369 case CODE_FOR_avx512vl_cmpv4si3_mask:
37370 case CODE_FOR_avx512vl_ucmpv2di3_mask:
37371 case CODE_FOR_avx512vl_ucmpv4si3_mask:
37372 error ("the last argument must be a 3-bit immediate");
37373 return const0_rtx;
37375 case CODE_FOR_sse4_1_roundsd:
37376 case CODE_FOR_sse4_1_roundss:
37378 case CODE_FOR_sse4_1_roundpd:
37379 case CODE_FOR_sse4_1_roundps:
37380 case CODE_FOR_avx_roundpd256:
37381 case CODE_FOR_avx_roundps256:
37383 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
37384 case CODE_FOR_sse4_1_roundps_sfix:
37385 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
37386 case CODE_FOR_avx_roundps_sfix256:
37388 case CODE_FOR_sse4_1_blendps:
37389 case CODE_FOR_avx_blendpd256:
37390 case CODE_FOR_avx_vpermilv4df:
37391 case CODE_FOR_avx_vpermilv4df_mask:
37392 case CODE_FOR_avx512f_getmantv8df_mask:
37393 case CODE_FOR_avx512f_getmantv16sf_mask:
37394 case CODE_FOR_avx512vl_getmantv8sf_mask:
37395 case CODE_FOR_avx512vl_getmantv4df_mask:
37396 case CODE_FOR_avx512vl_getmantv4sf_mask:
37397 case CODE_FOR_avx512vl_getmantv2df_mask:
37398 case CODE_FOR_avx512dq_rangepv8df_mask_round:
37399 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
37400 case CODE_FOR_avx512dq_rangepv4df_mask:
37401 case CODE_FOR_avx512dq_rangepv8sf_mask:
37402 case CODE_FOR_avx512dq_rangepv2df_mask:
37403 case CODE_FOR_avx512dq_rangepv4sf_mask:
37404 case CODE_FOR_avx_shufpd256_mask:
37405 error ("the last argument must be a 4-bit immediate");
37406 return const0_rtx;
37408 case CODE_FOR_sha1rnds4:
37409 case CODE_FOR_sse4_1_blendpd:
37410 case CODE_FOR_avx_vpermilv2df:
37411 case CODE_FOR_avx_vpermilv2df_mask:
37412 case CODE_FOR_xop_vpermil2v2df3:
37413 case CODE_FOR_xop_vpermil2v4sf3:
37414 case CODE_FOR_xop_vpermil2v4df3:
37415 case CODE_FOR_xop_vpermil2v8sf3:
37416 case CODE_FOR_avx512f_vinsertf32x4_mask:
37417 case CODE_FOR_avx512f_vinserti32x4_mask:
37418 case CODE_FOR_avx512f_vextractf32x4_mask:
37419 case CODE_FOR_avx512f_vextracti32x4_mask:
37420 case CODE_FOR_sse2_shufpd:
37421 case CODE_FOR_sse2_shufpd_mask:
37422 case CODE_FOR_avx512dq_shuf_f64x2_mask:
37423 case CODE_FOR_avx512dq_shuf_i64x2_mask:
37424 case CODE_FOR_avx512vl_shuf_i32x4_mask:
37425 case CODE_FOR_avx512vl_shuf_f32x4_mask:
37426 error ("the last argument must be a 2-bit immediate");
37427 return const0_rtx;
37429 case CODE_FOR_avx_vextractf128v4df:
37430 case CODE_FOR_avx_vextractf128v8sf:
37431 case CODE_FOR_avx_vextractf128v8si:
37432 case CODE_FOR_avx_vinsertf128v4df:
37433 case CODE_FOR_avx_vinsertf128v8sf:
37434 case CODE_FOR_avx_vinsertf128v8si:
37435 case CODE_FOR_avx512f_vinsertf64x4_mask:
37436 case CODE_FOR_avx512f_vinserti64x4_mask:
37437 case CODE_FOR_avx512f_vextractf64x4_mask:
37438 case CODE_FOR_avx512f_vextracti64x4_mask:
37439 case CODE_FOR_avx512dq_vinsertf32x8_mask:
37440 case CODE_FOR_avx512dq_vinserti32x8_mask:
37441 case CODE_FOR_avx512vl_vinsertv4df:
37442 case CODE_FOR_avx512vl_vinsertv4di:
37443 case CODE_FOR_avx512vl_vinsertv8sf:
37444 case CODE_FOR_avx512vl_vinsertv8si:
37445 error ("the last argument must be a 1-bit immediate");
37446 return const0_rtx;
37448 case CODE_FOR_avx_vmcmpv2df3:
37449 case CODE_FOR_avx_vmcmpv4sf3:
37450 case CODE_FOR_avx_cmpv2df3:
37451 case CODE_FOR_avx_cmpv4sf3:
37452 case CODE_FOR_avx_cmpv4df3:
37453 case CODE_FOR_avx_cmpv8sf3:
37454 case CODE_FOR_avx512f_cmpv8df3_mask:
37455 case CODE_FOR_avx512f_cmpv16sf3_mask:
37456 case CODE_FOR_avx512f_vmcmpv2df3_mask:
37457 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
37458 error ("the last argument must be a 5-bit immediate");
37459 return const0_rtx;
37461 default:
37462 switch (nargs_constant)
37464 case 2:
37465 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37466 (!mask_pos && (nargs - i) == nargs_constant))
37468 error ("the next to last argument must be an 8-bit immediate");
37469 break;
37471 case 1:
37472 error ("the last argument must be an 8-bit immediate");
37473 break;
37474 default:
37475 gcc_unreachable ();
37477 return const0_rtx;
37480 else
37482 if (VECTOR_MODE_P (mode))
37483 op = safe_vector_operand (op, mode);
37485 /* If we aren't optimizing, only allow one memory operand to
37486 be generated. */
37487 if (memory_operand (op, mode))
37488 num_memory++;
37490 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37492 if (optimize || !match || num_memory > 1)
37493 op = copy_to_mode_reg (mode, op);
37495 else
37497 op = copy_to_reg (op);
37498 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37502 args[i].op = op;
37503 args[i].mode = mode;
37506 switch (nargs)
37508 case 1:
37509 pat = GEN_FCN (icode) (real_target, args[0].op);
37510 break;
37511 case 2:
37512 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
37513 break;
37514 case 3:
37515 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37516 args[2].op);
37517 break;
37518 case 4:
37519 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37520 args[2].op, args[3].op);
37521 break;
37522 case 5:
37523 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37524 args[2].op, args[3].op, args[4].op);
37525 case 6:
37526 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37527 args[2].op, args[3].op, args[4].op,
37528 args[5].op);
37529 break;
37530 default:
37531 gcc_unreachable ();
37534 if (! pat)
37535 return 0;
37537 emit_insn (pat);
37538 return target;
37541 /* Transform pattern of following layout:
37542 (parallel [
37543 set (A B)
37544 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
37546 into:
37547 (set (A B))
37550 (parallel [ A B
37552 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
37555 into:
37556 (parallel [ A B ... ]) */
37558 static rtx
37559 ix86_erase_embedded_rounding (rtx pat)
37561 if (GET_CODE (pat) == INSN)
37562 pat = PATTERN (pat);
37564 gcc_assert (GET_CODE (pat) == PARALLEL);
37566 if (XVECLEN (pat, 0) == 2)
37568 rtx p0 = XVECEXP (pat, 0, 0);
37569 rtx p1 = XVECEXP (pat, 0, 1);
37571 gcc_assert (GET_CODE (p0) == SET
37572 && GET_CODE (p1) == UNSPEC
37573 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
37575 return p0;
37577 else
37579 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
37580 int i = 0;
37581 int j = 0;
37583 for (; i < XVECLEN (pat, 0); ++i)
37585 rtx elem = XVECEXP (pat, 0, i);
37586 if (GET_CODE (elem) != UNSPEC
37587 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
37588 res [j++] = elem;
37591 /* No more than 1 occurence was removed. */
37592 gcc_assert (j >= XVECLEN (pat, 0) - 1);
37594 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
37598 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
37599 with rounding. */
37600 static rtx
37601 ix86_expand_sse_comi_round (const struct builtin_description *d,
37602 tree exp, rtx target)
37604 rtx pat, set_dst;
37605 tree arg0 = CALL_EXPR_ARG (exp, 0);
37606 tree arg1 = CALL_EXPR_ARG (exp, 1);
37607 tree arg2 = CALL_EXPR_ARG (exp, 2);
37608 tree arg3 = CALL_EXPR_ARG (exp, 3);
37609 rtx op0 = expand_normal (arg0);
37610 rtx op1 = expand_normal (arg1);
37611 rtx op2 = expand_normal (arg2);
37612 rtx op3 = expand_normal (arg3);
37613 enum insn_code icode = d->icode;
37614 const struct insn_data_d *insn_p = &insn_data[icode];
37615 machine_mode mode0 = insn_p->operand[0].mode;
37616 machine_mode mode1 = insn_p->operand[1].mode;
37617 enum rtx_code comparison = UNEQ;
37618 bool need_ucomi = false;
37620 /* See avxintrin.h for values. */
37621 enum rtx_code comi_comparisons[32] =
37623 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
37624 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
37625 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
37627 bool need_ucomi_values[32] =
37629 true, false, false, true, true, false, false, true,
37630 true, false, false, true, true, false, false, true,
37631 false, true, true, false, false, true, true, false,
37632 false, true, true, false, false, true, true, false
37635 if (!CONST_INT_P (op2))
37637 error ("the third argument must be comparison constant");
37638 return const0_rtx;
37640 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
37642 error ("incorect comparison mode");
37643 return const0_rtx;
37646 if (!insn_p->operand[2].predicate (op3, SImode))
37648 error ("incorrect rounding operand");
37649 return const0_rtx;
37652 comparison = comi_comparisons[INTVAL (op2)];
37653 need_ucomi = need_ucomi_values[INTVAL (op2)];
37655 if (VECTOR_MODE_P (mode0))
37656 op0 = safe_vector_operand (op0, mode0);
37657 if (VECTOR_MODE_P (mode1))
37658 op1 = safe_vector_operand (op1, mode1);
37660 target = gen_reg_rtx (SImode);
37661 emit_move_insn (target, const0_rtx);
37662 target = gen_rtx_SUBREG (QImode, target, 0);
37664 if ((optimize && !register_operand (op0, mode0))
37665 || !insn_p->operand[0].predicate (op0, mode0))
37666 op0 = copy_to_mode_reg (mode0, op0);
37667 if ((optimize && !register_operand (op1, mode1))
37668 || !insn_p->operand[1].predicate (op1, mode1))
37669 op1 = copy_to_mode_reg (mode1, op1);
37671 if (need_ucomi)
37672 icode = icode == CODE_FOR_sse_comi_round
37673 ? CODE_FOR_sse_ucomi_round
37674 : CODE_FOR_sse2_ucomi_round;
37676 pat = GEN_FCN (icode) (op0, op1, op3);
37677 if (! pat)
37678 return 0;
37680 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
37681 if (INTVAL (op3) == NO_ROUND)
37683 pat = ix86_erase_embedded_rounding (pat);
37684 if (! pat)
37685 return 0;
37687 set_dst = SET_DEST (pat);
37689 else
37691 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
37692 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
37695 emit_insn (pat);
37696 emit_insn (gen_rtx_SET (VOIDmode,
37697 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
37698 gen_rtx_fmt_ee (comparison, QImode,
37699 set_dst,
37700 const0_rtx)));
37702 return SUBREG_REG (target);
37705 static rtx
37706 ix86_expand_round_builtin (const struct builtin_description *d,
37707 tree exp, rtx target)
37709 rtx pat;
37710 unsigned int i, nargs;
37711 struct
37713 rtx op;
37714 machine_mode mode;
37715 } args[6];
37716 enum insn_code icode = d->icode;
37717 const struct insn_data_d *insn_p = &insn_data[icode];
37718 machine_mode tmode = insn_p->operand[0].mode;
37719 unsigned int nargs_constant = 0;
37720 unsigned int redundant_embed_rnd = 0;
37722 switch ((enum ix86_builtin_func_type) d->flag)
37724 case UINT64_FTYPE_V2DF_INT:
37725 case UINT64_FTYPE_V4SF_INT:
37726 case UINT_FTYPE_V2DF_INT:
37727 case UINT_FTYPE_V4SF_INT:
37728 case INT64_FTYPE_V2DF_INT:
37729 case INT64_FTYPE_V4SF_INT:
37730 case INT_FTYPE_V2DF_INT:
37731 case INT_FTYPE_V4SF_INT:
37732 nargs = 2;
37733 break;
37734 case V4SF_FTYPE_V4SF_UINT_INT:
37735 case V4SF_FTYPE_V4SF_UINT64_INT:
37736 case V2DF_FTYPE_V2DF_UINT64_INT:
37737 case V4SF_FTYPE_V4SF_INT_INT:
37738 case V4SF_FTYPE_V4SF_INT64_INT:
37739 case V2DF_FTYPE_V2DF_INT64_INT:
37740 case V4SF_FTYPE_V4SF_V4SF_INT:
37741 case V2DF_FTYPE_V2DF_V2DF_INT:
37742 case V4SF_FTYPE_V4SF_V2DF_INT:
37743 case V2DF_FTYPE_V2DF_V4SF_INT:
37744 nargs = 3;
37745 break;
37746 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
37747 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
37748 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
37749 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
37750 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
37751 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
37752 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
37753 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
37754 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
37755 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
37756 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
37757 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
37758 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
37759 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
37760 nargs = 4;
37761 break;
37762 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
37763 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
37764 nargs_constant = 2;
37765 nargs = 4;
37766 break;
37767 case INT_FTYPE_V4SF_V4SF_INT_INT:
37768 case INT_FTYPE_V2DF_V2DF_INT_INT:
37769 return ix86_expand_sse_comi_round (d, exp, target);
37770 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
37771 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
37772 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
37773 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
37774 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
37775 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
37776 nargs = 5;
37777 break;
37778 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
37779 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
37780 nargs_constant = 4;
37781 nargs = 5;
37782 break;
37783 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
37784 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
37785 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
37786 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
37787 nargs_constant = 3;
37788 nargs = 5;
37789 break;
37790 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
37791 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
37792 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
37793 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
37794 nargs = 6;
37795 nargs_constant = 4;
37796 break;
37797 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
37798 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
37799 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
37800 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
37801 nargs = 6;
37802 nargs_constant = 3;
37803 break;
37804 default:
37805 gcc_unreachable ();
37807 gcc_assert (nargs <= ARRAY_SIZE (args));
37809 if (optimize
37810 || target == 0
37811 || GET_MODE (target) != tmode
37812 || !insn_p->operand[0].predicate (target, tmode))
37813 target = gen_reg_rtx (tmode);
37815 for (i = 0; i < nargs; i++)
37817 tree arg = CALL_EXPR_ARG (exp, i);
37818 rtx op = expand_normal (arg);
37819 machine_mode mode = insn_p->operand[i + 1].mode;
37820 bool match = insn_p->operand[i + 1].predicate (op, mode);
37822 if (i == nargs - nargs_constant)
37824 if (!match)
37826 switch (icode)
37828 case CODE_FOR_avx512f_getmantv8df_mask_round:
37829 case CODE_FOR_avx512f_getmantv16sf_mask_round:
37830 case CODE_FOR_avx512f_vgetmantv2df_round:
37831 case CODE_FOR_avx512f_vgetmantv4sf_round:
37832 error ("the immediate argument must be a 4-bit immediate");
37833 return const0_rtx;
37834 case CODE_FOR_avx512f_cmpv8df3_mask_round:
37835 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
37836 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
37837 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
37838 error ("the immediate argument must be a 5-bit immediate");
37839 return const0_rtx;
37840 default:
37841 error ("the immediate argument must be an 8-bit immediate");
37842 return const0_rtx;
37846 else if (i == nargs-1)
37848 if (!insn_p->operand[nargs].predicate (op, SImode))
37850 error ("incorrect rounding operand");
37851 return const0_rtx;
37854 /* If there is no rounding use normal version of the pattern. */
37855 if (INTVAL (op) == NO_ROUND)
37856 redundant_embed_rnd = 1;
37858 else
37860 if (VECTOR_MODE_P (mode))
37861 op = safe_vector_operand (op, mode);
37863 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37865 if (optimize || !match)
37866 op = copy_to_mode_reg (mode, op);
37868 else
37870 op = copy_to_reg (op);
37871 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37875 args[i].op = op;
37876 args[i].mode = mode;
37879 switch (nargs)
37881 case 1:
37882 pat = GEN_FCN (icode) (target, args[0].op);
37883 break;
37884 case 2:
37885 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
37886 break;
37887 case 3:
37888 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37889 args[2].op);
37890 break;
37891 case 4:
37892 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37893 args[2].op, args[3].op);
37894 break;
37895 case 5:
37896 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37897 args[2].op, args[3].op, args[4].op);
37898 case 6:
37899 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37900 args[2].op, args[3].op, args[4].op,
37901 args[5].op);
37902 break;
37903 default:
37904 gcc_unreachable ();
37907 if (!pat)
37908 return 0;
37910 if (redundant_embed_rnd)
37911 pat = ix86_erase_embedded_rounding (pat);
37913 emit_insn (pat);
37914 return target;
37917 /* Subroutine of ix86_expand_builtin to take care of special insns
37918 with variable number of operands. */
37920 static rtx
37921 ix86_expand_special_args_builtin (const struct builtin_description *d,
37922 tree exp, rtx target)
37924 tree arg;
37925 rtx pat, op;
37926 unsigned int i, nargs, arg_adjust, memory;
37927 bool aligned_mem = false;
37928 struct
37930 rtx op;
37931 machine_mode mode;
37932 } args[3];
37933 enum insn_code icode = d->icode;
37934 bool last_arg_constant = false;
37935 const struct insn_data_d *insn_p = &insn_data[icode];
37936 machine_mode tmode = insn_p->operand[0].mode;
37937 enum { load, store } klass;
37939 switch ((enum ix86_builtin_func_type) d->flag)
37941 case VOID_FTYPE_VOID:
37942 emit_insn (GEN_FCN (icode) (target));
37943 return 0;
37944 case VOID_FTYPE_UINT64:
37945 case VOID_FTYPE_UNSIGNED:
37946 nargs = 0;
37947 klass = store;
37948 memory = 0;
37949 break;
37951 case INT_FTYPE_VOID:
37952 case USHORT_FTYPE_VOID:
37953 case UINT64_FTYPE_VOID:
37954 case UNSIGNED_FTYPE_VOID:
37955 nargs = 0;
37956 klass = load;
37957 memory = 0;
37958 break;
37959 case UINT64_FTYPE_PUNSIGNED:
37960 case V2DI_FTYPE_PV2DI:
37961 case V4DI_FTYPE_PV4DI:
37962 case V32QI_FTYPE_PCCHAR:
37963 case V16QI_FTYPE_PCCHAR:
37964 case V8SF_FTYPE_PCV4SF:
37965 case V8SF_FTYPE_PCFLOAT:
37966 case V4SF_FTYPE_PCFLOAT:
37967 case V4DF_FTYPE_PCV2DF:
37968 case V4DF_FTYPE_PCDOUBLE:
37969 case V2DF_FTYPE_PCDOUBLE:
37970 case VOID_FTYPE_PVOID:
37971 case V16SI_FTYPE_PV4SI:
37972 case V16SF_FTYPE_PV4SF:
37973 case V8DI_FTYPE_PV4DI:
37974 case V8DI_FTYPE_PV8DI:
37975 case V8DF_FTYPE_PV4DF:
37976 nargs = 1;
37977 klass = load;
37978 memory = 0;
37979 switch (icode)
37981 case CODE_FOR_sse4_1_movntdqa:
37982 case CODE_FOR_avx2_movntdqa:
37983 case CODE_FOR_avx512f_movntdqa:
37984 aligned_mem = true;
37985 break;
37986 default:
37987 break;
37989 break;
37990 case VOID_FTYPE_PV2SF_V4SF:
37991 case VOID_FTYPE_PV8DI_V8DI:
37992 case VOID_FTYPE_PV4DI_V4DI:
37993 case VOID_FTYPE_PV2DI_V2DI:
37994 case VOID_FTYPE_PCHAR_V32QI:
37995 case VOID_FTYPE_PCHAR_V16QI:
37996 case VOID_FTYPE_PFLOAT_V16SF:
37997 case VOID_FTYPE_PFLOAT_V8SF:
37998 case VOID_FTYPE_PFLOAT_V4SF:
37999 case VOID_FTYPE_PDOUBLE_V8DF:
38000 case VOID_FTYPE_PDOUBLE_V4DF:
38001 case VOID_FTYPE_PDOUBLE_V2DF:
38002 case VOID_FTYPE_PLONGLONG_LONGLONG:
38003 case VOID_FTYPE_PULONGLONG_ULONGLONG:
38004 case VOID_FTYPE_PINT_INT:
38005 nargs = 1;
38006 klass = store;
38007 /* Reserve memory operand for target. */
38008 memory = ARRAY_SIZE (args);
38009 switch (icode)
38011 /* These builtins and instructions require the memory
38012 to be properly aligned. */
38013 case CODE_FOR_avx_movntv4di:
38014 case CODE_FOR_sse2_movntv2di:
38015 case CODE_FOR_avx_movntv8sf:
38016 case CODE_FOR_sse_movntv4sf:
38017 case CODE_FOR_sse4a_vmmovntv4sf:
38018 case CODE_FOR_avx_movntv4df:
38019 case CODE_FOR_sse2_movntv2df:
38020 case CODE_FOR_sse4a_vmmovntv2df:
38021 case CODE_FOR_sse2_movntidi:
38022 case CODE_FOR_sse_movntq:
38023 case CODE_FOR_sse2_movntisi:
38024 case CODE_FOR_avx512f_movntv16sf:
38025 case CODE_FOR_avx512f_movntv8df:
38026 case CODE_FOR_avx512f_movntv8di:
38027 aligned_mem = true;
38028 break;
38029 default:
38030 break;
38032 break;
38033 case V4SF_FTYPE_V4SF_PCV2SF:
38034 case V2DF_FTYPE_V2DF_PCDOUBLE:
38035 nargs = 2;
38036 klass = load;
38037 memory = 1;
38038 break;
38039 case V8SF_FTYPE_PCV8SF_V8SI:
38040 case V4DF_FTYPE_PCV4DF_V4DI:
38041 case V4SF_FTYPE_PCV4SF_V4SI:
38042 case V2DF_FTYPE_PCV2DF_V2DI:
38043 case V8SI_FTYPE_PCV8SI_V8SI:
38044 case V4DI_FTYPE_PCV4DI_V4DI:
38045 case V4SI_FTYPE_PCV4SI_V4SI:
38046 case V2DI_FTYPE_PCV2DI_V2DI:
38047 nargs = 2;
38048 klass = load;
38049 memory = 0;
38050 break;
38051 case VOID_FTYPE_PV8DF_V8DF_QI:
38052 case VOID_FTYPE_PV16SF_V16SF_HI:
38053 case VOID_FTYPE_PV8DI_V8DI_QI:
38054 case VOID_FTYPE_PV4DI_V4DI_QI:
38055 case VOID_FTYPE_PV2DI_V2DI_QI:
38056 case VOID_FTYPE_PV16SI_V16SI_HI:
38057 case VOID_FTYPE_PV8SI_V8SI_QI:
38058 case VOID_FTYPE_PV4SI_V4SI_QI:
38059 switch (icode)
38061 /* These builtins and instructions require the memory
38062 to be properly aligned. */
38063 case CODE_FOR_avx512f_storev16sf_mask:
38064 case CODE_FOR_avx512f_storev16si_mask:
38065 case CODE_FOR_avx512f_storev8df_mask:
38066 case CODE_FOR_avx512f_storev8di_mask:
38067 case CODE_FOR_avx512vl_storev8sf_mask:
38068 case CODE_FOR_avx512vl_storev8si_mask:
38069 case CODE_FOR_avx512vl_storev4df_mask:
38070 case CODE_FOR_avx512vl_storev4di_mask:
38071 case CODE_FOR_avx512vl_storev4sf_mask:
38072 case CODE_FOR_avx512vl_storev4si_mask:
38073 case CODE_FOR_avx512vl_storev2df_mask:
38074 case CODE_FOR_avx512vl_storev2di_mask:
38075 aligned_mem = true;
38076 break;
38077 default:
38078 break;
38080 /* FALLTHRU */
38081 case VOID_FTYPE_PV8SF_V8SI_V8SF:
38082 case VOID_FTYPE_PV4DF_V4DI_V4DF:
38083 case VOID_FTYPE_PV4SF_V4SI_V4SF:
38084 case VOID_FTYPE_PV2DF_V2DI_V2DF:
38085 case VOID_FTYPE_PV8SI_V8SI_V8SI:
38086 case VOID_FTYPE_PV4DI_V4DI_V4DI:
38087 case VOID_FTYPE_PV4SI_V4SI_V4SI:
38088 case VOID_FTYPE_PV2DI_V2DI_V2DI:
38089 case VOID_FTYPE_PDOUBLE_V2DF_QI:
38090 case VOID_FTYPE_PFLOAT_V4SF_QI:
38091 case VOID_FTYPE_PV8SI_V8DI_QI:
38092 case VOID_FTYPE_PV8HI_V8DI_QI:
38093 case VOID_FTYPE_PV16HI_V16SI_HI:
38094 case VOID_FTYPE_PV16QI_V8DI_QI:
38095 case VOID_FTYPE_PV16QI_V16SI_HI:
38096 case VOID_FTYPE_PV4SI_V4DI_QI:
38097 case VOID_FTYPE_PV4SI_V2DI_QI:
38098 case VOID_FTYPE_PV8HI_V4DI_QI:
38099 case VOID_FTYPE_PV8HI_V2DI_QI:
38100 case VOID_FTYPE_PV8HI_V8SI_QI:
38101 case VOID_FTYPE_PV8HI_V4SI_QI:
38102 case VOID_FTYPE_PV16QI_V4DI_QI:
38103 case VOID_FTYPE_PV16QI_V2DI_QI:
38104 case VOID_FTYPE_PV16QI_V8SI_QI:
38105 case VOID_FTYPE_PV16QI_V4SI_QI:
38106 case VOID_FTYPE_PV8HI_V8HI_QI:
38107 case VOID_FTYPE_PV16HI_V16HI_HI:
38108 case VOID_FTYPE_PV32HI_V32HI_SI:
38109 case VOID_FTYPE_PV16QI_V16QI_HI:
38110 case VOID_FTYPE_PV32QI_V32QI_SI:
38111 case VOID_FTYPE_PV64QI_V64QI_DI:
38112 case VOID_FTYPE_PV4DF_V4DF_QI:
38113 case VOID_FTYPE_PV2DF_V2DF_QI:
38114 case VOID_FTYPE_PV8SF_V8SF_QI:
38115 case VOID_FTYPE_PV4SF_V4SF_QI:
38116 nargs = 2;
38117 klass = store;
38118 /* Reserve memory operand for target. */
38119 memory = ARRAY_SIZE (args);
38120 break;
38121 case V4SF_FTYPE_PCV4SF_V4SF_QI:
38122 case V8SF_FTYPE_PCV8SF_V8SF_QI:
38123 case V16SF_FTYPE_PCV16SF_V16SF_HI:
38124 case V4SI_FTYPE_PCV4SI_V4SI_QI:
38125 case V8SI_FTYPE_PCV8SI_V8SI_QI:
38126 case V16SI_FTYPE_PCV16SI_V16SI_HI:
38127 case V2DF_FTYPE_PCV2DF_V2DF_QI:
38128 case V4DF_FTYPE_PCV4DF_V4DF_QI:
38129 case V8DF_FTYPE_PCV8DF_V8DF_QI:
38130 case V2DI_FTYPE_PCV2DI_V2DI_QI:
38131 case V4DI_FTYPE_PCV4DI_V4DI_QI:
38132 case V8DI_FTYPE_PCV8DI_V8DI_QI:
38133 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
38134 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
38135 case V8HI_FTYPE_PCV8HI_V8HI_QI:
38136 case V16HI_FTYPE_PCV16HI_V16HI_HI:
38137 case V32HI_FTYPE_PCV32HI_V32HI_SI:
38138 case V16QI_FTYPE_PCV16QI_V16QI_HI:
38139 case V32QI_FTYPE_PCV32QI_V32QI_SI:
38140 case V64QI_FTYPE_PCV64QI_V64QI_DI:
38141 nargs = 3;
38142 klass = load;
38143 memory = 0;
38144 switch (icode)
38146 /* These builtins and instructions require the memory
38147 to be properly aligned. */
38148 case CODE_FOR_avx512f_loadv16sf_mask:
38149 case CODE_FOR_avx512f_loadv16si_mask:
38150 case CODE_FOR_avx512f_loadv8df_mask:
38151 case CODE_FOR_avx512f_loadv8di_mask:
38152 case CODE_FOR_avx512vl_loadv8sf_mask:
38153 case CODE_FOR_avx512vl_loadv8si_mask:
38154 case CODE_FOR_avx512vl_loadv4df_mask:
38155 case CODE_FOR_avx512vl_loadv4di_mask:
38156 case CODE_FOR_avx512vl_loadv4sf_mask:
38157 case CODE_FOR_avx512vl_loadv4si_mask:
38158 case CODE_FOR_avx512vl_loadv2df_mask:
38159 case CODE_FOR_avx512vl_loadv2di_mask:
38160 case CODE_FOR_avx512bw_loadv64qi_mask:
38161 case CODE_FOR_avx512vl_loadv32qi_mask:
38162 case CODE_FOR_avx512vl_loadv16qi_mask:
38163 case CODE_FOR_avx512bw_loadv32hi_mask:
38164 case CODE_FOR_avx512vl_loadv16hi_mask:
38165 case CODE_FOR_avx512vl_loadv8hi_mask:
38166 aligned_mem = true;
38167 break;
38168 default:
38169 break;
38171 break;
38172 case VOID_FTYPE_UINT_UINT_UINT:
38173 case VOID_FTYPE_UINT64_UINT_UINT:
38174 case UCHAR_FTYPE_UINT_UINT_UINT:
38175 case UCHAR_FTYPE_UINT64_UINT_UINT:
38176 nargs = 3;
38177 klass = load;
38178 memory = ARRAY_SIZE (args);
38179 last_arg_constant = true;
38180 break;
38181 default:
38182 gcc_unreachable ();
38185 gcc_assert (nargs <= ARRAY_SIZE (args));
38187 if (klass == store)
38189 arg = CALL_EXPR_ARG (exp, 0);
38190 op = expand_normal (arg);
38191 gcc_assert (target == 0);
38192 if (memory)
38194 op = ix86_zero_extend_to_Pmode (op);
38195 target = gen_rtx_MEM (tmode, op);
38196 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
38197 on it. Try to improve it using get_pointer_alignment,
38198 and if the special builtin is one that requires strict
38199 mode alignment, also from it's GET_MODE_ALIGNMENT.
38200 Failure to do so could lead to ix86_legitimate_combined_insn
38201 rejecting all changes to such insns. */
38202 unsigned int align = get_pointer_alignment (arg);
38203 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
38204 align = GET_MODE_ALIGNMENT (tmode);
38205 if (MEM_ALIGN (target) < align)
38206 set_mem_align (target, align);
38208 else
38209 target = force_reg (tmode, op);
38210 arg_adjust = 1;
38212 else
38214 arg_adjust = 0;
38215 if (optimize
38216 || target == 0
38217 || !register_operand (target, tmode)
38218 || GET_MODE (target) != tmode)
38219 target = gen_reg_rtx (tmode);
38222 for (i = 0; i < nargs; i++)
38224 machine_mode mode = insn_p->operand[i + 1].mode;
38225 bool match;
38227 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
38228 op = expand_normal (arg);
38229 match = insn_p->operand[i + 1].predicate (op, mode);
38231 if (last_arg_constant && (i + 1) == nargs)
38233 if (!match)
38235 if (icode == CODE_FOR_lwp_lwpvalsi3
38236 || icode == CODE_FOR_lwp_lwpinssi3
38237 || icode == CODE_FOR_lwp_lwpvaldi3
38238 || icode == CODE_FOR_lwp_lwpinsdi3)
38239 error ("the last argument must be a 32-bit immediate");
38240 else
38241 error ("the last argument must be an 8-bit immediate");
38242 return const0_rtx;
38245 else
38247 if (i == memory)
38249 /* This must be the memory operand. */
38250 op = ix86_zero_extend_to_Pmode (op);
38251 op = gen_rtx_MEM (mode, op);
38252 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
38253 on it. Try to improve it using get_pointer_alignment,
38254 and if the special builtin is one that requires strict
38255 mode alignment, also from it's GET_MODE_ALIGNMENT.
38256 Failure to do so could lead to ix86_legitimate_combined_insn
38257 rejecting all changes to such insns. */
38258 unsigned int align = get_pointer_alignment (arg);
38259 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
38260 align = GET_MODE_ALIGNMENT (mode);
38261 if (MEM_ALIGN (op) < align)
38262 set_mem_align (op, align);
38264 else
38266 /* This must be register. */
38267 if (VECTOR_MODE_P (mode))
38268 op = safe_vector_operand (op, mode);
38270 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
38271 op = copy_to_mode_reg (mode, op);
38272 else
38274 op = copy_to_reg (op);
38275 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
38280 args[i].op = op;
38281 args[i].mode = mode;
38284 switch (nargs)
38286 case 0:
38287 pat = GEN_FCN (icode) (target);
38288 break;
38289 case 1:
38290 pat = GEN_FCN (icode) (target, args[0].op);
38291 break;
38292 case 2:
38293 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
38294 break;
38295 case 3:
38296 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
38297 break;
38298 default:
38299 gcc_unreachable ();
38302 if (! pat)
38303 return 0;
38304 emit_insn (pat);
38305 return klass == store ? 0 : target;
38308 /* Return the integer constant in ARG. Constrain it to be in the range
38309 of the subparts of VEC_TYPE; issue an error if not. */
38311 static int
38312 get_element_number (tree vec_type, tree arg)
38314 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
38316 if (!tree_fits_uhwi_p (arg)
38317 || (elt = tree_to_uhwi (arg), elt > max))
38319 error ("selector must be an integer constant in the range 0..%wi", max);
38320 return 0;
38323 return elt;
38326 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38327 ix86_expand_vector_init. We DO have language-level syntax for this, in
38328 the form of (type){ init-list }. Except that since we can't place emms
38329 instructions from inside the compiler, we can't allow the use of MMX
38330 registers unless the user explicitly asks for it. So we do *not* define
38331 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
38332 we have builtins invoked by mmintrin.h that gives us license to emit
38333 these sorts of instructions. */
38335 static rtx
38336 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
38338 machine_mode tmode = TYPE_MODE (type);
38339 machine_mode inner_mode = GET_MODE_INNER (tmode);
38340 int i, n_elt = GET_MODE_NUNITS (tmode);
38341 rtvec v = rtvec_alloc (n_elt);
38343 gcc_assert (VECTOR_MODE_P (tmode));
38344 gcc_assert (call_expr_nargs (exp) == n_elt);
38346 for (i = 0; i < n_elt; ++i)
38348 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
38349 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
38352 if (!target || !register_operand (target, tmode))
38353 target = gen_reg_rtx (tmode);
38355 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
38356 return target;
38359 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38360 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
38361 had a language-level syntax for referencing vector elements. */
38363 static rtx
38364 ix86_expand_vec_ext_builtin (tree exp, rtx target)
38366 machine_mode tmode, mode0;
38367 tree arg0, arg1;
38368 int elt;
38369 rtx op0;
38371 arg0 = CALL_EXPR_ARG (exp, 0);
38372 arg1 = CALL_EXPR_ARG (exp, 1);
38374 op0 = expand_normal (arg0);
38375 elt = get_element_number (TREE_TYPE (arg0), arg1);
38377 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38378 mode0 = TYPE_MODE (TREE_TYPE (arg0));
38379 gcc_assert (VECTOR_MODE_P (mode0));
38381 op0 = force_reg (mode0, op0);
38383 if (optimize || !target || !register_operand (target, tmode))
38384 target = gen_reg_rtx (tmode);
38386 ix86_expand_vector_extract (true, target, op0, elt);
38388 return target;
38391 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38392 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
38393 a language-level syntax for referencing vector elements. */
38395 static rtx
38396 ix86_expand_vec_set_builtin (tree exp)
38398 machine_mode tmode, mode1;
38399 tree arg0, arg1, arg2;
38400 int elt;
38401 rtx op0, op1, target;
38403 arg0 = CALL_EXPR_ARG (exp, 0);
38404 arg1 = CALL_EXPR_ARG (exp, 1);
38405 arg2 = CALL_EXPR_ARG (exp, 2);
38407 tmode = TYPE_MODE (TREE_TYPE (arg0));
38408 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38409 gcc_assert (VECTOR_MODE_P (tmode));
38411 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
38412 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
38413 elt = get_element_number (TREE_TYPE (arg0), arg2);
38415 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
38416 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
38418 op0 = force_reg (tmode, op0);
38419 op1 = force_reg (mode1, op1);
38421 /* OP0 is the source of these builtin functions and shouldn't be
38422 modified. Create a copy, use it and return it as target. */
38423 target = gen_reg_rtx (tmode);
38424 emit_move_insn (target, op0);
38425 ix86_expand_vector_set (true, target, op1, elt);
38427 return target;
38430 /* Emit conditional move of SRC to DST with condition
38431 OP1 CODE OP2. */
38432 static void
38433 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
38435 rtx t;
38437 if (TARGET_CMOVE)
38439 t = ix86_expand_compare (code, op1, op2);
38440 emit_insn (gen_rtx_SET (VOIDmode, dst,
38441 gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
38442 src, dst)));
38444 else
38446 rtx nomove = gen_label_rtx ();
38447 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
38448 const0_rtx, GET_MODE (op1), 1, nomove);
38449 emit_move_insn (dst, src);
38450 emit_label (nomove);
38454 /* Choose max of DST and SRC and put it to DST. */
38455 static void
38456 ix86_emit_move_max (rtx dst, rtx src)
38458 ix86_emit_cmove (dst, src, LTU, dst, src);
38461 /* Expand an expression EXP that calls a built-in function,
38462 with result going to TARGET if that's convenient
38463 (and in mode MODE if that's convenient).
38464 SUBTARGET may be used as the target for computing one of EXP's operands.
38465 IGNORE is nonzero if the value is to be ignored. */
38467 static rtx
38468 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
38469 machine_mode mode, int ignore)
38471 const struct builtin_description *d;
38472 size_t i;
38473 enum insn_code icode;
38474 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
38475 tree arg0, arg1, arg2, arg3, arg4;
38476 rtx op0, op1, op2, op3, op4, pat, insn;
38477 machine_mode mode0, mode1, mode2, mode3, mode4;
38478 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
38480 /* For CPU builtins that can be folded, fold first and expand the fold. */
38481 switch (fcode)
38483 case IX86_BUILTIN_CPU_INIT:
38485 /* Make it call __cpu_indicator_init in libgcc. */
38486 tree call_expr, fndecl, type;
38487 type = build_function_type_list (integer_type_node, NULL_TREE);
38488 fndecl = build_fn_decl ("__cpu_indicator_init", type);
38489 call_expr = build_call_expr (fndecl, 0);
38490 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
38492 case IX86_BUILTIN_CPU_IS:
38493 case IX86_BUILTIN_CPU_SUPPORTS:
38495 tree arg0 = CALL_EXPR_ARG (exp, 0);
38496 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
38497 gcc_assert (fold_expr != NULL_TREE);
38498 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
38502 /* Determine whether the builtin function is available under the current ISA.
38503 Originally the builtin was not created if it wasn't applicable to the
38504 current ISA based on the command line switches. With function specific
38505 options, we need to check in the context of the function making the call
38506 whether it is supported. */
38507 if (ix86_builtins_isa[fcode].isa
38508 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
38510 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
38511 NULL, (enum fpmath_unit) 0, false);
38513 if (!opts)
38514 error ("%qE needs unknown isa option", fndecl);
38515 else
38517 gcc_assert (opts != NULL);
38518 error ("%qE needs isa option %s", fndecl, opts);
38519 free (opts);
38521 return const0_rtx;
38524 switch (fcode)
38526 case IX86_BUILTIN_BNDMK:
38527 if (!target
38528 || GET_MODE (target) != BNDmode
38529 || !register_operand (target, BNDmode))
38530 target = gen_reg_rtx (BNDmode);
38532 arg0 = CALL_EXPR_ARG (exp, 0);
38533 arg1 = CALL_EXPR_ARG (exp, 1);
38535 op0 = expand_normal (arg0);
38536 op1 = expand_normal (arg1);
38538 if (!register_operand (op0, Pmode))
38539 op0 = ix86_zero_extend_to_Pmode (op0);
38540 if (!register_operand (op1, Pmode))
38541 op1 = ix86_zero_extend_to_Pmode (op1);
38543 /* Builtin arg1 is size of block but instruction op1 should
38544 be (size - 1). */
38545 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
38546 NULL_RTX, 1, OPTAB_DIRECT);
38548 emit_insn (BNDmode == BND64mode
38549 ? gen_bnd64_mk (target, op0, op1)
38550 : gen_bnd32_mk (target, op0, op1));
38551 return target;
38553 case IX86_BUILTIN_BNDSTX:
38554 arg0 = CALL_EXPR_ARG (exp, 0);
38555 arg1 = CALL_EXPR_ARG (exp, 1);
38556 arg2 = CALL_EXPR_ARG (exp, 2);
38558 op0 = expand_normal (arg0);
38559 op1 = expand_normal (arg1);
38560 op2 = expand_normal (arg2);
38562 if (!register_operand (op0, Pmode))
38563 op0 = ix86_zero_extend_to_Pmode (op0);
38564 if (!register_operand (op1, BNDmode))
38565 op1 = copy_to_mode_reg (BNDmode, op1);
38566 if (!register_operand (op2, Pmode))
38567 op2 = ix86_zero_extend_to_Pmode (op2);
38569 emit_insn (BNDmode == BND64mode
38570 ? gen_bnd64_stx (op2, op0, op1)
38571 : gen_bnd32_stx (op2, op0, op1));
38572 return 0;
38574 case IX86_BUILTIN_BNDLDX:
38575 if (!target
38576 || GET_MODE (target) != BNDmode
38577 || !register_operand (target, BNDmode))
38578 target = gen_reg_rtx (BNDmode);
38580 arg0 = CALL_EXPR_ARG (exp, 0);
38581 arg1 = CALL_EXPR_ARG (exp, 1);
38583 op0 = expand_normal (arg0);
38584 op1 = expand_normal (arg1);
38586 if (!register_operand (op0, Pmode))
38587 op0 = ix86_zero_extend_to_Pmode (op0);
38588 if (!register_operand (op1, Pmode))
38589 op1 = ix86_zero_extend_to_Pmode (op1);
38591 emit_insn (BNDmode == BND64mode
38592 ? gen_bnd64_ldx (target, op0, op1)
38593 : gen_bnd32_ldx (target, op0, op1));
38594 return target;
38596 case IX86_BUILTIN_BNDCL:
38597 arg0 = CALL_EXPR_ARG (exp, 0);
38598 arg1 = CALL_EXPR_ARG (exp, 1);
38600 op0 = expand_normal (arg0);
38601 op1 = expand_normal (arg1);
38603 if (!register_operand (op0, Pmode))
38604 op0 = ix86_zero_extend_to_Pmode (op0);
38605 if (!register_operand (op1, BNDmode))
38606 op1 = copy_to_mode_reg (BNDmode, op1);
38608 emit_insn (BNDmode == BND64mode
38609 ? gen_bnd64_cl (op1, op0)
38610 : gen_bnd32_cl (op1, op0));
38611 return 0;
38613 case IX86_BUILTIN_BNDCU:
38614 arg0 = CALL_EXPR_ARG (exp, 0);
38615 arg1 = CALL_EXPR_ARG (exp, 1);
38617 op0 = expand_normal (arg0);
38618 op1 = expand_normal (arg1);
38620 if (!register_operand (op0, Pmode))
38621 op0 = ix86_zero_extend_to_Pmode (op0);
38622 if (!register_operand (op1, BNDmode))
38623 op1 = copy_to_mode_reg (BNDmode, op1);
38625 emit_insn (BNDmode == BND64mode
38626 ? gen_bnd64_cu (op1, op0)
38627 : gen_bnd32_cu (op1, op0));
38628 return 0;
38630 case IX86_BUILTIN_BNDRET:
38631 arg0 = CALL_EXPR_ARG (exp, 0);
38632 gcc_assert (TREE_CODE (arg0) == SSA_NAME);
38633 target = chkp_get_rtl_bounds (arg0);
38635 /* If no bounds were specified for returned value,
38636 then use INIT bounds. It usually happens when
38637 some built-in function is expanded. */
38638 if (!target)
38640 rtx t1 = gen_reg_rtx (Pmode);
38641 rtx t2 = gen_reg_rtx (Pmode);
38642 target = gen_reg_rtx (BNDmode);
38643 emit_move_insn (t1, const0_rtx);
38644 emit_move_insn (t2, constm1_rtx);
38645 emit_insn (BNDmode == BND64mode
38646 ? gen_bnd64_mk (target, t1, t2)
38647 : gen_bnd32_mk (target, t1, t2));
38650 gcc_assert (target && REG_P (target));
38651 return target;
38653 case IX86_BUILTIN_BNDNARROW:
38655 rtx m1, m1h1, m1h2, lb, ub, t1;
38657 /* Return value and lb. */
38658 arg0 = CALL_EXPR_ARG (exp, 0);
38659 /* Bounds. */
38660 arg1 = CALL_EXPR_ARG (exp, 1);
38661 /* Size. */
38662 arg2 = CALL_EXPR_ARG (exp, 2);
38664 lb = expand_normal (arg0);
38665 op1 = expand_normal (arg1);
38666 op2 = expand_normal (arg2);
38668 /* Size was passed but we need to use (size - 1) as for bndmk. */
38669 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
38670 NULL_RTX, 1, OPTAB_DIRECT);
38672 /* Add LB to size and inverse to get UB. */
38673 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
38674 op2, 1, OPTAB_DIRECT);
38675 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
38677 if (!register_operand (lb, Pmode))
38678 lb = ix86_zero_extend_to_Pmode (lb);
38679 if (!register_operand (ub, Pmode))
38680 ub = ix86_zero_extend_to_Pmode (ub);
38682 /* We need to move bounds to memory before any computations. */
38683 if (MEM_P (op1))
38684 m1 = op1;
38685 else
38687 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
38688 emit_move_insn (m1, op1);
38691 /* Generate mem expression to be used for access to LB and UB. */
38692 m1h1 = adjust_address (m1, Pmode, 0);
38693 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
38695 t1 = gen_reg_rtx (Pmode);
38697 /* Compute LB. */
38698 emit_move_insn (t1, m1h1);
38699 ix86_emit_move_max (t1, lb);
38700 emit_move_insn (m1h1, t1);
38702 /* Compute UB. UB is stored in 1's complement form. Therefore
38703 we also use max here. */
38704 emit_move_insn (t1, m1h2);
38705 ix86_emit_move_max (t1, ub);
38706 emit_move_insn (m1h2, t1);
38708 op2 = gen_reg_rtx (BNDmode);
38709 emit_move_insn (op2, m1);
38711 return chkp_join_splitted_slot (lb, op2);
38714 case IX86_BUILTIN_BNDINT:
38716 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
38718 if (!target
38719 || GET_MODE (target) != BNDmode
38720 || !register_operand (target, BNDmode))
38721 target = gen_reg_rtx (BNDmode);
38723 arg0 = CALL_EXPR_ARG (exp, 0);
38724 arg1 = CALL_EXPR_ARG (exp, 1);
38726 op0 = expand_normal (arg0);
38727 op1 = expand_normal (arg1);
38729 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
38730 rh1 = adjust_address (res, Pmode, 0);
38731 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
38733 /* Put first bounds to temporaries. */
38734 lb1 = gen_reg_rtx (Pmode);
38735 ub1 = gen_reg_rtx (Pmode);
38736 if (MEM_P (op0))
38738 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
38739 emit_move_insn (ub1, adjust_address (op0, Pmode,
38740 GET_MODE_SIZE (Pmode)));
38742 else
38744 emit_move_insn (res, op0);
38745 emit_move_insn (lb1, rh1);
38746 emit_move_insn (ub1, rh2);
38749 /* Put second bounds to temporaries. */
38750 lb2 = gen_reg_rtx (Pmode);
38751 ub2 = gen_reg_rtx (Pmode);
38752 if (MEM_P (op1))
38754 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
38755 emit_move_insn (ub2, adjust_address (op1, Pmode,
38756 GET_MODE_SIZE (Pmode)));
38758 else
38760 emit_move_insn (res, op1);
38761 emit_move_insn (lb2, rh1);
38762 emit_move_insn (ub2, rh2);
38765 /* Compute LB. */
38766 ix86_emit_move_max (lb1, lb2);
38767 emit_move_insn (rh1, lb1);
38769 /* Compute UB. UB is stored in 1's complement form. Therefore
38770 we also use max here. */
38771 ix86_emit_move_max (ub1, ub2);
38772 emit_move_insn (rh2, ub1);
38774 emit_move_insn (target, res);
38776 return target;
38779 case IX86_BUILTIN_SIZEOF:
38781 tree name;
38782 rtx symbol;
38784 if (!target
38785 || GET_MODE (target) != Pmode
38786 || !register_operand (target, Pmode))
38787 target = gen_reg_rtx (Pmode);
38789 arg0 = CALL_EXPR_ARG (exp, 0);
38790 gcc_assert (TREE_CODE (arg0) == VAR_DECL);
38792 name = DECL_ASSEMBLER_NAME (arg0);
38793 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
38795 emit_insn (Pmode == SImode
38796 ? gen_move_size_reloc_si (target, symbol)
38797 : gen_move_size_reloc_di (target, symbol));
38799 return target;
38802 case IX86_BUILTIN_BNDLOWER:
38804 rtx mem, hmem;
38806 if (!target
38807 || GET_MODE (target) != Pmode
38808 || !register_operand (target, Pmode))
38809 target = gen_reg_rtx (Pmode);
38811 arg0 = CALL_EXPR_ARG (exp, 0);
38812 op0 = expand_normal (arg0);
38814 /* We need to move bounds to memory first. */
38815 if (MEM_P (op0))
38816 mem = op0;
38817 else
38819 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38820 emit_move_insn (mem, op0);
38823 /* Generate mem expression to access LB and load it. */
38824 hmem = adjust_address (mem, Pmode, 0);
38825 emit_move_insn (target, hmem);
38827 return target;
38830 case IX86_BUILTIN_BNDUPPER:
38832 rtx mem, hmem, res;
38834 if (!target
38835 || GET_MODE (target) != Pmode
38836 || !register_operand (target, Pmode))
38837 target = gen_reg_rtx (Pmode);
38839 arg0 = CALL_EXPR_ARG (exp, 0);
38840 op0 = expand_normal (arg0);
38842 /* We need to move bounds to memory first. */
38843 if (MEM_P (op0))
38844 mem = op0;
38845 else
38847 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38848 emit_move_insn (mem, op0);
38851 /* Generate mem expression to access UB. */
38852 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
38854 /* We need to inverse all bits of UB. */
38855 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
38857 if (res != target)
38858 emit_move_insn (target, res);
38860 return target;
38863 case IX86_BUILTIN_MASKMOVQ:
38864 case IX86_BUILTIN_MASKMOVDQU:
38865 icode = (fcode == IX86_BUILTIN_MASKMOVQ
38866 ? CODE_FOR_mmx_maskmovq
38867 : CODE_FOR_sse2_maskmovdqu);
38868 /* Note the arg order is different from the operand order. */
38869 arg1 = CALL_EXPR_ARG (exp, 0);
38870 arg2 = CALL_EXPR_ARG (exp, 1);
38871 arg0 = CALL_EXPR_ARG (exp, 2);
38872 op0 = expand_normal (arg0);
38873 op1 = expand_normal (arg1);
38874 op2 = expand_normal (arg2);
38875 mode0 = insn_data[icode].operand[0].mode;
38876 mode1 = insn_data[icode].operand[1].mode;
38877 mode2 = insn_data[icode].operand[2].mode;
38879 op0 = ix86_zero_extend_to_Pmode (op0);
38880 op0 = gen_rtx_MEM (mode1, op0);
38882 if (!insn_data[icode].operand[0].predicate (op0, mode0))
38883 op0 = copy_to_mode_reg (mode0, op0);
38884 if (!insn_data[icode].operand[1].predicate (op1, mode1))
38885 op1 = copy_to_mode_reg (mode1, op1);
38886 if (!insn_data[icode].operand[2].predicate (op2, mode2))
38887 op2 = copy_to_mode_reg (mode2, op2);
38888 pat = GEN_FCN (icode) (op0, op1, op2);
38889 if (! pat)
38890 return 0;
38891 emit_insn (pat);
38892 return 0;
38894 case IX86_BUILTIN_LDMXCSR:
38895 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
38896 target = assign_386_stack_local (SImode, SLOT_TEMP);
38897 emit_move_insn (target, op0);
38898 emit_insn (gen_sse_ldmxcsr (target));
38899 return 0;
38901 case IX86_BUILTIN_STMXCSR:
38902 target = assign_386_stack_local (SImode, SLOT_TEMP);
38903 emit_insn (gen_sse_stmxcsr (target));
38904 return copy_to_mode_reg (SImode, target);
38906 case IX86_BUILTIN_CLFLUSH:
38907 arg0 = CALL_EXPR_ARG (exp, 0);
38908 op0 = expand_normal (arg0);
38909 icode = CODE_FOR_sse2_clflush;
38910 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38911 op0 = ix86_zero_extend_to_Pmode (op0);
38913 emit_insn (gen_sse2_clflush (op0));
38914 return 0;
38916 case IX86_BUILTIN_CLWB:
38917 arg0 = CALL_EXPR_ARG (exp, 0);
38918 op0 = expand_normal (arg0);
38919 icode = CODE_FOR_clwb;
38920 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38921 op0 = ix86_zero_extend_to_Pmode (op0);
38923 emit_insn (gen_clwb (op0));
38924 return 0;
38926 case IX86_BUILTIN_CLFLUSHOPT:
38927 arg0 = CALL_EXPR_ARG (exp, 0);
38928 op0 = expand_normal (arg0);
38929 icode = CODE_FOR_clflushopt;
38930 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38931 op0 = ix86_zero_extend_to_Pmode (op0);
38933 emit_insn (gen_clflushopt (op0));
38934 return 0;
38936 case IX86_BUILTIN_MONITOR:
38937 arg0 = CALL_EXPR_ARG (exp, 0);
38938 arg1 = CALL_EXPR_ARG (exp, 1);
38939 arg2 = CALL_EXPR_ARG (exp, 2);
38940 op0 = expand_normal (arg0);
38941 op1 = expand_normal (arg1);
38942 op2 = expand_normal (arg2);
38943 if (!REG_P (op0))
38944 op0 = ix86_zero_extend_to_Pmode (op0);
38945 if (!REG_P (op1))
38946 op1 = copy_to_mode_reg (SImode, op1);
38947 if (!REG_P (op2))
38948 op2 = copy_to_mode_reg (SImode, op2);
38949 emit_insn (ix86_gen_monitor (op0, op1, op2));
38950 return 0;
38952 case IX86_BUILTIN_MWAIT:
38953 arg0 = CALL_EXPR_ARG (exp, 0);
38954 arg1 = CALL_EXPR_ARG (exp, 1);
38955 op0 = expand_normal (arg0);
38956 op1 = expand_normal (arg1);
38957 if (!REG_P (op0))
38958 op0 = copy_to_mode_reg (SImode, op0);
38959 if (!REG_P (op1))
38960 op1 = copy_to_mode_reg (SImode, op1);
38961 emit_insn (gen_sse3_mwait (op0, op1));
38962 return 0;
38964 case IX86_BUILTIN_VEC_INIT_V2SI:
38965 case IX86_BUILTIN_VEC_INIT_V4HI:
38966 case IX86_BUILTIN_VEC_INIT_V8QI:
38967 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
38969 case IX86_BUILTIN_VEC_EXT_V2DF:
38970 case IX86_BUILTIN_VEC_EXT_V2DI:
38971 case IX86_BUILTIN_VEC_EXT_V4SF:
38972 case IX86_BUILTIN_VEC_EXT_V4SI:
38973 case IX86_BUILTIN_VEC_EXT_V8HI:
38974 case IX86_BUILTIN_VEC_EXT_V2SI:
38975 case IX86_BUILTIN_VEC_EXT_V4HI:
38976 case IX86_BUILTIN_VEC_EXT_V16QI:
38977 return ix86_expand_vec_ext_builtin (exp, target);
38979 case IX86_BUILTIN_VEC_SET_V2DI:
38980 case IX86_BUILTIN_VEC_SET_V4SF:
38981 case IX86_BUILTIN_VEC_SET_V4SI:
38982 case IX86_BUILTIN_VEC_SET_V8HI:
38983 case IX86_BUILTIN_VEC_SET_V4HI:
38984 case IX86_BUILTIN_VEC_SET_V16QI:
38985 return ix86_expand_vec_set_builtin (exp);
38987 case IX86_BUILTIN_INFQ:
38988 case IX86_BUILTIN_HUGE_VALQ:
38990 REAL_VALUE_TYPE inf;
38991 rtx tmp;
38993 real_inf (&inf);
38994 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
38996 tmp = validize_mem (force_const_mem (mode, tmp));
38998 if (target == 0)
38999 target = gen_reg_rtx (mode);
39001 emit_move_insn (target, tmp);
39002 return target;
39005 case IX86_BUILTIN_RDPMC:
39006 case IX86_BUILTIN_RDTSC:
39007 case IX86_BUILTIN_RDTSCP:
39009 op0 = gen_reg_rtx (DImode);
39010 op1 = gen_reg_rtx (DImode);
39012 if (fcode == IX86_BUILTIN_RDPMC)
39014 arg0 = CALL_EXPR_ARG (exp, 0);
39015 op2 = expand_normal (arg0);
39016 if (!register_operand (op2, SImode))
39017 op2 = copy_to_mode_reg (SImode, op2);
39019 insn = (TARGET_64BIT
39020 ? gen_rdpmc_rex64 (op0, op1, op2)
39021 : gen_rdpmc (op0, op2));
39022 emit_insn (insn);
39024 else if (fcode == IX86_BUILTIN_RDTSC)
39026 insn = (TARGET_64BIT
39027 ? gen_rdtsc_rex64 (op0, op1)
39028 : gen_rdtsc (op0));
39029 emit_insn (insn);
39031 else
39033 op2 = gen_reg_rtx (SImode);
39035 insn = (TARGET_64BIT
39036 ? gen_rdtscp_rex64 (op0, op1, op2)
39037 : gen_rdtscp (op0, op2));
39038 emit_insn (insn);
39040 arg0 = CALL_EXPR_ARG (exp, 0);
39041 op4 = expand_normal (arg0);
39042 if (!address_operand (op4, VOIDmode))
39044 op4 = convert_memory_address (Pmode, op4);
39045 op4 = copy_addr_to_reg (op4);
39047 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
39050 if (target == 0)
39052 /* mode is VOIDmode if __builtin_rd* has been called
39053 without lhs. */
39054 if (mode == VOIDmode)
39055 return target;
39056 target = gen_reg_rtx (mode);
39059 if (TARGET_64BIT)
39061 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
39062 op1, 1, OPTAB_DIRECT);
39063 op0 = expand_simple_binop (DImode, IOR, op0, op1,
39064 op0, 1, OPTAB_DIRECT);
39067 emit_move_insn (target, op0);
39068 return target;
39070 case IX86_BUILTIN_FXSAVE:
39071 case IX86_BUILTIN_FXRSTOR:
39072 case IX86_BUILTIN_FXSAVE64:
39073 case IX86_BUILTIN_FXRSTOR64:
39074 case IX86_BUILTIN_FNSTENV:
39075 case IX86_BUILTIN_FLDENV:
39076 mode0 = BLKmode;
39077 switch (fcode)
39079 case IX86_BUILTIN_FXSAVE:
39080 icode = CODE_FOR_fxsave;
39081 break;
39082 case IX86_BUILTIN_FXRSTOR:
39083 icode = CODE_FOR_fxrstor;
39084 break;
39085 case IX86_BUILTIN_FXSAVE64:
39086 icode = CODE_FOR_fxsave64;
39087 break;
39088 case IX86_BUILTIN_FXRSTOR64:
39089 icode = CODE_FOR_fxrstor64;
39090 break;
39091 case IX86_BUILTIN_FNSTENV:
39092 icode = CODE_FOR_fnstenv;
39093 break;
39094 case IX86_BUILTIN_FLDENV:
39095 icode = CODE_FOR_fldenv;
39096 break;
39097 default:
39098 gcc_unreachable ();
39101 arg0 = CALL_EXPR_ARG (exp, 0);
39102 op0 = expand_normal (arg0);
39104 if (!address_operand (op0, VOIDmode))
39106 op0 = convert_memory_address (Pmode, op0);
39107 op0 = copy_addr_to_reg (op0);
39109 op0 = gen_rtx_MEM (mode0, op0);
39111 pat = GEN_FCN (icode) (op0);
39112 if (pat)
39113 emit_insn (pat);
39114 return 0;
39116 case IX86_BUILTIN_XSAVE:
39117 case IX86_BUILTIN_XRSTOR:
39118 case IX86_BUILTIN_XSAVE64:
39119 case IX86_BUILTIN_XRSTOR64:
39120 case IX86_BUILTIN_XSAVEOPT:
39121 case IX86_BUILTIN_XSAVEOPT64:
39122 case IX86_BUILTIN_XSAVES:
39123 case IX86_BUILTIN_XRSTORS:
39124 case IX86_BUILTIN_XSAVES64:
39125 case IX86_BUILTIN_XRSTORS64:
39126 case IX86_BUILTIN_XSAVEC:
39127 case IX86_BUILTIN_XSAVEC64:
39128 arg0 = CALL_EXPR_ARG (exp, 0);
39129 arg1 = CALL_EXPR_ARG (exp, 1);
39130 op0 = expand_normal (arg0);
39131 op1 = expand_normal (arg1);
39133 if (!address_operand (op0, VOIDmode))
39135 op0 = convert_memory_address (Pmode, op0);
39136 op0 = copy_addr_to_reg (op0);
39138 op0 = gen_rtx_MEM (BLKmode, op0);
39140 op1 = force_reg (DImode, op1);
39142 if (TARGET_64BIT)
39144 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
39145 NULL, 1, OPTAB_DIRECT);
39146 switch (fcode)
39148 case IX86_BUILTIN_XSAVE:
39149 icode = CODE_FOR_xsave_rex64;
39150 break;
39151 case IX86_BUILTIN_XRSTOR:
39152 icode = CODE_FOR_xrstor_rex64;
39153 break;
39154 case IX86_BUILTIN_XSAVE64:
39155 icode = CODE_FOR_xsave64;
39156 break;
39157 case IX86_BUILTIN_XRSTOR64:
39158 icode = CODE_FOR_xrstor64;
39159 break;
39160 case IX86_BUILTIN_XSAVEOPT:
39161 icode = CODE_FOR_xsaveopt_rex64;
39162 break;
39163 case IX86_BUILTIN_XSAVEOPT64:
39164 icode = CODE_FOR_xsaveopt64;
39165 break;
39166 case IX86_BUILTIN_XSAVES:
39167 icode = CODE_FOR_xsaves_rex64;
39168 break;
39169 case IX86_BUILTIN_XRSTORS:
39170 icode = CODE_FOR_xrstors_rex64;
39171 break;
39172 case IX86_BUILTIN_XSAVES64:
39173 icode = CODE_FOR_xsaves64;
39174 break;
39175 case IX86_BUILTIN_XRSTORS64:
39176 icode = CODE_FOR_xrstors64;
39177 break;
39178 case IX86_BUILTIN_XSAVEC:
39179 icode = CODE_FOR_xsavec_rex64;
39180 break;
39181 case IX86_BUILTIN_XSAVEC64:
39182 icode = CODE_FOR_xsavec64;
39183 break;
39184 default:
39185 gcc_unreachable ();
39188 op2 = gen_lowpart (SImode, op2);
39189 op1 = gen_lowpart (SImode, op1);
39190 pat = GEN_FCN (icode) (op0, op1, op2);
39192 else
39194 switch (fcode)
39196 case IX86_BUILTIN_XSAVE:
39197 icode = CODE_FOR_xsave;
39198 break;
39199 case IX86_BUILTIN_XRSTOR:
39200 icode = CODE_FOR_xrstor;
39201 break;
39202 case IX86_BUILTIN_XSAVEOPT:
39203 icode = CODE_FOR_xsaveopt;
39204 break;
39205 case IX86_BUILTIN_XSAVES:
39206 icode = CODE_FOR_xsaves;
39207 break;
39208 case IX86_BUILTIN_XRSTORS:
39209 icode = CODE_FOR_xrstors;
39210 break;
39211 case IX86_BUILTIN_XSAVEC:
39212 icode = CODE_FOR_xsavec;
39213 break;
39214 default:
39215 gcc_unreachable ();
39217 pat = GEN_FCN (icode) (op0, op1);
39220 if (pat)
39221 emit_insn (pat);
39222 return 0;
39224 case IX86_BUILTIN_LLWPCB:
39225 arg0 = CALL_EXPR_ARG (exp, 0);
39226 op0 = expand_normal (arg0);
39227 icode = CODE_FOR_lwp_llwpcb;
39228 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39229 op0 = ix86_zero_extend_to_Pmode (op0);
39230 emit_insn (gen_lwp_llwpcb (op0));
39231 return 0;
39233 case IX86_BUILTIN_SLWPCB:
39234 icode = CODE_FOR_lwp_slwpcb;
39235 if (!target
39236 || !insn_data[icode].operand[0].predicate (target, Pmode))
39237 target = gen_reg_rtx (Pmode);
39238 emit_insn (gen_lwp_slwpcb (target));
39239 return target;
39241 case IX86_BUILTIN_BEXTRI32:
39242 case IX86_BUILTIN_BEXTRI64:
39243 arg0 = CALL_EXPR_ARG (exp, 0);
39244 arg1 = CALL_EXPR_ARG (exp, 1);
39245 op0 = expand_normal (arg0);
39246 op1 = expand_normal (arg1);
39247 icode = (fcode == IX86_BUILTIN_BEXTRI32
39248 ? CODE_FOR_tbm_bextri_si
39249 : CODE_FOR_tbm_bextri_di);
39250 if (!CONST_INT_P (op1))
39252 error ("last argument must be an immediate");
39253 return const0_rtx;
39255 else
39257 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
39258 unsigned char lsb_index = INTVAL (op1) & 0xFF;
39259 op1 = GEN_INT (length);
39260 op2 = GEN_INT (lsb_index);
39261 pat = GEN_FCN (icode) (target, op0, op1, op2);
39262 if (pat)
39263 emit_insn (pat);
39264 return target;
39267 case IX86_BUILTIN_RDRAND16_STEP:
39268 icode = CODE_FOR_rdrandhi_1;
39269 mode0 = HImode;
39270 goto rdrand_step;
39272 case IX86_BUILTIN_RDRAND32_STEP:
39273 icode = CODE_FOR_rdrandsi_1;
39274 mode0 = SImode;
39275 goto rdrand_step;
39277 case IX86_BUILTIN_RDRAND64_STEP:
39278 icode = CODE_FOR_rdranddi_1;
39279 mode0 = DImode;
39281 rdrand_step:
39282 op0 = gen_reg_rtx (mode0);
39283 emit_insn (GEN_FCN (icode) (op0));
39285 arg0 = CALL_EXPR_ARG (exp, 0);
39286 op1 = expand_normal (arg0);
39287 if (!address_operand (op1, VOIDmode))
39289 op1 = convert_memory_address (Pmode, op1);
39290 op1 = copy_addr_to_reg (op1);
39292 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39294 op1 = gen_reg_rtx (SImode);
39295 emit_move_insn (op1, CONST1_RTX (SImode));
39297 /* Emit SImode conditional move. */
39298 if (mode0 == HImode)
39300 op2 = gen_reg_rtx (SImode);
39301 emit_insn (gen_zero_extendhisi2 (op2, op0));
39303 else if (mode0 == SImode)
39304 op2 = op0;
39305 else
39306 op2 = gen_rtx_SUBREG (SImode, op0, 0);
39308 if (target == 0
39309 || !register_operand (target, SImode))
39310 target = gen_reg_rtx (SImode);
39312 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
39313 const0_rtx);
39314 emit_insn (gen_rtx_SET (VOIDmode, target,
39315 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
39316 return target;
39318 case IX86_BUILTIN_RDSEED16_STEP:
39319 icode = CODE_FOR_rdseedhi_1;
39320 mode0 = HImode;
39321 goto rdseed_step;
39323 case IX86_BUILTIN_RDSEED32_STEP:
39324 icode = CODE_FOR_rdseedsi_1;
39325 mode0 = SImode;
39326 goto rdseed_step;
39328 case IX86_BUILTIN_RDSEED64_STEP:
39329 icode = CODE_FOR_rdseeddi_1;
39330 mode0 = DImode;
39332 rdseed_step:
39333 op0 = gen_reg_rtx (mode0);
39334 emit_insn (GEN_FCN (icode) (op0));
39336 arg0 = CALL_EXPR_ARG (exp, 0);
39337 op1 = expand_normal (arg0);
39338 if (!address_operand (op1, VOIDmode))
39340 op1 = convert_memory_address (Pmode, op1);
39341 op1 = copy_addr_to_reg (op1);
39343 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39345 op2 = gen_reg_rtx (QImode);
39347 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
39348 const0_rtx);
39349 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
39351 if (target == 0
39352 || !register_operand (target, SImode))
39353 target = gen_reg_rtx (SImode);
39355 emit_insn (gen_zero_extendqisi2 (target, op2));
39356 return target;
39358 case IX86_BUILTIN_SBB32:
39359 icode = CODE_FOR_subsi3_carry;
39360 mode0 = SImode;
39361 goto addcarryx;
39363 case IX86_BUILTIN_SBB64:
39364 icode = CODE_FOR_subdi3_carry;
39365 mode0 = DImode;
39366 goto addcarryx;
39368 case IX86_BUILTIN_ADDCARRYX32:
39369 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
39370 mode0 = SImode;
39371 goto addcarryx;
39373 case IX86_BUILTIN_ADDCARRYX64:
39374 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
39375 mode0 = DImode;
39377 addcarryx:
39378 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
39379 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
39380 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
39381 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
39383 op0 = gen_reg_rtx (QImode);
39385 /* Generate CF from input operand. */
39386 op1 = expand_normal (arg0);
39387 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
39388 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
39390 /* Gen ADCX instruction to compute X+Y+CF. */
39391 op2 = expand_normal (arg1);
39392 op3 = expand_normal (arg2);
39394 if (!REG_P (op2))
39395 op2 = copy_to_mode_reg (mode0, op2);
39396 if (!REG_P (op3))
39397 op3 = copy_to_mode_reg (mode0, op3);
39399 op0 = gen_reg_rtx (mode0);
39401 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
39402 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
39403 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
39405 /* Store the result. */
39406 op4 = expand_normal (arg3);
39407 if (!address_operand (op4, VOIDmode))
39409 op4 = convert_memory_address (Pmode, op4);
39410 op4 = copy_addr_to_reg (op4);
39412 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
39414 /* Return current CF value. */
39415 if (target == 0)
39416 target = gen_reg_rtx (QImode);
39418 PUT_MODE (pat, QImode);
39419 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
39420 return target;
39422 case IX86_BUILTIN_READ_FLAGS:
39423 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
39425 if (optimize
39426 || target == NULL_RTX
39427 || !nonimmediate_operand (target, word_mode)
39428 || GET_MODE (target) != word_mode)
39429 target = gen_reg_rtx (word_mode);
39431 emit_insn (gen_pop (target));
39432 return target;
39434 case IX86_BUILTIN_WRITE_FLAGS:
39436 arg0 = CALL_EXPR_ARG (exp, 0);
39437 op0 = expand_normal (arg0);
39438 if (!general_no_elim_operand (op0, word_mode))
39439 op0 = copy_to_mode_reg (word_mode, op0);
39441 emit_insn (gen_push (op0));
39442 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
39443 return 0;
39445 case IX86_BUILTIN_KORTESTC16:
39446 icode = CODE_FOR_kortestchi;
39447 mode0 = HImode;
39448 mode1 = CCCmode;
39449 goto kortest;
39451 case IX86_BUILTIN_KORTESTZ16:
39452 icode = CODE_FOR_kortestzhi;
39453 mode0 = HImode;
39454 mode1 = CCZmode;
39456 kortest:
39457 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
39458 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
39459 op0 = expand_normal (arg0);
39460 op1 = expand_normal (arg1);
39462 op0 = copy_to_reg (op0);
39463 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39464 op1 = copy_to_reg (op1);
39465 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
39467 target = gen_reg_rtx (QImode);
39468 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
39470 /* Emit kortest. */
39471 emit_insn (GEN_FCN (icode) (op0, op1));
39472 /* And use setcc to return result from flags. */
39473 ix86_expand_setcc (target, EQ,
39474 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
39475 return target;
39477 case IX86_BUILTIN_GATHERSIV2DF:
39478 icode = CODE_FOR_avx2_gathersiv2df;
39479 goto gather_gen;
39480 case IX86_BUILTIN_GATHERSIV4DF:
39481 icode = CODE_FOR_avx2_gathersiv4df;
39482 goto gather_gen;
39483 case IX86_BUILTIN_GATHERDIV2DF:
39484 icode = CODE_FOR_avx2_gatherdiv2df;
39485 goto gather_gen;
39486 case IX86_BUILTIN_GATHERDIV4DF:
39487 icode = CODE_FOR_avx2_gatherdiv4df;
39488 goto gather_gen;
39489 case IX86_BUILTIN_GATHERSIV4SF:
39490 icode = CODE_FOR_avx2_gathersiv4sf;
39491 goto gather_gen;
39492 case IX86_BUILTIN_GATHERSIV8SF:
39493 icode = CODE_FOR_avx2_gathersiv8sf;
39494 goto gather_gen;
39495 case IX86_BUILTIN_GATHERDIV4SF:
39496 icode = CODE_FOR_avx2_gatherdiv4sf;
39497 goto gather_gen;
39498 case IX86_BUILTIN_GATHERDIV8SF:
39499 icode = CODE_FOR_avx2_gatherdiv8sf;
39500 goto gather_gen;
39501 case IX86_BUILTIN_GATHERSIV2DI:
39502 icode = CODE_FOR_avx2_gathersiv2di;
39503 goto gather_gen;
39504 case IX86_BUILTIN_GATHERSIV4DI:
39505 icode = CODE_FOR_avx2_gathersiv4di;
39506 goto gather_gen;
39507 case IX86_BUILTIN_GATHERDIV2DI:
39508 icode = CODE_FOR_avx2_gatherdiv2di;
39509 goto gather_gen;
39510 case IX86_BUILTIN_GATHERDIV4DI:
39511 icode = CODE_FOR_avx2_gatherdiv4di;
39512 goto gather_gen;
39513 case IX86_BUILTIN_GATHERSIV4SI:
39514 icode = CODE_FOR_avx2_gathersiv4si;
39515 goto gather_gen;
39516 case IX86_BUILTIN_GATHERSIV8SI:
39517 icode = CODE_FOR_avx2_gathersiv8si;
39518 goto gather_gen;
39519 case IX86_BUILTIN_GATHERDIV4SI:
39520 icode = CODE_FOR_avx2_gatherdiv4si;
39521 goto gather_gen;
39522 case IX86_BUILTIN_GATHERDIV8SI:
39523 icode = CODE_FOR_avx2_gatherdiv8si;
39524 goto gather_gen;
39525 case IX86_BUILTIN_GATHERALTSIV4DF:
39526 icode = CODE_FOR_avx2_gathersiv4df;
39527 goto gather_gen;
39528 case IX86_BUILTIN_GATHERALTDIV8SF:
39529 icode = CODE_FOR_avx2_gatherdiv8sf;
39530 goto gather_gen;
39531 case IX86_BUILTIN_GATHERALTSIV4DI:
39532 icode = CODE_FOR_avx2_gathersiv4di;
39533 goto gather_gen;
39534 case IX86_BUILTIN_GATHERALTDIV8SI:
39535 icode = CODE_FOR_avx2_gatherdiv8si;
39536 goto gather_gen;
39537 case IX86_BUILTIN_GATHER3SIV16SF:
39538 icode = CODE_FOR_avx512f_gathersiv16sf;
39539 goto gather_gen;
39540 case IX86_BUILTIN_GATHER3SIV8DF:
39541 icode = CODE_FOR_avx512f_gathersiv8df;
39542 goto gather_gen;
39543 case IX86_BUILTIN_GATHER3DIV16SF:
39544 icode = CODE_FOR_avx512f_gatherdiv16sf;
39545 goto gather_gen;
39546 case IX86_BUILTIN_GATHER3DIV8DF:
39547 icode = CODE_FOR_avx512f_gatherdiv8df;
39548 goto gather_gen;
39549 case IX86_BUILTIN_GATHER3SIV16SI:
39550 icode = CODE_FOR_avx512f_gathersiv16si;
39551 goto gather_gen;
39552 case IX86_BUILTIN_GATHER3SIV8DI:
39553 icode = CODE_FOR_avx512f_gathersiv8di;
39554 goto gather_gen;
39555 case IX86_BUILTIN_GATHER3DIV16SI:
39556 icode = CODE_FOR_avx512f_gatherdiv16si;
39557 goto gather_gen;
39558 case IX86_BUILTIN_GATHER3DIV8DI:
39559 icode = CODE_FOR_avx512f_gatherdiv8di;
39560 goto gather_gen;
39561 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39562 icode = CODE_FOR_avx512f_gathersiv8df;
39563 goto gather_gen;
39564 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39565 icode = CODE_FOR_avx512f_gatherdiv16sf;
39566 goto gather_gen;
39567 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39568 icode = CODE_FOR_avx512f_gathersiv8di;
39569 goto gather_gen;
39570 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39571 icode = CODE_FOR_avx512f_gatherdiv16si;
39572 goto gather_gen;
39573 case IX86_BUILTIN_GATHER3SIV2DF:
39574 icode = CODE_FOR_avx512vl_gathersiv2df;
39575 goto gather_gen;
39576 case IX86_BUILTIN_GATHER3SIV4DF:
39577 icode = CODE_FOR_avx512vl_gathersiv4df;
39578 goto gather_gen;
39579 case IX86_BUILTIN_GATHER3DIV2DF:
39580 icode = CODE_FOR_avx512vl_gatherdiv2df;
39581 goto gather_gen;
39582 case IX86_BUILTIN_GATHER3DIV4DF:
39583 icode = CODE_FOR_avx512vl_gatherdiv4df;
39584 goto gather_gen;
39585 case IX86_BUILTIN_GATHER3SIV4SF:
39586 icode = CODE_FOR_avx512vl_gathersiv4sf;
39587 goto gather_gen;
39588 case IX86_BUILTIN_GATHER3SIV8SF:
39589 icode = CODE_FOR_avx512vl_gathersiv8sf;
39590 goto gather_gen;
39591 case IX86_BUILTIN_GATHER3DIV4SF:
39592 icode = CODE_FOR_avx512vl_gatherdiv4sf;
39593 goto gather_gen;
39594 case IX86_BUILTIN_GATHER3DIV8SF:
39595 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39596 goto gather_gen;
39597 case IX86_BUILTIN_GATHER3SIV2DI:
39598 icode = CODE_FOR_avx512vl_gathersiv2di;
39599 goto gather_gen;
39600 case IX86_BUILTIN_GATHER3SIV4DI:
39601 icode = CODE_FOR_avx512vl_gathersiv4di;
39602 goto gather_gen;
39603 case IX86_BUILTIN_GATHER3DIV2DI:
39604 icode = CODE_FOR_avx512vl_gatherdiv2di;
39605 goto gather_gen;
39606 case IX86_BUILTIN_GATHER3DIV4DI:
39607 icode = CODE_FOR_avx512vl_gatherdiv4di;
39608 goto gather_gen;
39609 case IX86_BUILTIN_GATHER3SIV4SI:
39610 icode = CODE_FOR_avx512vl_gathersiv4si;
39611 goto gather_gen;
39612 case IX86_BUILTIN_GATHER3SIV8SI:
39613 icode = CODE_FOR_avx512vl_gathersiv8si;
39614 goto gather_gen;
39615 case IX86_BUILTIN_GATHER3DIV4SI:
39616 icode = CODE_FOR_avx512vl_gatherdiv4si;
39617 goto gather_gen;
39618 case IX86_BUILTIN_GATHER3DIV8SI:
39619 icode = CODE_FOR_avx512vl_gatherdiv8si;
39620 goto gather_gen;
39621 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39622 icode = CODE_FOR_avx512vl_gathersiv4df;
39623 goto gather_gen;
39624 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39625 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39626 goto gather_gen;
39627 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39628 icode = CODE_FOR_avx512vl_gathersiv4di;
39629 goto gather_gen;
39630 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39631 icode = CODE_FOR_avx512vl_gatherdiv8si;
39632 goto gather_gen;
39633 case IX86_BUILTIN_SCATTERSIV16SF:
39634 icode = CODE_FOR_avx512f_scattersiv16sf;
39635 goto scatter_gen;
39636 case IX86_BUILTIN_SCATTERSIV8DF:
39637 icode = CODE_FOR_avx512f_scattersiv8df;
39638 goto scatter_gen;
39639 case IX86_BUILTIN_SCATTERDIV16SF:
39640 icode = CODE_FOR_avx512f_scatterdiv16sf;
39641 goto scatter_gen;
39642 case IX86_BUILTIN_SCATTERDIV8DF:
39643 icode = CODE_FOR_avx512f_scatterdiv8df;
39644 goto scatter_gen;
39645 case IX86_BUILTIN_SCATTERSIV16SI:
39646 icode = CODE_FOR_avx512f_scattersiv16si;
39647 goto scatter_gen;
39648 case IX86_BUILTIN_SCATTERSIV8DI:
39649 icode = CODE_FOR_avx512f_scattersiv8di;
39650 goto scatter_gen;
39651 case IX86_BUILTIN_SCATTERDIV16SI:
39652 icode = CODE_FOR_avx512f_scatterdiv16si;
39653 goto scatter_gen;
39654 case IX86_BUILTIN_SCATTERDIV8DI:
39655 icode = CODE_FOR_avx512f_scatterdiv8di;
39656 goto scatter_gen;
39657 case IX86_BUILTIN_SCATTERSIV8SF:
39658 icode = CODE_FOR_avx512vl_scattersiv8sf;
39659 goto scatter_gen;
39660 case IX86_BUILTIN_SCATTERSIV4SF:
39661 icode = CODE_FOR_avx512vl_scattersiv4sf;
39662 goto scatter_gen;
39663 case IX86_BUILTIN_SCATTERSIV4DF:
39664 icode = CODE_FOR_avx512vl_scattersiv4df;
39665 goto scatter_gen;
39666 case IX86_BUILTIN_SCATTERSIV2DF:
39667 icode = CODE_FOR_avx512vl_scattersiv2df;
39668 goto scatter_gen;
39669 case IX86_BUILTIN_SCATTERDIV8SF:
39670 icode = CODE_FOR_avx512vl_scatterdiv8sf;
39671 goto scatter_gen;
39672 case IX86_BUILTIN_SCATTERDIV4SF:
39673 icode = CODE_FOR_avx512vl_scatterdiv4sf;
39674 goto scatter_gen;
39675 case IX86_BUILTIN_SCATTERDIV4DF:
39676 icode = CODE_FOR_avx512vl_scatterdiv4df;
39677 goto scatter_gen;
39678 case IX86_BUILTIN_SCATTERDIV2DF:
39679 icode = CODE_FOR_avx512vl_scatterdiv2df;
39680 goto scatter_gen;
39681 case IX86_BUILTIN_SCATTERSIV8SI:
39682 icode = CODE_FOR_avx512vl_scattersiv8si;
39683 goto scatter_gen;
39684 case IX86_BUILTIN_SCATTERSIV4SI:
39685 icode = CODE_FOR_avx512vl_scattersiv4si;
39686 goto scatter_gen;
39687 case IX86_BUILTIN_SCATTERSIV4DI:
39688 icode = CODE_FOR_avx512vl_scattersiv4di;
39689 goto scatter_gen;
39690 case IX86_BUILTIN_SCATTERSIV2DI:
39691 icode = CODE_FOR_avx512vl_scattersiv2di;
39692 goto scatter_gen;
39693 case IX86_BUILTIN_SCATTERDIV8SI:
39694 icode = CODE_FOR_avx512vl_scatterdiv8si;
39695 goto scatter_gen;
39696 case IX86_BUILTIN_SCATTERDIV4SI:
39697 icode = CODE_FOR_avx512vl_scatterdiv4si;
39698 goto scatter_gen;
39699 case IX86_BUILTIN_SCATTERDIV4DI:
39700 icode = CODE_FOR_avx512vl_scatterdiv4di;
39701 goto scatter_gen;
39702 case IX86_BUILTIN_SCATTERDIV2DI:
39703 icode = CODE_FOR_avx512vl_scatterdiv2di;
39704 goto scatter_gen;
39705 case IX86_BUILTIN_GATHERPFDPD:
39706 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
39707 goto vec_prefetch_gen;
39708 case IX86_BUILTIN_GATHERPFDPS:
39709 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
39710 goto vec_prefetch_gen;
39711 case IX86_BUILTIN_GATHERPFQPD:
39712 icode = CODE_FOR_avx512pf_gatherpfv8didf;
39713 goto vec_prefetch_gen;
39714 case IX86_BUILTIN_GATHERPFQPS:
39715 icode = CODE_FOR_avx512pf_gatherpfv8disf;
39716 goto vec_prefetch_gen;
39717 case IX86_BUILTIN_SCATTERPFDPD:
39718 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
39719 goto vec_prefetch_gen;
39720 case IX86_BUILTIN_SCATTERPFDPS:
39721 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
39722 goto vec_prefetch_gen;
39723 case IX86_BUILTIN_SCATTERPFQPD:
39724 icode = CODE_FOR_avx512pf_scatterpfv8didf;
39725 goto vec_prefetch_gen;
39726 case IX86_BUILTIN_SCATTERPFQPS:
39727 icode = CODE_FOR_avx512pf_scatterpfv8disf;
39728 goto vec_prefetch_gen;
39730 gather_gen:
39731 rtx half;
39732 rtx (*gen) (rtx, rtx);
39734 arg0 = CALL_EXPR_ARG (exp, 0);
39735 arg1 = CALL_EXPR_ARG (exp, 1);
39736 arg2 = CALL_EXPR_ARG (exp, 2);
39737 arg3 = CALL_EXPR_ARG (exp, 3);
39738 arg4 = CALL_EXPR_ARG (exp, 4);
39739 op0 = expand_normal (arg0);
39740 op1 = expand_normal (arg1);
39741 op2 = expand_normal (arg2);
39742 op3 = expand_normal (arg3);
39743 op4 = expand_normal (arg4);
39744 /* Note the arg order is different from the operand order. */
39745 mode0 = insn_data[icode].operand[1].mode;
39746 mode2 = insn_data[icode].operand[3].mode;
39747 mode3 = insn_data[icode].operand[4].mode;
39748 mode4 = insn_data[icode].operand[5].mode;
39750 if (target == NULL_RTX
39751 || GET_MODE (target) != insn_data[icode].operand[0].mode
39752 || !insn_data[icode].operand[0].predicate (target,
39753 GET_MODE (target)))
39754 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
39755 else
39756 subtarget = target;
39758 switch (fcode)
39760 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39761 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39762 half = gen_reg_rtx (V8SImode);
39763 if (!nonimmediate_operand (op2, V16SImode))
39764 op2 = copy_to_mode_reg (V16SImode, op2);
39765 emit_insn (gen_vec_extract_lo_v16si (half, op2));
39766 op2 = half;
39767 break;
39768 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39769 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39770 case IX86_BUILTIN_GATHERALTSIV4DF:
39771 case IX86_BUILTIN_GATHERALTSIV4DI:
39772 half = gen_reg_rtx (V4SImode);
39773 if (!nonimmediate_operand (op2, V8SImode))
39774 op2 = copy_to_mode_reg (V8SImode, op2);
39775 emit_insn (gen_vec_extract_lo_v8si (half, op2));
39776 op2 = half;
39777 break;
39778 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39779 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39780 half = gen_reg_rtx (mode0);
39781 if (mode0 == V8SFmode)
39782 gen = gen_vec_extract_lo_v16sf;
39783 else
39784 gen = gen_vec_extract_lo_v16si;
39785 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39786 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39787 emit_insn (gen (half, op0));
39788 op0 = half;
39789 if (GET_MODE (op3) != VOIDmode)
39791 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39792 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39793 emit_insn (gen (half, op3));
39794 op3 = half;
39796 break;
39797 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39798 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39799 case IX86_BUILTIN_GATHERALTDIV8SF:
39800 case IX86_BUILTIN_GATHERALTDIV8SI:
39801 half = gen_reg_rtx (mode0);
39802 if (mode0 == V4SFmode)
39803 gen = gen_vec_extract_lo_v8sf;
39804 else
39805 gen = gen_vec_extract_lo_v8si;
39806 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39807 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39808 emit_insn (gen (half, op0));
39809 op0 = half;
39810 if (GET_MODE (op3) != VOIDmode)
39812 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39813 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39814 emit_insn (gen (half, op3));
39815 op3 = half;
39817 break;
39818 default:
39819 break;
39822 /* Force memory operand only with base register here. But we
39823 don't want to do it on memory operand for other builtin
39824 functions. */
39825 op1 = ix86_zero_extend_to_Pmode (op1);
39827 if (!insn_data[icode].operand[1].predicate (op0, mode0))
39828 op0 = copy_to_mode_reg (mode0, op0);
39829 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
39830 op1 = copy_to_mode_reg (Pmode, op1);
39831 if (!insn_data[icode].operand[3].predicate (op2, mode2))
39832 op2 = copy_to_mode_reg (mode2, op2);
39833 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
39835 if (!insn_data[icode].operand[4].predicate (op3, mode3))
39836 op3 = copy_to_mode_reg (mode3, op3);
39838 else
39840 op3 = copy_to_reg (op3);
39841 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
39843 if (!insn_data[icode].operand[5].predicate (op4, mode4))
39845 error ("the last argument must be scale 1, 2, 4, 8");
39846 return const0_rtx;
39849 /* Optimize. If mask is known to have all high bits set,
39850 replace op0 with pc_rtx to signal that the instruction
39851 overwrites the whole destination and doesn't use its
39852 previous contents. */
39853 if (optimize)
39855 if (TREE_CODE (arg3) == INTEGER_CST)
39857 if (integer_all_onesp (arg3))
39858 op0 = pc_rtx;
39860 else if (TREE_CODE (arg3) == VECTOR_CST)
39862 unsigned int negative = 0;
39863 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
39865 tree cst = VECTOR_CST_ELT (arg3, i);
39866 if (TREE_CODE (cst) == INTEGER_CST
39867 && tree_int_cst_sign_bit (cst))
39868 negative++;
39869 else if (TREE_CODE (cst) == REAL_CST
39870 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
39871 negative++;
39873 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
39874 op0 = pc_rtx;
39876 else if (TREE_CODE (arg3) == SSA_NAME
39877 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
39879 /* Recognize also when mask is like:
39880 __v2df src = _mm_setzero_pd ();
39881 __v2df mask = _mm_cmpeq_pd (src, src);
39883 __v8sf src = _mm256_setzero_ps ();
39884 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
39885 as that is a cheaper way to load all ones into
39886 a register than having to load a constant from
39887 memory. */
39888 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
39889 if (is_gimple_call (def_stmt))
39891 tree fndecl = gimple_call_fndecl (def_stmt);
39892 if (fndecl
39893 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
39894 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
39896 case IX86_BUILTIN_CMPPD:
39897 case IX86_BUILTIN_CMPPS:
39898 case IX86_BUILTIN_CMPPD256:
39899 case IX86_BUILTIN_CMPPS256:
39900 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
39901 break;
39902 /* FALLTHRU */
39903 case IX86_BUILTIN_CMPEQPD:
39904 case IX86_BUILTIN_CMPEQPS:
39905 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
39906 && initializer_zerop (gimple_call_arg (def_stmt,
39907 1)))
39908 op0 = pc_rtx;
39909 break;
39910 default:
39911 break;
39917 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
39918 if (! pat)
39919 return const0_rtx;
39920 emit_insn (pat);
39922 switch (fcode)
39924 case IX86_BUILTIN_GATHER3DIV16SF:
39925 if (target == NULL_RTX)
39926 target = gen_reg_rtx (V8SFmode);
39927 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
39928 break;
39929 case IX86_BUILTIN_GATHER3DIV16SI:
39930 if (target == NULL_RTX)
39931 target = gen_reg_rtx (V8SImode);
39932 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
39933 break;
39934 case IX86_BUILTIN_GATHER3DIV8SF:
39935 case IX86_BUILTIN_GATHERDIV8SF:
39936 if (target == NULL_RTX)
39937 target = gen_reg_rtx (V4SFmode);
39938 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
39939 break;
39940 case IX86_BUILTIN_GATHER3DIV8SI:
39941 case IX86_BUILTIN_GATHERDIV8SI:
39942 if (target == NULL_RTX)
39943 target = gen_reg_rtx (V4SImode);
39944 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
39945 break;
39946 default:
39947 target = subtarget;
39948 break;
39950 return target;
39952 scatter_gen:
39953 arg0 = CALL_EXPR_ARG (exp, 0);
39954 arg1 = CALL_EXPR_ARG (exp, 1);
39955 arg2 = CALL_EXPR_ARG (exp, 2);
39956 arg3 = CALL_EXPR_ARG (exp, 3);
39957 arg4 = CALL_EXPR_ARG (exp, 4);
39958 op0 = expand_normal (arg0);
39959 op1 = expand_normal (arg1);
39960 op2 = expand_normal (arg2);
39961 op3 = expand_normal (arg3);
39962 op4 = expand_normal (arg4);
39963 mode1 = insn_data[icode].operand[1].mode;
39964 mode2 = insn_data[icode].operand[2].mode;
39965 mode3 = insn_data[icode].operand[3].mode;
39966 mode4 = insn_data[icode].operand[4].mode;
39968 /* Force memory operand only with base register here. But we
39969 don't want to do it on memory operand for other builtin
39970 functions. */
39971 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
39973 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39974 op0 = copy_to_mode_reg (Pmode, op0);
39976 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
39978 if (!insn_data[icode].operand[1].predicate (op1, mode1))
39979 op1 = copy_to_mode_reg (mode1, op1);
39981 else
39983 op1 = copy_to_reg (op1);
39984 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
39987 if (!insn_data[icode].operand[2].predicate (op2, mode2))
39988 op2 = copy_to_mode_reg (mode2, op2);
39990 if (!insn_data[icode].operand[3].predicate (op3, mode3))
39991 op3 = copy_to_mode_reg (mode3, op3);
39993 if (!insn_data[icode].operand[4].predicate (op4, mode4))
39995 error ("the last argument must be scale 1, 2, 4, 8");
39996 return const0_rtx;
39999 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
40000 if (! pat)
40001 return const0_rtx;
40003 emit_insn (pat);
40004 return 0;
40006 vec_prefetch_gen:
40007 arg0 = CALL_EXPR_ARG (exp, 0);
40008 arg1 = CALL_EXPR_ARG (exp, 1);
40009 arg2 = CALL_EXPR_ARG (exp, 2);
40010 arg3 = CALL_EXPR_ARG (exp, 3);
40011 arg4 = CALL_EXPR_ARG (exp, 4);
40012 op0 = expand_normal (arg0);
40013 op1 = expand_normal (arg1);
40014 op2 = expand_normal (arg2);
40015 op3 = expand_normal (arg3);
40016 op4 = expand_normal (arg4);
40017 mode0 = insn_data[icode].operand[0].mode;
40018 mode1 = insn_data[icode].operand[1].mode;
40019 mode3 = insn_data[icode].operand[3].mode;
40020 mode4 = insn_data[icode].operand[4].mode;
40022 if (GET_MODE (op0) == mode0
40023 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
40025 if (!insn_data[icode].operand[0].predicate (op0, mode0))
40026 op0 = copy_to_mode_reg (mode0, op0);
40028 else if (op0 != constm1_rtx)
40030 op0 = copy_to_reg (op0);
40031 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
40034 if (!insn_data[icode].operand[1].predicate (op1, mode1))
40035 op1 = copy_to_mode_reg (mode1, op1);
40037 /* Force memory operand only with base register here. But we
40038 don't want to do it on memory operand for other builtin
40039 functions. */
40040 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
40042 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
40043 op2 = copy_to_mode_reg (Pmode, op2);
40045 if (!insn_data[icode].operand[3].predicate (op3, mode3))
40047 error ("the forth argument must be scale 1, 2, 4, 8");
40048 return const0_rtx;
40051 if (!insn_data[icode].operand[4].predicate (op4, mode4))
40053 error ("incorrect hint operand");
40054 return const0_rtx;
40057 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
40058 if (! pat)
40059 return const0_rtx;
40061 emit_insn (pat);
40063 return 0;
40065 case IX86_BUILTIN_XABORT:
40066 icode = CODE_FOR_xabort;
40067 arg0 = CALL_EXPR_ARG (exp, 0);
40068 op0 = expand_normal (arg0);
40069 mode0 = insn_data[icode].operand[0].mode;
40070 if (!insn_data[icode].operand[0].predicate (op0, mode0))
40072 error ("the xabort's argument must be an 8-bit immediate");
40073 return const0_rtx;
40075 emit_insn (gen_xabort (op0));
40076 return 0;
40078 default:
40079 break;
40082 for (i = 0, d = bdesc_special_args;
40083 i < ARRAY_SIZE (bdesc_special_args);
40084 i++, d++)
40085 if (d->code == fcode)
40086 return ix86_expand_special_args_builtin (d, exp, target);
40088 for (i = 0, d = bdesc_args;
40089 i < ARRAY_SIZE (bdesc_args);
40090 i++, d++)
40091 if (d->code == fcode)
40092 switch (fcode)
40094 case IX86_BUILTIN_FABSQ:
40095 case IX86_BUILTIN_COPYSIGNQ:
40096 if (!TARGET_SSE)
40097 /* Emit a normal call if SSE isn't available. */
40098 return expand_call (exp, target, ignore);
40099 default:
40100 return ix86_expand_args_builtin (d, exp, target);
40103 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
40104 if (d->code == fcode)
40105 return ix86_expand_sse_comi (d, exp, target);
40107 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
40108 if (d->code == fcode)
40109 return ix86_expand_round_builtin (d, exp, target);
40111 for (i = 0, d = bdesc_pcmpestr;
40112 i < ARRAY_SIZE (bdesc_pcmpestr);
40113 i++, d++)
40114 if (d->code == fcode)
40115 return ix86_expand_sse_pcmpestr (d, exp, target);
40117 for (i = 0, d = bdesc_pcmpistr;
40118 i < ARRAY_SIZE (bdesc_pcmpistr);
40119 i++, d++)
40120 if (d->code == fcode)
40121 return ix86_expand_sse_pcmpistr (d, exp, target);
40123 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
40124 if (d->code == fcode)
40125 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
40126 (enum ix86_builtin_func_type)
40127 d->flag, d->comparison);
40129 gcc_unreachable ();
40132 /* This returns the target-specific builtin with code CODE if
40133 current_function_decl has visibility on this builtin, which is checked
40134 using isa flags. Returns NULL_TREE otherwise. */
40136 static tree ix86_get_builtin (enum ix86_builtins code)
40138 struct cl_target_option *opts;
40139 tree target_tree = NULL_TREE;
40141 /* Determine the isa flags of current_function_decl. */
40143 if (current_function_decl)
40144 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
40146 if (target_tree == NULL)
40147 target_tree = target_option_default_node;
40149 opts = TREE_TARGET_OPTION (target_tree);
40151 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
40152 return ix86_builtin_decl (code, true);
40153 else
40154 return NULL_TREE;
40157 /* Return function decl for target specific builtin
40158 for given MPX builtin passed i FCODE. */
40159 static tree
40160 ix86_builtin_mpx_function (unsigned fcode)
40162 switch (fcode)
40164 case BUILT_IN_CHKP_BNDMK:
40165 return ix86_builtins[IX86_BUILTIN_BNDMK];
40167 case BUILT_IN_CHKP_BNDSTX:
40168 return ix86_builtins[IX86_BUILTIN_BNDSTX];
40170 case BUILT_IN_CHKP_BNDLDX:
40171 return ix86_builtins[IX86_BUILTIN_BNDLDX];
40173 case BUILT_IN_CHKP_BNDCL:
40174 return ix86_builtins[IX86_BUILTIN_BNDCL];
40176 case BUILT_IN_CHKP_BNDCU:
40177 return ix86_builtins[IX86_BUILTIN_BNDCU];
40179 case BUILT_IN_CHKP_BNDRET:
40180 return ix86_builtins[IX86_BUILTIN_BNDRET];
40182 case BUILT_IN_CHKP_INTERSECT:
40183 return ix86_builtins[IX86_BUILTIN_BNDINT];
40185 case BUILT_IN_CHKP_NARROW:
40186 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
40188 case BUILT_IN_CHKP_SIZEOF:
40189 return ix86_builtins[IX86_BUILTIN_SIZEOF];
40191 case BUILT_IN_CHKP_EXTRACT_LOWER:
40192 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
40194 case BUILT_IN_CHKP_EXTRACT_UPPER:
40195 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
40197 default:
40198 return NULL_TREE;
40201 gcc_unreachable ();
40204 /* Helper function for ix86_load_bounds and ix86_store_bounds.
40206 Return an address to be used to load/store bounds for pointer
40207 passed in SLOT.
40209 SLOT_NO is an integer constant holding number of a target
40210 dependent special slot to be used in case SLOT is not a memory.
40212 SPECIAL_BASE is a pointer to be used as a base of fake address
40213 to access special slots in Bounds Table. SPECIAL_BASE[-1],
40214 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
40216 static rtx
40217 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
40219 rtx addr = NULL;
40221 /* NULL slot means we pass bounds for pointer not passed to the
40222 function at all. Register slot means we pass pointer in a
40223 register. In both these cases bounds are passed via Bounds
40224 Table. Since we do not have actual pointer stored in memory,
40225 we have to use fake addresses to access Bounds Table. We
40226 start with (special_base - sizeof (void*)) and decrease this
40227 address by pointer size to get addresses for other slots. */
40228 if (!slot || REG_P (slot))
40230 gcc_assert (CONST_INT_P (slot_no));
40231 addr = plus_constant (Pmode, special_base,
40232 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
40234 /* If pointer is passed in a memory then its address is used to
40235 access Bounds Table. */
40236 else if (MEM_P (slot))
40238 addr = XEXP (slot, 0);
40239 if (!register_operand (addr, Pmode))
40240 addr = copy_addr_to_reg (addr);
40242 else
40243 gcc_unreachable ();
40245 return addr;
40248 /* Expand pass uses this hook to load bounds for function parameter
40249 PTR passed in SLOT in case its bounds are not passed in a register.
40251 If SLOT is a memory, then bounds are loaded as for regular pointer
40252 loaded from memory. PTR may be NULL in case SLOT is a memory.
40253 In such case value of PTR (if required) may be loaded from SLOT.
40255 If SLOT is NULL or a register then SLOT_NO is an integer constant
40256 holding number of the target dependent special slot which should be
40257 used to obtain bounds.
40259 Return loaded bounds. */
40261 static rtx
40262 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
40264 rtx reg = gen_reg_rtx (BNDmode);
40265 rtx addr;
40267 /* Get address to be used to access Bounds Table. Special slots start
40268 at the location of return address of the current function. */
40269 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
40271 /* Load pointer value from a memory if we don't have it. */
40272 if (!ptr)
40274 gcc_assert (MEM_P (slot));
40275 ptr = copy_addr_to_reg (slot);
40278 emit_insn (BNDmode == BND64mode
40279 ? gen_bnd64_ldx (reg, addr, ptr)
40280 : gen_bnd32_ldx (reg, addr, ptr));
40282 return reg;
40285 /* Expand pass uses this hook to store BOUNDS for call argument PTR
40286 passed in SLOT in case BOUNDS are not passed in a register.
40288 If SLOT is a memory, then BOUNDS are stored as for regular pointer
40289 stored in memory. PTR may be NULL in case SLOT is a memory.
40290 In such case value of PTR (if required) may be loaded from SLOT.
40292 If SLOT is NULL or a register then SLOT_NO is an integer constant
40293 holding number of the target dependent special slot which should be
40294 used to store BOUNDS. */
40296 static void
40297 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
40299 rtx addr;
40301 /* Get address to be used to access Bounds Table. Special slots start
40302 at the location of return address of a called function. */
40303 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
40305 /* Load pointer value from a memory if we don't have it. */
40306 if (!ptr)
40308 gcc_assert (MEM_P (slot));
40309 ptr = copy_addr_to_reg (slot);
40312 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
40313 if (!register_operand (bounds, BNDmode))
40314 bounds = copy_to_mode_reg (BNDmode, bounds);
40316 emit_insn (BNDmode == BND64mode
40317 ? gen_bnd64_stx (addr, ptr, bounds)
40318 : gen_bnd32_stx (addr, ptr, bounds));
40321 /* Load and return bounds returned by function in SLOT. */
40323 static rtx
40324 ix86_load_returned_bounds (rtx slot)
40326 rtx res;
40328 gcc_assert (REG_P (slot));
40329 res = gen_reg_rtx (BNDmode);
40330 emit_move_insn (res, slot);
40332 return res;
40335 /* Store BOUNDS returned by function into SLOT. */
40337 static void
40338 ix86_store_returned_bounds (rtx slot, rtx bounds)
40340 gcc_assert (REG_P (slot));
40341 emit_move_insn (slot, bounds);
40344 /* Returns a function decl for a vectorized version of the builtin function
40345 with builtin function code FN and the result vector type TYPE, or NULL_TREE
40346 if it is not available. */
40348 static tree
40349 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
40350 tree type_in)
40352 machine_mode in_mode, out_mode;
40353 int in_n, out_n;
40354 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
40356 if (TREE_CODE (type_out) != VECTOR_TYPE
40357 || TREE_CODE (type_in) != VECTOR_TYPE
40358 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
40359 return NULL_TREE;
40361 out_mode = TYPE_MODE (TREE_TYPE (type_out));
40362 out_n = TYPE_VECTOR_SUBPARTS (type_out);
40363 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40364 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40366 switch (fn)
40368 case BUILT_IN_SQRT:
40369 if (out_mode == DFmode && in_mode == DFmode)
40371 if (out_n == 2 && in_n == 2)
40372 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
40373 else if (out_n == 4 && in_n == 4)
40374 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
40375 else if (out_n == 8 && in_n == 8)
40376 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
40378 break;
40380 case BUILT_IN_EXP2F:
40381 if (out_mode == SFmode && in_mode == SFmode)
40383 if (out_n == 16 && in_n == 16)
40384 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
40386 break;
40388 case BUILT_IN_SQRTF:
40389 if (out_mode == SFmode && in_mode == SFmode)
40391 if (out_n == 4 && in_n == 4)
40392 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
40393 else if (out_n == 8 && in_n == 8)
40394 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
40395 else if (out_n == 16 && in_n == 16)
40396 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
40398 break;
40400 case BUILT_IN_IFLOOR:
40401 case BUILT_IN_LFLOOR:
40402 case BUILT_IN_LLFLOOR:
40403 /* The round insn does not trap on denormals. */
40404 if (flag_trapping_math || !TARGET_ROUND)
40405 break;
40407 if (out_mode == SImode && in_mode == DFmode)
40409 if (out_n == 4 && in_n == 2)
40410 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
40411 else if (out_n == 8 && in_n == 4)
40412 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
40413 else if (out_n == 16 && in_n == 8)
40414 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
40416 break;
40418 case BUILT_IN_IFLOORF:
40419 case BUILT_IN_LFLOORF:
40420 case BUILT_IN_LLFLOORF:
40421 /* The round insn does not trap on denormals. */
40422 if (flag_trapping_math || !TARGET_ROUND)
40423 break;
40425 if (out_mode == SImode && in_mode == SFmode)
40427 if (out_n == 4 && in_n == 4)
40428 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
40429 else if (out_n == 8 && in_n == 8)
40430 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
40432 break;
40434 case BUILT_IN_ICEIL:
40435 case BUILT_IN_LCEIL:
40436 case BUILT_IN_LLCEIL:
40437 /* The round insn does not trap on denormals. */
40438 if (flag_trapping_math || !TARGET_ROUND)
40439 break;
40441 if (out_mode == SImode && in_mode == DFmode)
40443 if (out_n == 4 && in_n == 2)
40444 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
40445 else if (out_n == 8 && in_n == 4)
40446 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
40447 else if (out_n == 16 && in_n == 8)
40448 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
40450 break;
40452 case BUILT_IN_ICEILF:
40453 case BUILT_IN_LCEILF:
40454 case BUILT_IN_LLCEILF:
40455 /* The round insn does not trap on denormals. */
40456 if (flag_trapping_math || !TARGET_ROUND)
40457 break;
40459 if (out_mode == SImode && in_mode == SFmode)
40461 if (out_n == 4 && in_n == 4)
40462 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
40463 else if (out_n == 8 && in_n == 8)
40464 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
40466 break;
40468 case BUILT_IN_IRINT:
40469 case BUILT_IN_LRINT:
40470 case BUILT_IN_LLRINT:
40471 if (out_mode == SImode && in_mode == DFmode)
40473 if (out_n == 4 && in_n == 2)
40474 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
40475 else if (out_n == 8 && in_n == 4)
40476 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
40478 break;
40480 case BUILT_IN_IRINTF:
40481 case BUILT_IN_LRINTF:
40482 case BUILT_IN_LLRINTF:
40483 if (out_mode == SImode && in_mode == SFmode)
40485 if (out_n == 4 && in_n == 4)
40486 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
40487 else if (out_n == 8 && in_n == 8)
40488 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
40490 break;
40492 case BUILT_IN_IROUND:
40493 case BUILT_IN_LROUND:
40494 case BUILT_IN_LLROUND:
40495 /* The round insn does not trap on denormals. */
40496 if (flag_trapping_math || !TARGET_ROUND)
40497 break;
40499 if (out_mode == SImode && in_mode == DFmode)
40501 if (out_n == 4 && in_n == 2)
40502 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
40503 else if (out_n == 8 && in_n == 4)
40504 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
40505 else if (out_n == 16 && in_n == 8)
40506 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
40508 break;
40510 case BUILT_IN_IROUNDF:
40511 case BUILT_IN_LROUNDF:
40512 case BUILT_IN_LLROUNDF:
40513 /* The round insn does not trap on denormals. */
40514 if (flag_trapping_math || !TARGET_ROUND)
40515 break;
40517 if (out_mode == SImode && in_mode == SFmode)
40519 if (out_n == 4 && in_n == 4)
40520 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
40521 else if (out_n == 8 && in_n == 8)
40522 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
40524 break;
40526 case BUILT_IN_COPYSIGN:
40527 if (out_mode == DFmode && in_mode == DFmode)
40529 if (out_n == 2 && in_n == 2)
40530 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
40531 else if (out_n == 4 && in_n == 4)
40532 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
40533 else if (out_n == 8 && in_n == 8)
40534 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
40536 break;
40538 case BUILT_IN_COPYSIGNF:
40539 if (out_mode == SFmode && in_mode == SFmode)
40541 if (out_n == 4 && in_n == 4)
40542 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
40543 else if (out_n == 8 && in_n == 8)
40544 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
40545 else if (out_n == 16 && in_n == 16)
40546 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
40548 break;
40550 case BUILT_IN_FLOOR:
40551 /* The round insn does not trap on denormals. */
40552 if (flag_trapping_math || !TARGET_ROUND)
40553 break;
40555 if (out_mode == DFmode && in_mode == DFmode)
40557 if (out_n == 2 && in_n == 2)
40558 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
40559 else if (out_n == 4 && in_n == 4)
40560 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
40562 break;
40564 case BUILT_IN_FLOORF:
40565 /* The round insn does not trap on denormals. */
40566 if (flag_trapping_math || !TARGET_ROUND)
40567 break;
40569 if (out_mode == SFmode && in_mode == SFmode)
40571 if (out_n == 4 && in_n == 4)
40572 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
40573 else if (out_n == 8 && in_n == 8)
40574 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
40576 break;
40578 case BUILT_IN_CEIL:
40579 /* The round insn does not trap on denormals. */
40580 if (flag_trapping_math || !TARGET_ROUND)
40581 break;
40583 if (out_mode == DFmode && in_mode == DFmode)
40585 if (out_n == 2 && in_n == 2)
40586 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
40587 else if (out_n == 4 && in_n == 4)
40588 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
40590 break;
40592 case BUILT_IN_CEILF:
40593 /* The round insn does not trap on denormals. */
40594 if (flag_trapping_math || !TARGET_ROUND)
40595 break;
40597 if (out_mode == SFmode && in_mode == SFmode)
40599 if (out_n == 4 && in_n == 4)
40600 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
40601 else if (out_n == 8 && in_n == 8)
40602 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
40604 break;
40606 case BUILT_IN_TRUNC:
40607 /* The round insn does not trap on denormals. */
40608 if (flag_trapping_math || !TARGET_ROUND)
40609 break;
40611 if (out_mode == DFmode && in_mode == DFmode)
40613 if (out_n == 2 && in_n == 2)
40614 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
40615 else if (out_n == 4 && in_n == 4)
40616 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
40618 break;
40620 case BUILT_IN_TRUNCF:
40621 /* The round insn does not trap on denormals. */
40622 if (flag_trapping_math || !TARGET_ROUND)
40623 break;
40625 if (out_mode == SFmode && in_mode == SFmode)
40627 if (out_n == 4 && in_n == 4)
40628 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
40629 else if (out_n == 8 && in_n == 8)
40630 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
40632 break;
40634 case BUILT_IN_RINT:
40635 /* The round insn does not trap on denormals. */
40636 if (flag_trapping_math || !TARGET_ROUND)
40637 break;
40639 if (out_mode == DFmode && in_mode == DFmode)
40641 if (out_n == 2 && in_n == 2)
40642 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
40643 else if (out_n == 4 && in_n == 4)
40644 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
40646 break;
40648 case BUILT_IN_RINTF:
40649 /* The round insn does not trap on denormals. */
40650 if (flag_trapping_math || !TARGET_ROUND)
40651 break;
40653 if (out_mode == SFmode && in_mode == SFmode)
40655 if (out_n == 4 && in_n == 4)
40656 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
40657 else if (out_n == 8 && in_n == 8)
40658 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
40660 break;
40662 case BUILT_IN_ROUND:
40663 /* The round insn does not trap on denormals. */
40664 if (flag_trapping_math || !TARGET_ROUND)
40665 break;
40667 if (out_mode == DFmode && in_mode == DFmode)
40669 if (out_n == 2 && in_n == 2)
40670 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
40671 else if (out_n == 4 && in_n == 4)
40672 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
40674 break;
40676 case BUILT_IN_ROUNDF:
40677 /* The round insn does not trap on denormals. */
40678 if (flag_trapping_math || !TARGET_ROUND)
40679 break;
40681 if (out_mode == SFmode && in_mode == SFmode)
40683 if (out_n == 4 && in_n == 4)
40684 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
40685 else if (out_n == 8 && in_n == 8)
40686 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
40688 break;
40690 case BUILT_IN_FMA:
40691 if (out_mode == DFmode && in_mode == DFmode)
40693 if (out_n == 2 && in_n == 2)
40694 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
40695 if (out_n == 4 && in_n == 4)
40696 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
40698 break;
40700 case BUILT_IN_FMAF:
40701 if (out_mode == SFmode && in_mode == SFmode)
40703 if (out_n == 4 && in_n == 4)
40704 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
40705 if (out_n == 8 && in_n == 8)
40706 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
40708 break;
40710 default:
40711 break;
40714 /* Dispatch to a handler for a vectorization library. */
40715 if (ix86_veclib_handler)
40716 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
40717 type_in);
40719 return NULL_TREE;
40722 /* Handler for an SVML-style interface to
40723 a library with vectorized intrinsics. */
40725 static tree
40726 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
40728 char name[20];
40729 tree fntype, new_fndecl, args;
40730 unsigned arity;
40731 const char *bname;
40732 machine_mode el_mode, in_mode;
40733 int n, in_n;
40735 /* The SVML is suitable for unsafe math only. */
40736 if (!flag_unsafe_math_optimizations)
40737 return NULL_TREE;
40739 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40740 n = TYPE_VECTOR_SUBPARTS (type_out);
40741 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40742 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40743 if (el_mode != in_mode
40744 || n != in_n)
40745 return NULL_TREE;
40747 switch (fn)
40749 case BUILT_IN_EXP:
40750 case BUILT_IN_LOG:
40751 case BUILT_IN_LOG10:
40752 case BUILT_IN_POW:
40753 case BUILT_IN_TANH:
40754 case BUILT_IN_TAN:
40755 case BUILT_IN_ATAN:
40756 case BUILT_IN_ATAN2:
40757 case BUILT_IN_ATANH:
40758 case BUILT_IN_CBRT:
40759 case BUILT_IN_SINH:
40760 case BUILT_IN_SIN:
40761 case BUILT_IN_ASINH:
40762 case BUILT_IN_ASIN:
40763 case BUILT_IN_COSH:
40764 case BUILT_IN_COS:
40765 case BUILT_IN_ACOSH:
40766 case BUILT_IN_ACOS:
40767 if (el_mode != DFmode || n != 2)
40768 return NULL_TREE;
40769 break;
40771 case BUILT_IN_EXPF:
40772 case BUILT_IN_LOGF:
40773 case BUILT_IN_LOG10F:
40774 case BUILT_IN_POWF:
40775 case BUILT_IN_TANHF:
40776 case BUILT_IN_TANF:
40777 case BUILT_IN_ATANF:
40778 case BUILT_IN_ATAN2F:
40779 case BUILT_IN_ATANHF:
40780 case BUILT_IN_CBRTF:
40781 case BUILT_IN_SINHF:
40782 case BUILT_IN_SINF:
40783 case BUILT_IN_ASINHF:
40784 case BUILT_IN_ASINF:
40785 case BUILT_IN_COSHF:
40786 case BUILT_IN_COSF:
40787 case BUILT_IN_ACOSHF:
40788 case BUILT_IN_ACOSF:
40789 if (el_mode != SFmode || n != 4)
40790 return NULL_TREE;
40791 break;
40793 default:
40794 return NULL_TREE;
40797 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40799 if (fn == BUILT_IN_LOGF)
40800 strcpy (name, "vmlsLn4");
40801 else if (fn == BUILT_IN_LOG)
40802 strcpy (name, "vmldLn2");
40803 else if (n == 4)
40805 sprintf (name, "vmls%s", bname+10);
40806 name[strlen (name)-1] = '4';
40808 else
40809 sprintf (name, "vmld%s2", bname+10);
40811 /* Convert to uppercase. */
40812 name[4] &= ~0x20;
40814 arity = 0;
40815 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40816 args;
40817 args = TREE_CHAIN (args))
40818 arity++;
40820 if (arity == 1)
40821 fntype = build_function_type_list (type_out, type_in, NULL);
40822 else
40823 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40825 /* Build a function declaration for the vectorized function. */
40826 new_fndecl = build_decl (BUILTINS_LOCATION,
40827 FUNCTION_DECL, get_identifier (name), fntype);
40828 TREE_PUBLIC (new_fndecl) = 1;
40829 DECL_EXTERNAL (new_fndecl) = 1;
40830 DECL_IS_NOVOPS (new_fndecl) = 1;
40831 TREE_READONLY (new_fndecl) = 1;
40833 return new_fndecl;
40836 /* Handler for an ACML-style interface to
40837 a library with vectorized intrinsics. */
40839 static tree
40840 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
40842 char name[20] = "__vr.._";
40843 tree fntype, new_fndecl, args;
40844 unsigned arity;
40845 const char *bname;
40846 machine_mode el_mode, in_mode;
40847 int n, in_n;
40849 /* The ACML is 64bits only and suitable for unsafe math only as
40850 it does not correctly support parts of IEEE with the required
40851 precision such as denormals. */
40852 if (!TARGET_64BIT
40853 || !flag_unsafe_math_optimizations)
40854 return NULL_TREE;
40856 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40857 n = TYPE_VECTOR_SUBPARTS (type_out);
40858 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40859 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40860 if (el_mode != in_mode
40861 || n != in_n)
40862 return NULL_TREE;
40864 switch (fn)
40866 case BUILT_IN_SIN:
40867 case BUILT_IN_COS:
40868 case BUILT_IN_EXP:
40869 case BUILT_IN_LOG:
40870 case BUILT_IN_LOG2:
40871 case BUILT_IN_LOG10:
40872 name[4] = 'd';
40873 name[5] = '2';
40874 if (el_mode != DFmode
40875 || n != 2)
40876 return NULL_TREE;
40877 break;
40879 case BUILT_IN_SINF:
40880 case BUILT_IN_COSF:
40881 case BUILT_IN_EXPF:
40882 case BUILT_IN_POWF:
40883 case BUILT_IN_LOGF:
40884 case BUILT_IN_LOG2F:
40885 case BUILT_IN_LOG10F:
40886 name[4] = 's';
40887 name[5] = '4';
40888 if (el_mode != SFmode
40889 || n != 4)
40890 return NULL_TREE;
40891 break;
40893 default:
40894 return NULL_TREE;
40897 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40898 sprintf (name + 7, "%s", bname+10);
40900 arity = 0;
40901 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40902 args;
40903 args = TREE_CHAIN (args))
40904 arity++;
40906 if (arity == 1)
40907 fntype = build_function_type_list (type_out, type_in, NULL);
40908 else
40909 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40911 /* Build a function declaration for the vectorized function. */
40912 new_fndecl = build_decl (BUILTINS_LOCATION,
40913 FUNCTION_DECL, get_identifier (name), fntype);
40914 TREE_PUBLIC (new_fndecl) = 1;
40915 DECL_EXTERNAL (new_fndecl) = 1;
40916 DECL_IS_NOVOPS (new_fndecl) = 1;
40917 TREE_READONLY (new_fndecl) = 1;
40919 return new_fndecl;
40922 /* Returns a decl of a function that implements gather load with
40923 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
40924 Return NULL_TREE if it is not available. */
40926 static tree
40927 ix86_vectorize_builtin_gather (const_tree mem_vectype,
40928 const_tree index_type, int scale)
40930 bool si;
40931 enum ix86_builtins code;
40933 if (! TARGET_AVX2)
40934 return NULL_TREE;
40936 if ((TREE_CODE (index_type) != INTEGER_TYPE
40937 && !POINTER_TYPE_P (index_type))
40938 || (TYPE_MODE (index_type) != SImode
40939 && TYPE_MODE (index_type) != DImode))
40940 return NULL_TREE;
40942 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
40943 return NULL_TREE;
40945 /* v*gather* insn sign extends index to pointer mode. */
40946 if (TYPE_PRECISION (index_type) < POINTER_SIZE
40947 && TYPE_UNSIGNED (index_type))
40948 return NULL_TREE;
40950 if (scale <= 0
40951 || scale > 8
40952 || (scale & (scale - 1)) != 0)
40953 return NULL_TREE;
40955 si = TYPE_MODE (index_type) == SImode;
40956 switch (TYPE_MODE (mem_vectype))
40958 case V2DFmode:
40959 if (TARGET_AVX512VL)
40960 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
40961 else
40962 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
40963 break;
40964 case V4DFmode:
40965 if (TARGET_AVX512VL)
40966 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
40967 else
40968 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
40969 break;
40970 case V2DImode:
40971 if (TARGET_AVX512VL)
40972 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
40973 else
40974 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
40975 break;
40976 case V4DImode:
40977 if (TARGET_AVX512VL)
40978 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
40979 else
40980 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
40981 break;
40982 case V4SFmode:
40983 if (TARGET_AVX512VL)
40984 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
40985 else
40986 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
40987 break;
40988 case V8SFmode:
40989 if (TARGET_AVX512VL)
40990 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
40991 else
40992 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
40993 break;
40994 case V4SImode:
40995 if (TARGET_AVX512VL)
40996 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
40997 else
40998 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
40999 break;
41000 case V8SImode:
41001 if (TARGET_AVX512VL)
41002 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
41003 else
41004 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
41005 break;
41006 case V8DFmode:
41007 if (TARGET_AVX512F)
41008 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
41009 else
41010 return NULL_TREE;
41011 break;
41012 case V8DImode:
41013 if (TARGET_AVX512F)
41014 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
41015 else
41016 return NULL_TREE;
41017 break;
41018 case V16SFmode:
41019 if (TARGET_AVX512F)
41020 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
41021 else
41022 return NULL_TREE;
41023 break;
41024 case V16SImode:
41025 if (TARGET_AVX512F)
41026 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
41027 else
41028 return NULL_TREE;
41029 break;
41030 default:
41031 return NULL_TREE;
41034 return ix86_get_builtin (code);
41037 /* Returns a code for a target-specific builtin that implements
41038 reciprocal of the function, or NULL_TREE if not available. */
41040 static tree
41041 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
41043 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
41044 && flag_finite_math_only && !flag_trapping_math
41045 && flag_unsafe_math_optimizations))
41046 return NULL_TREE;
41048 if (md_fn)
41049 /* Machine dependent builtins. */
41050 switch (fn)
41052 /* Vectorized version of sqrt to rsqrt conversion. */
41053 case IX86_BUILTIN_SQRTPS_NR:
41054 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
41056 case IX86_BUILTIN_SQRTPS_NR256:
41057 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
41059 default:
41060 return NULL_TREE;
41062 else
41063 /* Normal builtins. */
41064 switch (fn)
41066 /* Sqrt to rsqrt conversion. */
41067 case BUILT_IN_SQRTF:
41068 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
41070 default:
41071 return NULL_TREE;
41075 /* Helper for avx_vpermilps256_operand et al. This is also used by
41076 the expansion functions to turn the parallel back into a mask.
41077 The return value is 0 for no match and the imm8+1 for a match. */
41080 avx_vpermilp_parallel (rtx par, machine_mode mode)
41082 unsigned i, nelt = GET_MODE_NUNITS (mode);
41083 unsigned mask = 0;
41084 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
41086 if (XVECLEN (par, 0) != (int) nelt)
41087 return 0;
41089 /* Validate that all of the elements are constants, and not totally
41090 out of range. Copy the data into an integral array to make the
41091 subsequent checks easier. */
41092 for (i = 0; i < nelt; ++i)
41094 rtx er = XVECEXP (par, 0, i);
41095 unsigned HOST_WIDE_INT ei;
41097 if (!CONST_INT_P (er))
41098 return 0;
41099 ei = INTVAL (er);
41100 if (ei >= nelt)
41101 return 0;
41102 ipar[i] = ei;
41105 switch (mode)
41107 case V8DFmode:
41108 /* In the 512-bit DFmode case, we can only move elements within
41109 a 128-bit lane. First fill the second part of the mask,
41110 then fallthru. */
41111 for (i = 4; i < 6; ++i)
41113 if (ipar[i] < 4 || ipar[i] >= 6)
41114 return 0;
41115 mask |= (ipar[i] - 4) << i;
41117 for (i = 6; i < 8; ++i)
41119 if (ipar[i] < 6)
41120 return 0;
41121 mask |= (ipar[i] - 6) << i;
41123 /* FALLTHRU */
41125 case V4DFmode:
41126 /* In the 256-bit DFmode case, we can only move elements within
41127 a 128-bit lane. */
41128 for (i = 0; i < 2; ++i)
41130 if (ipar[i] >= 2)
41131 return 0;
41132 mask |= ipar[i] << i;
41134 for (i = 2; i < 4; ++i)
41136 if (ipar[i] < 2)
41137 return 0;
41138 mask |= (ipar[i] - 2) << i;
41140 break;
41142 case V16SFmode:
41143 /* In 512 bit SFmode case, permutation in the upper 256 bits
41144 must mirror the permutation in the lower 256-bits. */
41145 for (i = 0; i < 8; ++i)
41146 if (ipar[i] + 8 != ipar[i + 8])
41147 return 0;
41148 /* FALLTHRU */
41150 case V8SFmode:
41151 /* In 256 bit SFmode case, we have full freedom of
41152 movement within the low 128-bit lane, but the high 128-bit
41153 lane must mirror the exact same pattern. */
41154 for (i = 0; i < 4; ++i)
41155 if (ipar[i] + 4 != ipar[i + 4])
41156 return 0;
41157 nelt = 4;
41158 /* FALLTHRU */
41160 case V2DFmode:
41161 case V4SFmode:
41162 /* In the 128-bit case, we've full freedom in the placement of
41163 the elements from the source operand. */
41164 for (i = 0; i < nelt; ++i)
41165 mask |= ipar[i] << (i * (nelt / 2));
41166 break;
41168 default:
41169 gcc_unreachable ();
41172 /* Make sure success has a non-zero value by adding one. */
41173 return mask + 1;
41176 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
41177 the expansion functions to turn the parallel back into a mask.
41178 The return value is 0 for no match and the imm8+1 for a match. */
41181 avx_vperm2f128_parallel (rtx par, machine_mode mode)
41183 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
41184 unsigned mask = 0;
41185 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
41187 if (XVECLEN (par, 0) != (int) nelt)
41188 return 0;
41190 /* Validate that all of the elements are constants, and not totally
41191 out of range. Copy the data into an integral array to make the
41192 subsequent checks easier. */
41193 for (i = 0; i < nelt; ++i)
41195 rtx er = XVECEXP (par, 0, i);
41196 unsigned HOST_WIDE_INT ei;
41198 if (!CONST_INT_P (er))
41199 return 0;
41200 ei = INTVAL (er);
41201 if (ei >= 2 * nelt)
41202 return 0;
41203 ipar[i] = ei;
41206 /* Validate that the halves of the permute are halves. */
41207 for (i = 0; i < nelt2 - 1; ++i)
41208 if (ipar[i] + 1 != ipar[i + 1])
41209 return 0;
41210 for (i = nelt2; i < nelt - 1; ++i)
41211 if (ipar[i] + 1 != ipar[i + 1])
41212 return 0;
41214 /* Reconstruct the mask. */
41215 for (i = 0; i < 2; ++i)
41217 unsigned e = ipar[i * nelt2];
41218 if (e % nelt2)
41219 return 0;
41220 e /= nelt2;
41221 mask |= e << (i * 4);
41224 /* Make sure success has a non-zero value by adding one. */
41225 return mask + 1;
41228 /* Return a register priority for hard reg REGNO. */
41229 static int
41230 ix86_register_priority (int hard_regno)
41232 /* ebp and r13 as the base always wants a displacement, r12 as the
41233 base always wants an index. So discourage their usage in an
41234 address. */
41235 if (hard_regno == R12_REG || hard_regno == R13_REG)
41236 return 0;
41237 if (hard_regno == BP_REG)
41238 return 1;
41239 /* New x86-64 int registers result in bigger code size. Discourage
41240 them. */
41241 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
41242 return 2;
41243 /* New x86-64 SSE registers result in bigger code size. Discourage
41244 them. */
41245 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
41246 return 2;
41247 /* Usage of AX register results in smaller code. Prefer it. */
41248 if (hard_regno == 0)
41249 return 4;
41250 return 3;
41253 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
41255 Put float CONST_DOUBLE in the constant pool instead of fp regs.
41256 QImode must go into class Q_REGS.
41257 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
41258 movdf to do mem-to-mem moves through integer regs. */
41260 static reg_class_t
41261 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
41263 machine_mode mode = GET_MODE (x);
41265 /* We're only allowed to return a subclass of CLASS. Many of the
41266 following checks fail for NO_REGS, so eliminate that early. */
41267 if (regclass == NO_REGS)
41268 return NO_REGS;
41270 /* All classes can load zeros. */
41271 if (x == CONST0_RTX (mode))
41272 return regclass;
41274 /* Force constants into memory if we are loading a (nonzero) constant into
41275 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
41276 instructions to load from a constant. */
41277 if (CONSTANT_P (x)
41278 && (MAYBE_MMX_CLASS_P (regclass)
41279 || MAYBE_SSE_CLASS_P (regclass)
41280 || MAYBE_MASK_CLASS_P (regclass)))
41281 return NO_REGS;
41283 /* Prefer SSE regs only, if we can use them for math. */
41284 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
41285 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
41287 /* Floating-point constants need more complex checks. */
41288 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
41290 /* General regs can load everything. */
41291 if (reg_class_subset_p (regclass, GENERAL_REGS))
41292 return regclass;
41294 /* Floats can load 0 and 1 plus some others. Note that we eliminated
41295 zero above. We only want to wind up preferring 80387 registers if
41296 we plan on doing computation with them. */
41297 if (TARGET_80387
41298 && standard_80387_constant_p (x) > 0)
41300 /* Limit class to non-sse. */
41301 if (regclass == FLOAT_SSE_REGS)
41302 return FLOAT_REGS;
41303 if (regclass == FP_TOP_SSE_REGS)
41304 return FP_TOP_REG;
41305 if (regclass == FP_SECOND_SSE_REGS)
41306 return FP_SECOND_REG;
41307 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
41308 return regclass;
41311 return NO_REGS;
41314 /* Generally when we see PLUS here, it's the function invariant
41315 (plus soft-fp const_int). Which can only be computed into general
41316 regs. */
41317 if (GET_CODE (x) == PLUS)
41318 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
41320 /* QImode constants are easy to load, but non-constant QImode data
41321 must go into Q_REGS. */
41322 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
41324 if (reg_class_subset_p (regclass, Q_REGS))
41325 return regclass;
41326 if (reg_class_subset_p (Q_REGS, regclass))
41327 return Q_REGS;
41328 return NO_REGS;
41331 return regclass;
41334 /* Discourage putting floating-point values in SSE registers unless
41335 SSE math is being used, and likewise for the 387 registers. */
41336 static reg_class_t
41337 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
41339 machine_mode mode = GET_MODE (x);
41341 /* Restrict the output reload class to the register bank that we are doing
41342 math on. If we would like not to return a subset of CLASS, reject this
41343 alternative: if reload cannot do this, it will still use its choice. */
41344 mode = GET_MODE (x);
41345 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
41346 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
41348 if (X87_FLOAT_MODE_P (mode))
41350 if (regclass == FP_TOP_SSE_REGS)
41351 return FP_TOP_REG;
41352 else if (regclass == FP_SECOND_SSE_REGS)
41353 return FP_SECOND_REG;
41354 else
41355 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
41358 return regclass;
41361 static reg_class_t
41362 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
41363 machine_mode mode, secondary_reload_info *sri)
41365 /* Double-word spills from general registers to non-offsettable memory
41366 references (zero-extended addresses) require special handling. */
41367 if (TARGET_64BIT
41368 && MEM_P (x)
41369 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
41370 && INTEGER_CLASS_P (rclass)
41371 && !offsettable_memref_p (x))
41373 sri->icode = (in_p
41374 ? CODE_FOR_reload_noff_load
41375 : CODE_FOR_reload_noff_store);
41376 /* Add the cost of moving address to a temporary. */
41377 sri->extra_cost = 1;
41379 return NO_REGS;
41382 /* QImode spills from non-QI registers require
41383 intermediate register on 32bit targets. */
41384 if (mode == QImode
41385 && (MAYBE_MASK_CLASS_P (rclass)
41386 || (!TARGET_64BIT && !in_p
41387 && INTEGER_CLASS_P (rclass)
41388 && MAYBE_NON_Q_CLASS_P (rclass))))
41390 int regno;
41392 if (REG_P (x))
41393 regno = REGNO (x);
41394 else
41395 regno = -1;
41397 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
41398 regno = true_regnum (x);
41400 /* Return Q_REGS if the operand is in memory. */
41401 if (regno == -1)
41402 return Q_REGS;
41405 /* This condition handles corner case where an expression involving
41406 pointers gets vectorized. We're trying to use the address of a
41407 stack slot as a vector initializer.
41409 (set (reg:V2DI 74 [ vect_cst_.2 ])
41410 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
41412 Eventually frame gets turned into sp+offset like this:
41414 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41415 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41416 (const_int 392 [0x188]))))
41418 That later gets turned into:
41420 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41421 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41422 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
41424 We'll have the following reload recorded:
41426 Reload 0: reload_in (DI) =
41427 (plus:DI (reg/f:DI 7 sp)
41428 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
41429 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41430 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
41431 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
41432 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41433 reload_reg_rtx: (reg:V2DI 22 xmm1)
41435 Which isn't going to work since SSE instructions can't handle scalar
41436 additions. Returning GENERAL_REGS forces the addition into integer
41437 register and reload can handle subsequent reloads without problems. */
41439 if (in_p && GET_CODE (x) == PLUS
41440 && SSE_CLASS_P (rclass)
41441 && SCALAR_INT_MODE_P (mode))
41442 return GENERAL_REGS;
41444 return NO_REGS;
41447 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
41449 static bool
41450 ix86_class_likely_spilled_p (reg_class_t rclass)
41452 switch (rclass)
41454 case AREG:
41455 case DREG:
41456 case CREG:
41457 case BREG:
41458 case AD_REGS:
41459 case SIREG:
41460 case DIREG:
41461 case SSE_FIRST_REG:
41462 case FP_TOP_REG:
41463 case FP_SECOND_REG:
41464 case BND_REGS:
41465 return true;
41467 default:
41468 break;
41471 return false;
41474 /* If we are copying between general and FP registers, we need a memory
41475 location. The same is true for SSE and MMX registers.
41477 To optimize register_move_cost performance, allow inline variant.
41479 The macro can't work reliably when one of the CLASSES is class containing
41480 registers from multiple units (SSE, MMX, integer). We avoid this by never
41481 combining those units in single alternative in the machine description.
41482 Ensure that this constraint holds to avoid unexpected surprises.
41484 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
41485 enforce these sanity checks. */
41487 static inline bool
41488 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41489 machine_mode mode, int strict)
41491 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
41492 return false;
41493 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
41494 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
41495 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
41496 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
41497 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
41498 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
41500 gcc_assert (!strict || lra_in_progress);
41501 return true;
41504 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
41505 return true;
41507 /* Between mask and general, we have moves no larger than word size. */
41508 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
41509 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
41510 return true;
41512 /* ??? This is a lie. We do have moves between mmx/general, and for
41513 mmx/sse2. But by saying we need secondary memory we discourage the
41514 register allocator from using the mmx registers unless needed. */
41515 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
41516 return true;
41518 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41520 /* SSE1 doesn't have any direct moves from other classes. */
41521 if (!TARGET_SSE2)
41522 return true;
41524 /* If the target says that inter-unit moves are more expensive
41525 than moving through memory, then don't generate them. */
41526 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
41527 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
41528 return true;
41530 /* Between SSE and general, we have moves no larger than word size. */
41531 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41532 return true;
41535 return false;
41538 bool
41539 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41540 machine_mode mode, int strict)
41542 return inline_secondary_memory_needed (class1, class2, mode, strict);
41545 /* Implement the TARGET_CLASS_MAX_NREGS hook.
41547 On the 80386, this is the size of MODE in words,
41548 except in the FP regs, where a single reg is always enough. */
41550 static unsigned char
41551 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
41553 if (MAYBE_INTEGER_CLASS_P (rclass))
41555 if (mode == XFmode)
41556 return (TARGET_64BIT ? 2 : 3);
41557 else if (mode == XCmode)
41558 return (TARGET_64BIT ? 4 : 6);
41559 else
41560 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
41562 else
41564 if (COMPLEX_MODE_P (mode))
41565 return 2;
41566 else
41567 return 1;
41571 /* Return true if the registers in CLASS cannot represent the change from
41572 modes FROM to TO. */
41574 bool
41575 ix86_cannot_change_mode_class (machine_mode from, machine_mode to,
41576 enum reg_class regclass)
41578 if (from == to)
41579 return false;
41581 /* x87 registers can't do subreg at all, as all values are reformatted
41582 to extended precision. */
41583 if (MAYBE_FLOAT_CLASS_P (regclass))
41584 return true;
41586 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
41588 /* Vector registers do not support QI or HImode loads. If we don't
41589 disallow a change to these modes, reload will assume it's ok to
41590 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
41591 the vec_dupv4hi pattern. */
41592 if (GET_MODE_SIZE (from) < 4)
41593 return true;
41596 return false;
41599 /* Return the cost of moving data of mode M between a
41600 register and memory. A value of 2 is the default; this cost is
41601 relative to those in `REGISTER_MOVE_COST'.
41603 This function is used extensively by register_move_cost that is used to
41604 build tables at startup. Make it inline in this case.
41605 When IN is 2, return maximum of in and out move cost.
41607 If moving between registers and memory is more expensive than
41608 between two registers, you should define this macro to express the
41609 relative cost.
41611 Model also increased moving costs of QImode registers in non
41612 Q_REGS classes.
41614 static inline int
41615 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
41616 int in)
41618 int cost;
41619 if (FLOAT_CLASS_P (regclass))
41621 int index;
41622 switch (mode)
41624 case SFmode:
41625 index = 0;
41626 break;
41627 case DFmode:
41628 index = 1;
41629 break;
41630 case XFmode:
41631 index = 2;
41632 break;
41633 default:
41634 return 100;
41636 if (in == 2)
41637 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
41638 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
41640 if (SSE_CLASS_P (regclass))
41642 int index;
41643 switch (GET_MODE_SIZE (mode))
41645 case 4:
41646 index = 0;
41647 break;
41648 case 8:
41649 index = 1;
41650 break;
41651 case 16:
41652 index = 2;
41653 break;
41654 default:
41655 return 100;
41657 if (in == 2)
41658 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
41659 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
41661 if (MMX_CLASS_P (regclass))
41663 int index;
41664 switch (GET_MODE_SIZE (mode))
41666 case 4:
41667 index = 0;
41668 break;
41669 case 8:
41670 index = 1;
41671 break;
41672 default:
41673 return 100;
41675 if (in)
41676 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
41677 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
41679 switch (GET_MODE_SIZE (mode))
41681 case 1:
41682 if (Q_CLASS_P (regclass) || TARGET_64BIT)
41684 if (!in)
41685 return ix86_cost->int_store[0];
41686 if (TARGET_PARTIAL_REG_DEPENDENCY
41687 && optimize_function_for_speed_p (cfun))
41688 cost = ix86_cost->movzbl_load;
41689 else
41690 cost = ix86_cost->int_load[0];
41691 if (in == 2)
41692 return MAX (cost, ix86_cost->int_store[0]);
41693 return cost;
41695 else
41697 if (in == 2)
41698 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
41699 if (in)
41700 return ix86_cost->movzbl_load;
41701 else
41702 return ix86_cost->int_store[0] + 4;
41704 break;
41705 case 2:
41706 if (in == 2)
41707 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
41708 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
41709 default:
41710 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
41711 if (mode == TFmode)
41712 mode = XFmode;
41713 if (in == 2)
41714 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
41715 else if (in)
41716 cost = ix86_cost->int_load[2];
41717 else
41718 cost = ix86_cost->int_store[2];
41719 return (cost * (((int) GET_MODE_SIZE (mode)
41720 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
41724 static int
41725 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
41726 bool in)
41728 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
41732 /* Return the cost of moving data from a register in class CLASS1 to
41733 one in class CLASS2.
41735 It is not required that the cost always equal 2 when FROM is the same as TO;
41736 on some machines it is expensive to move between registers if they are not
41737 general registers. */
41739 static int
41740 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
41741 reg_class_t class2_i)
41743 enum reg_class class1 = (enum reg_class) class1_i;
41744 enum reg_class class2 = (enum reg_class) class2_i;
41746 /* In case we require secondary memory, compute cost of the store followed
41747 by load. In order to avoid bad register allocation choices, we need
41748 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
41750 if (inline_secondary_memory_needed (class1, class2, mode, 0))
41752 int cost = 1;
41754 cost += inline_memory_move_cost (mode, class1, 2);
41755 cost += inline_memory_move_cost (mode, class2, 2);
41757 /* In case of copying from general_purpose_register we may emit multiple
41758 stores followed by single load causing memory size mismatch stall.
41759 Count this as arbitrarily high cost of 20. */
41760 if (targetm.class_max_nregs (class1, mode)
41761 > targetm.class_max_nregs (class2, mode))
41762 cost += 20;
41764 /* In the case of FP/MMX moves, the registers actually overlap, and we
41765 have to switch modes in order to treat them differently. */
41766 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
41767 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
41768 cost += 20;
41770 return cost;
41773 /* Moves between SSE/MMX and integer unit are expensive. */
41774 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
41775 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41777 /* ??? By keeping returned value relatively high, we limit the number
41778 of moves between integer and MMX/SSE registers for all targets.
41779 Additionally, high value prevents problem with x86_modes_tieable_p(),
41780 where integer modes in MMX/SSE registers are not tieable
41781 because of missing QImode and HImode moves to, from or between
41782 MMX/SSE registers. */
41783 return MAX (8, ix86_cost->mmxsse_to_integer);
41785 if (MAYBE_FLOAT_CLASS_P (class1))
41786 return ix86_cost->fp_move;
41787 if (MAYBE_SSE_CLASS_P (class1))
41788 return ix86_cost->sse_move;
41789 if (MAYBE_MMX_CLASS_P (class1))
41790 return ix86_cost->mmx_move;
41791 return 2;
41794 /* Return TRUE if hard register REGNO can hold a value of machine-mode
41795 MODE. */
41797 bool
41798 ix86_hard_regno_mode_ok (int regno, machine_mode mode)
41800 /* Flags and only flags can only hold CCmode values. */
41801 if (CC_REGNO_P (regno))
41802 return GET_MODE_CLASS (mode) == MODE_CC;
41803 if (GET_MODE_CLASS (mode) == MODE_CC
41804 || GET_MODE_CLASS (mode) == MODE_RANDOM
41805 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
41806 return false;
41807 if (STACK_REGNO_P (regno))
41808 return VALID_FP_MODE_P (mode);
41809 if (MASK_REGNO_P (regno))
41810 return (VALID_MASK_REG_MODE (mode)
41811 || (TARGET_AVX512BW
41812 && VALID_MASK_AVX512BW_MODE (mode)));
41813 if (BND_REGNO_P (regno))
41814 return VALID_BND_REG_MODE (mode);
41815 if (SSE_REGNO_P (regno))
41817 /* We implement the move patterns for all vector modes into and
41818 out of SSE registers, even when no operation instructions
41819 are available. */
41821 /* For AVX-512 we allow, regardless of regno:
41822 - XI mode
41823 - any of 512-bit wide vector mode
41824 - any scalar mode. */
41825 if (TARGET_AVX512F
41826 && (mode == XImode
41827 || VALID_AVX512F_REG_MODE (mode)
41828 || VALID_AVX512F_SCALAR_MODE (mode)))
41829 return true;
41831 /* TODO check for QI/HI scalars. */
41832 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
41833 if (TARGET_AVX512VL
41834 && (mode == OImode
41835 || mode == TImode
41836 || VALID_AVX256_REG_MODE (mode)
41837 || VALID_AVX512VL_128_REG_MODE (mode)))
41838 return true;
41840 /* xmm16-xmm31 are only available for AVX-512. */
41841 if (EXT_REX_SSE_REGNO_P (regno))
41842 return false;
41844 /* OImode and AVX modes are available only when AVX is enabled. */
41845 return ((TARGET_AVX
41846 && VALID_AVX256_REG_OR_OI_MODE (mode))
41847 || VALID_SSE_REG_MODE (mode)
41848 || VALID_SSE2_REG_MODE (mode)
41849 || VALID_MMX_REG_MODE (mode)
41850 || VALID_MMX_REG_MODE_3DNOW (mode));
41852 if (MMX_REGNO_P (regno))
41854 /* We implement the move patterns for 3DNOW modes even in MMX mode,
41855 so if the register is available at all, then we can move data of
41856 the given mode into or out of it. */
41857 return (VALID_MMX_REG_MODE (mode)
41858 || VALID_MMX_REG_MODE_3DNOW (mode));
41861 if (mode == QImode)
41863 /* Take care for QImode values - they can be in non-QI regs,
41864 but then they do cause partial register stalls. */
41865 if (ANY_QI_REGNO_P (regno))
41866 return true;
41867 if (!TARGET_PARTIAL_REG_STALL)
41868 return true;
41869 /* LRA checks if the hard register is OK for the given mode.
41870 QImode values can live in non-QI regs, so we allow all
41871 registers here. */
41872 if (lra_in_progress)
41873 return true;
41874 return !can_create_pseudo_p ();
41876 /* We handle both integer and floats in the general purpose registers. */
41877 else if (VALID_INT_MODE_P (mode))
41878 return true;
41879 else if (VALID_FP_MODE_P (mode))
41880 return true;
41881 else if (VALID_DFP_MODE_P (mode))
41882 return true;
41883 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
41884 on to use that value in smaller contexts, this can easily force a
41885 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
41886 supporting DImode, allow it. */
41887 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
41888 return true;
41890 return false;
41893 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
41894 tieable integer mode. */
41896 static bool
41897 ix86_tieable_integer_mode_p (machine_mode mode)
41899 switch (mode)
41901 case HImode:
41902 case SImode:
41903 return true;
41905 case QImode:
41906 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
41908 case DImode:
41909 return TARGET_64BIT;
41911 default:
41912 return false;
41916 /* Return true if MODE1 is accessible in a register that can hold MODE2
41917 without copying. That is, all register classes that can hold MODE2
41918 can also hold MODE1. */
41920 bool
41921 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
41923 if (mode1 == mode2)
41924 return true;
41926 if (ix86_tieable_integer_mode_p (mode1)
41927 && ix86_tieable_integer_mode_p (mode2))
41928 return true;
41930 /* MODE2 being XFmode implies fp stack or general regs, which means we
41931 can tie any smaller floating point modes to it. Note that we do not
41932 tie this with TFmode. */
41933 if (mode2 == XFmode)
41934 return mode1 == SFmode || mode1 == DFmode;
41936 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
41937 that we can tie it with SFmode. */
41938 if (mode2 == DFmode)
41939 return mode1 == SFmode;
41941 /* If MODE2 is only appropriate for an SSE register, then tie with
41942 any other mode acceptable to SSE registers. */
41943 if (GET_MODE_SIZE (mode2) == 32
41944 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
41945 return (GET_MODE_SIZE (mode1) == 32
41946 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
41947 if (GET_MODE_SIZE (mode2) == 16
41948 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
41949 return (GET_MODE_SIZE (mode1) == 16
41950 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
41952 /* If MODE2 is appropriate for an MMX register, then tie
41953 with any other mode acceptable to MMX registers. */
41954 if (GET_MODE_SIZE (mode2) == 8
41955 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
41956 return (GET_MODE_SIZE (mode1) == 8
41957 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
41959 return false;
41962 /* Return the cost of moving between two registers of mode MODE. */
41964 static int
41965 ix86_set_reg_reg_cost (machine_mode mode)
41967 unsigned int units = UNITS_PER_WORD;
41969 switch (GET_MODE_CLASS (mode))
41971 default:
41972 break;
41974 case MODE_CC:
41975 units = GET_MODE_SIZE (CCmode);
41976 break;
41978 case MODE_FLOAT:
41979 if ((TARGET_SSE && mode == TFmode)
41980 || (TARGET_80387 && mode == XFmode)
41981 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
41982 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
41983 units = GET_MODE_SIZE (mode);
41984 break;
41986 case MODE_COMPLEX_FLOAT:
41987 if ((TARGET_SSE && mode == TCmode)
41988 || (TARGET_80387 && mode == XCmode)
41989 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
41990 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
41991 units = GET_MODE_SIZE (mode);
41992 break;
41994 case MODE_VECTOR_INT:
41995 case MODE_VECTOR_FLOAT:
41996 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41997 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41998 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41999 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
42000 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
42001 units = GET_MODE_SIZE (mode);
42004 /* Return the cost of moving between two registers of mode MODE,
42005 assuming that the move will be in pieces of at most UNITS bytes. */
42006 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
42009 /* Compute a (partial) cost for rtx X. Return true if the complete
42010 cost has been computed, and false if subexpressions should be
42011 scanned. In either case, *TOTAL contains the cost result. */
42013 static bool
42014 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
42015 bool speed)
42017 rtx mask;
42018 enum rtx_code code = (enum rtx_code) code_i;
42019 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
42020 machine_mode mode = GET_MODE (x);
42021 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
42023 switch (code)
42025 case SET:
42026 if (register_operand (SET_DEST (x), VOIDmode)
42027 && reg_or_0_operand (SET_SRC (x), VOIDmode))
42029 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
42030 return true;
42032 return false;
42034 case CONST_INT:
42035 case CONST:
42036 case LABEL_REF:
42037 case SYMBOL_REF:
42038 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
42039 *total = 3;
42040 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
42041 *total = 2;
42042 else if (flag_pic && SYMBOLIC_CONST (x)
42043 && !(TARGET_64BIT
42044 && (GET_CODE (x) == LABEL_REF
42045 || (GET_CODE (x) == SYMBOL_REF
42046 && SYMBOL_REF_LOCAL_P (x)))))
42047 *total = 1;
42048 else
42049 *total = 0;
42050 return true;
42052 case CONST_DOUBLE:
42053 if (mode == VOIDmode)
42055 *total = 0;
42056 return true;
42058 switch (standard_80387_constant_p (x))
42060 case 1: /* 0.0 */
42061 *total = 1;
42062 return true;
42063 default: /* Other constants */
42064 *total = 2;
42065 return true;
42066 case 0:
42067 case -1:
42068 break;
42070 if (SSE_FLOAT_MODE_P (mode))
42072 case CONST_VECTOR:
42073 switch (standard_sse_constant_p (x))
42075 case 0:
42076 break;
42077 case 1: /* 0: xor eliminates false dependency */
42078 *total = 0;
42079 return true;
42080 default: /* -1: cmp contains false dependency */
42081 *total = 1;
42082 return true;
42085 /* Fall back to (MEM (SYMBOL_REF)), since that's where
42086 it'll probably end up. Add a penalty for size. */
42087 *total = (COSTS_N_INSNS (1)
42088 + (flag_pic != 0 && !TARGET_64BIT)
42089 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
42090 return true;
42092 case ZERO_EXTEND:
42093 /* The zero extensions is often completely free on x86_64, so make
42094 it as cheap as possible. */
42095 if (TARGET_64BIT && mode == DImode
42096 && GET_MODE (XEXP (x, 0)) == SImode)
42097 *total = 1;
42098 else if (TARGET_ZERO_EXTEND_WITH_AND)
42099 *total = cost->add;
42100 else
42101 *total = cost->movzx;
42102 return false;
42104 case SIGN_EXTEND:
42105 *total = cost->movsx;
42106 return false;
42108 case ASHIFT:
42109 if (SCALAR_INT_MODE_P (mode)
42110 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
42111 && CONST_INT_P (XEXP (x, 1)))
42113 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42114 if (value == 1)
42116 *total = cost->add;
42117 return false;
42119 if ((value == 2 || value == 3)
42120 && cost->lea <= cost->shift_const)
42122 *total = cost->lea;
42123 return false;
42126 /* FALLTHRU */
42128 case ROTATE:
42129 case ASHIFTRT:
42130 case LSHIFTRT:
42131 case ROTATERT:
42132 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42134 /* ??? Should be SSE vector operation cost. */
42135 /* At least for published AMD latencies, this really is the same
42136 as the latency for a simple fpu operation like fabs. */
42137 /* V*QImode is emulated with 1-11 insns. */
42138 if (mode == V16QImode || mode == V32QImode)
42140 int count = 11;
42141 if (TARGET_XOP && mode == V16QImode)
42143 /* For XOP we use vpshab, which requires a broadcast of the
42144 value to the variable shift insn. For constants this
42145 means a V16Q const in mem; even when we can perform the
42146 shift with one insn set the cost to prefer paddb. */
42147 if (CONSTANT_P (XEXP (x, 1)))
42149 *total = (cost->fabs
42150 + rtx_cost (XEXP (x, 0), code, 0, speed)
42151 + (speed ? 2 : COSTS_N_BYTES (16)));
42152 return true;
42154 count = 3;
42156 else if (TARGET_SSSE3)
42157 count = 7;
42158 *total = cost->fabs * count;
42160 else
42161 *total = cost->fabs;
42163 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42165 if (CONST_INT_P (XEXP (x, 1)))
42167 if (INTVAL (XEXP (x, 1)) > 32)
42168 *total = cost->shift_const + COSTS_N_INSNS (2);
42169 else
42170 *total = cost->shift_const * 2;
42172 else
42174 if (GET_CODE (XEXP (x, 1)) == AND)
42175 *total = cost->shift_var * 2;
42176 else
42177 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
42180 else
42182 if (CONST_INT_P (XEXP (x, 1)))
42183 *total = cost->shift_const;
42184 else if (GET_CODE (XEXP (x, 1)) == SUBREG
42185 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
42187 /* Return the cost after shift-and truncation. */
42188 *total = cost->shift_var;
42189 return true;
42191 else
42192 *total = cost->shift_var;
42194 return false;
42196 case FMA:
42198 rtx sub;
42200 gcc_assert (FLOAT_MODE_P (mode));
42201 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
42203 /* ??? SSE scalar/vector cost should be used here. */
42204 /* ??? Bald assumption that fma has the same cost as fmul. */
42205 *total = cost->fmul;
42206 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
42208 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
42209 sub = XEXP (x, 0);
42210 if (GET_CODE (sub) == NEG)
42211 sub = XEXP (sub, 0);
42212 *total += rtx_cost (sub, FMA, 0, speed);
42214 sub = XEXP (x, 2);
42215 if (GET_CODE (sub) == NEG)
42216 sub = XEXP (sub, 0);
42217 *total += rtx_cost (sub, FMA, 2, speed);
42218 return true;
42221 case MULT:
42222 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42224 /* ??? SSE scalar cost should be used here. */
42225 *total = cost->fmul;
42226 return false;
42228 else if (X87_FLOAT_MODE_P (mode))
42230 *total = cost->fmul;
42231 return false;
42233 else if (FLOAT_MODE_P (mode))
42235 /* ??? SSE vector cost should be used here. */
42236 *total = cost->fmul;
42237 return false;
42239 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42241 /* V*QImode is emulated with 7-13 insns. */
42242 if (mode == V16QImode || mode == V32QImode)
42244 int extra = 11;
42245 if (TARGET_XOP && mode == V16QImode)
42246 extra = 5;
42247 else if (TARGET_SSSE3)
42248 extra = 6;
42249 *total = cost->fmul * 2 + cost->fabs * extra;
42251 /* V*DImode is emulated with 5-8 insns. */
42252 else if (mode == V2DImode || mode == V4DImode)
42254 if (TARGET_XOP && mode == V2DImode)
42255 *total = cost->fmul * 2 + cost->fabs * 3;
42256 else
42257 *total = cost->fmul * 3 + cost->fabs * 5;
42259 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
42260 insns, including two PMULUDQ. */
42261 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
42262 *total = cost->fmul * 2 + cost->fabs * 5;
42263 else
42264 *total = cost->fmul;
42265 return false;
42267 else
42269 rtx op0 = XEXP (x, 0);
42270 rtx op1 = XEXP (x, 1);
42271 int nbits;
42272 if (CONST_INT_P (XEXP (x, 1)))
42274 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42275 for (nbits = 0; value != 0; value &= value - 1)
42276 nbits++;
42278 else
42279 /* This is arbitrary. */
42280 nbits = 7;
42282 /* Compute costs correctly for widening multiplication. */
42283 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
42284 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
42285 == GET_MODE_SIZE (mode))
42287 int is_mulwiden = 0;
42288 machine_mode inner_mode = GET_MODE (op0);
42290 if (GET_CODE (op0) == GET_CODE (op1))
42291 is_mulwiden = 1, op1 = XEXP (op1, 0);
42292 else if (CONST_INT_P (op1))
42294 if (GET_CODE (op0) == SIGN_EXTEND)
42295 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
42296 == INTVAL (op1);
42297 else
42298 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
42301 if (is_mulwiden)
42302 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
42305 *total = (cost->mult_init[MODE_INDEX (mode)]
42306 + nbits * cost->mult_bit
42307 + rtx_cost (op0, outer_code, opno, speed)
42308 + rtx_cost (op1, outer_code, opno, speed));
42310 return true;
42313 case DIV:
42314 case UDIV:
42315 case MOD:
42316 case UMOD:
42317 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42318 /* ??? SSE cost should be used here. */
42319 *total = cost->fdiv;
42320 else if (X87_FLOAT_MODE_P (mode))
42321 *total = cost->fdiv;
42322 else if (FLOAT_MODE_P (mode))
42323 /* ??? SSE vector cost should be used here. */
42324 *total = cost->fdiv;
42325 else
42326 *total = cost->divide[MODE_INDEX (mode)];
42327 return false;
42329 case PLUS:
42330 if (GET_MODE_CLASS (mode) == MODE_INT
42331 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
42333 if (GET_CODE (XEXP (x, 0)) == PLUS
42334 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
42335 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
42336 && CONSTANT_P (XEXP (x, 1)))
42338 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
42339 if (val == 2 || val == 4 || val == 8)
42341 *total = cost->lea;
42342 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42343 outer_code, opno, speed);
42344 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
42345 outer_code, opno, speed);
42346 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42347 return true;
42350 else if (GET_CODE (XEXP (x, 0)) == MULT
42351 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
42353 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
42354 if (val == 2 || val == 4 || val == 8)
42356 *total = cost->lea;
42357 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42358 outer_code, opno, speed);
42359 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42360 return true;
42363 else if (GET_CODE (XEXP (x, 0)) == PLUS)
42365 *total = cost->lea;
42366 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42367 outer_code, opno, speed);
42368 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42369 outer_code, opno, speed);
42370 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42371 return true;
42374 /* FALLTHRU */
42376 case MINUS:
42377 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42379 /* ??? SSE cost should be used here. */
42380 *total = cost->fadd;
42381 return false;
42383 else if (X87_FLOAT_MODE_P (mode))
42385 *total = cost->fadd;
42386 return false;
42388 else if (FLOAT_MODE_P (mode))
42390 /* ??? SSE vector cost should be used here. */
42391 *total = cost->fadd;
42392 return false;
42394 /* FALLTHRU */
42396 case AND:
42397 case IOR:
42398 case XOR:
42399 if (GET_MODE_CLASS (mode) == MODE_INT
42400 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42402 *total = (cost->add * 2
42403 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
42404 << (GET_MODE (XEXP (x, 0)) != DImode))
42405 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
42406 << (GET_MODE (XEXP (x, 1)) != DImode)));
42407 return true;
42409 /* FALLTHRU */
42411 case NEG:
42412 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42414 /* ??? SSE cost should be used here. */
42415 *total = cost->fchs;
42416 return false;
42418 else if (X87_FLOAT_MODE_P (mode))
42420 *total = cost->fchs;
42421 return false;
42423 else if (FLOAT_MODE_P (mode))
42425 /* ??? SSE vector cost should be used here. */
42426 *total = cost->fchs;
42427 return false;
42429 /* FALLTHRU */
42431 case NOT:
42432 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42434 /* ??? Should be SSE vector operation cost. */
42435 /* At least for published AMD latencies, this really is the same
42436 as the latency for a simple fpu operation like fabs. */
42437 *total = cost->fabs;
42439 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42440 *total = cost->add * 2;
42441 else
42442 *total = cost->add;
42443 return false;
42445 case COMPARE:
42446 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
42447 && XEXP (XEXP (x, 0), 1) == const1_rtx
42448 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
42449 && XEXP (x, 1) == const0_rtx)
42451 /* This kind of construct is implemented using test[bwl].
42452 Treat it as if we had an AND. */
42453 *total = (cost->add
42454 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
42455 + rtx_cost (const1_rtx, outer_code, opno, speed));
42456 return true;
42458 return false;
42460 case FLOAT_EXTEND:
42461 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
42462 *total = 0;
42463 return false;
42465 case ABS:
42466 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42467 /* ??? SSE cost should be used here. */
42468 *total = cost->fabs;
42469 else if (X87_FLOAT_MODE_P (mode))
42470 *total = cost->fabs;
42471 else if (FLOAT_MODE_P (mode))
42472 /* ??? SSE vector cost should be used here. */
42473 *total = cost->fabs;
42474 return false;
42476 case SQRT:
42477 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42478 /* ??? SSE cost should be used here. */
42479 *total = cost->fsqrt;
42480 else if (X87_FLOAT_MODE_P (mode))
42481 *total = cost->fsqrt;
42482 else if (FLOAT_MODE_P (mode))
42483 /* ??? SSE vector cost should be used here. */
42484 *total = cost->fsqrt;
42485 return false;
42487 case UNSPEC:
42488 if (XINT (x, 1) == UNSPEC_TP)
42489 *total = 0;
42490 return false;
42492 case VEC_SELECT:
42493 case VEC_CONCAT:
42494 case VEC_DUPLICATE:
42495 /* ??? Assume all of these vector manipulation patterns are
42496 recognizable. In which case they all pretty much have the
42497 same cost. */
42498 *total = cost->fabs;
42499 return true;
42500 case VEC_MERGE:
42501 mask = XEXP (x, 2);
42502 /* This is masked instruction, assume the same cost,
42503 as nonmasked variant. */
42504 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
42505 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
42506 else
42507 *total = cost->fabs;
42508 return true;
42510 default:
42511 return false;
42515 #if TARGET_MACHO
42517 static int current_machopic_label_num;
42519 /* Given a symbol name and its associated stub, write out the
42520 definition of the stub. */
42522 void
42523 machopic_output_stub (FILE *file, const char *symb, const char *stub)
42525 unsigned int length;
42526 char *binder_name, *symbol_name, lazy_ptr_name[32];
42527 int label = ++current_machopic_label_num;
42529 /* For 64-bit we shouldn't get here. */
42530 gcc_assert (!TARGET_64BIT);
42532 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
42533 symb = targetm.strip_name_encoding (symb);
42535 length = strlen (stub);
42536 binder_name = XALLOCAVEC (char, length + 32);
42537 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
42539 length = strlen (symb);
42540 symbol_name = XALLOCAVEC (char, length + 32);
42541 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
42543 sprintf (lazy_ptr_name, "L%d$lz", label);
42545 if (MACHOPIC_ATT_STUB)
42546 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
42547 else if (MACHOPIC_PURE)
42548 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
42549 else
42550 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
42552 fprintf (file, "%s:\n", stub);
42553 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42555 if (MACHOPIC_ATT_STUB)
42557 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
42559 else if (MACHOPIC_PURE)
42561 /* PIC stub. */
42562 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42563 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
42564 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
42565 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
42566 label, lazy_ptr_name, label);
42567 fprintf (file, "\tjmp\t*%%ecx\n");
42569 else
42570 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
42572 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
42573 it needs no stub-binding-helper. */
42574 if (MACHOPIC_ATT_STUB)
42575 return;
42577 fprintf (file, "%s:\n", binder_name);
42579 if (MACHOPIC_PURE)
42581 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
42582 fprintf (file, "\tpushl\t%%ecx\n");
42584 else
42585 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
42587 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
42589 /* N.B. Keep the correspondence of these
42590 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
42591 old-pic/new-pic/non-pic stubs; altering this will break
42592 compatibility with existing dylibs. */
42593 if (MACHOPIC_PURE)
42595 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42596 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
42598 else
42599 /* 16-byte -mdynamic-no-pic stub. */
42600 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
42602 fprintf (file, "%s:\n", lazy_ptr_name);
42603 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42604 fprintf (file, ASM_LONG "%s\n", binder_name);
42606 #endif /* TARGET_MACHO */
42608 /* Order the registers for register allocator. */
42610 void
42611 x86_order_regs_for_local_alloc (void)
42613 int pos = 0;
42614 int i;
42616 /* First allocate the local general purpose registers. */
42617 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42618 if (GENERAL_REGNO_P (i) && call_used_regs[i])
42619 reg_alloc_order [pos++] = i;
42621 /* Global general purpose registers. */
42622 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42623 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
42624 reg_alloc_order [pos++] = i;
42626 /* x87 registers come first in case we are doing FP math
42627 using them. */
42628 if (!TARGET_SSE_MATH)
42629 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42630 reg_alloc_order [pos++] = i;
42632 /* SSE registers. */
42633 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
42634 reg_alloc_order [pos++] = i;
42635 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
42636 reg_alloc_order [pos++] = i;
42638 /* Extended REX SSE registers. */
42639 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
42640 reg_alloc_order [pos++] = i;
42642 /* Mask register. */
42643 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
42644 reg_alloc_order [pos++] = i;
42646 /* MPX bound registers. */
42647 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
42648 reg_alloc_order [pos++] = i;
42650 /* x87 registers. */
42651 if (TARGET_SSE_MATH)
42652 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42653 reg_alloc_order [pos++] = i;
42655 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
42656 reg_alloc_order [pos++] = i;
42658 /* Initialize the rest of array as we do not allocate some registers
42659 at all. */
42660 while (pos < FIRST_PSEUDO_REGISTER)
42661 reg_alloc_order [pos++] = 0;
42664 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
42665 in struct attribute_spec handler. */
42666 static tree
42667 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
42668 tree args,
42669 int,
42670 bool *no_add_attrs)
42672 if (TREE_CODE (*node) != FUNCTION_TYPE
42673 && TREE_CODE (*node) != METHOD_TYPE
42674 && TREE_CODE (*node) != FIELD_DECL
42675 && TREE_CODE (*node) != TYPE_DECL)
42677 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42678 name);
42679 *no_add_attrs = true;
42680 return NULL_TREE;
42682 if (TARGET_64BIT)
42684 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
42685 name);
42686 *no_add_attrs = true;
42687 return NULL_TREE;
42689 if (is_attribute_p ("callee_pop_aggregate_return", name))
42691 tree cst;
42693 cst = TREE_VALUE (args);
42694 if (TREE_CODE (cst) != INTEGER_CST)
42696 warning (OPT_Wattributes,
42697 "%qE attribute requires an integer constant argument",
42698 name);
42699 *no_add_attrs = true;
42701 else if (compare_tree_int (cst, 0) != 0
42702 && compare_tree_int (cst, 1) != 0)
42704 warning (OPT_Wattributes,
42705 "argument to %qE attribute is neither zero, nor one",
42706 name);
42707 *no_add_attrs = true;
42710 return NULL_TREE;
42713 return NULL_TREE;
42716 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
42717 struct attribute_spec.handler. */
42718 static tree
42719 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
42720 bool *no_add_attrs)
42722 if (TREE_CODE (*node) != FUNCTION_TYPE
42723 && TREE_CODE (*node) != METHOD_TYPE
42724 && TREE_CODE (*node) != FIELD_DECL
42725 && TREE_CODE (*node) != TYPE_DECL)
42727 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42728 name);
42729 *no_add_attrs = true;
42730 return NULL_TREE;
42733 /* Can combine regparm with all attributes but fastcall. */
42734 if (is_attribute_p ("ms_abi", name))
42736 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
42738 error ("ms_abi and sysv_abi attributes are not compatible");
42741 return NULL_TREE;
42743 else if (is_attribute_p ("sysv_abi", name))
42745 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
42747 error ("ms_abi and sysv_abi attributes are not compatible");
42750 return NULL_TREE;
42753 return NULL_TREE;
42756 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
42757 struct attribute_spec.handler. */
42758 static tree
42759 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
42760 bool *no_add_attrs)
42762 tree *type = NULL;
42763 if (DECL_P (*node))
42765 if (TREE_CODE (*node) == TYPE_DECL)
42766 type = &TREE_TYPE (*node);
42768 else
42769 type = node;
42771 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
42773 warning (OPT_Wattributes, "%qE attribute ignored",
42774 name);
42775 *no_add_attrs = true;
42778 else if ((is_attribute_p ("ms_struct", name)
42779 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
42780 || ((is_attribute_p ("gcc_struct", name)
42781 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
42783 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
42784 name);
42785 *no_add_attrs = true;
42788 return NULL_TREE;
42791 static tree
42792 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
42793 bool *no_add_attrs)
42795 if (TREE_CODE (*node) != FUNCTION_DECL)
42797 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42798 name);
42799 *no_add_attrs = true;
42801 return NULL_TREE;
42804 static bool
42805 ix86_ms_bitfield_layout_p (const_tree record_type)
42807 return ((TARGET_MS_BITFIELD_LAYOUT
42808 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
42809 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
42812 /* Returns an expression indicating where the this parameter is
42813 located on entry to the FUNCTION. */
42815 static rtx
42816 x86_this_parameter (tree function)
42818 tree type = TREE_TYPE (function);
42819 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
42820 int nregs;
42822 if (TARGET_64BIT)
42824 const int *parm_regs;
42826 if (ix86_function_type_abi (type) == MS_ABI)
42827 parm_regs = x86_64_ms_abi_int_parameter_registers;
42828 else
42829 parm_regs = x86_64_int_parameter_registers;
42830 return gen_rtx_REG (Pmode, parm_regs[aggr]);
42833 nregs = ix86_function_regparm (type, function);
42835 if (nregs > 0 && !stdarg_p (type))
42837 int regno;
42838 unsigned int ccvt = ix86_get_callcvt (type);
42840 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42841 regno = aggr ? DX_REG : CX_REG;
42842 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42844 regno = CX_REG;
42845 if (aggr)
42846 return gen_rtx_MEM (SImode,
42847 plus_constant (Pmode, stack_pointer_rtx, 4));
42849 else
42851 regno = AX_REG;
42852 if (aggr)
42854 regno = DX_REG;
42855 if (nregs == 1)
42856 return gen_rtx_MEM (SImode,
42857 plus_constant (Pmode,
42858 stack_pointer_rtx, 4));
42861 return gen_rtx_REG (SImode, regno);
42864 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
42865 aggr ? 8 : 4));
42868 /* Determine whether x86_output_mi_thunk can succeed. */
42870 static bool
42871 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
42872 const_tree function)
42874 /* 64-bit can handle anything. */
42875 if (TARGET_64BIT)
42876 return true;
42878 /* For 32-bit, everything's fine if we have one free register. */
42879 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
42880 return true;
42882 /* Need a free register for vcall_offset. */
42883 if (vcall_offset)
42884 return false;
42886 /* Need a free register for GOT references. */
42887 if (flag_pic && !targetm.binds_local_p (function))
42888 return false;
42890 /* Otherwise ok. */
42891 return true;
42894 /* Output the assembler code for a thunk function. THUNK_DECL is the
42895 declaration for the thunk function itself, FUNCTION is the decl for
42896 the target function. DELTA is an immediate constant offset to be
42897 added to THIS. If VCALL_OFFSET is nonzero, the word at
42898 *(*this + vcall_offset) should be added to THIS. */
42900 static void
42901 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
42902 HOST_WIDE_INT vcall_offset, tree function)
42904 rtx this_param = x86_this_parameter (function);
42905 rtx this_reg, tmp, fnaddr;
42906 unsigned int tmp_regno;
42907 rtx_insn *insn;
42909 if (TARGET_64BIT)
42910 tmp_regno = R10_REG;
42911 else
42913 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
42914 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42915 tmp_regno = AX_REG;
42916 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42917 tmp_regno = DX_REG;
42918 else
42919 tmp_regno = CX_REG;
42922 emit_note (NOTE_INSN_PROLOGUE_END);
42924 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
42925 pull it in now and let DELTA benefit. */
42926 if (REG_P (this_param))
42927 this_reg = this_param;
42928 else if (vcall_offset)
42930 /* Put the this parameter into %eax. */
42931 this_reg = gen_rtx_REG (Pmode, AX_REG);
42932 emit_move_insn (this_reg, this_param);
42934 else
42935 this_reg = NULL_RTX;
42937 /* Adjust the this parameter by a fixed constant. */
42938 if (delta)
42940 rtx delta_rtx = GEN_INT (delta);
42941 rtx delta_dst = this_reg ? this_reg : this_param;
42943 if (TARGET_64BIT)
42945 if (!x86_64_general_operand (delta_rtx, Pmode))
42947 tmp = gen_rtx_REG (Pmode, tmp_regno);
42948 emit_move_insn (tmp, delta_rtx);
42949 delta_rtx = tmp;
42953 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
42956 /* Adjust the this parameter by a value stored in the vtable. */
42957 if (vcall_offset)
42959 rtx vcall_addr, vcall_mem, this_mem;
42961 tmp = gen_rtx_REG (Pmode, tmp_regno);
42963 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
42964 if (Pmode != ptr_mode)
42965 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
42966 emit_move_insn (tmp, this_mem);
42968 /* Adjust the this parameter. */
42969 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
42970 if (TARGET_64BIT
42971 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
42973 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
42974 emit_move_insn (tmp2, GEN_INT (vcall_offset));
42975 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
42978 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
42979 if (Pmode != ptr_mode)
42980 emit_insn (gen_addsi_1_zext (this_reg,
42981 gen_rtx_REG (ptr_mode,
42982 REGNO (this_reg)),
42983 vcall_mem));
42984 else
42985 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
42988 /* If necessary, drop THIS back to its stack slot. */
42989 if (this_reg && this_reg != this_param)
42990 emit_move_insn (this_param, this_reg);
42992 fnaddr = XEXP (DECL_RTL (function), 0);
42993 if (TARGET_64BIT)
42995 if (!flag_pic || targetm.binds_local_p (function)
42996 || TARGET_PECOFF)
42998 else
43000 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
43001 tmp = gen_rtx_CONST (Pmode, tmp);
43002 fnaddr = gen_const_mem (Pmode, tmp);
43005 else
43007 if (!flag_pic || targetm.binds_local_p (function))
43009 #if TARGET_MACHO
43010 else if (TARGET_MACHO)
43012 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
43013 fnaddr = XEXP (fnaddr, 0);
43015 #endif /* TARGET_MACHO */
43016 else
43018 tmp = gen_rtx_REG (Pmode, CX_REG);
43019 output_set_got (tmp, NULL_RTX);
43021 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
43022 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
43023 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
43024 fnaddr = gen_const_mem (Pmode, fnaddr);
43028 /* Our sibling call patterns do not allow memories, because we have no
43029 predicate that can distinguish between frame and non-frame memory.
43030 For our purposes here, we can get away with (ab)using a jump pattern,
43031 because we're going to do no optimization. */
43032 if (MEM_P (fnaddr))
43034 if (sibcall_insn_operand (fnaddr, word_mode))
43036 fnaddr = XEXP (DECL_RTL (function), 0);
43037 tmp = gen_rtx_MEM (QImode, fnaddr);
43038 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
43039 tmp = emit_call_insn (tmp);
43040 SIBLING_CALL_P (tmp) = 1;
43042 else
43043 emit_jump_insn (gen_indirect_jump (fnaddr));
43045 else
43047 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
43049 // CM_LARGE_PIC always uses pseudo PIC register which is
43050 // uninitialized. Since FUNCTION is local and calling it
43051 // doesn't go through PLT, we use scratch register %r11 as
43052 // PIC register and initialize it here.
43053 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
43054 ix86_init_large_pic_reg (tmp_regno);
43055 fnaddr = legitimize_pic_address (fnaddr,
43056 gen_rtx_REG (Pmode, tmp_regno));
43059 if (!sibcall_insn_operand (fnaddr, word_mode))
43061 tmp = gen_rtx_REG (word_mode, tmp_regno);
43062 if (GET_MODE (fnaddr) != word_mode)
43063 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
43064 emit_move_insn (tmp, fnaddr);
43065 fnaddr = tmp;
43068 tmp = gen_rtx_MEM (QImode, fnaddr);
43069 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
43070 tmp = emit_call_insn (tmp);
43071 SIBLING_CALL_P (tmp) = 1;
43073 emit_barrier ();
43075 /* Emit just enough of rest_of_compilation to get the insns emitted.
43076 Note that use_thunk calls assemble_start_function et al. */
43077 insn = get_insns ();
43078 shorten_branches (insn);
43079 final_start_function (insn, file, 1);
43080 final (insn, file, 1);
43081 final_end_function ();
43084 static void
43085 x86_file_start (void)
43087 default_file_start ();
43088 if (TARGET_16BIT)
43089 fputs ("\t.code16gcc\n", asm_out_file);
43090 #if TARGET_MACHO
43091 darwin_file_start ();
43092 #endif
43093 if (X86_FILE_START_VERSION_DIRECTIVE)
43094 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
43095 if (X86_FILE_START_FLTUSED)
43096 fputs ("\t.global\t__fltused\n", asm_out_file);
43097 if (ix86_asm_dialect == ASM_INTEL)
43098 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
43102 x86_field_alignment (tree field, int computed)
43104 machine_mode mode;
43105 tree type = TREE_TYPE (field);
43107 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
43108 return computed;
43109 mode = TYPE_MODE (strip_array_types (type));
43110 if (mode == DFmode || mode == DCmode
43111 || GET_MODE_CLASS (mode) == MODE_INT
43112 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
43113 return MIN (32, computed);
43114 return computed;
43117 /* Print call to TARGET to FILE. */
43119 static void
43120 x86_print_call_or_nop (FILE *file, const char *target)
43122 if (flag_nop_mcount)
43123 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
43124 else
43125 fprintf (file, "1:\tcall\t%s\n", target);
43128 /* Output assembler code to FILE to increment profiler label # LABELNO
43129 for profiling a function entry. */
43130 void
43131 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
43133 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
43134 : MCOUNT_NAME);
43135 if (TARGET_64BIT)
43137 #ifndef NO_PROFILE_COUNTERS
43138 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
43139 #endif
43141 if (!TARGET_PECOFF && flag_pic)
43142 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
43143 else
43144 x86_print_call_or_nop (file, mcount_name);
43146 else if (flag_pic)
43148 #ifndef NO_PROFILE_COUNTERS
43149 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
43150 LPREFIX, labelno);
43151 #endif
43152 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
43154 else
43156 #ifndef NO_PROFILE_COUNTERS
43157 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
43158 LPREFIX, labelno);
43159 #endif
43160 x86_print_call_or_nop (file, mcount_name);
43163 if (flag_record_mcount)
43165 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
43166 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
43167 fprintf (file, "\t.previous\n");
43171 /* We don't have exact information about the insn sizes, but we may assume
43172 quite safely that we are informed about all 1 byte insns and memory
43173 address sizes. This is enough to eliminate unnecessary padding in
43174 99% of cases. */
43176 static int
43177 min_insn_size (rtx_insn *insn)
43179 int l = 0, len;
43181 if (!INSN_P (insn) || !active_insn_p (insn))
43182 return 0;
43184 /* Discard alignments we've emit and jump instructions. */
43185 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
43186 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
43187 return 0;
43189 /* Important case - calls are always 5 bytes.
43190 It is common to have many calls in the row. */
43191 if (CALL_P (insn)
43192 && symbolic_reference_mentioned_p (PATTERN (insn))
43193 && !SIBLING_CALL_P (insn))
43194 return 5;
43195 len = get_attr_length (insn);
43196 if (len <= 1)
43197 return 1;
43199 /* For normal instructions we rely on get_attr_length being exact,
43200 with a few exceptions. */
43201 if (!JUMP_P (insn))
43203 enum attr_type type = get_attr_type (insn);
43205 switch (type)
43207 case TYPE_MULTI:
43208 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
43209 || asm_noperands (PATTERN (insn)) >= 0)
43210 return 0;
43211 break;
43212 case TYPE_OTHER:
43213 case TYPE_FCMP:
43214 break;
43215 default:
43216 /* Otherwise trust get_attr_length. */
43217 return len;
43220 l = get_attr_length_address (insn);
43221 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
43222 l = 4;
43224 if (l)
43225 return 1+l;
43226 else
43227 return 2;
43230 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43232 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
43233 window. */
43235 static void
43236 ix86_avoid_jump_mispredicts (void)
43238 rtx_insn *insn, *start = get_insns ();
43239 int nbytes = 0, njumps = 0;
43240 bool isjump = false;
43242 /* Look for all minimal intervals of instructions containing 4 jumps.
43243 The intervals are bounded by START and INSN. NBYTES is the total
43244 size of instructions in the interval including INSN and not including
43245 START. When the NBYTES is smaller than 16 bytes, it is possible
43246 that the end of START and INSN ends up in the same 16byte page.
43248 The smallest offset in the page INSN can start is the case where START
43249 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
43250 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
43252 Don't consider asm goto as jump, while it can contain a jump, it doesn't
43253 have to, control transfer to label(s) can be performed through other
43254 means, and also we estimate minimum length of all asm stmts as 0. */
43255 for (insn = start; insn; insn = NEXT_INSN (insn))
43257 int min_size;
43259 if (LABEL_P (insn))
43261 int align = label_to_alignment (insn);
43262 int max_skip = label_to_max_skip (insn);
43264 if (max_skip > 15)
43265 max_skip = 15;
43266 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
43267 already in the current 16 byte page, because otherwise
43268 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
43269 bytes to reach 16 byte boundary. */
43270 if (align <= 0
43271 || (align <= 3 && max_skip != (1 << align) - 1))
43272 max_skip = 0;
43273 if (dump_file)
43274 fprintf (dump_file, "Label %i with max_skip %i\n",
43275 INSN_UID (insn), max_skip);
43276 if (max_skip)
43278 while (nbytes + max_skip >= 16)
43280 start = NEXT_INSN (start);
43281 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43282 || CALL_P (start))
43283 njumps--, isjump = true;
43284 else
43285 isjump = false;
43286 nbytes -= min_insn_size (start);
43289 continue;
43292 min_size = min_insn_size (insn);
43293 nbytes += min_size;
43294 if (dump_file)
43295 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
43296 INSN_UID (insn), min_size);
43297 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
43298 || CALL_P (insn))
43299 njumps++;
43300 else
43301 continue;
43303 while (njumps > 3)
43305 start = NEXT_INSN (start);
43306 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43307 || CALL_P (start))
43308 njumps--, isjump = true;
43309 else
43310 isjump = false;
43311 nbytes -= min_insn_size (start);
43313 gcc_assert (njumps >= 0);
43314 if (dump_file)
43315 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
43316 INSN_UID (start), INSN_UID (insn), nbytes);
43318 if (njumps == 3 && isjump && nbytes < 16)
43320 int padsize = 15 - nbytes + min_insn_size (insn);
43322 if (dump_file)
43323 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
43324 INSN_UID (insn), padsize);
43325 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
43329 #endif
43331 /* AMD Athlon works faster
43332 when RET is not destination of conditional jump or directly preceded
43333 by other jump instruction. We avoid the penalty by inserting NOP just
43334 before the RET instructions in such cases. */
43335 static void
43336 ix86_pad_returns (void)
43338 edge e;
43339 edge_iterator ei;
43341 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43343 basic_block bb = e->src;
43344 rtx_insn *ret = BB_END (bb);
43345 rtx_insn *prev;
43346 bool replace = false;
43348 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
43349 || optimize_bb_for_size_p (bb))
43350 continue;
43351 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
43352 if (active_insn_p (prev) || LABEL_P (prev))
43353 break;
43354 if (prev && LABEL_P (prev))
43356 edge e;
43357 edge_iterator ei;
43359 FOR_EACH_EDGE (e, ei, bb->preds)
43360 if (EDGE_FREQUENCY (e) && e->src->index >= 0
43361 && !(e->flags & EDGE_FALLTHRU))
43363 replace = true;
43364 break;
43367 if (!replace)
43369 prev = prev_active_insn (ret);
43370 if (prev
43371 && ((JUMP_P (prev) && any_condjump_p (prev))
43372 || CALL_P (prev)))
43373 replace = true;
43374 /* Empty functions get branch mispredict even when
43375 the jump destination is not visible to us. */
43376 if (!prev && !optimize_function_for_size_p (cfun))
43377 replace = true;
43379 if (replace)
43381 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
43382 delete_insn (ret);
43387 /* Count the minimum number of instructions in BB. Return 4 if the
43388 number of instructions >= 4. */
43390 static int
43391 ix86_count_insn_bb (basic_block bb)
43393 rtx_insn *insn;
43394 int insn_count = 0;
43396 /* Count number of instructions in this block. Return 4 if the number
43397 of instructions >= 4. */
43398 FOR_BB_INSNS (bb, insn)
43400 /* Only happen in exit blocks. */
43401 if (JUMP_P (insn)
43402 && ANY_RETURN_P (PATTERN (insn)))
43403 break;
43405 if (NONDEBUG_INSN_P (insn)
43406 && GET_CODE (PATTERN (insn)) != USE
43407 && GET_CODE (PATTERN (insn)) != CLOBBER)
43409 insn_count++;
43410 if (insn_count >= 4)
43411 return insn_count;
43415 return insn_count;
43419 /* Count the minimum number of instructions in code path in BB.
43420 Return 4 if the number of instructions >= 4. */
43422 static int
43423 ix86_count_insn (basic_block bb)
43425 edge e;
43426 edge_iterator ei;
43427 int min_prev_count;
43429 /* Only bother counting instructions along paths with no
43430 more than 2 basic blocks between entry and exit. Given
43431 that BB has an edge to exit, determine if a predecessor
43432 of BB has an edge from entry. If so, compute the number
43433 of instructions in the predecessor block. If there
43434 happen to be multiple such blocks, compute the minimum. */
43435 min_prev_count = 4;
43436 FOR_EACH_EDGE (e, ei, bb->preds)
43438 edge prev_e;
43439 edge_iterator prev_ei;
43441 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43443 min_prev_count = 0;
43444 break;
43446 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
43448 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43450 int count = ix86_count_insn_bb (e->src);
43451 if (count < min_prev_count)
43452 min_prev_count = count;
43453 break;
43458 if (min_prev_count < 4)
43459 min_prev_count += ix86_count_insn_bb (bb);
43461 return min_prev_count;
43464 /* Pad short function to 4 instructions. */
43466 static void
43467 ix86_pad_short_function (void)
43469 edge e;
43470 edge_iterator ei;
43472 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43474 rtx_insn *ret = BB_END (e->src);
43475 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
43477 int insn_count = ix86_count_insn (e->src);
43479 /* Pad short function. */
43480 if (insn_count < 4)
43482 rtx_insn *insn = ret;
43484 /* Find epilogue. */
43485 while (insn
43486 && (!NOTE_P (insn)
43487 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
43488 insn = PREV_INSN (insn);
43490 if (!insn)
43491 insn = ret;
43493 /* Two NOPs count as one instruction. */
43494 insn_count = 2 * (4 - insn_count);
43495 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
43501 /* Fix up a Windows system unwinder issue. If an EH region falls through into
43502 the epilogue, the Windows system unwinder will apply epilogue logic and
43503 produce incorrect offsets. This can be avoided by adding a nop between
43504 the last insn that can throw and the first insn of the epilogue. */
43506 static void
43507 ix86_seh_fixup_eh_fallthru (void)
43509 edge e;
43510 edge_iterator ei;
43512 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43514 rtx_insn *insn, *next;
43516 /* Find the beginning of the epilogue. */
43517 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
43518 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
43519 break;
43520 if (insn == NULL)
43521 continue;
43523 /* We only care about preceding insns that can throw. */
43524 insn = prev_active_insn (insn);
43525 if (insn == NULL || !can_throw_internal (insn))
43526 continue;
43528 /* Do not separate calls from their debug information. */
43529 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
43530 if (NOTE_P (next)
43531 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
43532 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
43533 insn = next;
43534 else
43535 break;
43537 emit_insn_after (gen_nops (const1_rtx), insn);
43541 /* Implement machine specific optimizations. We implement padding of returns
43542 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
43543 static void
43544 ix86_reorg (void)
43546 /* We are freeing block_for_insn in the toplev to keep compatibility
43547 with old MDEP_REORGS that are not CFG based. Recompute it now. */
43548 compute_bb_for_insn ();
43550 if (TARGET_SEH && current_function_has_exception_handlers ())
43551 ix86_seh_fixup_eh_fallthru ();
43553 if (optimize && optimize_function_for_speed_p (cfun))
43555 if (TARGET_PAD_SHORT_FUNCTION)
43556 ix86_pad_short_function ();
43557 else if (TARGET_PAD_RETURNS)
43558 ix86_pad_returns ();
43559 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43560 if (TARGET_FOUR_JUMP_LIMIT)
43561 ix86_avoid_jump_mispredicts ();
43562 #endif
43566 /* Return nonzero when QImode register that must be represented via REX prefix
43567 is used. */
43568 bool
43569 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
43571 int i;
43572 extract_insn_cached (insn);
43573 for (i = 0; i < recog_data.n_operands; i++)
43574 if (GENERAL_REG_P (recog_data.operand[i])
43575 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
43576 return true;
43577 return false;
43580 /* Return true when INSN mentions register that must be encoded using REX
43581 prefix. */
43582 bool
43583 x86_extended_reg_mentioned_p (rtx insn)
43585 subrtx_iterator::array_type array;
43586 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
43588 const_rtx x = *iter;
43589 if (REG_P (x)
43590 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
43591 return true;
43593 return false;
43596 /* If profitable, negate (without causing overflow) integer constant
43597 of mode MODE at location LOC. Return true in this case. */
43598 bool
43599 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
43601 HOST_WIDE_INT val;
43603 if (!CONST_INT_P (*loc))
43604 return false;
43606 switch (mode)
43608 case DImode:
43609 /* DImode x86_64 constants must fit in 32 bits. */
43610 gcc_assert (x86_64_immediate_operand (*loc, mode));
43612 mode = SImode;
43613 break;
43615 case SImode:
43616 case HImode:
43617 case QImode:
43618 break;
43620 default:
43621 gcc_unreachable ();
43624 /* Avoid overflows. */
43625 if (mode_signbit_p (mode, *loc))
43626 return false;
43628 val = INTVAL (*loc);
43630 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
43631 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
43632 if ((val < 0 && val != -128)
43633 || val == 128)
43635 *loc = GEN_INT (-val);
43636 return true;
43639 return false;
43642 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
43643 optabs would emit if we didn't have TFmode patterns. */
43645 void
43646 x86_emit_floatuns (rtx operands[2])
43648 rtx_code_label *neglab, *donelab;
43649 rtx i0, i1, f0, in, out;
43650 machine_mode mode, inmode;
43652 inmode = GET_MODE (operands[1]);
43653 gcc_assert (inmode == SImode || inmode == DImode);
43655 out = operands[0];
43656 in = force_reg (inmode, operands[1]);
43657 mode = GET_MODE (out);
43658 neglab = gen_label_rtx ();
43659 donelab = gen_label_rtx ();
43660 f0 = gen_reg_rtx (mode);
43662 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
43664 expand_float (out, in, 0);
43666 emit_jump_insn (gen_jump (donelab));
43667 emit_barrier ();
43669 emit_label (neglab);
43671 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
43672 1, OPTAB_DIRECT);
43673 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
43674 1, OPTAB_DIRECT);
43675 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
43677 expand_float (f0, i0, 0);
43679 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
43681 emit_label (donelab);
43684 static bool canonicalize_perm (struct expand_vec_perm_d *d);
43685 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
43686 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
43687 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
43689 /* Get a vector mode of the same size as the original but with elements
43690 twice as wide. This is only guaranteed to apply to integral vectors. */
43692 static inline machine_mode
43693 get_mode_wider_vector (machine_mode o)
43695 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
43696 machine_mode n = GET_MODE_WIDER_MODE (o);
43697 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
43698 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
43699 return n;
43702 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
43703 fill target with val via vec_duplicate. */
43705 static bool
43706 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
43708 bool ok;
43709 rtx_insn *insn;
43710 rtx dup;
43712 /* First attempt to recognize VAL as-is. */
43713 dup = gen_rtx_VEC_DUPLICATE (mode, val);
43714 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
43715 if (recog_memoized (insn) < 0)
43717 rtx_insn *seq;
43718 /* If that fails, force VAL into a register. */
43720 start_sequence ();
43721 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
43722 seq = get_insns ();
43723 end_sequence ();
43724 if (seq)
43725 emit_insn_before (seq, insn);
43727 ok = recog_memoized (insn) >= 0;
43728 gcc_assert (ok);
43730 return true;
43733 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43734 with all elements equal to VAR. Return true if successful. */
43736 static bool
43737 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
43738 rtx target, rtx val)
43740 bool ok;
43742 switch (mode)
43744 case V2SImode:
43745 case V2SFmode:
43746 if (!mmx_ok)
43747 return false;
43748 /* FALLTHRU */
43750 case V4DFmode:
43751 case V4DImode:
43752 case V8SFmode:
43753 case V8SImode:
43754 case V2DFmode:
43755 case V2DImode:
43756 case V4SFmode:
43757 case V4SImode:
43758 case V16SImode:
43759 case V8DImode:
43760 case V16SFmode:
43761 case V8DFmode:
43762 return ix86_vector_duplicate_value (mode, target, val);
43764 case V4HImode:
43765 if (!mmx_ok)
43766 return false;
43767 if (TARGET_SSE || TARGET_3DNOW_A)
43769 rtx x;
43771 val = gen_lowpart (SImode, val);
43772 x = gen_rtx_TRUNCATE (HImode, val);
43773 x = gen_rtx_VEC_DUPLICATE (mode, x);
43774 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43775 return true;
43777 goto widen;
43779 case V8QImode:
43780 if (!mmx_ok)
43781 return false;
43782 goto widen;
43784 case V8HImode:
43785 if (TARGET_AVX2)
43786 return ix86_vector_duplicate_value (mode, target, val);
43788 if (TARGET_SSE2)
43790 struct expand_vec_perm_d dperm;
43791 rtx tmp1, tmp2;
43793 permute:
43794 memset (&dperm, 0, sizeof (dperm));
43795 dperm.target = target;
43796 dperm.vmode = mode;
43797 dperm.nelt = GET_MODE_NUNITS (mode);
43798 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
43799 dperm.one_operand_p = true;
43801 /* Extend to SImode using a paradoxical SUBREG. */
43802 tmp1 = gen_reg_rtx (SImode);
43803 emit_move_insn (tmp1, gen_lowpart (SImode, val));
43805 /* Insert the SImode value as low element of a V4SImode vector. */
43806 tmp2 = gen_reg_rtx (V4SImode);
43807 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
43808 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
43810 ok = (expand_vec_perm_1 (&dperm)
43811 || expand_vec_perm_broadcast_1 (&dperm));
43812 gcc_assert (ok);
43813 return ok;
43815 goto widen;
43817 case V16QImode:
43818 if (TARGET_AVX2)
43819 return ix86_vector_duplicate_value (mode, target, val);
43821 if (TARGET_SSE2)
43822 goto permute;
43823 goto widen;
43825 widen:
43826 /* Replicate the value once into the next wider mode and recurse. */
43828 machine_mode smode, wsmode, wvmode;
43829 rtx x;
43831 smode = GET_MODE_INNER (mode);
43832 wvmode = get_mode_wider_vector (mode);
43833 wsmode = GET_MODE_INNER (wvmode);
43835 val = convert_modes (wsmode, smode, val, true);
43836 x = expand_simple_binop (wsmode, ASHIFT, val,
43837 GEN_INT (GET_MODE_BITSIZE (smode)),
43838 NULL_RTX, 1, OPTAB_LIB_WIDEN);
43839 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
43841 x = gen_reg_rtx (wvmode);
43842 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
43843 gcc_assert (ok);
43844 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
43845 return ok;
43848 case V16HImode:
43849 case V32QImode:
43850 if (TARGET_AVX2)
43851 return ix86_vector_duplicate_value (mode, target, val);
43852 else
43854 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
43855 rtx x = gen_reg_rtx (hvmode);
43857 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43858 gcc_assert (ok);
43860 x = gen_rtx_VEC_CONCAT (mode, x, x);
43861 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43863 return true;
43865 case V64QImode:
43866 case V32HImode:
43867 if (TARGET_AVX512BW)
43868 return ix86_vector_duplicate_value (mode, target, val);
43869 else
43871 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
43872 rtx x = gen_reg_rtx (hvmode);
43874 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43875 gcc_assert (ok);
43877 x = gen_rtx_VEC_CONCAT (mode, x, x);
43878 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43880 return true;
43882 default:
43883 return false;
43887 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43888 whose ONE_VAR element is VAR, and other elements are zero. Return true
43889 if successful. */
43891 static bool
43892 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
43893 rtx target, rtx var, int one_var)
43895 machine_mode vsimode;
43896 rtx new_target;
43897 rtx x, tmp;
43898 bool use_vector_set = false;
43900 switch (mode)
43902 case V2DImode:
43903 /* For SSE4.1, we normally use vector set. But if the second
43904 element is zero and inter-unit moves are OK, we use movq
43905 instead. */
43906 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
43907 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
43908 && one_var == 0));
43909 break;
43910 case V16QImode:
43911 case V4SImode:
43912 case V4SFmode:
43913 use_vector_set = TARGET_SSE4_1;
43914 break;
43915 case V8HImode:
43916 use_vector_set = TARGET_SSE2;
43917 break;
43918 case V4HImode:
43919 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
43920 break;
43921 case V32QImode:
43922 case V16HImode:
43923 case V8SImode:
43924 case V8SFmode:
43925 case V4DFmode:
43926 use_vector_set = TARGET_AVX;
43927 break;
43928 case V4DImode:
43929 /* Use ix86_expand_vector_set in 64bit mode only. */
43930 use_vector_set = TARGET_AVX && TARGET_64BIT;
43931 break;
43932 default:
43933 break;
43936 if (use_vector_set)
43938 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
43939 var = force_reg (GET_MODE_INNER (mode), var);
43940 ix86_expand_vector_set (mmx_ok, target, var, one_var);
43941 return true;
43944 switch (mode)
43946 case V2SFmode:
43947 case V2SImode:
43948 if (!mmx_ok)
43949 return false;
43950 /* FALLTHRU */
43952 case V2DFmode:
43953 case V2DImode:
43954 if (one_var != 0)
43955 return false;
43956 var = force_reg (GET_MODE_INNER (mode), var);
43957 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
43958 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43959 return true;
43961 case V4SFmode:
43962 case V4SImode:
43963 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
43964 new_target = gen_reg_rtx (mode);
43965 else
43966 new_target = target;
43967 var = force_reg (GET_MODE_INNER (mode), var);
43968 x = gen_rtx_VEC_DUPLICATE (mode, var);
43969 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
43970 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
43971 if (one_var != 0)
43973 /* We need to shuffle the value to the correct position, so
43974 create a new pseudo to store the intermediate result. */
43976 /* With SSE2, we can use the integer shuffle insns. */
43977 if (mode != V4SFmode && TARGET_SSE2)
43979 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
43980 const1_rtx,
43981 GEN_INT (one_var == 1 ? 0 : 1),
43982 GEN_INT (one_var == 2 ? 0 : 1),
43983 GEN_INT (one_var == 3 ? 0 : 1)));
43984 if (target != new_target)
43985 emit_move_insn (target, new_target);
43986 return true;
43989 /* Otherwise convert the intermediate result to V4SFmode and
43990 use the SSE1 shuffle instructions. */
43991 if (mode != V4SFmode)
43993 tmp = gen_reg_rtx (V4SFmode);
43994 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
43996 else
43997 tmp = new_target;
43999 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
44000 const1_rtx,
44001 GEN_INT (one_var == 1 ? 0 : 1),
44002 GEN_INT (one_var == 2 ? 0+4 : 1+4),
44003 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
44005 if (mode != V4SFmode)
44006 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
44007 else if (tmp != target)
44008 emit_move_insn (target, tmp);
44010 else if (target != new_target)
44011 emit_move_insn (target, new_target);
44012 return true;
44014 case V8HImode:
44015 case V16QImode:
44016 vsimode = V4SImode;
44017 goto widen;
44018 case V4HImode:
44019 case V8QImode:
44020 if (!mmx_ok)
44021 return false;
44022 vsimode = V2SImode;
44023 goto widen;
44024 widen:
44025 if (one_var != 0)
44026 return false;
44028 /* Zero extend the variable element to SImode and recurse. */
44029 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
44031 x = gen_reg_rtx (vsimode);
44032 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
44033 var, one_var))
44034 gcc_unreachable ();
44036 emit_move_insn (target, gen_lowpart (mode, x));
44037 return true;
44039 default:
44040 return false;
44044 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
44045 consisting of the values in VALS. It is known that all elements
44046 except ONE_VAR are constants. Return true if successful. */
44048 static bool
44049 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
44050 rtx target, rtx vals, int one_var)
44052 rtx var = XVECEXP (vals, 0, one_var);
44053 machine_mode wmode;
44054 rtx const_vec, x;
44056 const_vec = copy_rtx (vals);
44057 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
44058 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
44060 switch (mode)
44062 case V2DFmode:
44063 case V2DImode:
44064 case V2SFmode:
44065 case V2SImode:
44066 /* For the two element vectors, it's just as easy to use
44067 the general case. */
44068 return false;
44070 case V4DImode:
44071 /* Use ix86_expand_vector_set in 64bit mode only. */
44072 if (!TARGET_64BIT)
44073 return false;
44074 case V4DFmode:
44075 case V8SFmode:
44076 case V8SImode:
44077 case V16HImode:
44078 case V32QImode:
44079 case V4SFmode:
44080 case V4SImode:
44081 case V8HImode:
44082 case V4HImode:
44083 break;
44085 case V16QImode:
44086 if (TARGET_SSE4_1)
44087 break;
44088 wmode = V8HImode;
44089 goto widen;
44090 case V8QImode:
44091 wmode = V4HImode;
44092 goto widen;
44093 widen:
44094 /* There's no way to set one QImode entry easily. Combine
44095 the variable value with its adjacent constant value, and
44096 promote to an HImode set. */
44097 x = XVECEXP (vals, 0, one_var ^ 1);
44098 if (one_var & 1)
44100 var = convert_modes (HImode, QImode, var, true);
44101 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
44102 NULL_RTX, 1, OPTAB_LIB_WIDEN);
44103 x = GEN_INT (INTVAL (x) & 0xff);
44105 else
44107 var = convert_modes (HImode, QImode, var, true);
44108 x = gen_int_mode (INTVAL (x) << 8, HImode);
44110 if (x != const0_rtx)
44111 var = expand_simple_binop (HImode, IOR, var, x, var,
44112 1, OPTAB_LIB_WIDEN);
44114 x = gen_reg_rtx (wmode);
44115 emit_move_insn (x, gen_lowpart (wmode, const_vec));
44116 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
44118 emit_move_insn (target, gen_lowpart (mode, x));
44119 return true;
44121 default:
44122 return false;
44125 emit_move_insn (target, const_vec);
44126 ix86_expand_vector_set (mmx_ok, target, var, one_var);
44127 return true;
44130 /* A subroutine of ix86_expand_vector_init_general. Use vector
44131 concatenate to handle the most general case: all values variable,
44132 and none identical. */
44134 static void
44135 ix86_expand_vector_init_concat (machine_mode mode,
44136 rtx target, rtx *ops, int n)
44138 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
44139 rtx first[16], second[8], third[4];
44140 rtvec v;
44141 int i, j;
44143 switch (n)
44145 case 2:
44146 switch (mode)
44148 case V16SImode:
44149 cmode = V8SImode;
44150 break;
44151 case V16SFmode:
44152 cmode = V8SFmode;
44153 break;
44154 case V8DImode:
44155 cmode = V4DImode;
44156 break;
44157 case V8DFmode:
44158 cmode = V4DFmode;
44159 break;
44160 case V8SImode:
44161 cmode = V4SImode;
44162 break;
44163 case V8SFmode:
44164 cmode = V4SFmode;
44165 break;
44166 case V4DImode:
44167 cmode = V2DImode;
44168 break;
44169 case V4DFmode:
44170 cmode = V2DFmode;
44171 break;
44172 case V4SImode:
44173 cmode = V2SImode;
44174 break;
44175 case V4SFmode:
44176 cmode = V2SFmode;
44177 break;
44178 case V2DImode:
44179 cmode = DImode;
44180 break;
44181 case V2SImode:
44182 cmode = SImode;
44183 break;
44184 case V2DFmode:
44185 cmode = DFmode;
44186 break;
44187 case V2SFmode:
44188 cmode = SFmode;
44189 break;
44190 default:
44191 gcc_unreachable ();
44194 if (!register_operand (ops[1], cmode))
44195 ops[1] = force_reg (cmode, ops[1]);
44196 if (!register_operand (ops[0], cmode))
44197 ops[0] = force_reg (cmode, ops[0]);
44198 emit_insn (gen_rtx_SET (VOIDmode, target,
44199 gen_rtx_VEC_CONCAT (mode, ops[0],
44200 ops[1])));
44201 break;
44203 case 4:
44204 switch (mode)
44206 case V4DImode:
44207 cmode = V2DImode;
44208 break;
44209 case V4DFmode:
44210 cmode = V2DFmode;
44211 break;
44212 case V4SImode:
44213 cmode = V2SImode;
44214 break;
44215 case V4SFmode:
44216 cmode = V2SFmode;
44217 break;
44218 default:
44219 gcc_unreachable ();
44221 goto half;
44223 case 8:
44224 switch (mode)
44226 case V8DImode:
44227 cmode = V2DImode;
44228 hmode = V4DImode;
44229 break;
44230 case V8DFmode:
44231 cmode = V2DFmode;
44232 hmode = V4DFmode;
44233 break;
44234 case V8SImode:
44235 cmode = V2SImode;
44236 hmode = V4SImode;
44237 break;
44238 case V8SFmode:
44239 cmode = V2SFmode;
44240 hmode = V4SFmode;
44241 break;
44242 default:
44243 gcc_unreachable ();
44245 goto half;
44247 case 16:
44248 switch (mode)
44250 case V16SImode:
44251 cmode = V2SImode;
44252 hmode = V4SImode;
44253 gmode = V8SImode;
44254 break;
44255 case V16SFmode:
44256 cmode = V2SFmode;
44257 hmode = V4SFmode;
44258 gmode = V8SFmode;
44259 break;
44260 default:
44261 gcc_unreachable ();
44263 goto half;
44265 half:
44266 /* FIXME: We process inputs backward to help RA. PR 36222. */
44267 i = n - 1;
44268 j = (n >> 1) - 1;
44269 for (; i > 0; i -= 2, j--)
44271 first[j] = gen_reg_rtx (cmode);
44272 v = gen_rtvec (2, ops[i - 1], ops[i]);
44273 ix86_expand_vector_init (false, first[j],
44274 gen_rtx_PARALLEL (cmode, v));
44277 n >>= 1;
44278 if (n > 4)
44280 gcc_assert (hmode != VOIDmode);
44281 gcc_assert (gmode != VOIDmode);
44282 for (i = j = 0; i < n; i += 2, j++)
44284 second[j] = gen_reg_rtx (hmode);
44285 ix86_expand_vector_init_concat (hmode, second [j],
44286 &first [i], 2);
44288 n >>= 1;
44289 for (i = j = 0; i < n; i += 2, j++)
44291 third[j] = gen_reg_rtx (gmode);
44292 ix86_expand_vector_init_concat (gmode, third[j],
44293 &second[i], 2);
44295 n >>= 1;
44296 ix86_expand_vector_init_concat (mode, target, third, n);
44298 else if (n > 2)
44300 gcc_assert (hmode != VOIDmode);
44301 for (i = j = 0; i < n; i += 2, j++)
44303 second[j] = gen_reg_rtx (hmode);
44304 ix86_expand_vector_init_concat (hmode, second [j],
44305 &first [i], 2);
44307 n >>= 1;
44308 ix86_expand_vector_init_concat (mode, target, second, n);
44310 else
44311 ix86_expand_vector_init_concat (mode, target, first, n);
44312 break;
44314 default:
44315 gcc_unreachable ();
44319 /* A subroutine of ix86_expand_vector_init_general. Use vector
44320 interleave to handle the most general case: all values variable,
44321 and none identical. */
44323 static void
44324 ix86_expand_vector_init_interleave (machine_mode mode,
44325 rtx target, rtx *ops, int n)
44327 machine_mode first_imode, second_imode, third_imode, inner_mode;
44328 int i, j;
44329 rtx op0, op1;
44330 rtx (*gen_load_even) (rtx, rtx, rtx);
44331 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
44332 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
44334 switch (mode)
44336 case V8HImode:
44337 gen_load_even = gen_vec_setv8hi;
44338 gen_interleave_first_low = gen_vec_interleave_lowv4si;
44339 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44340 inner_mode = HImode;
44341 first_imode = V4SImode;
44342 second_imode = V2DImode;
44343 third_imode = VOIDmode;
44344 break;
44345 case V16QImode:
44346 gen_load_even = gen_vec_setv16qi;
44347 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
44348 gen_interleave_second_low = gen_vec_interleave_lowv4si;
44349 inner_mode = QImode;
44350 first_imode = V8HImode;
44351 second_imode = V4SImode;
44352 third_imode = V2DImode;
44353 break;
44354 default:
44355 gcc_unreachable ();
44358 for (i = 0; i < n; i++)
44360 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
44361 op0 = gen_reg_rtx (SImode);
44362 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
44364 /* Insert the SImode value as low element of V4SImode vector. */
44365 op1 = gen_reg_rtx (V4SImode);
44366 op0 = gen_rtx_VEC_MERGE (V4SImode,
44367 gen_rtx_VEC_DUPLICATE (V4SImode,
44368 op0),
44369 CONST0_RTX (V4SImode),
44370 const1_rtx);
44371 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
44373 /* Cast the V4SImode vector back to a vector in orignal mode. */
44374 op0 = gen_reg_rtx (mode);
44375 emit_move_insn (op0, gen_lowpart (mode, op1));
44377 /* Load even elements into the second position. */
44378 emit_insn (gen_load_even (op0,
44379 force_reg (inner_mode,
44380 ops [i + i + 1]),
44381 const1_rtx));
44383 /* Cast vector to FIRST_IMODE vector. */
44384 ops[i] = gen_reg_rtx (first_imode);
44385 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
44388 /* Interleave low FIRST_IMODE vectors. */
44389 for (i = j = 0; i < n; i += 2, j++)
44391 op0 = gen_reg_rtx (first_imode);
44392 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
44394 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
44395 ops[j] = gen_reg_rtx (second_imode);
44396 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
44399 /* Interleave low SECOND_IMODE vectors. */
44400 switch (second_imode)
44402 case V4SImode:
44403 for (i = j = 0; i < n / 2; i += 2, j++)
44405 op0 = gen_reg_rtx (second_imode);
44406 emit_insn (gen_interleave_second_low (op0, ops[i],
44407 ops[i + 1]));
44409 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
44410 vector. */
44411 ops[j] = gen_reg_rtx (third_imode);
44412 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
44414 second_imode = V2DImode;
44415 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44416 /* FALLTHRU */
44418 case V2DImode:
44419 op0 = gen_reg_rtx (second_imode);
44420 emit_insn (gen_interleave_second_low (op0, ops[0],
44421 ops[1]));
44423 /* Cast the SECOND_IMODE vector back to a vector on original
44424 mode. */
44425 emit_insn (gen_rtx_SET (VOIDmode, target,
44426 gen_lowpart (mode, op0)));
44427 break;
44429 default:
44430 gcc_unreachable ();
44434 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
44435 all values variable, and none identical. */
44437 static void
44438 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
44439 rtx target, rtx vals)
44441 rtx ops[64], op0, op1, op2, op3, op4, op5;
44442 machine_mode half_mode = VOIDmode;
44443 machine_mode quarter_mode = VOIDmode;
44444 int n, i;
44446 switch (mode)
44448 case V2SFmode:
44449 case V2SImode:
44450 if (!mmx_ok && !TARGET_SSE)
44451 break;
44452 /* FALLTHRU */
44454 case V16SImode:
44455 case V16SFmode:
44456 case V8DFmode:
44457 case V8DImode:
44458 case V8SFmode:
44459 case V8SImode:
44460 case V4DFmode:
44461 case V4DImode:
44462 case V4SFmode:
44463 case V4SImode:
44464 case V2DFmode:
44465 case V2DImode:
44466 n = GET_MODE_NUNITS (mode);
44467 for (i = 0; i < n; i++)
44468 ops[i] = XVECEXP (vals, 0, i);
44469 ix86_expand_vector_init_concat (mode, target, ops, n);
44470 return;
44472 case V32QImode:
44473 half_mode = V16QImode;
44474 goto half;
44476 case V16HImode:
44477 half_mode = V8HImode;
44478 goto half;
44480 half:
44481 n = GET_MODE_NUNITS (mode);
44482 for (i = 0; i < n; i++)
44483 ops[i] = XVECEXP (vals, 0, i);
44484 op0 = gen_reg_rtx (half_mode);
44485 op1 = gen_reg_rtx (half_mode);
44486 ix86_expand_vector_init_interleave (half_mode, op0, ops,
44487 n >> 2);
44488 ix86_expand_vector_init_interleave (half_mode, op1,
44489 &ops [n >> 1], n >> 2);
44490 emit_insn (gen_rtx_SET (VOIDmode, target,
44491 gen_rtx_VEC_CONCAT (mode, op0, op1)));
44492 return;
44494 case V64QImode:
44495 quarter_mode = V16QImode;
44496 half_mode = V32QImode;
44497 goto quarter;
44499 case V32HImode:
44500 quarter_mode = V8HImode;
44501 half_mode = V16HImode;
44502 goto quarter;
44504 quarter:
44505 n = GET_MODE_NUNITS (mode);
44506 for (i = 0; i < n; i++)
44507 ops[i] = XVECEXP (vals, 0, i);
44508 op0 = gen_reg_rtx (quarter_mode);
44509 op1 = gen_reg_rtx (quarter_mode);
44510 op2 = gen_reg_rtx (quarter_mode);
44511 op3 = gen_reg_rtx (quarter_mode);
44512 op4 = gen_reg_rtx (half_mode);
44513 op5 = gen_reg_rtx (half_mode);
44514 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
44515 n >> 3);
44516 ix86_expand_vector_init_interleave (quarter_mode, op1,
44517 &ops [n >> 2], n >> 3);
44518 ix86_expand_vector_init_interleave (quarter_mode, op2,
44519 &ops [n >> 1], n >> 3);
44520 ix86_expand_vector_init_interleave (quarter_mode, op3,
44521 &ops [(n >> 1) | (n >> 2)], n >> 3);
44522 emit_insn (gen_rtx_SET (VOIDmode, op4,
44523 gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
44524 emit_insn (gen_rtx_SET (VOIDmode, op5,
44525 gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
44526 emit_insn (gen_rtx_SET (VOIDmode, target,
44527 gen_rtx_VEC_CONCAT (mode, op4, op5)));
44528 return;
44530 case V16QImode:
44531 if (!TARGET_SSE4_1)
44532 break;
44533 /* FALLTHRU */
44535 case V8HImode:
44536 if (!TARGET_SSE2)
44537 break;
44539 /* Don't use ix86_expand_vector_init_interleave if we can't
44540 move from GPR to SSE register directly. */
44541 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
44542 break;
44544 n = GET_MODE_NUNITS (mode);
44545 for (i = 0; i < n; i++)
44546 ops[i] = XVECEXP (vals, 0, i);
44547 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
44548 return;
44550 case V4HImode:
44551 case V8QImode:
44552 break;
44554 default:
44555 gcc_unreachable ();
44559 int i, j, n_elts, n_words, n_elt_per_word;
44560 machine_mode inner_mode;
44561 rtx words[4], shift;
44563 inner_mode = GET_MODE_INNER (mode);
44564 n_elts = GET_MODE_NUNITS (mode);
44565 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
44566 n_elt_per_word = n_elts / n_words;
44567 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
44569 for (i = 0; i < n_words; ++i)
44571 rtx word = NULL_RTX;
44573 for (j = 0; j < n_elt_per_word; ++j)
44575 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
44576 elt = convert_modes (word_mode, inner_mode, elt, true);
44578 if (j == 0)
44579 word = elt;
44580 else
44582 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
44583 word, 1, OPTAB_LIB_WIDEN);
44584 word = expand_simple_binop (word_mode, IOR, word, elt,
44585 word, 1, OPTAB_LIB_WIDEN);
44589 words[i] = word;
44592 if (n_words == 1)
44593 emit_move_insn (target, gen_lowpart (mode, words[0]));
44594 else if (n_words == 2)
44596 rtx tmp = gen_reg_rtx (mode);
44597 emit_clobber (tmp);
44598 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
44599 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
44600 emit_move_insn (target, tmp);
44602 else if (n_words == 4)
44604 rtx tmp = gen_reg_rtx (V4SImode);
44605 gcc_assert (word_mode == SImode);
44606 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
44607 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
44608 emit_move_insn (target, gen_lowpart (mode, tmp));
44610 else
44611 gcc_unreachable ();
44615 /* Initialize vector TARGET via VALS. Suppress the use of MMX
44616 instructions unless MMX_OK is true. */
44618 void
44619 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
44621 machine_mode mode = GET_MODE (target);
44622 machine_mode inner_mode = GET_MODE_INNER (mode);
44623 int n_elts = GET_MODE_NUNITS (mode);
44624 int n_var = 0, one_var = -1;
44625 bool all_same = true, all_const_zero = true;
44626 int i;
44627 rtx x;
44629 for (i = 0; i < n_elts; ++i)
44631 x = XVECEXP (vals, 0, i);
44632 if (!(CONST_INT_P (x)
44633 || GET_CODE (x) == CONST_DOUBLE
44634 || GET_CODE (x) == CONST_FIXED))
44635 n_var++, one_var = i;
44636 else if (x != CONST0_RTX (inner_mode))
44637 all_const_zero = false;
44638 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
44639 all_same = false;
44642 /* Constants are best loaded from the constant pool. */
44643 if (n_var == 0)
44645 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
44646 return;
44649 /* If all values are identical, broadcast the value. */
44650 if (all_same
44651 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
44652 XVECEXP (vals, 0, 0)))
44653 return;
44655 /* Values where only one field is non-constant are best loaded from
44656 the pool and overwritten via move later. */
44657 if (n_var == 1)
44659 if (all_const_zero
44660 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
44661 XVECEXP (vals, 0, one_var),
44662 one_var))
44663 return;
44665 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
44666 return;
44669 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
44672 void
44673 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
44675 machine_mode mode = GET_MODE (target);
44676 machine_mode inner_mode = GET_MODE_INNER (mode);
44677 machine_mode half_mode;
44678 bool use_vec_merge = false;
44679 rtx tmp;
44680 static rtx (*gen_extract[6][2]) (rtx, rtx)
44682 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
44683 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
44684 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
44685 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
44686 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
44687 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
44689 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
44691 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
44692 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
44693 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
44694 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
44695 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
44696 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
44698 int i, j, n;
44700 switch (mode)
44702 case V2SFmode:
44703 case V2SImode:
44704 if (mmx_ok)
44706 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44707 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
44708 if (elt == 0)
44709 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44710 else
44711 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44712 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44713 return;
44715 break;
44717 case V2DImode:
44718 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
44719 if (use_vec_merge)
44720 break;
44722 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44723 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
44724 if (elt == 0)
44725 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44726 else
44727 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44728 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44729 return;
44731 case V2DFmode:
44733 rtx op0, op1;
44735 /* For the two element vectors, we implement a VEC_CONCAT with
44736 the extraction of the other element. */
44738 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
44739 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
44741 if (elt == 0)
44742 op0 = val, op1 = tmp;
44743 else
44744 op0 = tmp, op1 = val;
44746 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
44747 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44749 return;
44751 case V4SFmode:
44752 use_vec_merge = TARGET_SSE4_1;
44753 if (use_vec_merge)
44754 break;
44756 switch (elt)
44758 case 0:
44759 use_vec_merge = true;
44760 break;
44762 case 1:
44763 /* tmp = target = A B C D */
44764 tmp = copy_to_reg (target);
44765 /* target = A A B B */
44766 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
44767 /* target = X A B B */
44768 ix86_expand_vector_set (false, target, val, 0);
44769 /* target = A X C D */
44770 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44771 const1_rtx, const0_rtx,
44772 GEN_INT (2+4), GEN_INT (3+4)));
44773 return;
44775 case 2:
44776 /* tmp = target = A B C D */
44777 tmp = copy_to_reg (target);
44778 /* tmp = X B C D */
44779 ix86_expand_vector_set (false, tmp, val, 0);
44780 /* target = A B X D */
44781 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44782 const0_rtx, const1_rtx,
44783 GEN_INT (0+4), GEN_INT (3+4)));
44784 return;
44786 case 3:
44787 /* tmp = target = A B C D */
44788 tmp = copy_to_reg (target);
44789 /* tmp = X B C D */
44790 ix86_expand_vector_set (false, tmp, val, 0);
44791 /* target = A B X D */
44792 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44793 const0_rtx, const1_rtx,
44794 GEN_INT (2+4), GEN_INT (0+4)));
44795 return;
44797 default:
44798 gcc_unreachable ();
44800 break;
44802 case V4SImode:
44803 use_vec_merge = TARGET_SSE4_1;
44804 if (use_vec_merge)
44805 break;
44807 /* Element 0 handled by vec_merge below. */
44808 if (elt == 0)
44810 use_vec_merge = true;
44811 break;
44814 if (TARGET_SSE2)
44816 /* With SSE2, use integer shuffles to swap element 0 and ELT,
44817 store into element 0, then shuffle them back. */
44819 rtx order[4];
44821 order[0] = GEN_INT (elt);
44822 order[1] = const1_rtx;
44823 order[2] = const2_rtx;
44824 order[3] = GEN_INT (3);
44825 order[elt] = const0_rtx;
44827 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44828 order[1], order[2], order[3]));
44830 ix86_expand_vector_set (false, target, val, 0);
44832 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44833 order[1], order[2], order[3]));
44835 else
44837 /* For SSE1, we have to reuse the V4SF code. */
44838 rtx t = gen_reg_rtx (V4SFmode);
44839 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
44840 emit_move_insn (target, gen_lowpart (mode, t));
44842 return;
44844 case V8HImode:
44845 use_vec_merge = TARGET_SSE2;
44846 break;
44847 case V4HImode:
44848 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
44849 break;
44851 case V16QImode:
44852 use_vec_merge = TARGET_SSE4_1;
44853 break;
44855 case V8QImode:
44856 break;
44858 case V32QImode:
44859 half_mode = V16QImode;
44860 j = 0;
44861 n = 16;
44862 goto half;
44864 case V16HImode:
44865 half_mode = V8HImode;
44866 j = 1;
44867 n = 8;
44868 goto half;
44870 case V8SImode:
44871 half_mode = V4SImode;
44872 j = 2;
44873 n = 4;
44874 goto half;
44876 case V4DImode:
44877 half_mode = V2DImode;
44878 j = 3;
44879 n = 2;
44880 goto half;
44882 case V8SFmode:
44883 half_mode = V4SFmode;
44884 j = 4;
44885 n = 4;
44886 goto half;
44888 case V4DFmode:
44889 half_mode = V2DFmode;
44890 j = 5;
44891 n = 2;
44892 goto half;
44894 half:
44895 /* Compute offset. */
44896 i = elt / n;
44897 elt %= n;
44899 gcc_assert (i <= 1);
44901 /* Extract the half. */
44902 tmp = gen_reg_rtx (half_mode);
44903 emit_insn (gen_extract[j][i] (tmp, target));
44905 /* Put val in tmp at elt. */
44906 ix86_expand_vector_set (false, tmp, val, elt);
44908 /* Put it back. */
44909 emit_insn (gen_insert[j][i] (target, target, tmp));
44910 return;
44912 case V8DFmode:
44913 if (TARGET_AVX512F)
44915 tmp = gen_reg_rtx (mode);
44916 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44917 gen_rtx_VEC_DUPLICATE (mode, val)));
44918 emit_insn (gen_avx512f_blendmv8df (target, tmp, target,
44919 force_reg (QImode, GEN_INT (1 << elt))));
44920 return;
44922 else
44923 break;
44924 case V8DImode:
44925 if (TARGET_AVX512F)
44927 tmp = gen_reg_rtx (mode);
44928 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44929 gen_rtx_VEC_DUPLICATE (mode, val)));
44930 emit_insn (gen_avx512f_blendmv8di (target, tmp, target,
44931 force_reg (QImode, GEN_INT (1 << elt))));
44932 return;
44934 else
44935 break;
44936 case V16SFmode:
44937 if (TARGET_AVX512F)
44939 tmp = gen_reg_rtx (mode);
44940 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44941 gen_rtx_VEC_DUPLICATE (mode, val)));
44942 emit_insn (gen_avx512f_blendmv16sf (target, tmp, target,
44943 force_reg (HImode, GEN_INT (1 << elt))));
44944 return;
44946 else
44947 break;
44948 case V16SImode:
44949 if (TARGET_AVX512F)
44951 tmp = gen_reg_rtx (mode);
44952 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44953 gen_rtx_VEC_DUPLICATE (mode, val)));
44954 emit_insn (gen_avx512f_blendmv16si (target, tmp, target,
44955 force_reg (HImode, GEN_INT (1 << elt))));
44956 return;
44958 else
44959 break;
44960 case V32HImode:
44961 if (TARGET_AVX512F && TARGET_AVX512BW)
44963 tmp = gen_reg_rtx (mode);
44964 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44965 gen_rtx_VEC_DUPLICATE (mode, val)));
44966 emit_insn (gen_avx512bw_blendmv32hi (target, tmp, target,
44967 force_reg (SImode, GEN_INT (1 << elt))));
44968 return;
44970 else
44971 break;
44972 case V64QImode:
44973 if (TARGET_AVX512F && TARGET_AVX512BW)
44975 tmp = gen_reg_rtx (mode);
44976 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44977 gen_rtx_VEC_DUPLICATE (mode, val)));
44978 emit_insn (gen_avx512bw_blendmv64qi (target, tmp, target,
44979 force_reg (DImode, GEN_INT (1 << elt))));
44980 return;
44982 else
44983 break;
44985 default:
44986 break;
44989 if (use_vec_merge)
44991 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
44992 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
44993 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44995 else
44997 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
44999 emit_move_insn (mem, target);
45001 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45002 emit_move_insn (tmp, val);
45004 emit_move_insn (target, mem);
45008 void
45009 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
45011 machine_mode mode = GET_MODE (vec);
45012 machine_mode inner_mode = GET_MODE_INNER (mode);
45013 bool use_vec_extr = false;
45014 rtx tmp;
45016 switch (mode)
45018 case V2SImode:
45019 case V2SFmode:
45020 if (!mmx_ok)
45021 break;
45022 /* FALLTHRU */
45024 case V2DFmode:
45025 case V2DImode:
45026 use_vec_extr = true;
45027 break;
45029 case V4SFmode:
45030 use_vec_extr = TARGET_SSE4_1;
45031 if (use_vec_extr)
45032 break;
45034 switch (elt)
45036 case 0:
45037 tmp = vec;
45038 break;
45040 case 1:
45041 case 3:
45042 tmp = gen_reg_rtx (mode);
45043 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
45044 GEN_INT (elt), GEN_INT (elt),
45045 GEN_INT (elt+4), GEN_INT (elt+4)));
45046 break;
45048 case 2:
45049 tmp = gen_reg_rtx (mode);
45050 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
45051 break;
45053 default:
45054 gcc_unreachable ();
45056 vec = tmp;
45057 use_vec_extr = true;
45058 elt = 0;
45059 break;
45061 case V4SImode:
45062 use_vec_extr = TARGET_SSE4_1;
45063 if (use_vec_extr)
45064 break;
45066 if (TARGET_SSE2)
45068 switch (elt)
45070 case 0:
45071 tmp = vec;
45072 break;
45074 case 1:
45075 case 3:
45076 tmp = gen_reg_rtx (mode);
45077 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
45078 GEN_INT (elt), GEN_INT (elt),
45079 GEN_INT (elt), GEN_INT (elt)));
45080 break;
45082 case 2:
45083 tmp = gen_reg_rtx (mode);
45084 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
45085 break;
45087 default:
45088 gcc_unreachable ();
45090 vec = tmp;
45091 use_vec_extr = true;
45092 elt = 0;
45094 else
45096 /* For SSE1, we have to reuse the V4SF code. */
45097 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
45098 gen_lowpart (V4SFmode, vec), elt);
45099 return;
45101 break;
45103 case V8HImode:
45104 use_vec_extr = TARGET_SSE2;
45105 break;
45106 case V4HImode:
45107 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
45108 break;
45110 case V16QImode:
45111 use_vec_extr = TARGET_SSE4_1;
45112 break;
45114 case V8SFmode:
45115 if (TARGET_AVX)
45117 tmp = gen_reg_rtx (V4SFmode);
45118 if (elt < 4)
45119 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
45120 else
45121 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
45122 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45123 return;
45125 break;
45127 case V4DFmode:
45128 if (TARGET_AVX)
45130 tmp = gen_reg_rtx (V2DFmode);
45131 if (elt < 2)
45132 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
45133 else
45134 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
45135 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45136 return;
45138 break;
45140 case V32QImode:
45141 if (TARGET_AVX)
45143 tmp = gen_reg_rtx (V16QImode);
45144 if (elt < 16)
45145 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
45146 else
45147 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
45148 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45149 return;
45151 break;
45153 case V16HImode:
45154 if (TARGET_AVX)
45156 tmp = gen_reg_rtx (V8HImode);
45157 if (elt < 8)
45158 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
45159 else
45160 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
45161 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45162 return;
45164 break;
45166 case V8SImode:
45167 if (TARGET_AVX)
45169 tmp = gen_reg_rtx (V4SImode);
45170 if (elt < 4)
45171 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
45172 else
45173 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
45174 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45175 return;
45177 break;
45179 case V4DImode:
45180 if (TARGET_AVX)
45182 tmp = gen_reg_rtx (V2DImode);
45183 if (elt < 2)
45184 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
45185 else
45186 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
45187 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45188 return;
45190 break;
45192 case V32HImode:
45193 if (TARGET_AVX512BW)
45195 tmp = gen_reg_rtx (V16HImode);
45196 if (elt < 16)
45197 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
45198 else
45199 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
45200 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45201 return;
45203 break;
45205 case V64QImode:
45206 if (TARGET_AVX512BW)
45208 tmp = gen_reg_rtx (V32QImode);
45209 if (elt < 32)
45210 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
45211 else
45212 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
45213 ix86_expand_vector_extract (false, target, tmp, elt & 31);
45214 return;
45216 break;
45218 case V16SFmode:
45219 tmp = gen_reg_rtx (V8SFmode);
45220 if (elt < 8)
45221 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
45222 else
45223 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
45224 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45225 return;
45227 case V8DFmode:
45228 tmp = gen_reg_rtx (V4DFmode);
45229 if (elt < 4)
45230 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
45231 else
45232 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
45233 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45234 return;
45236 case V16SImode:
45237 tmp = gen_reg_rtx (V8SImode);
45238 if (elt < 8)
45239 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
45240 else
45241 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
45242 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45243 return;
45245 case V8DImode:
45246 tmp = gen_reg_rtx (V4DImode);
45247 if (elt < 4)
45248 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
45249 else
45250 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
45251 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45252 return;
45254 case V8QImode:
45255 /* ??? Could extract the appropriate HImode element and shift. */
45256 default:
45257 break;
45260 if (use_vec_extr)
45262 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
45263 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
45265 /* Let the rtl optimizers know about the zero extension performed. */
45266 if (inner_mode == QImode || inner_mode == HImode)
45268 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
45269 target = gen_lowpart (SImode, target);
45272 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
45274 else
45276 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
45278 emit_move_insn (mem, vec);
45280 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45281 emit_move_insn (target, tmp);
45285 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
45286 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
45287 The upper bits of DEST are undefined, though they shouldn't cause
45288 exceptions (some bits from src or all zeros are ok). */
45290 static void
45291 emit_reduc_half (rtx dest, rtx src, int i)
45293 rtx tem, d = dest;
45294 switch (GET_MODE (src))
45296 case V4SFmode:
45297 if (i == 128)
45298 tem = gen_sse_movhlps (dest, src, src);
45299 else
45300 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
45301 GEN_INT (1 + 4), GEN_INT (1 + 4));
45302 break;
45303 case V2DFmode:
45304 tem = gen_vec_interleave_highv2df (dest, src, src);
45305 break;
45306 case V16QImode:
45307 case V8HImode:
45308 case V4SImode:
45309 case V2DImode:
45310 d = gen_reg_rtx (V1TImode);
45311 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
45312 GEN_INT (i / 2));
45313 break;
45314 case V8SFmode:
45315 if (i == 256)
45316 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
45317 else
45318 tem = gen_avx_shufps256 (dest, src, src,
45319 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
45320 break;
45321 case V4DFmode:
45322 if (i == 256)
45323 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
45324 else
45325 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
45326 break;
45327 case V32QImode:
45328 case V16HImode:
45329 case V8SImode:
45330 case V4DImode:
45331 if (i == 256)
45333 if (GET_MODE (dest) != V4DImode)
45334 d = gen_reg_rtx (V4DImode);
45335 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
45336 gen_lowpart (V4DImode, src),
45337 const1_rtx);
45339 else
45341 d = gen_reg_rtx (V2TImode);
45342 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
45343 GEN_INT (i / 2));
45345 break;
45346 case V64QImode:
45347 case V32HImode:
45348 case V16SImode:
45349 case V16SFmode:
45350 case V8DImode:
45351 case V8DFmode:
45352 if (i > 128)
45353 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
45354 gen_lowpart (V16SImode, src),
45355 gen_lowpart (V16SImode, src),
45356 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
45357 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
45358 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
45359 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
45360 GEN_INT (0xC), GEN_INT (0xD),
45361 GEN_INT (0xE), GEN_INT (0xF),
45362 GEN_INT (0x10), GEN_INT (0x11),
45363 GEN_INT (0x12), GEN_INT (0x13),
45364 GEN_INT (0x14), GEN_INT (0x15),
45365 GEN_INT (0x16), GEN_INT (0x17));
45366 else
45367 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
45368 gen_lowpart (V16SImode, src),
45369 GEN_INT (i == 128 ? 0x2 : 0x1),
45370 GEN_INT (0x3),
45371 GEN_INT (0x3),
45372 GEN_INT (0x3),
45373 GEN_INT (i == 128 ? 0x6 : 0x5),
45374 GEN_INT (0x7),
45375 GEN_INT (0x7),
45376 GEN_INT (0x7),
45377 GEN_INT (i == 128 ? 0xA : 0x9),
45378 GEN_INT (0xB),
45379 GEN_INT (0xB),
45380 GEN_INT (0xB),
45381 GEN_INT (i == 128 ? 0xE : 0xD),
45382 GEN_INT (0xF),
45383 GEN_INT (0xF),
45384 GEN_INT (0xF));
45385 break;
45386 default:
45387 gcc_unreachable ();
45389 emit_insn (tem);
45390 if (d != dest)
45391 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
45394 /* Expand a vector reduction. FN is the binary pattern to reduce;
45395 DEST is the destination; IN is the input vector. */
45397 void
45398 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
45400 rtx half, dst, vec = in;
45401 machine_mode mode = GET_MODE (in);
45402 int i;
45404 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
45405 if (TARGET_SSE4_1
45406 && mode == V8HImode
45407 && fn == gen_uminv8hi3)
45409 emit_insn (gen_sse4_1_phminposuw (dest, in));
45410 return;
45413 for (i = GET_MODE_BITSIZE (mode);
45414 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
45415 i >>= 1)
45417 half = gen_reg_rtx (mode);
45418 emit_reduc_half (half, vec, i);
45419 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
45420 dst = dest;
45421 else
45422 dst = gen_reg_rtx (mode);
45423 emit_insn (fn (dst, half, vec));
45424 vec = dst;
45428 /* Target hook for scalar_mode_supported_p. */
45429 static bool
45430 ix86_scalar_mode_supported_p (machine_mode mode)
45432 if (DECIMAL_FLOAT_MODE_P (mode))
45433 return default_decimal_float_supported_p ();
45434 else if (mode == TFmode)
45435 return true;
45436 else
45437 return default_scalar_mode_supported_p (mode);
45440 /* Implements target hook vector_mode_supported_p. */
45441 static bool
45442 ix86_vector_mode_supported_p (machine_mode mode)
45444 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
45445 return true;
45446 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
45447 return true;
45448 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
45449 return true;
45450 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
45451 return true;
45452 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
45453 return true;
45454 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
45455 return true;
45456 return false;
45459 /* Implement target hook libgcc_floating_mode_supported_p. */
45460 static bool
45461 ix86_libgcc_floating_mode_supported_p (machine_mode mode)
45463 switch (mode)
45465 case SFmode:
45466 case DFmode:
45467 case XFmode:
45468 return true;
45470 case TFmode:
45471 #ifdef IX86_NO_LIBGCC_TFMODE
45472 return false;
45473 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
45474 return TARGET_LONG_DOUBLE_128;
45475 #else
45476 return true;
45477 #endif
45479 default:
45480 return false;
45484 /* Target hook for c_mode_for_suffix. */
45485 static machine_mode
45486 ix86_c_mode_for_suffix (char suffix)
45488 if (suffix == 'q')
45489 return TFmode;
45490 if (suffix == 'w')
45491 return XFmode;
45493 return VOIDmode;
45496 /* Worker function for TARGET_MD_ASM_CLOBBERS.
45498 We do this in the new i386 backend to maintain source compatibility
45499 with the old cc0-based compiler. */
45501 static tree
45502 ix86_md_asm_clobbers (tree, tree, tree clobbers)
45504 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
45505 clobbers);
45506 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
45507 clobbers);
45508 return clobbers;
45511 /* Implements target vector targetm.asm.encode_section_info. */
45513 static void ATTRIBUTE_UNUSED
45514 ix86_encode_section_info (tree decl, rtx rtl, int first)
45516 default_encode_section_info (decl, rtl, first);
45518 if (ix86_in_large_data_p (decl))
45519 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
45522 /* Worker function for REVERSE_CONDITION. */
45524 enum rtx_code
45525 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
45527 return (mode != CCFPmode && mode != CCFPUmode
45528 ? reverse_condition (code)
45529 : reverse_condition_maybe_unordered (code));
45532 /* Output code to perform an x87 FP register move, from OPERANDS[1]
45533 to OPERANDS[0]. */
45535 const char *
45536 output_387_reg_move (rtx insn, rtx *operands)
45538 if (REG_P (operands[0]))
45540 if (REG_P (operands[1])
45541 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45543 if (REGNO (operands[0]) == FIRST_STACK_REG)
45544 return output_387_ffreep (operands, 0);
45545 return "fstp\t%y0";
45547 if (STACK_TOP_P (operands[0]))
45548 return "fld%Z1\t%y1";
45549 return "fst\t%y0";
45551 else if (MEM_P (operands[0]))
45553 gcc_assert (REG_P (operands[1]));
45554 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45555 return "fstp%Z0\t%y0";
45556 else
45558 /* There is no non-popping store to memory for XFmode.
45559 So if we need one, follow the store with a load. */
45560 if (GET_MODE (operands[0]) == XFmode)
45561 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
45562 else
45563 return "fst%Z0\t%y0";
45566 else
45567 gcc_unreachable();
45570 /* Output code to perform a conditional jump to LABEL, if C2 flag in
45571 FP status register is set. */
45573 void
45574 ix86_emit_fp_unordered_jump (rtx label)
45576 rtx reg = gen_reg_rtx (HImode);
45577 rtx temp;
45579 emit_insn (gen_x86_fnstsw_1 (reg));
45581 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
45583 emit_insn (gen_x86_sahf_1 (reg));
45585 temp = gen_rtx_REG (CCmode, FLAGS_REG);
45586 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
45588 else
45590 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
45592 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
45593 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
45596 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
45597 gen_rtx_LABEL_REF (VOIDmode, label),
45598 pc_rtx);
45599 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
45601 emit_jump_insn (temp);
45602 predict_jump (REG_BR_PROB_BASE * 10 / 100);
45605 /* Output code to perform a log1p XFmode calculation. */
45607 void ix86_emit_i387_log1p (rtx op0, rtx op1)
45609 rtx_code_label *label1 = gen_label_rtx ();
45610 rtx_code_label *label2 = gen_label_rtx ();
45612 rtx tmp = gen_reg_rtx (XFmode);
45613 rtx tmp2 = gen_reg_rtx (XFmode);
45614 rtx test;
45616 emit_insn (gen_absxf2 (tmp, op1));
45617 test = gen_rtx_GE (VOIDmode, tmp,
45618 CONST_DOUBLE_FROM_REAL_VALUE (
45619 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
45620 XFmode));
45621 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
45623 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45624 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
45625 emit_jump (label2);
45627 emit_label (label1);
45628 emit_move_insn (tmp, CONST1_RTX (XFmode));
45629 emit_insn (gen_addxf3 (tmp, op1, tmp));
45630 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45631 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
45633 emit_label (label2);
45636 /* Emit code for round calculation. */
45637 void ix86_emit_i387_round (rtx op0, rtx op1)
45639 machine_mode inmode = GET_MODE (op1);
45640 machine_mode outmode = GET_MODE (op0);
45641 rtx e1, e2, res, tmp, tmp1, half;
45642 rtx scratch = gen_reg_rtx (HImode);
45643 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
45644 rtx_code_label *jump_label = gen_label_rtx ();
45645 rtx insn;
45646 rtx (*gen_abs) (rtx, rtx);
45647 rtx (*gen_neg) (rtx, rtx);
45649 switch (inmode)
45651 case SFmode:
45652 gen_abs = gen_abssf2;
45653 break;
45654 case DFmode:
45655 gen_abs = gen_absdf2;
45656 break;
45657 case XFmode:
45658 gen_abs = gen_absxf2;
45659 break;
45660 default:
45661 gcc_unreachable ();
45664 switch (outmode)
45666 case SFmode:
45667 gen_neg = gen_negsf2;
45668 break;
45669 case DFmode:
45670 gen_neg = gen_negdf2;
45671 break;
45672 case XFmode:
45673 gen_neg = gen_negxf2;
45674 break;
45675 case HImode:
45676 gen_neg = gen_neghi2;
45677 break;
45678 case SImode:
45679 gen_neg = gen_negsi2;
45680 break;
45681 case DImode:
45682 gen_neg = gen_negdi2;
45683 break;
45684 default:
45685 gcc_unreachable ();
45688 e1 = gen_reg_rtx (inmode);
45689 e2 = gen_reg_rtx (inmode);
45690 res = gen_reg_rtx (outmode);
45692 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
45694 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
45696 /* scratch = fxam(op1) */
45697 emit_insn (gen_rtx_SET (VOIDmode, scratch,
45698 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
45699 UNSPEC_FXAM)));
45700 /* e1 = fabs(op1) */
45701 emit_insn (gen_abs (e1, op1));
45703 /* e2 = e1 + 0.5 */
45704 half = force_reg (inmode, half);
45705 emit_insn (gen_rtx_SET (VOIDmode, e2,
45706 gen_rtx_PLUS (inmode, e1, half)));
45708 /* res = floor(e2) */
45709 if (inmode != XFmode)
45711 tmp1 = gen_reg_rtx (XFmode);
45713 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
45714 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
45716 else
45717 tmp1 = e2;
45719 switch (outmode)
45721 case SFmode:
45722 case DFmode:
45724 rtx tmp0 = gen_reg_rtx (XFmode);
45726 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
45728 emit_insn (gen_rtx_SET (VOIDmode, res,
45729 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
45730 UNSPEC_TRUNC_NOOP)));
45732 break;
45733 case XFmode:
45734 emit_insn (gen_frndintxf2_floor (res, tmp1));
45735 break;
45736 case HImode:
45737 emit_insn (gen_lfloorxfhi2 (res, tmp1));
45738 break;
45739 case SImode:
45740 emit_insn (gen_lfloorxfsi2 (res, tmp1));
45741 break;
45742 case DImode:
45743 emit_insn (gen_lfloorxfdi2 (res, tmp1));
45744 break;
45745 default:
45746 gcc_unreachable ();
45749 /* flags = signbit(a) */
45750 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
45752 /* if (flags) then res = -res */
45753 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
45754 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
45755 gen_rtx_LABEL_REF (VOIDmode, jump_label),
45756 pc_rtx);
45757 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
45758 predict_jump (REG_BR_PROB_BASE * 50 / 100);
45759 JUMP_LABEL (insn) = jump_label;
45761 emit_insn (gen_neg (res, res));
45763 emit_label (jump_label);
45764 LABEL_NUSES (jump_label) = 1;
45766 emit_move_insn (op0, res);
45769 /* Output code to perform a Newton-Rhapson approximation of a single precision
45770 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
45772 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
45774 rtx x0, x1, e0, e1;
45776 x0 = gen_reg_rtx (mode);
45777 e0 = gen_reg_rtx (mode);
45778 e1 = gen_reg_rtx (mode);
45779 x1 = gen_reg_rtx (mode);
45781 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
45783 b = force_reg (mode, b);
45785 /* x0 = rcp(b) estimate */
45786 if (mode == V16SFmode || mode == V8DFmode)
45787 emit_insn (gen_rtx_SET (VOIDmode, x0,
45788 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45789 UNSPEC_RCP14)));
45790 else
45791 emit_insn (gen_rtx_SET (VOIDmode, x0,
45792 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45793 UNSPEC_RCP)));
45795 /* e0 = x0 * b */
45796 emit_insn (gen_rtx_SET (VOIDmode, e0,
45797 gen_rtx_MULT (mode, x0, b)));
45799 /* e0 = x0 * e0 */
45800 emit_insn (gen_rtx_SET (VOIDmode, e0,
45801 gen_rtx_MULT (mode, x0, e0)));
45803 /* e1 = x0 + x0 */
45804 emit_insn (gen_rtx_SET (VOIDmode, e1,
45805 gen_rtx_PLUS (mode, x0, x0)));
45807 /* x1 = e1 - e0 */
45808 emit_insn (gen_rtx_SET (VOIDmode, x1,
45809 gen_rtx_MINUS (mode, e1, e0)));
45811 /* res = a * x1 */
45812 emit_insn (gen_rtx_SET (VOIDmode, res,
45813 gen_rtx_MULT (mode, a, x1)));
45816 /* Output code to perform a Newton-Rhapson approximation of a
45817 single precision floating point [reciprocal] square root. */
45819 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode,
45820 bool recip)
45822 rtx x0, e0, e1, e2, e3, mthree, mhalf;
45823 REAL_VALUE_TYPE r;
45824 int unspec;
45826 x0 = gen_reg_rtx (mode);
45827 e0 = gen_reg_rtx (mode);
45828 e1 = gen_reg_rtx (mode);
45829 e2 = gen_reg_rtx (mode);
45830 e3 = gen_reg_rtx (mode);
45832 real_from_integer (&r, VOIDmode, -3, SIGNED);
45833 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45835 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
45836 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45837 unspec = UNSPEC_RSQRT;
45839 if (VECTOR_MODE_P (mode))
45841 mthree = ix86_build_const_vector (mode, true, mthree);
45842 mhalf = ix86_build_const_vector (mode, true, mhalf);
45843 /* There is no 512-bit rsqrt. There is however rsqrt14. */
45844 if (GET_MODE_SIZE (mode) == 64)
45845 unspec = UNSPEC_RSQRT14;
45848 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
45849 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
45851 a = force_reg (mode, a);
45853 /* x0 = rsqrt(a) estimate */
45854 emit_insn (gen_rtx_SET (VOIDmode, x0,
45855 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
45856 unspec)));
45858 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
45859 if (!recip)
45861 rtx zero, mask;
45863 zero = gen_reg_rtx (mode);
45864 mask = gen_reg_rtx (mode);
45866 zero = force_reg (mode, CONST0_RTX(mode));
45868 /* Handle masked compare. */
45869 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
45871 mask = gen_reg_rtx (HImode);
45872 /* Imm value 0x4 corresponds to not-equal comparison. */
45873 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
45874 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
45876 else
45878 emit_insn (gen_rtx_SET (VOIDmode, mask,
45879 gen_rtx_NE (mode, zero, a)));
45881 emit_insn (gen_rtx_SET (VOIDmode, x0,
45882 gen_rtx_AND (mode, x0, mask)));
45886 /* e0 = x0 * a */
45887 emit_insn (gen_rtx_SET (VOIDmode, e0,
45888 gen_rtx_MULT (mode, x0, a)));
45889 /* e1 = e0 * x0 */
45890 emit_insn (gen_rtx_SET (VOIDmode, e1,
45891 gen_rtx_MULT (mode, e0, x0)));
45893 /* e2 = e1 - 3. */
45894 mthree = force_reg (mode, mthree);
45895 emit_insn (gen_rtx_SET (VOIDmode, e2,
45896 gen_rtx_PLUS (mode, e1, mthree)));
45898 mhalf = force_reg (mode, mhalf);
45899 if (recip)
45900 /* e3 = -.5 * x0 */
45901 emit_insn (gen_rtx_SET (VOIDmode, e3,
45902 gen_rtx_MULT (mode, x0, mhalf)));
45903 else
45904 /* e3 = -.5 * e0 */
45905 emit_insn (gen_rtx_SET (VOIDmode, e3,
45906 gen_rtx_MULT (mode, e0, mhalf)));
45907 /* ret = e2 * e3 */
45908 emit_insn (gen_rtx_SET (VOIDmode, res,
45909 gen_rtx_MULT (mode, e2, e3)));
45912 #ifdef TARGET_SOLARIS
45913 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
45915 static void
45916 i386_solaris_elf_named_section (const char *name, unsigned int flags,
45917 tree decl)
45919 /* With Binutils 2.15, the "@unwind" marker must be specified on
45920 every occurrence of the ".eh_frame" section, not just the first
45921 one. */
45922 if (TARGET_64BIT
45923 && strcmp (name, ".eh_frame") == 0)
45925 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
45926 flags & SECTION_WRITE ? "aw" : "a");
45927 return;
45930 #ifndef USE_GAS
45931 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
45933 solaris_elf_asm_comdat_section (name, flags, decl);
45934 return;
45936 #endif
45938 default_elf_asm_named_section (name, flags, decl);
45940 #endif /* TARGET_SOLARIS */
45942 /* Return the mangling of TYPE if it is an extended fundamental type. */
45944 static const char *
45945 ix86_mangle_type (const_tree type)
45947 type = TYPE_MAIN_VARIANT (type);
45949 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
45950 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
45951 return NULL;
45953 switch (TYPE_MODE (type))
45955 case TFmode:
45956 /* __float128 is "g". */
45957 return "g";
45958 case XFmode:
45959 /* "long double" or __float80 is "e". */
45960 return "e";
45961 default:
45962 return NULL;
45966 /* For 32-bit code we can save PIC register setup by using
45967 __stack_chk_fail_local hidden function instead of calling
45968 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
45969 register, so it is better to call __stack_chk_fail directly. */
45971 static tree ATTRIBUTE_UNUSED
45972 ix86_stack_protect_fail (void)
45974 return TARGET_64BIT
45975 ? default_external_stack_protect_fail ()
45976 : default_hidden_stack_protect_fail ();
45979 /* Select a format to encode pointers in exception handling data. CODE
45980 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
45981 true if the symbol may be affected by dynamic relocations.
45983 ??? All x86 object file formats are capable of representing this.
45984 After all, the relocation needed is the same as for the call insn.
45985 Whether or not a particular assembler allows us to enter such, I
45986 guess we'll have to see. */
45988 asm_preferred_eh_data_format (int code, int global)
45990 if (flag_pic)
45992 int type = DW_EH_PE_sdata8;
45993 if (!TARGET_64BIT
45994 || ix86_cmodel == CM_SMALL_PIC
45995 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
45996 type = DW_EH_PE_sdata4;
45997 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
45999 if (ix86_cmodel == CM_SMALL
46000 || (ix86_cmodel == CM_MEDIUM && code))
46001 return DW_EH_PE_udata4;
46002 return DW_EH_PE_absptr;
46005 /* Expand copysign from SIGN to the positive value ABS_VALUE
46006 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
46007 the sign-bit. */
46008 static void
46009 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
46011 machine_mode mode = GET_MODE (sign);
46012 rtx sgn = gen_reg_rtx (mode);
46013 if (mask == NULL_RTX)
46015 machine_mode vmode;
46017 if (mode == SFmode)
46018 vmode = V4SFmode;
46019 else if (mode == DFmode)
46020 vmode = V2DFmode;
46021 else
46022 vmode = mode;
46024 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
46025 if (!VECTOR_MODE_P (mode))
46027 /* We need to generate a scalar mode mask in this case. */
46028 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
46029 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
46030 mask = gen_reg_rtx (mode);
46031 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
46034 else
46035 mask = gen_rtx_NOT (mode, mask);
46036 emit_insn (gen_rtx_SET (VOIDmode, sgn,
46037 gen_rtx_AND (mode, mask, sign)));
46038 emit_insn (gen_rtx_SET (VOIDmode, result,
46039 gen_rtx_IOR (mode, abs_value, sgn)));
46042 /* Expand fabs (OP0) and return a new rtx that holds the result. The
46043 mask for masking out the sign-bit is stored in *SMASK, if that is
46044 non-null. */
46045 static rtx
46046 ix86_expand_sse_fabs (rtx op0, rtx *smask)
46048 machine_mode vmode, mode = GET_MODE (op0);
46049 rtx xa, mask;
46051 xa = gen_reg_rtx (mode);
46052 if (mode == SFmode)
46053 vmode = V4SFmode;
46054 else if (mode == DFmode)
46055 vmode = V2DFmode;
46056 else
46057 vmode = mode;
46058 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
46059 if (!VECTOR_MODE_P (mode))
46061 /* We need to generate a scalar mode mask in this case. */
46062 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
46063 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
46064 mask = gen_reg_rtx (mode);
46065 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
46067 emit_insn (gen_rtx_SET (VOIDmode, xa,
46068 gen_rtx_AND (mode, op0, mask)));
46070 if (smask)
46071 *smask = mask;
46073 return xa;
46076 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
46077 swapping the operands if SWAP_OPERANDS is true. The expanded
46078 code is a forward jump to a newly created label in case the
46079 comparison is true. The generated label rtx is returned. */
46080 static rtx_code_label *
46081 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
46082 bool swap_operands)
46084 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
46085 rtx_code_label *label;
46086 rtx tmp;
46088 if (swap_operands)
46089 std::swap (op0, op1);
46091 label = gen_label_rtx ();
46092 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
46093 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46094 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
46095 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
46096 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
46097 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
46098 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
46099 JUMP_LABEL (tmp) = label;
46101 return label;
46104 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
46105 using comparison code CODE. Operands are swapped for the comparison if
46106 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
46107 static rtx
46108 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
46109 bool swap_operands)
46111 rtx (*insn)(rtx, rtx, rtx, rtx);
46112 machine_mode mode = GET_MODE (op0);
46113 rtx mask = gen_reg_rtx (mode);
46115 if (swap_operands)
46116 std::swap (op0, op1);
46118 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
46120 emit_insn (insn (mask, op0, op1,
46121 gen_rtx_fmt_ee (code, mode, op0, op1)));
46122 return mask;
46125 /* Generate and return a rtx of mode MODE for 2**n where n is the number
46126 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
46127 static rtx
46128 ix86_gen_TWO52 (machine_mode mode)
46130 REAL_VALUE_TYPE TWO52r;
46131 rtx TWO52;
46133 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
46134 TWO52 = const_double_from_real_value (TWO52r, mode);
46135 TWO52 = force_reg (mode, TWO52);
46137 return TWO52;
46140 /* Expand SSE sequence for computing lround from OP1 storing
46141 into OP0. */
46142 void
46143 ix86_expand_lround (rtx op0, rtx op1)
46145 /* C code for the stuff we're doing below:
46146 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
46147 return (long)tmp;
46149 machine_mode mode = GET_MODE (op1);
46150 const struct real_format *fmt;
46151 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46152 rtx adj;
46154 /* load nextafter (0.5, 0.0) */
46155 fmt = REAL_MODE_FORMAT (mode);
46156 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46157 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46159 /* adj = copysign (0.5, op1) */
46160 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
46161 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
46163 /* adj = op1 + adj */
46164 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
46166 /* op0 = (imode)adj */
46167 expand_fix (op0, adj, 0);
46170 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
46171 into OPERAND0. */
46172 void
46173 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
46175 /* C code for the stuff we're doing below (for do_floor):
46176 xi = (long)op1;
46177 xi -= (double)xi > op1 ? 1 : 0;
46178 return xi;
46180 machine_mode fmode = GET_MODE (op1);
46181 machine_mode imode = GET_MODE (op0);
46182 rtx ireg, freg, tmp;
46183 rtx_code_label *label;
46185 /* reg = (long)op1 */
46186 ireg = gen_reg_rtx (imode);
46187 expand_fix (ireg, op1, 0);
46189 /* freg = (double)reg */
46190 freg = gen_reg_rtx (fmode);
46191 expand_float (freg, ireg, 0);
46193 /* ireg = (freg > op1) ? ireg - 1 : ireg */
46194 label = ix86_expand_sse_compare_and_jump (UNLE,
46195 freg, op1, !do_floor);
46196 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
46197 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
46198 emit_move_insn (ireg, tmp);
46200 emit_label (label);
46201 LABEL_NUSES (label) = 1;
46203 emit_move_insn (op0, ireg);
46206 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
46207 result in OPERAND0. */
46208 void
46209 ix86_expand_rint (rtx operand0, rtx operand1)
46211 /* C code for the stuff we're doing below:
46212 xa = fabs (operand1);
46213 if (!isless (xa, 2**52))
46214 return operand1;
46215 xa = xa + 2**52 - 2**52;
46216 return copysign (xa, operand1);
46218 machine_mode mode = GET_MODE (operand0);
46219 rtx res, xa, TWO52, mask;
46220 rtx_code_label *label;
46222 res = gen_reg_rtx (mode);
46223 emit_move_insn (res, operand1);
46225 /* xa = abs (operand1) */
46226 xa = ix86_expand_sse_fabs (res, &mask);
46228 /* if (!isless (xa, TWO52)) goto label; */
46229 TWO52 = ix86_gen_TWO52 (mode);
46230 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46232 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46233 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46235 ix86_sse_copysign_to_positive (res, xa, res, mask);
46237 emit_label (label);
46238 LABEL_NUSES (label) = 1;
46240 emit_move_insn (operand0, res);
46243 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46244 into OPERAND0. */
46245 void
46246 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
46248 /* C code for the stuff we expand below.
46249 double xa = fabs (x), x2;
46250 if (!isless (xa, TWO52))
46251 return x;
46252 xa = xa + TWO52 - TWO52;
46253 x2 = copysign (xa, x);
46254 Compensate. Floor:
46255 if (x2 > x)
46256 x2 -= 1;
46257 Compensate. Ceil:
46258 if (x2 < x)
46259 x2 -= -1;
46260 return x2;
46262 machine_mode mode = GET_MODE (operand0);
46263 rtx xa, TWO52, tmp, one, res, mask;
46264 rtx_code_label *label;
46266 TWO52 = ix86_gen_TWO52 (mode);
46268 /* Temporary for holding the result, initialized to the input
46269 operand to ease control flow. */
46270 res = gen_reg_rtx (mode);
46271 emit_move_insn (res, operand1);
46273 /* xa = abs (operand1) */
46274 xa = ix86_expand_sse_fabs (res, &mask);
46276 /* if (!isless (xa, TWO52)) goto label; */
46277 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46279 /* xa = xa + TWO52 - TWO52; */
46280 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46281 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46283 /* xa = copysign (xa, operand1) */
46284 ix86_sse_copysign_to_positive (xa, xa, res, mask);
46286 /* generate 1.0 or -1.0 */
46287 one = force_reg (mode,
46288 const_double_from_real_value (do_floor
46289 ? dconst1 : dconstm1, mode));
46291 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46292 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46293 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46294 gen_rtx_AND (mode, one, tmp)));
46295 /* We always need to subtract here to preserve signed zero. */
46296 tmp = expand_simple_binop (mode, MINUS,
46297 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46298 emit_move_insn (res, tmp);
46300 emit_label (label);
46301 LABEL_NUSES (label) = 1;
46303 emit_move_insn (operand0, res);
46306 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46307 into OPERAND0. */
46308 void
46309 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
46311 /* C code for the stuff we expand below.
46312 double xa = fabs (x), x2;
46313 if (!isless (xa, TWO52))
46314 return x;
46315 x2 = (double)(long)x;
46316 Compensate. Floor:
46317 if (x2 > x)
46318 x2 -= 1;
46319 Compensate. Ceil:
46320 if (x2 < x)
46321 x2 += 1;
46322 if (HONOR_SIGNED_ZEROS (mode))
46323 return copysign (x2, x);
46324 return x2;
46326 machine_mode mode = GET_MODE (operand0);
46327 rtx xa, xi, TWO52, tmp, one, res, mask;
46328 rtx_code_label *label;
46330 TWO52 = ix86_gen_TWO52 (mode);
46332 /* Temporary for holding the result, initialized to the input
46333 operand to ease control flow. */
46334 res = gen_reg_rtx (mode);
46335 emit_move_insn (res, operand1);
46337 /* xa = abs (operand1) */
46338 xa = ix86_expand_sse_fabs (res, &mask);
46340 /* if (!isless (xa, TWO52)) goto label; */
46341 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46343 /* xa = (double)(long)x */
46344 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46345 expand_fix (xi, res, 0);
46346 expand_float (xa, xi, 0);
46348 /* generate 1.0 */
46349 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46351 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46352 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46353 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46354 gen_rtx_AND (mode, one, tmp)));
46355 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
46356 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46357 emit_move_insn (res, tmp);
46359 if (HONOR_SIGNED_ZEROS (mode))
46360 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46362 emit_label (label);
46363 LABEL_NUSES (label) = 1;
46365 emit_move_insn (operand0, res);
46368 /* Expand SSE sequence for computing round from OPERAND1 storing
46369 into OPERAND0. Sequence that works without relying on DImode truncation
46370 via cvttsd2siq that is only available on 64bit targets. */
46371 void
46372 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
46374 /* C code for the stuff we expand below.
46375 double xa = fabs (x), xa2, x2;
46376 if (!isless (xa, TWO52))
46377 return x;
46378 Using the absolute value and copying back sign makes
46379 -0.0 -> -0.0 correct.
46380 xa2 = xa + TWO52 - TWO52;
46381 Compensate.
46382 dxa = xa2 - xa;
46383 if (dxa <= -0.5)
46384 xa2 += 1;
46385 else if (dxa > 0.5)
46386 xa2 -= 1;
46387 x2 = copysign (xa2, x);
46388 return x2;
46390 machine_mode mode = GET_MODE (operand0);
46391 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
46392 rtx_code_label *label;
46394 TWO52 = ix86_gen_TWO52 (mode);
46396 /* Temporary for holding the result, initialized to the input
46397 operand to ease control flow. */
46398 res = gen_reg_rtx (mode);
46399 emit_move_insn (res, operand1);
46401 /* xa = abs (operand1) */
46402 xa = ix86_expand_sse_fabs (res, &mask);
46404 /* if (!isless (xa, TWO52)) goto label; */
46405 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46407 /* xa2 = xa + TWO52 - TWO52; */
46408 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46409 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
46411 /* dxa = xa2 - xa; */
46412 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
46414 /* generate 0.5, 1.0 and -0.5 */
46415 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
46416 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
46417 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
46418 0, OPTAB_DIRECT);
46420 /* Compensate. */
46421 tmp = gen_reg_rtx (mode);
46422 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
46423 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
46424 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46425 gen_rtx_AND (mode, one, tmp)));
46426 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46427 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
46428 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
46429 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46430 gen_rtx_AND (mode, one, tmp)));
46431 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46433 /* res = copysign (xa2, operand1) */
46434 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
46436 emit_label (label);
46437 LABEL_NUSES (label) = 1;
46439 emit_move_insn (operand0, res);
46442 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46443 into OPERAND0. */
46444 void
46445 ix86_expand_trunc (rtx operand0, rtx operand1)
46447 /* C code for SSE variant we expand below.
46448 double xa = fabs (x), x2;
46449 if (!isless (xa, TWO52))
46450 return x;
46451 x2 = (double)(long)x;
46452 if (HONOR_SIGNED_ZEROS (mode))
46453 return copysign (x2, x);
46454 return x2;
46456 machine_mode mode = GET_MODE (operand0);
46457 rtx xa, xi, TWO52, res, mask;
46458 rtx_code_label *label;
46460 TWO52 = ix86_gen_TWO52 (mode);
46462 /* Temporary for holding the result, initialized to the input
46463 operand to ease control flow. */
46464 res = gen_reg_rtx (mode);
46465 emit_move_insn (res, operand1);
46467 /* xa = abs (operand1) */
46468 xa = ix86_expand_sse_fabs (res, &mask);
46470 /* if (!isless (xa, TWO52)) goto label; */
46471 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46473 /* x = (double)(long)x */
46474 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46475 expand_fix (xi, res, 0);
46476 expand_float (res, xi, 0);
46478 if (HONOR_SIGNED_ZEROS (mode))
46479 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46481 emit_label (label);
46482 LABEL_NUSES (label) = 1;
46484 emit_move_insn (operand0, res);
46487 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46488 into OPERAND0. */
46489 void
46490 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
46492 machine_mode mode = GET_MODE (operand0);
46493 rtx xa, mask, TWO52, one, res, smask, tmp;
46494 rtx_code_label *label;
46496 /* C code for SSE variant we expand below.
46497 double xa = fabs (x), x2;
46498 if (!isless (xa, TWO52))
46499 return x;
46500 xa2 = xa + TWO52 - TWO52;
46501 Compensate:
46502 if (xa2 > xa)
46503 xa2 -= 1.0;
46504 x2 = copysign (xa2, x);
46505 return x2;
46508 TWO52 = ix86_gen_TWO52 (mode);
46510 /* Temporary for holding the result, initialized to the input
46511 operand to ease control flow. */
46512 res = gen_reg_rtx (mode);
46513 emit_move_insn (res, operand1);
46515 /* xa = abs (operand1) */
46516 xa = ix86_expand_sse_fabs (res, &smask);
46518 /* if (!isless (xa, TWO52)) goto label; */
46519 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46521 /* res = xa + TWO52 - TWO52; */
46522 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46523 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
46524 emit_move_insn (res, tmp);
46526 /* generate 1.0 */
46527 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46529 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
46530 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
46531 emit_insn (gen_rtx_SET (VOIDmode, mask,
46532 gen_rtx_AND (mode, mask, one)));
46533 tmp = expand_simple_binop (mode, MINUS,
46534 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
46535 emit_move_insn (res, tmp);
46537 /* res = copysign (res, operand1) */
46538 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
46540 emit_label (label);
46541 LABEL_NUSES (label) = 1;
46543 emit_move_insn (operand0, res);
46546 /* Expand SSE sequence for computing round from OPERAND1 storing
46547 into OPERAND0. */
46548 void
46549 ix86_expand_round (rtx operand0, rtx operand1)
46551 /* C code for the stuff we're doing below:
46552 double xa = fabs (x);
46553 if (!isless (xa, TWO52))
46554 return x;
46555 xa = (double)(long)(xa + nextafter (0.5, 0.0));
46556 return copysign (xa, x);
46558 machine_mode mode = GET_MODE (operand0);
46559 rtx res, TWO52, xa, xi, half, mask;
46560 rtx_code_label *label;
46561 const struct real_format *fmt;
46562 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46564 /* Temporary for holding the result, initialized to the input
46565 operand to ease control flow. */
46566 res = gen_reg_rtx (mode);
46567 emit_move_insn (res, operand1);
46569 TWO52 = ix86_gen_TWO52 (mode);
46570 xa = ix86_expand_sse_fabs (res, &mask);
46571 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46573 /* load nextafter (0.5, 0.0) */
46574 fmt = REAL_MODE_FORMAT (mode);
46575 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46576 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46578 /* xa = xa + 0.5 */
46579 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
46580 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
46582 /* xa = (double)(int64_t)xa */
46583 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46584 expand_fix (xi, xa, 0);
46585 expand_float (xa, xi, 0);
46587 /* res = copysign (xa, operand1) */
46588 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
46590 emit_label (label);
46591 LABEL_NUSES (label) = 1;
46593 emit_move_insn (operand0, res);
46596 /* Expand SSE sequence for computing round
46597 from OP1 storing into OP0 using sse4 round insn. */
46598 void
46599 ix86_expand_round_sse4 (rtx op0, rtx op1)
46601 machine_mode mode = GET_MODE (op0);
46602 rtx e1, e2, res, half;
46603 const struct real_format *fmt;
46604 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46605 rtx (*gen_copysign) (rtx, rtx, rtx);
46606 rtx (*gen_round) (rtx, rtx, rtx);
46608 switch (mode)
46610 case SFmode:
46611 gen_copysign = gen_copysignsf3;
46612 gen_round = gen_sse4_1_roundsf2;
46613 break;
46614 case DFmode:
46615 gen_copysign = gen_copysigndf3;
46616 gen_round = gen_sse4_1_rounddf2;
46617 break;
46618 default:
46619 gcc_unreachable ();
46622 /* round (a) = trunc (a + copysign (0.5, a)) */
46624 /* load nextafter (0.5, 0.0) */
46625 fmt = REAL_MODE_FORMAT (mode);
46626 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46627 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46628 half = const_double_from_real_value (pred_half, mode);
46630 /* e1 = copysign (0.5, op1) */
46631 e1 = gen_reg_rtx (mode);
46632 emit_insn (gen_copysign (e1, half, op1));
46634 /* e2 = op1 + e1 */
46635 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
46637 /* res = trunc (e2) */
46638 res = gen_reg_rtx (mode);
46639 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
46641 emit_move_insn (op0, res);
46645 /* Table of valid machine attributes. */
46646 static const struct attribute_spec ix86_attribute_table[] =
46648 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
46649 affects_type_identity } */
46650 /* Stdcall attribute says callee is responsible for popping arguments
46651 if they are not variable. */
46652 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46653 true },
46654 /* Fastcall attribute says callee is responsible for popping arguments
46655 if they are not variable. */
46656 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46657 true },
46658 /* Thiscall attribute says callee is responsible for popping arguments
46659 if they are not variable. */
46660 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46661 true },
46662 /* Cdecl attribute says the callee is a normal C declaration */
46663 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46664 true },
46665 /* Regparm attribute specifies how many integer arguments are to be
46666 passed in registers. */
46667 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
46668 true },
46669 /* Sseregparm attribute says we are using x86_64 calling conventions
46670 for FP arguments. */
46671 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46672 true },
46673 /* The transactional memory builtins are implicitly regparm or fastcall
46674 depending on the ABI. Override the generic do-nothing attribute that
46675 these builtins were declared with. */
46676 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
46677 true },
46678 /* force_align_arg_pointer says this function realigns the stack at entry. */
46679 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
46680 false, true, true, ix86_handle_cconv_attribute, false },
46681 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46682 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
46683 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
46684 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
46685 false },
46686 #endif
46687 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46688 false },
46689 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46690 false },
46691 #ifdef SUBTARGET_ATTRIBUTE_TABLE
46692 SUBTARGET_ATTRIBUTE_TABLE,
46693 #endif
46694 /* ms_abi and sysv_abi calling convention function attributes. */
46695 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46696 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46697 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
46698 false },
46699 { "callee_pop_aggregate_return", 1, 1, false, true, true,
46700 ix86_handle_callee_pop_aggregate_return, true },
46701 /* End element. */
46702 { NULL, 0, 0, false, false, false, NULL, false }
46705 /* Implement targetm.vectorize.builtin_vectorization_cost. */
46706 static int
46707 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
46708 tree vectype, int)
46710 unsigned elements;
46712 switch (type_of_cost)
46714 case scalar_stmt:
46715 return ix86_cost->scalar_stmt_cost;
46717 case scalar_load:
46718 return ix86_cost->scalar_load_cost;
46720 case scalar_store:
46721 return ix86_cost->scalar_store_cost;
46723 case vector_stmt:
46724 return ix86_cost->vec_stmt_cost;
46726 case vector_load:
46727 return ix86_cost->vec_align_load_cost;
46729 case vector_store:
46730 return ix86_cost->vec_store_cost;
46732 case vec_to_scalar:
46733 return ix86_cost->vec_to_scalar_cost;
46735 case scalar_to_vec:
46736 return ix86_cost->scalar_to_vec_cost;
46738 case unaligned_load:
46739 case unaligned_store:
46740 return ix86_cost->vec_unalign_load_cost;
46742 case cond_branch_taken:
46743 return ix86_cost->cond_taken_branch_cost;
46745 case cond_branch_not_taken:
46746 return ix86_cost->cond_not_taken_branch_cost;
46748 case vec_perm:
46749 case vec_promote_demote:
46750 return ix86_cost->vec_stmt_cost;
46752 case vec_construct:
46753 elements = TYPE_VECTOR_SUBPARTS (vectype);
46754 return elements / 2 + 1;
46756 default:
46757 gcc_unreachable ();
46761 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
46762 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
46763 insn every time. */
46765 static GTY(()) rtx_insn *vselect_insn;
46767 /* Initialize vselect_insn. */
46769 static void
46770 init_vselect_insn (void)
46772 unsigned i;
46773 rtx x;
46775 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
46776 for (i = 0; i < MAX_VECT_LEN; ++i)
46777 XVECEXP (x, 0, i) = const0_rtx;
46778 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
46779 const0_rtx), x);
46780 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
46781 start_sequence ();
46782 vselect_insn = emit_insn (x);
46783 end_sequence ();
46786 /* Construct (set target (vec_select op0 (parallel perm))) and
46787 return true if that's a valid instruction in the active ISA. */
46789 static bool
46790 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
46791 unsigned nelt, bool testing_p)
46793 unsigned int i;
46794 rtx x, save_vconcat;
46795 int icode;
46797 if (vselect_insn == NULL_RTX)
46798 init_vselect_insn ();
46800 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
46801 PUT_NUM_ELEM (XVEC (x, 0), nelt);
46802 for (i = 0; i < nelt; ++i)
46803 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
46804 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46805 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
46806 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
46807 SET_DEST (PATTERN (vselect_insn)) = target;
46808 icode = recog_memoized (vselect_insn);
46810 if (icode >= 0 && !testing_p)
46811 emit_insn (copy_rtx (PATTERN (vselect_insn)));
46813 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
46814 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
46815 INSN_CODE (vselect_insn) = -1;
46817 return icode >= 0;
46820 /* Similar, but generate a vec_concat from op0 and op1 as well. */
46822 static bool
46823 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
46824 const unsigned char *perm, unsigned nelt,
46825 bool testing_p)
46827 machine_mode v2mode;
46828 rtx x;
46829 bool ok;
46831 if (vselect_insn == NULL_RTX)
46832 init_vselect_insn ();
46834 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
46835 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46836 PUT_MODE (x, v2mode);
46837 XEXP (x, 0) = op0;
46838 XEXP (x, 1) = op1;
46839 ok = expand_vselect (target, x, perm, nelt, testing_p);
46840 XEXP (x, 0) = const0_rtx;
46841 XEXP (x, 1) = const0_rtx;
46842 return ok;
46845 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46846 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
46848 static bool
46849 expand_vec_perm_blend (struct expand_vec_perm_d *d)
46851 machine_mode vmode = d->vmode;
46852 unsigned i, mask, nelt = d->nelt;
46853 rtx target, op0, op1, x;
46854 rtx rperm[32], vperm;
46856 if (d->one_operand_p)
46857 return false;
46858 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
46859 && GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4)
46861 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
46863 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
46865 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
46867 else
46868 return false;
46870 /* This is a blend, not a permute. Elements must stay in their
46871 respective lanes. */
46872 for (i = 0; i < nelt; ++i)
46874 unsigned e = d->perm[i];
46875 if (!(e == i || e == i + nelt))
46876 return false;
46879 if (d->testing_p)
46880 return true;
46882 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
46883 decision should be extracted elsewhere, so that we only try that
46884 sequence once all budget==3 options have been tried. */
46885 target = d->target;
46886 op0 = d->op0;
46887 op1 = d->op1;
46888 mask = 0;
46890 switch (vmode)
46892 case V8DFmode:
46893 case V16SFmode:
46894 case V4DFmode:
46895 case V8SFmode:
46896 case V2DFmode:
46897 case V4SFmode:
46898 case V8HImode:
46899 case V8SImode:
46900 case V32HImode:
46901 case V64QImode:
46902 case V16SImode:
46903 case V8DImode:
46904 for (i = 0; i < nelt; ++i)
46905 mask |= (d->perm[i] >= nelt) << i;
46906 break;
46908 case V2DImode:
46909 for (i = 0; i < 2; ++i)
46910 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
46911 vmode = V8HImode;
46912 goto do_subreg;
46914 case V4SImode:
46915 for (i = 0; i < 4; ++i)
46916 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46917 vmode = V8HImode;
46918 goto do_subreg;
46920 case V16QImode:
46921 /* See if bytes move in pairs so we can use pblendw with
46922 an immediate argument, rather than pblendvb with a vector
46923 argument. */
46924 for (i = 0; i < 16; i += 2)
46925 if (d->perm[i] + 1 != d->perm[i + 1])
46927 use_pblendvb:
46928 for (i = 0; i < nelt; ++i)
46929 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
46931 finish_pblendvb:
46932 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
46933 vperm = force_reg (vmode, vperm);
46935 if (GET_MODE_SIZE (vmode) == 16)
46936 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
46937 else
46938 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
46939 if (target != d->target)
46940 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46941 return true;
46944 for (i = 0; i < 8; ++i)
46945 mask |= (d->perm[i * 2] >= 16) << i;
46946 vmode = V8HImode;
46947 /* FALLTHRU */
46949 do_subreg:
46950 target = gen_reg_rtx (vmode);
46951 op0 = gen_lowpart (vmode, op0);
46952 op1 = gen_lowpart (vmode, op1);
46953 break;
46955 case V32QImode:
46956 /* See if bytes move in pairs. If not, vpblendvb must be used. */
46957 for (i = 0; i < 32; i += 2)
46958 if (d->perm[i] + 1 != d->perm[i + 1])
46959 goto use_pblendvb;
46960 /* See if bytes move in quadruplets. If yes, vpblendd
46961 with immediate can be used. */
46962 for (i = 0; i < 32; i += 4)
46963 if (d->perm[i] + 2 != d->perm[i + 2])
46964 break;
46965 if (i < 32)
46967 /* See if bytes move the same in both lanes. If yes,
46968 vpblendw with immediate can be used. */
46969 for (i = 0; i < 16; i += 2)
46970 if (d->perm[i] + 16 != d->perm[i + 16])
46971 goto use_pblendvb;
46973 /* Use vpblendw. */
46974 for (i = 0; i < 16; ++i)
46975 mask |= (d->perm[i * 2] >= 32) << i;
46976 vmode = V16HImode;
46977 goto do_subreg;
46980 /* Use vpblendd. */
46981 for (i = 0; i < 8; ++i)
46982 mask |= (d->perm[i * 4] >= 32) << i;
46983 vmode = V8SImode;
46984 goto do_subreg;
46986 case V16HImode:
46987 /* See if words move in pairs. If yes, vpblendd can be used. */
46988 for (i = 0; i < 16; i += 2)
46989 if (d->perm[i] + 1 != d->perm[i + 1])
46990 break;
46991 if (i < 16)
46993 /* See if words move the same in both lanes. If not,
46994 vpblendvb must be used. */
46995 for (i = 0; i < 8; i++)
46996 if (d->perm[i] + 8 != d->perm[i + 8])
46998 /* Use vpblendvb. */
46999 for (i = 0; i < 32; ++i)
47000 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
47002 vmode = V32QImode;
47003 nelt = 32;
47004 target = gen_reg_rtx (vmode);
47005 op0 = gen_lowpart (vmode, op0);
47006 op1 = gen_lowpart (vmode, op1);
47007 goto finish_pblendvb;
47010 /* Use vpblendw. */
47011 for (i = 0; i < 16; ++i)
47012 mask |= (d->perm[i] >= 16) << i;
47013 break;
47016 /* Use vpblendd. */
47017 for (i = 0; i < 8; ++i)
47018 mask |= (d->perm[i * 2] >= 16) << i;
47019 vmode = V8SImode;
47020 goto do_subreg;
47022 case V4DImode:
47023 /* Use vpblendd. */
47024 for (i = 0; i < 4; ++i)
47025 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
47026 vmode = V8SImode;
47027 goto do_subreg;
47029 default:
47030 gcc_unreachable ();
47033 /* This matches five different patterns with the different modes. */
47034 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
47035 x = gen_rtx_SET (VOIDmode, target, x);
47036 emit_insn (x);
47037 if (target != d->target)
47038 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47040 return true;
47043 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47044 in terms of the variable form of vpermilps.
47046 Note that we will have already failed the immediate input vpermilps,
47047 which requires that the high and low part shuffle be identical; the
47048 variable form doesn't require that. */
47050 static bool
47051 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
47053 rtx rperm[8], vperm;
47054 unsigned i;
47056 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
47057 return false;
47059 /* We can only permute within the 128-bit lane. */
47060 for (i = 0; i < 8; ++i)
47062 unsigned e = d->perm[i];
47063 if (i < 4 ? e >= 4 : e < 4)
47064 return false;
47067 if (d->testing_p)
47068 return true;
47070 for (i = 0; i < 8; ++i)
47072 unsigned e = d->perm[i];
47074 /* Within each 128-bit lane, the elements of op0 are numbered
47075 from 0 and the elements of op1 are numbered from 4. */
47076 if (e >= 8 + 4)
47077 e -= 8;
47078 else if (e >= 4)
47079 e -= 4;
47081 rperm[i] = GEN_INT (e);
47084 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
47085 vperm = force_reg (V8SImode, vperm);
47086 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
47088 return true;
47091 /* Return true if permutation D can be performed as VMODE permutation
47092 instead. */
47094 static bool
47095 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
47097 unsigned int i, j, chunk;
47099 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
47100 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
47101 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
47102 return false;
47104 if (GET_MODE_NUNITS (vmode) >= d->nelt)
47105 return true;
47107 chunk = d->nelt / GET_MODE_NUNITS (vmode);
47108 for (i = 0; i < d->nelt; i += chunk)
47109 if (d->perm[i] & (chunk - 1))
47110 return false;
47111 else
47112 for (j = 1; j < chunk; ++j)
47113 if (d->perm[i] + j != d->perm[i + j])
47114 return false;
47116 return true;
47119 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47120 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
47122 static bool
47123 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
47125 unsigned i, nelt, eltsz, mask;
47126 unsigned char perm[64];
47127 machine_mode vmode = V16QImode;
47128 rtx rperm[64], vperm, target, op0, op1;
47130 nelt = d->nelt;
47132 if (!d->one_operand_p)
47134 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
47136 if (TARGET_AVX2
47137 && valid_perm_using_mode_p (V2TImode, d))
47139 if (d->testing_p)
47140 return true;
47142 /* Use vperm2i128 insn. The pattern uses
47143 V4DImode instead of V2TImode. */
47144 target = d->target;
47145 if (d->vmode != V4DImode)
47146 target = gen_reg_rtx (V4DImode);
47147 op0 = gen_lowpart (V4DImode, d->op0);
47148 op1 = gen_lowpart (V4DImode, d->op1);
47149 rperm[0]
47150 = GEN_INT ((d->perm[0] / (nelt / 2))
47151 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
47152 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
47153 if (target != d->target)
47154 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47155 return true;
47157 return false;
47160 else
47162 if (GET_MODE_SIZE (d->vmode) == 16)
47164 if (!TARGET_SSSE3)
47165 return false;
47167 else if (GET_MODE_SIZE (d->vmode) == 32)
47169 if (!TARGET_AVX2)
47170 return false;
47172 /* V4DImode should be already handled through
47173 expand_vselect by vpermq instruction. */
47174 gcc_assert (d->vmode != V4DImode);
47176 vmode = V32QImode;
47177 if (d->vmode == V8SImode
47178 || d->vmode == V16HImode
47179 || d->vmode == V32QImode)
47181 /* First see if vpermq can be used for
47182 V8SImode/V16HImode/V32QImode. */
47183 if (valid_perm_using_mode_p (V4DImode, d))
47185 for (i = 0; i < 4; i++)
47186 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
47187 if (d->testing_p)
47188 return true;
47189 target = gen_reg_rtx (V4DImode);
47190 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
47191 perm, 4, false))
47193 emit_move_insn (d->target,
47194 gen_lowpart (d->vmode, target));
47195 return true;
47197 return false;
47200 /* Next see if vpermd can be used. */
47201 if (valid_perm_using_mode_p (V8SImode, d))
47202 vmode = V8SImode;
47204 /* Or if vpermps can be used. */
47205 else if (d->vmode == V8SFmode)
47206 vmode = V8SImode;
47208 if (vmode == V32QImode)
47210 /* vpshufb only works intra lanes, it is not
47211 possible to shuffle bytes in between the lanes. */
47212 for (i = 0; i < nelt; ++i)
47213 if ((d->perm[i] ^ i) & (nelt / 2))
47214 return false;
47217 else if (GET_MODE_SIZE (d->vmode) == 64)
47219 if (!TARGET_AVX512BW)
47220 return false;
47222 /* If vpermq didn't work, vpshufb won't work either. */
47223 if (d->vmode == V8DFmode || d->vmode == V8DImode)
47224 return false;
47226 vmode = V64QImode;
47227 if (d->vmode == V16SImode
47228 || d->vmode == V32HImode
47229 || d->vmode == V64QImode)
47231 /* First see if vpermq can be used for
47232 V16SImode/V32HImode/V64QImode. */
47233 if (valid_perm_using_mode_p (V8DImode, d))
47235 for (i = 0; i < 8; i++)
47236 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
47237 if (d->testing_p)
47238 return true;
47239 target = gen_reg_rtx (V8DImode);
47240 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
47241 perm, 8, false))
47243 emit_move_insn (d->target,
47244 gen_lowpart (d->vmode, target));
47245 return true;
47247 return false;
47250 /* Next see if vpermd can be used. */
47251 if (valid_perm_using_mode_p (V16SImode, d))
47252 vmode = V16SImode;
47254 /* Or if vpermps can be used. */
47255 else if (d->vmode == V16SFmode)
47256 vmode = V16SImode;
47257 if (vmode == V64QImode)
47259 /* vpshufb only works intra lanes, it is not
47260 possible to shuffle bytes in between the lanes. */
47261 for (i = 0; i < nelt; ++i)
47262 if ((d->perm[i] ^ i) & (nelt / 4))
47263 return false;
47266 else
47267 return false;
47270 if (d->testing_p)
47271 return true;
47273 if (vmode == V8SImode)
47274 for (i = 0; i < 8; ++i)
47275 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
47276 else if (vmode == V16SImode)
47277 for (i = 0; i < 16; ++i)
47278 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
47279 else
47281 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
47282 if (!d->one_operand_p)
47283 mask = 2 * nelt - 1;
47284 else if (vmode == V16QImode)
47285 mask = nelt - 1;
47286 else if (vmode == V64QImode)
47287 mask = nelt / 4 - 1;
47288 else
47289 mask = nelt / 2 - 1;
47291 for (i = 0; i < nelt; ++i)
47293 unsigned j, e = d->perm[i] & mask;
47294 for (j = 0; j < eltsz; ++j)
47295 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
47299 vperm = gen_rtx_CONST_VECTOR (vmode,
47300 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
47301 vperm = force_reg (vmode, vperm);
47303 target = d->target;
47304 if (d->vmode != vmode)
47305 target = gen_reg_rtx (vmode);
47306 op0 = gen_lowpart (vmode, d->op0);
47307 if (d->one_operand_p)
47309 if (vmode == V16QImode)
47310 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
47311 else if (vmode == V32QImode)
47312 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
47313 else if (vmode == V64QImode)
47314 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
47315 else if (vmode == V8SFmode)
47316 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
47317 else if (vmode == V8SImode)
47318 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
47319 else if (vmode == V16SFmode)
47320 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
47321 else if (vmode == V16SImode)
47322 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
47323 else
47324 gcc_unreachable ();
47326 else
47328 op1 = gen_lowpart (vmode, d->op1);
47329 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
47331 if (target != d->target)
47332 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47334 return true;
47337 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
47338 in a single instruction. */
47340 static bool
47341 expand_vec_perm_1 (struct expand_vec_perm_d *d)
47343 unsigned i, nelt = d->nelt;
47344 unsigned char perm2[MAX_VECT_LEN];
47346 /* Check plain VEC_SELECT first, because AVX has instructions that could
47347 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
47348 input where SEL+CONCAT may not. */
47349 if (d->one_operand_p)
47351 int mask = nelt - 1;
47352 bool identity_perm = true;
47353 bool broadcast_perm = true;
47355 for (i = 0; i < nelt; i++)
47357 perm2[i] = d->perm[i] & mask;
47358 if (perm2[i] != i)
47359 identity_perm = false;
47360 if (perm2[i])
47361 broadcast_perm = false;
47364 if (identity_perm)
47366 if (!d->testing_p)
47367 emit_move_insn (d->target, d->op0);
47368 return true;
47370 else if (broadcast_perm && TARGET_AVX2)
47372 /* Use vpbroadcast{b,w,d}. */
47373 rtx (*gen) (rtx, rtx) = NULL;
47374 switch (d->vmode)
47376 case V64QImode:
47377 if (TARGET_AVX512BW)
47378 gen = gen_avx512bw_vec_dupv64qi_1;
47379 break;
47380 case V32QImode:
47381 gen = gen_avx2_pbroadcastv32qi_1;
47382 break;
47383 case V32HImode:
47384 if (TARGET_AVX512BW)
47385 gen = gen_avx512bw_vec_dupv32hi_1;
47386 break;
47387 case V16HImode:
47388 gen = gen_avx2_pbroadcastv16hi_1;
47389 break;
47390 case V16SImode:
47391 if (TARGET_AVX512F)
47392 gen = gen_avx512f_vec_dupv16si_1;
47393 break;
47394 case V8SImode:
47395 gen = gen_avx2_pbroadcastv8si_1;
47396 break;
47397 case V16QImode:
47398 gen = gen_avx2_pbroadcastv16qi;
47399 break;
47400 case V8HImode:
47401 gen = gen_avx2_pbroadcastv8hi;
47402 break;
47403 case V16SFmode:
47404 if (TARGET_AVX512F)
47405 gen = gen_avx512f_vec_dupv16sf_1;
47406 break;
47407 case V8SFmode:
47408 gen = gen_avx2_vec_dupv8sf_1;
47409 break;
47410 case V8DFmode:
47411 if (TARGET_AVX512F)
47412 gen = gen_avx512f_vec_dupv8df_1;
47413 break;
47414 case V8DImode:
47415 if (TARGET_AVX512F)
47416 gen = gen_avx512f_vec_dupv8di_1;
47417 break;
47418 /* For other modes prefer other shuffles this function creates. */
47419 default: break;
47421 if (gen != NULL)
47423 if (!d->testing_p)
47424 emit_insn (gen (d->target, d->op0));
47425 return true;
47429 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
47430 return true;
47432 /* There are plenty of patterns in sse.md that are written for
47433 SEL+CONCAT and are not replicated for a single op. Perhaps
47434 that should be changed, to avoid the nastiness here. */
47436 /* Recognize interleave style patterns, which means incrementing
47437 every other permutation operand. */
47438 for (i = 0; i < nelt; i += 2)
47440 perm2[i] = d->perm[i] & mask;
47441 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
47443 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47444 d->testing_p))
47445 return true;
47447 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
47448 if (nelt >= 4)
47450 for (i = 0; i < nelt; i += 4)
47452 perm2[i + 0] = d->perm[i + 0] & mask;
47453 perm2[i + 1] = d->perm[i + 1] & mask;
47454 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
47455 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
47458 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47459 d->testing_p))
47460 return true;
47464 /* Finally, try the fully general two operand permute. */
47465 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
47466 d->testing_p))
47467 return true;
47469 /* Recognize interleave style patterns with reversed operands. */
47470 if (!d->one_operand_p)
47472 for (i = 0; i < nelt; ++i)
47474 unsigned e = d->perm[i];
47475 if (e >= nelt)
47476 e -= nelt;
47477 else
47478 e += nelt;
47479 perm2[i] = e;
47482 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
47483 d->testing_p))
47484 return true;
47487 /* Try the SSE4.1 blend variable merge instructions. */
47488 if (expand_vec_perm_blend (d))
47489 return true;
47491 /* Try one of the AVX vpermil variable permutations. */
47492 if (expand_vec_perm_vpermil (d))
47493 return true;
47495 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
47496 vpshufb, vpermd, vpermps or vpermq variable permutation. */
47497 if (expand_vec_perm_pshufb (d))
47498 return true;
47500 /* Try the AVX2 vpalignr instruction. */
47501 if (expand_vec_perm_palignr (d, true))
47502 return true;
47504 /* Try the AVX512F vpermi2 instructions. */
47505 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
47506 return true;
47508 return false;
47511 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47512 in terms of a pair of pshuflw + pshufhw instructions. */
47514 static bool
47515 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
47517 unsigned char perm2[MAX_VECT_LEN];
47518 unsigned i;
47519 bool ok;
47521 if (d->vmode != V8HImode || !d->one_operand_p)
47522 return false;
47524 /* The two permutations only operate in 64-bit lanes. */
47525 for (i = 0; i < 4; ++i)
47526 if (d->perm[i] >= 4)
47527 return false;
47528 for (i = 4; i < 8; ++i)
47529 if (d->perm[i] < 4)
47530 return false;
47532 if (d->testing_p)
47533 return true;
47535 /* Emit the pshuflw. */
47536 memcpy (perm2, d->perm, 4);
47537 for (i = 4; i < 8; ++i)
47538 perm2[i] = i;
47539 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
47540 gcc_assert (ok);
47542 /* Emit the pshufhw. */
47543 memcpy (perm2 + 4, d->perm + 4, 4);
47544 for (i = 0; i < 4; ++i)
47545 perm2[i] = i;
47546 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
47547 gcc_assert (ok);
47549 return true;
47552 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47553 the permutation using the SSSE3 palignr instruction. This succeeds
47554 when all of the elements in PERM fit within one vector and we merely
47555 need to shift them down so that a single vector permutation has a
47556 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
47557 the vpalignr instruction itself can perform the requested permutation. */
47559 static bool
47560 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
47562 unsigned i, nelt = d->nelt;
47563 unsigned min, max, minswap, maxswap;
47564 bool in_order, ok, swap = false;
47565 rtx shift, target;
47566 struct expand_vec_perm_d dcopy;
47568 /* Even with AVX, palignr only operates on 128-bit vectors,
47569 in AVX2 palignr operates on both 128-bit lanes. */
47570 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
47571 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
47572 return false;
47574 min = 2 * nelt;
47575 max = 0;
47576 minswap = 2 * nelt;
47577 maxswap = 0;
47578 for (i = 0; i < nelt; ++i)
47580 unsigned e = d->perm[i];
47581 unsigned eswap = d->perm[i] ^ nelt;
47582 if (GET_MODE_SIZE (d->vmode) == 32)
47584 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
47585 eswap = e ^ (nelt / 2);
47587 if (e < min)
47588 min = e;
47589 if (e > max)
47590 max = e;
47591 if (eswap < minswap)
47592 minswap = eswap;
47593 if (eswap > maxswap)
47594 maxswap = eswap;
47596 if (min == 0
47597 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
47599 if (d->one_operand_p
47600 || minswap == 0
47601 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
47602 ? nelt / 2 : nelt))
47603 return false;
47604 swap = true;
47605 min = minswap;
47606 max = maxswap;
47609 /* Given that we have SSSE3, we know we'll be able to implement the
47610 single operand permutation after the palignr with pshufb for
47611 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
47612 first. */
47613 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
47614 return true;
47616 dcopy = *d;
47617 if (swap)
47619 dcopy.op0 = d->op1;
47620 dcopy.op1 = d->op0;
47621 for (i = 0; i < nelt; ++i)
47622 dcopy.perm[i] ^= nelt;
47625 in_order = true;
47626 for (i = 0; i < nelt; ++i)
47628 unsigned e = dcopy.perm[i];
47629 if (GET_MODE_SIZE (d->vmode) == 32
47630 && e >= nelt
47631 && (e & (nelt / 2 - 1)) < min)
47632 e = e - min - (nelt / 2);
47633 else
47634 e = e - min;
47635 if (e != i)
47636 in_order = false;
47637 dcopy.perm[i] = e;
47639 dcopy.one_operand_p = true;
47641 if (single_insn_only_p && !in_order)
47642 return false;
47644 /* For AVX2, test whether we can permute the result in one instruction. */
47645 if (d->testing_p)
47647 if (in_order)
47648 return true;
47649 dcopy.op1 = dcopy.op0;
47650 return expand_vec_perm_1 (&dcopy);
47653 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
47654 if (GET_MODE_SIZE (d->vmode) == 16)
47656 target = gen_reg_rtx (TImode);
47657 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
47658 gen_lowpart (TImode, dcopy.op0), shift));
47660 else
47662 target = gen_reg_rtx (V2TImode);
47663 emit_insn (gen_avx2_palignrv2ti (target,
47664 gen_lowpart (V2TImode, dcopy.op1),
47665 gen_lowpart (V2TImode, dcopy.op0),
47666 shift));
47669 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
47671 /* Test for the degenerate case where the alignment by itself
47672 produces the desired permutation. */
47673 if (in_order)
47675 emit_move_insn (d->target, dcopy.op0);
47676 return true;
47679 ok = expand_vec_perm_1 (&dcopy);
47680 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
47682 return ok;
47685 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
47686 the permutation using the SSE4_1 pblendv instruction. Potentially
47687 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
47689 static bool
47690 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
47692 unsigned i, which, nelt = d->nelt;
47693 struct expand_vec_perm_d dcopy, dcopy1;
47694 machine_mode vmode = d->vmode;
47695 bool ok;
47697 /* Use the same checks as in expand_vec_perm_blend. */
47698 if (d->one_operand_p)
47699 return false;
47700 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
47702 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
47704 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
47706 else
47707 return false;
47709 /* Figure out where permutation elements stay not in their
47710 respective lanes. */
47711 for (i = 0, which = 0; i < nelt; ++i)
47713 unsigned e = d->perm[i];
47714 if (e != i)
47715 which |= (e < nelt ? 1 : 2);
47717 /* We can pblend the part where elements stay not in their
47718 respective lanes only when these elements are all in one
47719 half of a permutation.
47720 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
47721 lanes, but both 8 and 9 >= 8
47722 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
47723 respective lanes and 8 >= 8, but 2 not. */
47724 if (which != 1 && which != 2)
47725 return false;
47726 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
47727 return true;
47729 /* First we apply one operand permutation to the part where
47730 elements stay not in their respective lanes. */
47731 dcopy = *d;
47732 if (which == 2)
47733 dcopy.op0 = dcopy.op1 = d->op1;
47734 else
47735 dcopy.op0 = dcopy.op1 = d->op0;
47736 if (!d->testing_p)
47737 dcopy.target = gen_reg_rtx (vmode);
47738 dcopy.one_operand_p = true;
47740 for (i = 0; i < nelt; ++i)
47741 dcopy.perm[i] = d->perm[i] & (nelt - 1);
47743 ok = expand_vec_perm_1 (&dcopy);
47744 if (GET_MODE_SIZE (vmode) != 16 && !ok)
47745 return false;
47746 else
47747 gcc_assert (ok);
47748 if (d->testing_p)
47749 return true;
47751 /* Next we put permuted elements into their positions. */
47752 dcopy1 = *d;
47753 if (which == 2)
47754 dcopy1.op1 = dcopy.target;
47755 else
47756 dcopy1.op0 = dcopy.target;
47758 for (i = 0; i < nelt; ++i)
47759 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
47761 ok = expand_vec_perm_blend (&dcopy1);
47762 gcc_assert (ok);
47764 return true;
47767 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
47769 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47770 a two vector permutation into a single vector permutation by using
47771 an interleave operation to merge the vectors. */
47773 static bool
47774 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
47776 struct expand_vec_perm_d dremap, dfinal;
47777 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
47778 unsigned HOST_WIDE_INT contents;
47779 unsigned char remap[2 * MAX_VECT_LEN];
47780 rtx_insn *seq;
47781 bool ok, same_halves = false;
47783 if (GET_MODE_SIZE (d->vmode) == 16)
47785 if (d->one_operand_p)
47786 return false;
47788 else if (GET_MODE_SIZE (d->vmode) == 32)
47790 if (!TARGET_AVX)
47791 return false;
47792 /* For 32-byte modes allow even d->one_operand_p.
47793 The lack of cross-lane shuffling in some instructions
47794 might prevent a single insn shuffle. */
47795 dfinal = *d;
47796 dfinal.testing_p = true;
47797 /* If expand_vec_perm_interleave3 can expand this into
47798 a 3 insn sequence, give up and let it be expanded as
47799 3 insn sequence. While that is one insn longer,
47800 it doesn't need a memory operand and in the common
47801 case that both interleave low and high permutations
47802 with the same operands are adjacent needs 4 insns
47803 for both after CSE. */
47804 if (expand_vec_perm_interleave3 (&dfinal))
47805 return false;
47807 else
47808 return false;
47810 /* Examine from whence the elements come. */
47811 contents = 0;
47812 for (i = 0; i < nelt; ++i)
47813 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
47815 memset (remap, 0xff, sizeof (remap));
47816 dremap = *d;
47818 if (GET_MODE_SIZE (d->vmode) == 16)
47820 unsigned HOST_WIDE_INT h1, h2, h3, h4;
47822 /* Split the two input vectors into 4 halves. */
47823 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
47824 h2 = h1 << nelt2;
47825 h3 = h2 << nelt2;
47826 h4 = h3 << nelt2;
47828 /* If the elements from the low halves use interleave low, and similarly
47829 for interleave high. If the elements are from mis-matched halves, we
47830 can use shufps for V4SF/V4SI or do a DImode shuffle. */
47831 if ((contents & (h1 | h3)) == contents)
47833 /* punpckl* */
47834 for (i = 0; i < nelt2; ++i)
47836 remap[i] = i * 2;
47837 remap[i + nelt] = i * 2 + 1;
47838 dremap.perm[i * 2] = i;
47839 dremap.perm[i * 2 + 1] = i + nelt;
47841 if (!TARGET_SSE2 && d->vmode == V4SImode)
47842 dremap.vmode = V4SFmode;
47844 else if ((contents & (h2 | h4)) == contents)
47846 /* punpckh* */
47847 for (i = 0; i < nelt2; ++i)
47849 remap[i + nelt2] = i * 2;
47850 remap[i + nelt + nelt2] = i * 2 + 1;
47851 dremap.perm[i * 2] = i + nelt2;
47852 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
47854 if (!TARGET_SSE2 && d->vmode == V4SImode)
47855 dremap.vmode = V4SFmode;
47857 else if ((contents & (h1 | h4)) == contents)
47859 /* shufps */
47860 for (i = 0; i < nelt2; ++i)
47862 remap[i] = i;
47863 remap[i + nelt + nelt2] = i + nelt2;
47864 dremap.perm[i] = i;
47865 dremap.perm[i + nelt2] = i + nelt + nelt2;
47867 if (nelt != 4)
47869 /* shufpd */
47870 dremap.vmode = V2DImode;
47871 dremap.nelt = 2;
47872 dremap.perm[0] = 0;
47873 dremap.perm[1] = 3;
47876 else if ((contents & (h2 | h3)) == contents)
47878 /* shufps */
47879 for (i = 0; i < nelt2; ++i)
47881 remap[i + nelt2] = i;
47882 remap[i + nelt] = i + nelt2;
47883 dremap.perm[i] = i + nelt2;
47884 dremap.perm[i + nelt2] = i + nelt;
47886 if (nelt != 4)
47888 /* shufpd */
47889 dremap.vmode = V2DImode;
47890 dremap.nelt = 2;
47891 dremap.perm[0] = 1;
47892 dremap.perm[1] = 2;
47895 else
47896 return false;
47898 else
47900 unsigned int nelt4 = nelt / 4, nzcnt = 0;
47901 unsigned HOST_WIDE_INT q[8];
47902 unsigned int nonzero_halves[4];
47904 /* Split the two input vectors into 8 quarters. */
47905 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
47906 for (i = 1; i < 8; ++i)
47907 q[i] = q[0] << (nelt4 * i);
47908 for (i = 0; i < 4; ++i)
47909 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
47911 nonzero_halves[nzcnt] = i;
47912 ++nzcnt;
47915 if (nzcnt == 1)
47917 gcc_assert (d->one_operand_p);
47918 nonzero_halves[1] = nonzero_halves[0];
47919 same_halves = true;
47921 else if (d->one_operand_p)
47923 gcc_assert (nonzero_halves[0] == 0);
47924 gcc_assert (nonzero_halves[1] == 1);
47927 if (nzcnt <= 2)
47929 if (d->perm[0] / nelt2 == nonzero_halves[1])
47931 /* Attempt to increase the likelihood that dfinal
47932 shuffle will be intra-lane. */
47933 char tmph = nonzero_halves[0];
47934 nonzero_halves[0] = nonzero_halves[1];
47935 nonzero_halves[1] = tmph;
47938 /* vperm2f128 or vperm2i128. */
47939 for (i = 0; i < nelt2; ++i)
47941 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
47942 remap[i + nonzero_halves[0] * nelt2] = i;
47943 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
47944 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
47947 if (d->vmode != V8SFmode
47948 && d->vmode != V4DFmode
47949 && d->vmode != V8SImode)
47951 dremap.vmode = V8SImode;
47952 dremap.nelt = 8;
47953 for (i = 0; i < 4; ++i)
47955 dremap.perm[i] = i + nonzero_halves[0] * 4;
47956 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
47960 else if (d->one_operand_p)
47961 return false;
47962 else if (TARGET_AVX2
47963 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
47965 /* vpunpckl* */
47966 for (i = 0; i < nelt4; ++i)
47968 remap[i] = i * 2;
47969 remap[i + nelt] = i * 2 + 1;
47970 remap[i + nelt2] = i * 2 + nelt2;
47971 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
47972 dremap.perm[i * 2] = i;
47973 dremap.perm[i * 2 + 1] = i + nelt;
47974 dremap.perm[i * 2 + nelt2] = i + nelt2;
47975 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
47978 else if (TARGET_AVX2
47979 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
47981 /* vpunpckh* */
47982 for (i = 0; i < nelt4; ++i)
47984 remap[i + nelt4] = i * 2;
47985 remap[i + nelt + nelt4] = i * 2 + 1;
47986 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
47987 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
47988 dremap.perm[i * 2] = i + nelt4;
47989 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
47990 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
47991 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
47994 else
47995 return false;
47998 /* Use the remapping array set up above to move the elements from their
47999 swizzled locations into their final destinations. */
48000 dfinal = *d;
48001 for (i = 0; i < nelt; ++i)
48003 unsigned e = remap[d->perm[i]];
48004 gcc_assert (e < nelt);
48005 /* If same_halves is true, both halves of the remapped vector are the
48006 same. Avoid cross-lane accesses if possible. */
48007 if (same_halves && i >= nelt2)
48009 gcc_assert (e < nelt2);
48010 dfinal.perm[i] = e + nelt2;
48012 else
48013 dfinal.perm[i] = e;
48015 if (!d->testing_p)
48017 dremap.target = gen_reg_rtx (dremap.vmode);
48018 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
48020 dfinal.op1 = dfinal.op0;
48021 dfinal.one_operand_p = true;
48023 /* Test if the final remap can be done with a single insn. For V4SFmode or
48024 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
48025 start_sequence ();
48026 ok = expand_vec_perm_1 (&dfinal);
48027 seq = get_insns ();
48028 end_sequence ();
48030 if (!ok)
48031 return false;
48033 if (d->testing_p)
48034 return true;
48036 if (dremap.vmode != dfinal.vmode)
48038 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
48039 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
48042 ok = expand_vec_perm_1 (&dremap);
48043 gcc_assert (ok);
48045 emit_insn (seq);
48046 return true;
48049 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48050 a single vector cross-lane permutation into vpermq followed
48051 by any of the single insn permutations. */
48053 static bool
48054 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
48056 struct expand_vec_perm_d dremap, dfinal;
48057 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
48058 unsigned contents[2];
48059 bool ok;
48061 if (!(TARGET_AVX2
48062 && (d->vmode == V32QImode || d->vmode == V16HImode)
48063 && d->one_operand_p))
48064 return false;
48066 contents[0] = 0;
48067 contents[1] = 0;
48068 for (i = 0; i < nelt2; ++i)
48070 contents[0] |= 1u << (d->perm[i] / nelt4);
48071 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
48074 for (i = 0; i < 2; ++i)
48076 unsigned int cnt = 0;
48077 for (j = 0; j < 4; ++j)
48078 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
48079 return false;
48082 if (d->testing_p)
48083 return true;
48085 dremap = *d;
48086 dremap.vmode = V4DImode;
48087 dremap.nelt = 4;
48088 dremap.target = gen_reg_rtx (V4DImode);
48089 dremap.op0 = gen_lowpart (V4DImode, d->op0);
48090 dremap.op1 = dremap.op0;
48091 dremap.one_operand_p = true;
48092 for (i = 0; i < 2; ++i)
48094 unsigned int cnt = 0;
48095 for (j = 0; j < 4; ++j)
48096 if ((contents[i] & (1u << j)) != 0)
48097 dremap.perm[2 * i + cnt++] = j;
48098 for (; cnt < 2; ++cnt)
48099 dremap.perm[2 * i + cnt] = 0;
48102 dfinal = *d;
48103 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
48104 dfinal.op1 = dfinal.op0;
48105 dfinal.one_operand_p = true;
48106 for (i = 0, j = 0; i < nelt; ++i)
48108 if (i == nelt2)
48109 j = 2;
48110 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
48111 if ((d->perm[i] / nelt4) == dremap.perm[j])
48113 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
48114 dfinal.perm[i] |= nelt4;
48115 else
48116 gcc_unreachable ();
48119 ok = expand_vec_perm_1 (&dremap);
48120 gcc_assert (ok);
48122 ok = expand_vec_perm_1 (&dfinal);
48123 gcc_assert (ok);
48125 return true;
48128 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
48129 a vector permutation using two instructions, vperm2f128 resp.
48130 vperm2i128 followed by any single in-lane permutation. */
48132 static bool
48133 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
48135 struct expand_vec_perm_d dfirst, dsecond;
48136 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
48137 bool ok;
48139 if (!TARGET_AVX
48140 || GET_MODE_SIZE (d->vmode) != 32
48141 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
48142 return false;
48144 dsecond = *d;
48145 dsecond.one_operand_p = false;
48146 dsecond.testing_p = true;
48148 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
48149 immediate. For perm < 16 the second permutation uses
48150 d->op0 as first operand, for perm >= 16 it uses d->op1
48151 as first operand. The second operand is the result of
48152 vperm2[fi]128. */
48153 for (perm = 0; perm < 32; perm++)
48155 /* Ignore permutations which do not move anything cross-lane. */
48156 if (perm < 16)
48158 /* The second shuffle for e.g. V4DFmode has
48159 0123 and ABCD operands.
48160 Ignore AB23, as 23 is already in the second lane
48161 of the first operand. */
48162 if ((perm & 0xc) == (1 << 2)) continue;
48163 /* And 01CD, as 01 is in the first lane of the first
48164 operand. */
48165 if ((perm & 3) == 0) continue;
48166 /* And 4567, as then the vperm2[fi]128 doesn't change
48167 anything on the original 4567 second operand. */
48168 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
48170 else
48172 /* The second shuffle for e.g. V4DFmode has
48173 4567 and ABCD operands.
48174 Ignore AB67, as 67 is already in the second lane
48175 of the first operand. */
48176 if ((perm & 0xc) == (3 << 2)) continue;
48177 /* And 45CD, as 45 is in the first lane of the first
48178 operand. */
48179 if ((perm & 3) == 2) continue;
48180 /* And 0123, as then the vperm2[fi]128 doesn't change
48181 anything on the original 0123 first operand. */
48182 if ((perm & 0xf) == (1 << 2)) continue;
48185 for (i = 0; i < nelt; i++)
48187 j = d->perm[i] / nelt2;
48188 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
48189 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
48190 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
48191 dsecond.perm[i] = d->perm[i] & (nelt - 1);
48192 else
48193 break;
48196 if (i == nelt)
48198 start_sequence ();
48199 ok = expand_vec_perm_1 (&dsecond);
48200 end_sequence ();
48202 else
48203 ok = false;
48205 if (ok)
48207 if (d->testing_p)
48208 return true;
48210 /* Found a usable second shuffle. dfirst will be
48211 vperm2f128 on d->op0 and d->op1. */
48212 dsecond.testing_p = false;
48213 dfirst = *d;
48214 dfirst.target = gen_reg_rtx (d->vmode);
48215 for (i = 0; i < nelt; i++)
48216 dfirst.perm[i] = (i & (nelt2 - 1))
48217 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
48219 canonicalize_perm (&dfirst);
48220 ok = expand_vec_perm_1 (&dfirst);
48221 gcc_assert (ok);
48223 /* And dsecond is some single insn shuffle, taking
48224 d->op0 and result of vperm2f128 (if perm < 16) or
48225 d->op1 and result of vperm2f128 (otherwise). */
48226 if (perm >= 16)
48227 dsecond.op0 = dsecond.op1;
48228 dsecond.op1 = dfirst.target;
48230 ok = expand_vec_perm_1 (&dsecond);
48231 gcc_assert (ok);
48233 return true;
48236 /* For one operand, the only useful vperm2f128 permutation is 0x01
48237 aka lanes swap. */
48238 if (d->one_operand_p)
48239 return false;
48242 return false;
48245 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48246 a two vector permutation using 2 intra-lane interleave insns
48247 and cross-lane shuffle for 32-byte vectors. */
48249 static bool
48250 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
48252 unsigned i, nelt;
48253 rtx (*gen) (rtx, rtx, rtx);
48255 if (d->one_operand_p)
48256 return false;
48257 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
48259 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
48261 else
48262 return false;
48264 nelt = d->nelt;
48265 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
48266 return false;
48267 for (i = 0; i < nelt; i += 2)
48268 if (d->perm[i] != d->perm[0] + i / 2
48269 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
48270 return false;
48272 if (d->testing_p)
48273 return true;
48275 switch (d->vmode)
48277 case V32QImode:
48278 if (d->perm[0])
48279 gen = gen_vec_interleave_highv32qi;
48280 else
48281 gen = gen_vec_interleave_lowv32qi;
48282 break;
48283 case V16HImode:
48284 if (d->perm[0])
48285 gen = gen_vec_interleave_highv16hi;
48286 else
48287 gen = gen_vec_interleave_lowv16hi;
48288 break;
48289 case V8SImode:
48290 if (d->perm[0])
48291 gen = gen_vec_interleave_highv8si;
48292 else
48293 gen = gen_vec_interleave_lowv8si;
48294 break;
48295 case V4DImode:
48296 if (d->perm[0])
48297 gen = gen_vec_interleave_highv4di;
48298 else
48299 gen = gen_vec_interleave_lowv4di;
48300 break;
48301 case V8SFmode:
48302 if (d->perm[0])
48303 gen = gen_vec_interleave_highv8sf;
48304 else
48305 gen = gen_vec_interleave_lowv8sf;
48306 break;
48307 case V4DFmode:
48308 if (d->perm[0])
48309 gen = gen_vec_interleave_highv4df;
48310 else
48311 gen = gen_vec_interleave_lowv4df;
48312 break;
48313 default:
48314 gcc_unreachable ();
48317 emit_insn (gen (d->target, d->op0, d->op1));
48318 return true;
48321 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
48322 a single vector permutation using a single intra-lane vector
48323 permutation, vperm2f128 swapping the lanes and vblend* insn blending
48324 the non-swapped and swapped vectors together. */
48326 static bool
48327 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
48329 struct expand_vec_perm_d dfirst, dsecond;
48330 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
48331 rtx_insn *seq;
48332 bool ok;
48333 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
48335 if (!TARGET_AVX
48336 || TARGET_AVX2
48337 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
48338 || !d->one_operand_p)
48339 return false;
48341 dfirst = *d;
48342 for (i = 0; i < nelt; i++)
48343 dfirst.perm[i] = 0xff;
48344 for (i = 0, msk = 0; i < nelt; i++)
48346 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
48347 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
48348 return false;
48349 dfirst.perm[j] = d->perm[i];
48350 if (j != i)
48351 msk |= (1 << i);
48353 for (i = 0; i < nelt; i++)
48354 if (dfirst.perm[i] == 0xff)
48355 dfirst.perm[i] = i;
48357 if (!d->testing_p)
48358 dfirst.target = gen_reg_rtx (dfirst.vmode);
48360 start_sequence ();
48361 ok = expand_vec_perm_1 (&dfirst);
48362 seq = get_insns ();
48363 end_sequence ();
48365 if (!ok)
48366 return false;
48368 if (d->testing_p)
48369 return true;
48371 emit_insn (seq);
48373 dsecond = *d;
48374 dsecond.op0 = dfirst.target;
48375 dsecond.op1 = dfirst.target;
48376 dsecond.one_operand_p = true;
48377 dsecond.target = gen_reg_rtx (dsecond.vmode);
48378 for (i = 0; i < nelt; i++)
48379 dsecond.perm[i] = i ^ nelt2;
48381 ok = expand_vec_perm_1 (&dsecond);
48382 gcc_assert (ok);
48384 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
48385 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
48386 return true;
48389 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
48390 permutation using two vperm2f128, followed by a vshufpd insn blending
48391 the two vectors together. */
48393 static bool
48394 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
48396 struct expand_vec_perm_d dfirst, dsecond, dthird;
48397 bool ok;
48399 if (!TARGET_AVX || (d->vmode != V4DFmode))
48400 return false;
48402 if (d->testing_p)
48403 return true;
48405 dfirst = *d;
48406 dsecond = *d;
48407 dthird = *d;
48409 dfirst.perm[0] = (d->perm[0] & ~1);
48410 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
48411 dfirst.perm[2] = (d->perm[2] & ~1);
48412 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
48413 dsecond.perm[0] = (d->perm[1] & ~1);
48414 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
48415 dsecond.perm[2] = (d->perm[3] & ~1);
48416 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
48417 dthird.perm[0] = (d->perm[0] % 2);
48418 dthird.perm[1] = (d->perm[1] % 2) + 4;
48419 dthird.perm[2] = (d->perm[2] % 2) + 2;
48420 dthird.perm[3] = (d->perm[3] % 2) + 6;
48422 dfirst.target = gen_reg_rtx (dfirst.vmode);
48423 dsecond.target = gen_reg_rtx (dsecond.vmode);
48424 dthird.op0 = dfirst.target;
48425 dthird.op1 = dsecond.target;
48426 dthird.one_operand_p = false;
48428 canonicalize_perm (&dfirst);
48429 canonicalize_perm (&dsecond);
48431 ok = expand_vec_perm_1 (&dfirst)
48432 && expand_vec_perm_1 (&dsecond)
48433 && expand_vec_perm_1 (&dthird);
48435 gcc_assert (ok);
48437 return true;
48440 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
48441 permutation with two pshufb insns and an ior. We should have already
48442 failed all two instruction sequences. */
48444 static bool
48445 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
48447 rtx rperm[2][16], vperm, l, h, op, m128;
48448 unsigned int i, nelt, eltsz;
48450 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
48451 return false;
48452 gcc_assert (!d->one_operand_p);
48454 if (d->testing_p)
48455 return true;
48457 nelt = d->nelt;
48458 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48460 /* Generate two permutation masks. If the required element is within
48461 the given vector it is shuffled into the proper lane. If the required
48462 element is in the other vector, force a zero into the lane by setting
48463 bit 7 in the permutation mask. */
48464 m128 = GEN_INT (-128);
48465 for (i = 0; i < nelt; ++i)
48467 unsigned j, e = d->perm[i];
48468 unsigned which = (e >= nelt);
48469 if (e >= nelt)
48470 e -= nelt;
48472 for (j = 0; j < eltsz; ++j)
48474 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
48475 rperm[1-which][i*eltsz + j] = m128;
48479 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
48480 vperm = force_reg (V16QImode, vperm);
48482 l = gen_reg_rtx (V16QImode);
48483 op = gen_lowpart (V16QImode, d->op0);
48484 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
48486 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
48487 vperm = force_reg (V16QImode, vperm);
48489 h = gen_reg_rtx (V16QImode);
48490 op = gen_lowpart (V16QImode, d->op1);
48491 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
48493 op = d->target;
48494 if (d->vmode != V16QImode)
48495 op = gen_reg_rtx (V16QImode);
48496 emit_insn (gen_iorv16qi3 (op, l, h));
48497 if (op != d->target)
48498 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48500 return true;
48503 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
48504 with two vpshufb insns, vpermq and vpor. We should have already failed
48505 all two or three instruction sequences. */
48507 static bool
48508 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
48510 rtx rperm[2][32], vperm, l, h, hp, op, m128;
48511 unsigned int i, nelt, eltsz;
48513 if (!TARGET_AVX2
48514 || !d->one_operand_p
48515 || (d->vmode != V32QImode && d->vmode != V16HImode))
48516 return false;
48518 if (d->testing_p)
48519 return true;
48521 nelt = d->nelt;
48522 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48524 /* Generate two permutation masks. If the required element is within
48525 the same lane, it is shuffled in. If the required element from the
48526 other lane, force a zero by setting bit 7 in the permutation mask.
48527 In the other mask the mask has non-negative elements if element
48528 is requested from the other lane, but also moved to the other lane,
48529 so that the result of vpshufb can have the two V2TImode halves
48530 swapped. */
48531 m128 = GEN_INT (-128);
48532 for (i = 0; i < nelt; ++i)
48534 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48535 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48537 for (j = 0; j < eltsz; ++j)
48539 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
48540 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
48544 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48545 vperm = force_reg (V32QImode, vperm);
48547 h = gen_reg_rtx (V32QImode);
48548 op = gen_lowpart (V32QImode, d->op0);
48549 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48551 /* Swap the 128-byte lanes of h into hp. */
48552 hp = gen_reg_rtx (V4DImode);
48553 op = gen_lowpart (V4DImode, h);
48554 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
48555 const1_rtx));
48557 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48558 vperm = force_reg (V32QImode, vperm);
48560 l = gen_reg_rtx (V32QImode);
48561 op = gen_lowpart (V32QImode, d->op0);
48562 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48564 op = d->target;
48565 if (d->vmode != V32QImode)
48566 op = gen_reg_rtx (V32QImode);
48567 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
48568 if (op != d->target)
48569 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48571 return true;
48574 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48575 and extract-odd permutations of two V32QImode and V16QImode operand
48576 with two vpshufb insns, vpor and vpermq. We should have already
48577 failed all two or three instruction sequences. */
48579 static bool
48580 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
48582 rtx rperm[2][32], vperm, l, h, ior, op, m128;
48583 unsigned int i, nelt, eltsz;
48585 if (!TARGET_AVX2
48586 || d->one_operand_p
48587 || (d->vmode != V32QImode && d->vmode != V16HImode))
48588 return false;
48590 for (i = 0; i < d->nelt; ++i)
48591 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
48592 return false;
48594 if (d->testing_p)
48595 return true;
48597 nelt = d->nelt;
48598 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48600 /* Generate two permutation masks. In the first permutation mask
48601 the first quarter will contain indexes for the first half
48602 of the op0, the second quarter will contain bit 7 set, third quarter
48603 will contain indexes for the second half of the op0 and the
48604 last quarter bit 7 set. In the second permutation mask
48605 the first quarter will contain bit 7 set, the second quarter
48606 indexes for the first half of the op1, the third quarter bit 7 set
48607 and last quarter indexes for the second half of the op1.
48608 I.e. the first mask e.g. for V32QImode extract even will be:
48609 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
48610 (all values masked with 0xf except for -128) and second mask
48611 for extract even will be
48612 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
48613 m128 = GEN_INT (-128);
48614 for (i = 0; i < nelt; ++i)
48616 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48617 unsigned which = d->perm[i] >= nelt;
48618 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
48620 for (j = 0; j < eltsz; ++j)
48622 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
48623 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
48627 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48628 vperm = force_reg (V32QImode, vperm);
48630 l = gen_reg_rtx (V32QImode);
48631 op = gen_lowpart (V32QImode, d->op0);
48632 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48634 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48635 vperm = force_reg (V32QImode, vperm);
48637 h = gen_reg_rtx (V32QImode);
48638 op = gen_lowpart (V32QImode, d->op1);
48639 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48641 ior = gen_reg_rtx (V32QImode);
48642 emit_insn (gen_iorv32qi3 (ior, l, h));
48644 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
48645 op = gen_reg_rtx (V4DImode);
48646 ior = gen_lowpart (V4DImode, ior);
48647 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
48648 const1_rtx, GEN_INT (3)));
48649 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48651 return true;
48654 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48655 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
48656 with two "and" and "pack" or two "shift" and "pack" insns. We should
48657 have already failed all two instruction sequences. */
48659 static bool
48660 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
48662 rtx op, dop0, dop1, t, rperm[16];
48663 unsigned i, odd, c, s, nelt = d->nelt;
48664 bool end_perm = false;
48665 machine_mode half_mode;
48666 rtx (*gen_and) (rtx, rtx, rtx);
48667 rtx (*gen_pack) (rtx, rtx, rtx);
48668 rtx (*gen_shift) (rtx, rtx, rtx);
48670 if (d->one_operand_p)
48671 return false;
48673 switch (d->vmode)
48675 case V8HImode:
48676 /* Required for "pack". */
48677 if (!TARGET_SSE4_1)
48678 return false;
48679 c = 0xffff;
48680 s = 16;
48681 half_mode = V4SImode;
48682 gen_and = gen_andv4si3;
48683 gen_pack = gen_sse4_1_packusdw;
48684 gen_shift = gen_lshrv4si3;
48685 break;
48686 case V16QImode:
48687 /* No check as all instructions are SSE2. */
48688 c = 0xff;
48689 s = 8;
48690 half_mode = V8HImode;
48691 gen_and = gen_andv8hi3;
48692 gen_pack = gen_sse2_packuswb;
48693 gen_shift = gen_lshrv8hi3;
48694 break;
48695 case V16HImode:
48696 if (!TARGET_AVX2)
48697 return false;
48698 c = 0xffff;
48699 s = 16;
48700 half_mode = V8SImode;
48701 gen_and = gen_andv8si3;
48702 gen_pack = gen_avx2_packusdw;
48703 gen_shift = gen_lshrv8si3;
48704 end_perm = true;
48705 break;
48706 case V32QImode:
48707 if (!TARGET_AVX2)
48708 return false;
48709 c = 0xff;
48710 s = 8;
48711 half_mode = V16HImode;
48712 gen_and = gen_andv16hi3;
48713 gen_pack = gen_avx2_packuswb;
48714 gen_shift = gen_lshrv16hi3;
48715 end_perm = true;
48716 break;
48717 default:
48718 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
48719 general shuffles. */
48720 return false;
48723 /* Check that permutation is even or odd. */
48724 odd = d->perm[0];
48725 if (odd > 1)
48726 return false;
48728 for (i = 1; i < nelt; ++i)
48729 if (d->perm[i] != 2 * i + odd)
48730 return false;
48732 if (d->testing_p)
48733 return true;
48735 dop0 = gen_reg_rtx (half_mode);
48736 dop1 = gen_reg_rtx (half_mode);
48737 if (odd == 0)
48739 for (i = 0; i < nelt / 2; i++)
48740 rperm[i] = GEN_INT (c);
48741 t = gen_rtx_CONST_VECTOR (half_mode, gen_rtvec_v (nelt / 2, rperm));
48742 t = force_reg (half_mode, t);
48743 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
48744 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
48746 else
48748 emit_insn (gen_shift (dop0,
48749 gen_lowpart (half_mode, d->op0),
48750 GEN_INT (s)));
48751 emit_insn (gen_shift (dop1,
48752 gen_lowpart (half_mode, d->op1),
48753 GEN_INT (s)));
48755 /* In AVX2 for 256 bit case we need to permute pack result. */
48756 if (TARGET_AVX2 && end_perm)
48758 op = gen_reg_rtx (d->vmode);
48759 t = gen_reg_rtx (V4DImode);
48760 emit_insn (gen_pack (op, dop0, dop1));
48761 emit_insn (gen_avx2_permv4di_1 (t,
48762 gen_lowpart (V4DImode, op),
48763 const0_rtx,
48764 const2_rtx,
48765 const1_rtx,
48766 GEN_INT (3)));
48767 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
48769 else
48770 emit_insn (gen_pack (d->target, dop0, dop1));
48772 return true;
48775 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
48776 and extract-odd permutations. */
48778 static bool
48779 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
48781 rtx t1, t2, t3, t4, t5;
48783 switch (d->vmode)
48785 case V4DFmode:
48786 if (d->testing_p)
48787 break;
48788 t1 = gen_reg_rtx (V4DFmode);
48789 t2 = gen_reg_rtx (V4DFmode);
48791 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48792 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
48793 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
48795 /* Now an unpck[lh]pd will produce the result required. */
48796 if (odd)
48797 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
48798 else
48799 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
48800 emit_insn (t3);
48801 break;
48803 case V8SFmode:
48805 int mask = odd ? 0xdd : 0x88;
48807 if (d->testing_p)
48808 break;
48809 t1 = gen_reg_rtx (V8SFmode);
48810 t2 = gen_reg_rtx (V8SFmode);
48811 t3 = gen_reg_rtx (V8SFmode);
48813 /* Shuffle within the 128-bit lanes to produce:
48814 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
48815 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
48816 GEN_INT (mask)));
48818 /* Shuffle the lanes around to produce:
48819 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
48820 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
48821 GEN_INT (0x3)));
48823 /* Shuffle within the 128-bit lanes to produce:
48824 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
48825 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
48827 /* Shuffle within the 128-bit lanes to produce:
48828 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
48829 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
48831 /* Shuffle the lanes around to produce:
48832 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
48833 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
48834 GEN_INT (0x20)));
48836 break;
48838 case V2DFmode:
48839 case V4SFmode:
48840 case V2DImode:
48841 case V4SImode:
48842 /* These are always directly implementable by expand_vec_perm_1. */
48843 gcc_unreachable ();
48845 case V8HImode:
48846 if (TARGET_SSE4_1)
48847 return expand_vec_perm_even_odd_pack (d);
48848 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
48849 return expand_vec_perm_pshufb2 (d);
48850 else
48852 if (d->testing_p)
48853 break;
48854 /* We need 2*log2(N)-1 operations to achieve odd/even
48855 with interleave. */
48856 t1 = gen_reg_rtx (V8HImode);
48857 t2 = gen_reg_rtx (V8HImode);
48858 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
48859 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
48860 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
48861 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
48862 if (odd)
48863 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
48864 else
48865 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
48866 emit_insn (t3);
48868 break;
48870 case V16QImode:
48871 return expand_vec_perm_even_odd_pack (d);
48873 case V16HImode:
48874 case V32QImode:
48875 return expand_vec_perm_even_odd_pack (d);
48877 case V4DImode:
48878 if (!TARGET_AVX2)
48880 struct expand_vec_perm_d d_copy = *d;
48881 d_copy.vmode = V4DFmode;
48882 if (d->testing_p)
48883 d_copy.target = gen_lowpart (V4DFmode, d->target);
48884 else
48885 d_copy.target = gen_reg_rtx (V4DFmode);
48886 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
48887 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
48888 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48890 if (!d->testing_p)
48891 emit_move_insn (d->target,
48892 gen_lowpart (V4DImode, d_copy.target));
48893 return true;
48895 return false;
48898 if (d->testing_p)
48899 break;
48901 t1 = gen_reg_rtx (V4DImode);
48902 t2 = gen_reg_rtx (V4DImode);
48904 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48905 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
48906 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
48908 /* Now an vpunpck[lh]qdq will produce the result required. */
48909 if (odd)
48910 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
48911 else
48912 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
48913 emit_insn (t3);
48914 break;
48916 case V8SImode:
48917 if (!TARGET_AVX2)
48919 struct expand_vec_perm_d d_copy = *d;
48920 d_copy.vmode = V8SFmode;
48921 if (d->testing_p)
48922 d_copy.target = gen_lowpart (V8SFmode, d->target);
48923 else
48924 d_copy.target = gen_reg_rtx (V8SFmode);
48925 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
48926 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
48927 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48929 if (!d->testing_p)
48930 emit_move_insn (d->target,
48931 gen_lowpart (V8SImode, d_copy.target));
48932 return true;
48934 return false;
48937 if (d->testing_p)
48938 break;
48940 t1 = gen_reg_rtx (V8SImode);
48941 t2 = gen_reg_rtx (V8SImode);
48942 t3 = gen_reg_rtx (V4DImode);
48943 t4 = gen_reg_rtx (V4DImode);
48944 t5 = gen_reg_rtx (V4DImode);
48946 /* Shuffle the lanes around into
48947 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
48948 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
48949 gen_lowpart (V4DImode, d->op1),
48950 GEN_INT (0x20)));
48951 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
48952 gen_lowpart (V4DImode, d->op1),
48953 GEN_INT (0x31)));
48955 /* Swap the 2nd and 3rd position in each lane into
48956 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
48957 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
48958 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48959 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
48960 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48962 /* Now an vpunpck[lh]qdq will produce
48963 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
48964 if (odd)
48965 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
48966 gen_lowpart (V4DImode, t2));
48967 else
48968 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
48969 gen_lowpart (V4DImode, t2));
48970 emit_insn (t3);
48971 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
48972 break;
48974 default:
48975 gcc_unreachable ();
48978 return true;
48981 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
48982 extract-even and extract-odd permutations. */
48984 static bool
48985 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
48987 unsigned i, odd, nelt = d->nelt;
48989 odd = d->perm[0];
48990 if (odd != 0 && odd != 1)
48991 return false;
48993 for (i = 1; i < nelt; ++i)
48994 if (d->perm[i] != 2 * i + odd)
48995 return false;
48997 return expand_vec_perm_even_odd_1 (d, odd);
49000 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
49001 permutations. We assume that expand_vec_perm_1 has already failed. */
49003 static bool
49004 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
49006 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
49007 machine_mode vmode = d->vmode;
49008 unsigned char perm2[4];
49009 rtx op0 = d->op0, dest;
49010 bool ok;
49012 switch (vmode)
49014 case V4DFmode:
49015 case V8SFmode:
49016 /* These are special-cased in sse.md so that we can optionally
49017 use the vbroadcast instruction. They expand to two insns
49018 if the input happens to be in a register. */
49019 gcc_unreachable ();
49021 case V2DFmode:
49022 case V2DImode:
49023 case V4SFmode:
49024 case V4SImode:
49025 /* These are always implementable using standard shuffle patterns. */
49026 gcc_unreachable ();
49028 case V8HImode:
49029 case V16QImode:
49030 /* These can be implemented via interleave. We save one insn by
49031 stopping once we have promoted to V4SImode and then use pshufd. */
49032 if (d->testing_p)
49033 return true;
49036 rtx dest;
49037 rtx (*gen) (rtx, rtx, rtx)
49038 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
49039 : gen_vec_interleave_lowv8hi;
49041 if (elt >= nelt2)
49043 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
49044 : gen_vec_interleave_highv8hi;
49045 elt -= nelt2;
49047 nelt2 /= 2;
49049 dest = gen_reg_rtx (vmode);
49050 emit_insn (gen (dest, op0, op0));
49051 vmode = get_mode_wider_vector (vmode);
49052 op0 = gen_lowpart (vmode, dest);
49054 while (vmode != V4SImode);
49056 memset (perm2, elt, 4);
49057 dest = gen_reg_rtx (V4SImode);
49058 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
49059 gcc_assert (ok);
49060 if (!d->testing_p)
49061 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
49062 return true;
49064 case V64QImode:
49065 case V32QImode:
49066 case V16HImode:
49067 case V8SImode:
49068 case V4DImode:
49069 /* For AVX2 broadcasts of the first element vpbroadcast* or
49070 vpermq should be used by expand_vec_perm_1. */
49071 gcc_assert (!TARGET_AVX2 || d->perm[0]);
49072 return false;
49074 default:
49075 gcc_unreachable ();
49079 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
49080 broadcast permutations. */
49082 static bool
49083 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
49085 unsigned i, elt, nelt = d->nelt;
49087 if (!d->one_operand_p)
49088 return false;
49090 elt = d->perm[0];
49091 for (i = 1; i < nelt; ++i)
49092 if (d->perm[i] != elt)
49093 return false;
49095 return expand_vec_perm_broadcast_1 (d);
49098 /* Implement arbitrary permutations of two V64QImode operands
49099 will 2 vpermi2w, 2 vpshufb and one vpor instruction. */
49100 static bool
49101 expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d)
49103 if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
49104 return false;
49106 if (d->testing_p)
49107 return true;
49109 struct expand_vec_perm_d ds[2];
49110 rtx rperm[128], vperm, target0, target1;
49111 unsigned int i, nelt;
49112 machine_mode vmode;
49114 nelt = d->nelt;
49115 vmode = V64QImode;
49117 for (i = 0; i < 2; i++)
49119 ds[i] = *d;
49120 ds[i].vmode = V32HImode;
49121 ds[i].nelt = 32;
49122 ds[i].target = gen_reg_rtx (V32HImode);
49123 ds[i].op0 = gen_lowpart (V32HImode, d->op0);
49124 ds[i].op1 = gen_lowpart (V32HImode, d->op1);
49127 /* Prepare permutations such that the first one takes care of
49128 putting the even bytes into the right positions or one higher
49129 positions (ds[0]) and the second one takes care of
49130 putting the odd bytes into the right positions or one below
49131 (ds[1]). */
49133 for (i = 0; i < nelt; i++)
49135 ds[i & 1].perm[i / 2] = d->perm[i] / 2;
49136 if (i & 1)
49138 rperm[i] = constm1_rtx;
49139 rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49141 else
49143 rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49144 rperm[i + 64] = constm1_rtx;
49148 bool ok = expand_vec_perm_1 (&ds[0]);
49149 gcc_assert (ok);
49150 ds[0].target = gen_lowpart (V64QImode, ds[0].target);
49152 ok = expand_vec_perm_1 (&ds[1]);
49153 gcc_assert (ok);
49154 ds[1].target = gen_lowpart (V64QImode, ds[1].target);
49156 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
49157 vperm = force_reg (vmode, vperm);
49158 target0 = gen_reg_rtx (V64QImode);
49159 emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
49161 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
49162 vperm = force_reg (vmode, vperm);
49163 target1 = gen_reg_rtx (V64QImode);
49164 emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
49166 emit_insn (gen_iorv64qi3 (d->target, target0, target1));
49167 return true;
49170 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
49171 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
49172 all the shorter instruction sequences. */
49174 static bool
49175 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
49177 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
49178 unsigned int i, nelt, eltsz;
49179 bool used[4];
49181 if (!TARGET_AVX2
49182 || d->one_operand_p
49183 || (d->vmode != V32QImode && d->vmode != V16HImode))
49184 return false;
49186 if (d->testing_p)
49187 return true;
49189 nelt = d->nelt;
49190 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
49192 /* Generate 4 permutation masks. If the required element is within
49193 the same lane, it is shuffled in. If the required element from the
49194 other lane, force a zero by setting bit 7 in the permutation mask.
49195 In the other mask the mask has non-negative elements if element
49196 is requested from the other lane, but also moved to the other lane,
49197 so that the result of vpshufb can have the two V2TImode halves
49198 swapped. */
49199 m128 = GEN_INT (-128);
49200 for (i = 0; i < 32; ++i)
49202 rperm[0][i] = m128;
49203 rperm[1][i] = m128;
49204 rperm[2][i] = m128;
49205 rperm[3][i] = m128;
49207 used[0] = false;
49208 used[1] = false;
49209 used[2] = false;
49210 used[3] = false;
49211 for (i = 0; i < nelt; ++i)
49213 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
49214 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
49215 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
49217 for (j = 0; j < eltsz; ++j)
49218 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
49219 used[which] = true;
49222 for (i = 0; i < 2; ++i)
49224 if (!used[2 * i + 1])
49226 h[i] = NULL_RTX;
49227 continue;
49229 vperm = gen_rtx_CONST_VECTOR (V32QImode,
49230 gen_rtvec_v (32, rperm[2 * i + 1]));
49231 vperm = force_reg (V32QImode, vperm);
49232 h[i] = gen_reg_rtx (V32QImode);
49233 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49234 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
49237 /* Swap the 128-byte lanes of h[X]. */
49238 for (i = 0; i < 2; ++i)
49240 if (h[i] == NULL_RTX)
49241 continue;
49242 op = gen_reg_rtx (V4DImode);
49243 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
49244 const2_rtx, GEN_INT (3), const0_rtx,
49245 const1_rtx));
49246 h[i] = gen_lowpart (V32QImode, op);
49249 for (i = 0; i < 2; ++i)
49251 if (!used[2 * i])
49253 l[i] = NULL_RTX;
49254 continue;
49256 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
49257 vperm = force_reg (V32QImode, vperm);
49258 l[i] = gen_reg_rtx (V32QImode);
49259 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49260 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
49263 for (i = 0; i < 2; ++i)
49265 if (h[i] && l[i])
49267 op = gen_reg_rtx (V32QImode);
49268 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
49269 l[i] = op;
49271 else if (h[i])
49272 l[i] = h[i];
49275 gcc_assert (l[0] && l[1]);
49276 op = d->target;
49277 if (d->vmode != V32QImode)
49278 op = gen_reg_rtx (V32QImode);
49279 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
49280 if (op != d->target)
49281 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
49282 return true;
49285 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
49286 With all of the interface bits taken care of, perform the expansion
49287 in D and return true on success. */
49289 static bool
49290 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
49292 /* Try a single instruction expansion. */
49293 if (expand_vec_perm_1 (d))
49294 return true;
49296 /* Try sequences of two instructions. */
49298 if (expand_vec_perm_pshuflw_pshufhw (d))
49299 return true;
49301 if (expand_vec_perm_palignr (d, false))
49302 return true;
49304 if (expand_vec_perm_interleave2 (d))
49305 return true;
49307 if (expand_vec_perm_broadcast (d))
49308 return true;
49310 if (expand_vec_perm_vpermq_perm_1 (d))
49311 return true;
49313 if (expand_vec_perm_vperm2f128 (d))
49314 return true;
49316 if (expand_vec_perm_pblendv (d))
49317 return true;
49319 /* Try sequences of three instructions. */
49321 if (expand_vec_perm_even_odd_pack (d))
49322 return true;
49324 if (expand_vec_perm_2vperm2f128_vshuf (d))
49325 return true;
49327 if (expand_vec_perm_pshufb2 (d))
49328 return true;
49330 if (expand_vec_perm_interleave3 (d))
49331 return true;
49333 if (expand_vec_perm_vperm2f128_vblend (d))
49334 return true;
49336 /* Try sequences of four instructions. */
49338 if (expand_vec_perm_vpshufb2_vpermq (d))
49339 return true;
49341 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
49342 return true;
49344 if (expand_vec_perm_vpermi2_vpshub2 (d))
49345 return true;
49347 /* ??? Look for narrow permutations whose element orderings would
49348 allow the promotion to a wider mode. */
49350 /* ??? Look for sequences of interleave or a wider permute that place
49351 the data into the correct lanes for a half-vector shuffle like
49352 pshuf[lh]w or vpermilps. */
49354 /* ??? Look for sequences of interleave that produce the desired results.
49355 The combinatorics of punpck[lh] get pretty ugly... */
49357 if (expand_vec_perm_even_odd (d))
49358 return true;
49360 /* Even longer sequences. */
49361 if (expand_vec_perm_vpshufb4_vpermq2 (d))
49362 return true;
49364 return false;
49367 /* If a permutation only uses one operand, make it clear. Returns true
49368 if the permutation references both operands. */
49370 static bool
49371 canonicalize_perm (struct expand_vec_perm_d *d)
49373 int i, which, nelt = d->nelt;
49375 for (i = which = 0; i < nelt; ++i)
49376 which |= (d->perm[i] < nelt ? 1 : 2);
49378 d->one_operand_p = true;
49379 switch (which)
49381 default:
49382 gcc_unreachable();
49384 case 3:
49385 if (!rtx_equal_p (d->op0, d->op1))
49387 d->one_operand_p = false;
49388 break;
49390 /* The elements of PERM do not suggest that only the first operand
49391 is used, but both operands are identical. Allow easier matching
49392 of the permutation by folding the permutation into the single
49393 input vector. */
49394 /* FALLTHRU */
49396 case 2:
49397 for (i = 0; i < nelt; ++i)
49398 d->perm[i] &= nelt - 1;
49399 d->op0 = d->op1;
49400 break;
49402 case 1:
49403 d->op1 = d->op0;
49404 break;
49407 return (which == 3);
49410 bool
49411 ix86_expand_vec_perm_const (rtx operands[4])
49413 struct expand_vec_perm_d d;
49414 unsigned char perm[MAX_VECT_LEN];
49415 int i, nelt;
49416 bool two_args;
49417 rtx sel;
49419 d.target = operands[0];
49420 d.op0 = operands[1];
49421 d.op1 = operands[2];
49422 sel = operands[3];
49424 d.vmode = GET_MODE (d.target);
49425 gcc_assert (VECTOR_MODE_P (d.vmode));
49426 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49427 d.testing_p = false;
49429 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
49430 gcc_assert (XVECLEN (sel, 0) == nelt);
49431 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
49433 for (i = 0; i < nelt; ++i)
49435 rtx e = XVECEXP (sel, 0, i);
49436 int ei = INTVAL (e) & (2 * nelt - 1);
49437 d.perm[i] = ei;
49438 perm[i] = ei;
49441 two_args = canonicalize_perm (&d);
49443 if (ix86_expand_vec_perm_const_1 (&d))
49444 return true;
49446 /* If the selector says both arguments are needed, but the operands are the
49447 same, the above tried to expand with one_operand_p and flattened selector.
49448 If that didn't work, retry without one_operand_p; we succeeded with that
49449 during testing. */
49450 if (two_args && d.one_operand_p)
49452 d.one_operand_p = false;
49453 memcpy (d.perm, perm, sizeof (perm));
49454 return ix86_expand_vec_perm_const_1 (&d);
49457 return false;
49460 /* Implement targetm.vectorize.vec_perm_const_ok. */
49462 static bool
49463 ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
49464 const unsigned char *sel)
49466 struct expand_vec_perm_d d;
49467 unsigned int i, nelt, which;
49468 bool ret;
49470 d.vmode = vmode;
49471 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49472 d.testing_p = true;
49474 /* Given sufficient ISA support we can just return true here
49475 for selected vector modes. */
49476 switch (d.vmode)
49478 case V16SFmode:
49479 case V16SImode:
49480 case V8DImode:
49481 case V8DFmode:
49482 if (TARGET_AVX512F)
49483 /* All implementable with a single vpermi2 insn. */
49484 return true;
49485 break;
49486 case V32HImode:
49487 if (TARGET_AVX512BW)
49488 /* All implementable with a single vpermi2 insn. */
49489 return true;
49490 break;
49491 case V64QImode:
49492 if (TARGET_AVX512BW)
49493 /* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn. */
49494 return true;
49495 break;
49496 case V8SImode:
49497 case V8SFmode:
49498 case V4DFmode:
49499 case V4DImode:
49500 if (TARGET_AVX512VL)
49501 /* All implementable with a single vpermi2 insn. */
49502 return true;
49503 break;
49504 case V16HImode:
49505 if (TARGET_AVX2)
49506 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49507 return true;
49508 break;
49509 case V32QImode:
49510 if (TARGET_AVX2)
49511 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49512 return true;
49513 break;
49514 case V4SImode:
49515 case V4SFmode:
49516 case V8HImode:
49517 case V16QImode:
49518 /* All implementable with a single vpperm insn. */
49519 if (TARGET_XOP)
49520 return true;
49521 /* All implementable with 2 pshufb + 1 ior. */
49522 if (TARGET_SSSE3)
49523 return true;
49524 break;
49525 case V2DImode:
49526 case V2DFmode:
49527 /* All implementable with shufpd or unpck[lh]pd. */
49528 return true;
49529 default:
49530 return false;
49533 /* Extract the values from the vector CST into the permutation
49534 array in D. */
49535 memcpy (d.perm, sel, nelt);
49536 for (i = which = 0; i < nelt; ++i)
49538 unsigned char e = d.perm[i];
49539 gcc_assert (e < 2 * nelt);
49540 which |= (e < nelt ? 1 : 2);
49543 /* For all elements from second vector, fold the elements to first. */
49544 if (which == 2)
49545 for (i = 0; i < nelt; ++i)
49546 d.perm[i] -= nelt;
49548 /* Check whether the mask can be applied to the vector type. */
49549 d.one_operand_p = (which != 3);
49551 /* Implementable with shufps or pshufd. */
49552 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
49553 return true;
49555 /* Otherwise we have to go through the motions and see if we can
49556 figure out how to generate the requested permutation. */
49557 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
49558 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
49559 if (!d.one_operand_p)
49560 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
49562 start_sequence ();
49563 ret = ix86_expand_vec_perm_const_1 (&d);
49564 end_sequence ();
49566 return ret;
49569 void
49570 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
49572 struct expand_vec_perm_d d;
49573 unsigned i, nelt;
49575 d.target = targ;
49576 d.op0 = op0;
49577 d.op1 = op1;
49578 d.vmode = GET_MODE (targ);
49579 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49580 d.one_operand_p = false;
49581 d.testing_p = false;
49583 for (i = 0; i < nelt; ++i)
49584 d.perm[i] = i * 2 + odd;
49586 /* We'll either be able to implement the permutation directly... */
49587 if (expand_vec_perm_1 (&d))
49588 return;
49590 /* ... or we use the special-case patterns. */
49591 expand_vec_perm_even_odd_1 (&d, odd);
49594 static void
49595 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
49597 struct expand_vec_perm_d d;
49598 unsigned i, nelt, base;
49599 bool ok;
49601 d.target = targ;
49602 d.op0 = op0;
49603 d.op1 = op1;
49604 d.vmode = GET_MODE (targ);
49605 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49606 d.one_operand_p = false;
49607 d.testing_p = false;
49609 base = high_p ? nelt / 2 : 0;
49610 for (i = 0; i < nelt / 2; ++i)
49612 d.perm[i * 2] = i + base;
49613 d.perm[i * 2 + 1] = i + base + nelt;
49616 /* Note that for AVX this isn't one instruction. */
49617 ok = ix86_expand_vec_perm_const_1 (&d);
49618 gcc_assert (ok);
49622 /* Expand a vector operation CODE for a V*QImode in terms of the
49623 same operation on V*HImode. */
49625 void
49626 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
49628 machine_mode qimode = GET_MODE (dest);
49629 machine_mode himode;
49630 rtx (*gen_il) (rtx, rtx, rtx);
49631 rtx (*gen_ih) (rtx, rtx, rtx);
49632 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
49633 struct expand_vec_perm_d d;
49634 bool ok, full_interleave;
49635 bool uns_p = false;
49636 int i;
49638 switch (qimode)
49640 case V16QImode:
49641 himode = V8HImode;
49642 gen_il = gen_vec_interleave_lowv16qi;
49643 gen_ih = gen_vec_interleave_highv16qi;
49644 break;
49645 case V32QImode:
49646 himode = V16HImode;
49647 gen_il = gen_avx2_interleave_lowv32qi;
49648 gen_ih = gen_avx2_interleave_highv32qi;
49649 break;
49650 case V64QImode:
49651 himode = V32HImode;
49652 gen_il = gen_avx512bw_interleave_lowv64qi;
49653 gen_ih = gen_avx512bw_interleave_highv64qi;
49654 break;
49655 default:
49656 gcc_unreachable ();
49659 op2_l = op2_h = op2;
49660 switch (code)
49662 case MULT:
49663 /* Unpack data such that we've got a source byte in each low byte of
49664 each word. We don't care what goes into the high byte of each word.
49665 Rather than trying to get zero in there, most convenient is to let
49666 it be a copy of the low byte. */
49667 op2_l = gen_reg_rtx (qimode);
49668 op2_h = gen_reg_rtx (qimode);
49669 emit_insn (gen_il (op2_l, op2, op2));
49670 emit_insn (gen_ih (op2_h, op2, op2));
49671 /* FALLTHRU */
49673 op1_l = gen_reg_rtx (qimode);
49674 op1_h = gen_reg_rtx (qimode);
49675 emit_insn (gen_il (op1_l, op1, op1));
49676 emit_insn (gen_ih (op1_h, op1, op1));
49677 full_interleave = qimode == V16QImode;
49678 break;
49680 case ASHIFT:
49681 case LSHIFTRT:
49682 uns_p = true;
49683 /* FALLTHRU */
49684 case ASHIFTRT:
49685 op1_l = gen_reg_rtx (himode);
49686 op1_h = gen_reg_rtx (himode);
49687 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
49688 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
49689 full_interleave = true;
49690 break;
49691 default:
49692 gcc_unreachable ();
49695 /* Perform the operation. */
49696 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
49697 1, OPTAB_DIRECT);
49698 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
49699 1, OPTAB_DIRECT);
49700 gcc_assert (res_l && res_h);
49702 /* Merge the data back into the right place. */
49703 d.target = dest;
49704 d.op0 = gen_lowpart (qimode, res_l);
49705 d.op1 = gen_lowpart (qimode, res_h);
49706 d.vmode = qimode;
49707 d.nelt = GET_MODE_NUNITS (qimode);
49708 d.one_operand_p = false;
49709 d.testing_p = false;
49711 if (full_interleave)
49713 /* For SSE2, we used an full interleave, so the desired
49714 results are in the even elements. */
49715 for (i = 0; i < 64; ++i)
49716 d.perm[i] = i * 2;
49718 else
49720 /* For AVX, the interleave used above was not cross-lane. So the
49721 extraction is evens but with the second and third quarter swapped.
49722 Happily, that is even one insn shorter than even extraction. */
49723 for (i = 0; i < 64; ++i)
49724 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
49727 ok = ix86_expand_vec_perm_const_1 (&d);
49728 gcc_assert (ok);
49730 set_unique_reg_note (get_last_insn (), REG_EQUAL,
49731 gen_rtx_fmt_ee (code, qimode, op1, op2));
49734 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
49735 if op is CONST_VECTOR with all odd elements equal to their
49736 preceding element. */
49738 static bool
49739 const_vector_equal_evenodd_p (rtx op)
49741 machine_mode mode = GET_MODE (op);
49742 int i, nunits = GET_MODE_NUNITS (mode);
49743 if (GET_CODE (op) != CONST_VECTOR
49744 || nunits != CONST_VECTOR_NUNITS (op))
49745 return false;
49746 for (i = 0; i < nunits; i += 2)
49747 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
49748 return false;
49749 return true;
49752 void
49753 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
49754 bool uns_p, bool odd_p)
49756 machine_mode mode = GET_MODE (op1);
49757 machine_mode wmode = GET_MODE (dest);
49758 rtx x;
49759 rtx orig_op1 = op1, orig_op2 = op2;
49761 if (!nonimmediate_operand (op1, mode))
49762 op1 = force_reg (mode, op1);
49763 if (!nonimmediate_operand (op2, mode))
49764 op2 = force_reg (mode, op2);
49766 /* We only play even/odd games with vectors of SImode. */
49767 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
49769 /* If we're looking for the odd results, shift those members down to
49770 the even slots. For some cpus this is faster than a PSHUFD. */
49771 if (odd_p)
49773 /* For XOP use vpmacsdqh, but only for smult, as it is only
49774 signed. */
49775 if (TARGET_XOP && mode == V4SImode && !uns_p)
49777 x = force_reg (wmode, CONST0_RTX (wmode));
49778 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
49779 return;
49782 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
49783 if (!const_vector_equal_evenodd_p (orig_op1))
49784 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
49785 x, NULL, 1, OPTAB_DIRECT);
49786 if (!const_vector_equal_evenodd_p (orig_op2))
49787 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
49788 x, NULL, 1, OPTAB_DIRECT);
49789 op1 = gen_lowpart (mode, op1);
49790 op2 = gen_lowpart (mode, op2);
49793 if (mode == V16SImode)
49795 if (uns_p)
49796 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
49797 else
49798 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
49800 else if (mode == V8SImode)
49802 if (uns_p)
49803 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
49804 else
49805 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
49807 else if (uns_p)
49808 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
49809 else if (TARGET_SSE4_1)
49810 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
49811 else
49813 rtx s1, s2, t0, t1, t2;
49815 /* The easiest way to implement this without PMULDQ is to go through
49816 the motions as if we are performing a full 64-bit multiply. With
49817 the exception that we need to do less shuffling of the elements. */
49819 /* Compute the sign-extension, aka highparts, of the two operands. */
49820 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49821 op1, pc_rtx, pc_rtx);
49822 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49823 op2, pc_rtx, pc_rtx);
49825 /* Multiply LO(A) * HI(B), and vice-versa. */
49826 t1 = gen_reg_rtx (wmode);
49827 t2 = gen_reg_rtx (wmode);
49828 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
49829 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
49831 /* Multiply LO(A) * LO(B). */
49832 t0 = gen_reg_rtx (wmode);
49833 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
49835 /* Combine and shift the highparts into place. */
49836 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
49837 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
49838 1, OPTAB_DIRECT);
49840 /* Combine high and low parts. */
49841 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
49842 return;
49844 emit_insn (x);
49847 void
49848 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
49849 bool uns_p, bool high_p)
49851 machine_mode wmode = GET_MODE (dest);
49852 machine_mode mode = GET_MODE (op1);
49853 rtx t1, t2, t3, t4, mask;
49855 switch (mode)
49857 case V4SImode:
49858 t1 = gen_reg_rtx (mode);
49859 t2 = gen_reg_rtx (mode);
49860 if (TARGET_XOP && !uns_p)
49862 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
49863 shuffle the elements once so that all elements are in the right
49864 place for immediate use: { A C B D }. */
49865 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
49866 const1_rtx, GEN_INT (3)));
49867 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
49868 const1_rtx, GEN_INT (3)));
49870 else
49872 /* Put the elements into place for the multiply. */
49873 ix86_expand_vec_interleave (t1, op1, op1, high_p);
49874 ix86_expand_vec_interleave (t2, op2, op2, high_p);
49875 high_p = false;
49877 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
49878 break;
49880 case V8SImode:
49881 /* Shuffle the elements between the lanes. After this we
49882 have { A B E F | C D G H } for each operand. */
49883 t1 = gen_reg_rtx (V4DImode);
49884 t2 = gen_reg_rtx (V4DImode);
49885 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
49886 const0_rtx, const2_rtx,
49887 const1_rtx, GEN_INT (3)));
49888 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
49889 const0_rtx, const2_rtx,
49890 const1_rtx, GEN_INT (3)));
49892 /* Shuffle the elements within the lanes. After this we
49893 have { A A B B | C C D D } or { E E F F | G G H H }. */
49894 t3 = gen_reg_rtx (V8SImode);
49895 t4 = gen_reg_rtx (V8SImode);
49896 mask = GEN_INT (high_p
49897 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
49898 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
49899 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
49900 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
49902 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
49903 break;
49905 case V8HImode:
49906 case V16HImode:
49907 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
49908 uns_p, OPTAB_DIRECT);
49909 t2 = expand_binop (mode,
49910 uns_p ? umul_highpart_optab : smul_highpart_optab,
49911 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
49912 gcc_assert (t1 && t2);
49914 t3 = gen_reg_rtx (mode);
49915 ix86_expand_vec_interleave (t3, t1, t2, high_p);
49916 emit_move_insn (dest, gen_lowpart (wmode, t3));
49917 break;
49919 case V16QImode:
49920 case V32QImode:
49921 case V32HImode:
49922 case V16SImode:
49923 case V64QImode:
49924 t1 = gen_reg_rtx (wmode);
49925 t2 = gen_reg_rtx (wmode);
49926 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
49927 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
49929 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
49930 break;
49932 default:
49933 gcc_unreachable ();
49937 void
49938 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
49940 rtx res_1, res_2, res_3, res_4;
49942 res_1 = gen_reg_rtx (V4SImode);
49943 res_2 = gen_reg_rtx (V4SImode);
49944 res_3 = gen_reg_rtx (V2DImode);
49945 res_4 = gen_reg_rtx (V2DImode);
49946 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
49947 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
49949 /* Move the results in element 2 down to element 1; we don't care
49950 what goes in elements 2 and 3. Then we can merge the parts
49951 back together with an interleave.
49953 Note that two other sequences were tried:
49954 (1) Use interleaves at the start instead of psrldq, which allows
49955 us to use a single shufps to merge things back at the end.
49956 (2) Use shufps here to combine the two vectors, then pshufd to
49957 put the elements in the correct order.
49958 In both cases the cost of the reformatting stall was too high
49959 and the overall sequence slower. */
49961 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
49962 const0_rtx, const2_rtx,
49963 const0_rtx, const0_rtx));
49964 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
49965 const0_rtx, const2_rtx,
49966 const0_rtx, const0_rtx));
49967 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
49969 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
49972 void
49973 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
49975 machine_mode mode = GET_MODE (op0);
49976 rtx t1, t2, t3, t4, t5, t6;
49978 if (TARGET_AVX512DQ && mode == V8DImode)
49979 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
49980 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
49981 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
49982 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
49983 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
49984 else if (TARGET_XOP && mode == V2DImode)
49986 /* op1: A,B,C,D, op2: E,F,G,H */
49987 op1 = gen_lowpart (V4SImode, op1);
49988 op2 = gen_lowpart (V4SImode, op2);
49990 t1 = gen_reg_rtx (V4SImode);
49991 t2 = gen_reg_rtx (V4SImode);
49992 t3 = gen_reg_rtx (V2DImode);
49993 t4 = gen_reg_rtx (V2DImode);
49995 /* t1: B,A,D,C */
49996 emit_insn (gen_sse2_pshufd_1 (t1, op1,
49997 GEN_INT (1),
49998 GEN_INT (0),
49999 GEN_INT (3),
50000 GEN_INT (2)));
50002 /* t2: (B*E),(A*F),(D*G),(C*H) */
50003 emit_insn (gen_mulv4si3 (t2, t1, op2));
50005 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
50006 emit_insn (gen_xop_phadddq (t3, t2));
50008 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
50009 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
50011 /* Multiply lower parts and add all */
50012 t5 = gen_reg_rtx (V2DImode);
50013 emit_insn (gen_vec_widen_umult_even_v4si (t5,
50014 gen_lowpart (V4SImode, op1),
50015 gen_lowpart (V4SImode, op2)));
50016 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
50019 else
50021 machine_mode nmode;
50022 rtx (*umul) (rtx, rtx, rtx);
50024 if (mode == V2DImode)
50026 umul = gen_vec_widen_umult_even_v4si;
50027 nmode = V4SImode;
50029 else if (mode == V4DImode)
50031 umul = gen_vec_widen_umult_even_v8si;
50032 nmode = V8SImode;
50034 else if (mode == V8DImode)
50036 umul = gen_vec_widen_umult_even_v16si;
50037 nmode = V16SImode;
50039 else
50040 gcc_unreachable ();
50043 /* Multiply low parts. */
50044 t1 = gen_reg_rtx (mode);
50045 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
50047 /* Shift input vectors right 32 bits so we can multiply high parts. */
50048 t6 = GEN_INT (32);
50049 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
50050 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
50052 /* Multiply high parts by low parts. */
50053 t4 = gen_reg_rtx (mode);
50054 t5 = gen_reg_rtx (mode);
50055 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
50056 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
50058 /* Combine and shift the highparts back. */
50059 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
50060 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
50062 /* Combine high and low parts. */
50063 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
50066 set_unique_reg_note (get_last_insn (), REG_EQUAL,
50067 gen_rtx_MULT (mode, op1, op2));
50070 /* Return 1 if control tansfer instruction INSN
50071 should be encoded with bnd prefix.
50072 If insn is NULL then return 1 when control
50073 transfer instructions should be prefixed with
50074 bnd by default for current function. */
50076 bool
50077 ix86_bnd_prefixed_insn_p (rtx insn)
50079 /* For call insns check special flag. */
50080 if (insn && CALL_P (insn))
50082 rtx call = get_call_rtx_from (insn);
50083 if (call)
50084 return CALL_EXPR_WITH_BOUNDS_P (call);
50087 /* All other insns are prefixed only if function is instrumented. */
50088 return chkp_function_instrumented_p (current_function_decl);
50091 /* Calculate integer abs() using only SSE2 instructions. */
50093 void
50094 ix86_expand_sse2_abs (rtx target, rtx input)
50096 machine_mode mode = GET_MODE (target);
50097 rtx tmp0, tmp1, x;
50099 switch (mode)
50101 /* For 32-bit signed integer X, the best way to calculate the absolute
50102 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
50103 case V4SImode:
50104 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
50105 GEN_INT (GET_MODE_BITSIZE
50106 (GET_MODE_INNER (mode)) - 1),
50107 NULL, 0, OPTAB_DIRECT);
50108 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
50109 NULL, 0, OPTAB_DIRECT);
50110 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
50111 target, 0, OPTAB_DIRECT);
50112 break;
50114 /* For 16-bit signed integer X, the best way to calculate the absolute
50115 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
50116 case V8HImode:
50117 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50119 x = expand_simple_binop (mode, SMAX, tmp0, input,
50120 target, 0, OPTAB_DIRECT);
50121 break;
50123 /* For 8-bit signed integer X, the best way to calculate the absolute
50124 value of X is min ((unsigned char) X, (unsigned char) (-X)),
50125 as SSE2 provides the PMINUB insn. */
50126 case V16QImode:
50127 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50129 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
50130 target, 0, OPTAB_DIRECT);
50131 break;
50133 default:
50134 gcc_unreachable ();
50137 if (x != target)
50138 emit_move_insn (target, x);
50141 /* Expand an insert into a vector register through pinsr insn.
50142 Return true if successful. */
50144 bool
50145 ix86_expand_pinsr (rtx *operands)
50147 rtx dst = operands[0];
50148 rtx src = operands[3];
50150 unsigned int size = INTVAL (operands[1]);
50151 unsigned int pos = INTVAL (operands[2]);
50153 if (GET_CODE (dst) == SUBREG)
50155 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
50156 dst = SUBREG_REG (dst);
50159 if (GET_CODE (src) == SUBREG)
50160 src = SUBREG_REG (src);
50162 switch (GET_MODE (dst))
50164 case V16QImode:
50165 case V8HImode:
50166 case V4SImode:
50167 case V2DImode:
50169 machine_mode srcmode, dstmode;
50170 rtx (*pinsr)(rtx, rtx, rtx, rtx);
50172 srcmode = mode_for_size (size, MODE_INT, 0);
50174 switch (srcmode)
50176 case QImode:
50177 if (!TARGET_SSE4_1)
50178 return false;
50179 dstmode = V16QImode;
50180 pinsr = gen_sse4_1_pinsrb;
50181 break;
50183 case HImode:
50184 if (!TARGET_SSE2)
50185 return false;
50186 dstmode = V8HImode;
50187 pinsr = gen_sse2_pinsrw;
50188 break;
50190 case SImode:
50191 if (!TARGET_SSE4_1)
50192 return false;
50193 dstmode = V4SImode;
50194 pinsr = gen_sse4_1_pinsrd;
50195 break;
50197 case DImode:
50198 gcc_assert (TARGET_64BIT);
50199 if (!TARGET_SSE4_1)
50200 return false;
50201 dstmode = V2DImode;
50202 pinsr = gen_sse4_1_pinsrq;
50203 break;
50205 default:
50206 return false;
50209 rtx d = dst;
50210 if (GET_MODE (dst) != dstmode)
50211 d = gen_reg_rtx (dstmode);
50212 src = gen_lowpart (srcmode, src);
50214 pos /= size;
50216 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
50217 GEN_INT (1 << pos)));
50218 if (d != dst)
50219 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
50220 return true;
50223 default:
50224 return false;
50228 /* This function returns the calling abi specific va_list type node.
50229 It returns the FNDECL specific va_list type. */
50231 static tree
50232 ix86_fn_abi_va_list (tree fndecl)
50234 if (!TARGET_64BIT)
50235 return va_list_type_node;
50236 gcc_assert (fndecl != NULL_TREE);
50238 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
50239 return ms_va_list_type_node;
50240 else
50241 return sysv_va_list_type_node;
50244 /* Returns the canonical va_list type specified by TYPE. If there
50245 is no valid TYPE provided, it return NULL_TREE. */
50247 static tree
50248 ix86_canonical_va_list_type (tree type)
50250 tree wtype, htype;
50252 /* Resolve references and pointers to va_list type. */
50253 if (TREE_CODE (type) == MEM_REF)
50254 type = TREE_TYPE (type);
50255 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
50256 type = TREE_TYPE (type);
50257 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
50258 type = TREE_TYPE (type);
50260 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
50262 wtype = va_list_type_node;
50263 gcc_assert (wtype != NULL_TREE);
50264 htype = type;
50265 if (TREE_CODE (wtype) == ARRAY_TYPE)
50267 /* If va_list is an array type, the argument may have decayed
50268 to a pointer type, e.g. by being passed to another function.
50269 In that case, unwrap both types so that we can compare the
50270 underlying records. */
50271 if (TREE_CODE (htype) == ARRAY_TYPE
50272 || POINTER_TYPE_P (htype))
50274 wtype = TREE_TYPE (wtype);
50275 htype = TREE_TYPE (htype);
50278 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50279 return va_list_type_node;
50280 wtype = sysv_va_list_type_node;
50281 gcc_assert (wtype != NULL_TREE);
50282 htype = type;
50283 if (TREE_CODE (wtype) == ARRAY_TYPE)
50285 /* If va_list is an array type, the argument may have decayed
50286 to a pointer type, e.g. by being passed to another function.
50287 In that case, unwrap both types so that we can compare the
50288 underlying records. */
50289 if (TREE_CODE (htype) == ARRAY_TYPE
50290 || POINTER_TYPE_P (htype))
50292 wtype = TREE_TYPE (wtype);
50293 htype = TREE_TYPE (htype);
50296 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50297 return sysv_va_list_type_node;
50298 wtype = ms_va_list_type_node;
50299 gcc_assert (wtype != NULL_TREE);
50300 htype = type;
50301 if (TREE_CODE (wtype) == ARRAY_TYPE)
50303 /* If va_list is an array type, the argument may have decayed
50304 to a pointer type, e.g. by being passed to another function.
50305 In that case, unwrap both types so that we can compare the
50306 underlying records. */
50307 if (TREE_CODE (htype) == ARRAY_TYPE
50308 || POINTER_TYPE_P (htype))
50310 wtype = TREE_TYPE (wtype);
50311 htype = TREE_TYPE (htype);
50314 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50315 return ms_va_list_type_node;
50316 return NULL_TREE;
50318 return std_canonical_va_list_type (type);
50321 /* Iterate through the target-specific builtin types for va_list.
50322 IDX denotes the iterator, *PTREE is set to the result type of
50323 the va_list builtin, and *PNAME to its internal type.
50324 Returns zero if there is no element for this index, otherwise
50325 IDX should be increased upon the next call.
50326 Note, do not iterate a base builtin's name like __builtin_va_list.
50327 Used from c_common_nodes_and_builtins. */
50329 static int
50330 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
50332 if (TARGET_64BIT)
50334 switch (idx)
50336 default:
50337 break;
50339 case 0:
50340 *ptree = ms_va_list_type_node;
50341 *pname = "__builtin_ms_va_list";
50342 return 1;
50344 case 1:
50345 *ptree = sysv_va_list_type_node;
50346 *pname = "__builtin_sysv_va_list";
50347 return 1;
50351 return 0;
50354 #undef TARGET_SCHED_DISPATCH
50355 #define TARGET_SCHED_DISPATCH has_dispatch
50356 #undef TARGET_SCHED_DISPATCH_DO
50357 #define TARGET_SCHED_DISPATCH_DO do_dispatch
50358 #undef TARGET_SCHED_REASSOCIATION_WIDTH
50359 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
50360 #undef TARGET_SCHED_REORDER
50361 #define TARGET_SCHED_REORDER ix86_sched_reorder
50362 #undef TARGET_SCHED_ADJUST_PRIORITY
50363 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
50364 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
50365 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
50366 ix86_dependencies_evaluation_hook
50368 /* The size of the dispatch window is the total number of bytes of
50369 object code allowed in a window. */
50370 #define DISPATCH_WINDOW_SIZE 16
50372 /* Number of dispatch windows considered for scheduling. */
50373 #define MAX_DISPATCH_WINDOWS 3
50375 /* Maximum number of instructions in a window. */
50376 #define MAX_INSN 4
50378 /* Maximum number of immediate operands in a window. */
50379 #define MAX_IMM 4
50381 /* Maximum number of immediate bits allowed in a window. */
50382 #define MAX_IMM_SIZE 128
50384 /* Maximum number of 32 bit immediates allowed in a window. */
50385 #define MAX_IMM_32 4
50387 /* Maximum number of 64 bit immediates allowed in a window. */
50388 #define MAX_IMM_64 2
50390 /* Maximum total of loads or prefetches allowed in a window. */
50391 #define MAX_LOAD 2
50393 /* Maximum total of stores allowed in a window. */
50394 #define MAX_STORE 1
50396 #undef BIG
50397 #define BIG 100
50400 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
50401 enum dispatch_group {
50402 disp_no_group = 0,
50403 disp_load,
50404 disp_store,
50405 disp_load_store,
50406 disp_prefetch,
50407 disp_imm,
50408 disp_imm_32,
50409 disp_imm_64,
50410 disp_branch,
50411 disp_cmp,
50412 disp_jcc,
50413 disp_last
50416 /* Number of allowable groups in a dispatch window. It is an array
50417 indexed by dispatch_group enum. 100 is used as a big number,
50418 because the number of these kind of operations does not have any
50419 effect in dispatch window, but we need them for other reasons in
50420 the table. */
50421 static unsigned int num_allowable_groups[disp_last] = {
50422 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
50425 char group_name[disp_last + 1][16] = {
50426 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
50427 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
50428 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
50431 /* Instruction path. */
50432 enum insn_path {
50433 no_path = 0,
50434 path_single, /* Single micro op. */
50435 path_double, /* Double micro op. */
50436 path_multi, /* Instructions with more than 2 micro op.. */
50437 last_path
50440 /* sched_insn_info defines a window to the instructions scheduled in
50441 the basic block. It contains a pointer to the insn_info table and
50442 the instruction scheduled.
50444 Windows are allocated for each basic block and are linked
50445 together. */
50446 typedef struct sched_insn_info_s {
50447 rtx insn;
50448 enum dispatch_group group;
50449 enum insn_path path;
50450 int byte_len;
50451 int imm_bytes;
50452 } sched_insn_info;
50454 /* Linked list of dispatch windows. This is a two way list of
50455 dispatch windows of a basic block. It contains information about
50456 the number of uops in the window and the total number of
50457 instructions and of bytes in the object code for this dispatch
50458 window. */
50459 typedef struct dispatch_windows_s {
50460 int num_insn; /* Number of insn in the window. */
50461 int num_uops; /* Number of uops in the window. */
50462 int window_size; /* Number of bytes in the window. */
50463 int window_num; /* Window number between 0 or 1. */
50464 int num_imm; /* Number of immediates in an insn. */
50465 int num_imm_32; /* Number of 32 bit immediates in an insn. */
50466 int num_imm_64; /* Number of 64 bit immediates in an insn. */
50467 int imm_size; /* Total immediates in the window. */
50468 int num_loads; /* Total memory loads in the window. */
50469 int num_stores; /* Total memory stores in the window. */
50470 int violation; /* Violation exists in window. */
50471 sched_insn_info *window; /* Pointer to the window. */
50472 struct dispatch_windows_s *next;
50473 struct dispatch_windows_s *prev;
50474 } dispatch_windows;
50476 /* Immediate valuse used in an insn. */
50477 typedef struct imm_info_s
50479 int imm;
50480 int imm32;
50481 int imm64;
50482 } imm_info;
50484 static dispatch_windows *dispatch_window_list;
50485 static dispatch_windows *dispatch_window_list1;
50487 /* Get dispatch group of insn. */
50489 static enum dispatch_group
50490 get_mem_group (rtx_insn *insn)
50492 enum attr_memory memory;
50494 if (INSN_CODE (insn) < 0)
50495 return disp_no_group;
50496 memory = get_attr_memory (insn);
50497 if (memory == MEMORY_STORE)
50498 return disp_store;
50500 if (memory == MEMORY_LOAD)
50501 return disp_load;
50503 if (memory == MEMORY_BOTH)
50504 return disp_load_store;
50506 return disp_no_group;
50509 /* Return true if insn is a compare instruction. */
50511 static bool
50512 is_cmp (rtx_insn *insn)
50514 enum attr_type type;
50516 type = get_attr_type (insn);
50517 return (type == TYPE_TEST
50518 || type == TYPE_ICMP
50519 || type == TYPE_FCMP
50520 || GET_CODE (PATTERN (insn)) == COMPARE);
50523 /* Return true if a dispatch violation encountered. */
50525 static bool
50526 dispatch_violation (void)
50528 if (dispatch_window_list->next)
50529 return dispatch_window_list->next->violation;
50530 return dispatch_window_list->violation;
50533 /* Return true if insn is a branch instruction. */
50535 static bool
50536 is_branch (rtx insn)
50538 return (CALL_P (insn) || JUMP_P (insn));
50541 /* Return true if insn is a prefetch instruction. */
50543 static bool
50544 is_prefetch (rtx insn)
50546 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
50549 /* This function initializes a dispatch window and the list container holding a
50550 pointer to the window. */
50552 static void
50553 init_window (int window_num)
50555 int i;
50556 dispatch_windows *new_list;
50558 if (window_num == 0)
50559 new_list = dispatch_window_list;
50560 else
50561 new_list = dispatch_window_list1;
50563 new_list->num_insn = 0;
50564 new_list->num_uops = 0;
50565 new_list->window_size = 0;
50566 new_list->next = NULL;
50567 new_list->prev = NULL;
50568 new_list->window_num = window_num;
50569 new_list->num_imm = 0;
50570 new_list->num_imm_32 = 0;
50571 new_list->num_imm_64 = 0;
50572 new_list->imm_size = 0;
50573 new_list->num_loads = 0;
50574 new_list->num_stores = 0;
50575 new_list->violation = false;
50577 for (i = 0; i < MAX_INSN; i++)
50579 new_list->window[i].insn = NULL;
50580 new_list->window[i].group = disp_no_group;
50581 new_list->window[i].path = no_path;
50582 new_list->window[i].byte_len = 0;
50583 new_list->window[i].imm_bytes = 0;
50585 return;
50588 /* This function allocates and initializes a dispatch window and the
50589 list container holding a pointer to the window. */
50591 static dispatch_windows *
50592 allocate_window (void)
50594 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
50595 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
50597 return new_list;
50600 /* This routine initializes the dispatch scheduling information. It
50601 initiates building dispatch scheduler tables and constructs the
50602 first dispatch window. */
50604 static void
50605 init_dispatch_sched (void)
50607 /* Allocate a dispatch list and a window. */
50608 dispatch_window_list = allocate_window ();
50609 dispatch_window_list1 = allocate_window ();
50610 init_window (0);
50611 init_window (1);
50614 /* This function returns true if a branch is detected. End of a basic block
50615 does not have to be a branch, but here we assume only branches end a
50616 window. */
50618 static bool
50619 is_end_basic_block (enum dispatch_group group)
50621 return group == disp_branch;
50624 /* This function is called when the end of a window processing is reached. */
50626 static void
50627 process_end_window (void)
50629 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
50630 if (dispatch_window_list->next)
50632 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
50633 gcc_assert (dispatch_window_list->window_size
50634 + dispatch_window_list1->window_size <= 48);
50635 init_window (1);
50637 init_window (0);
50640 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
50641 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
50642 for 48 bytes of instructions. Note that these windows are not dispatch
50643 windows that their sizes are DISPATCH_WINDOW_SIZE. */
50645 static dispatch_windows *
50646 allocate_next_window (int window_num)
50648 if (window_num == 0)
50650 if (dispatch_window_list->next)
50651 init_window (1);
50652 init_window (0);
50653 return dispatch_window_list;
50656 dispatch_window_list->next = dispatch_window_list1;
50657 dispatch_window_list1->prev = dispatch_window_list;
50659 return dispatch_window_list1;
50662 /* Compute number of immediate operands of an instruction. */
50664 static void
50665 find_constant (rtx in_rtx, imm_info *imm_values)
50667 if (INSN_P (in_rtx))
50668 in_rtx = PATTERN (in_rtx);
50669 subrtx_iterator::array_type array;
50670 FOR_EACH_SUBRTX (iter, array, in_rtx, ALL)
50671 if (const_rtx x = *iter)
50672 switch (GET_CODE (x))
50674 case CONST:
50675 case SYMBOL_REF:
50676 case CONST_INT:
50677 (imm_values->imm)++;
50678 if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode))
50679 (imm_values->imm32)++;
50680 else
50681 (imm_values->imm64)++;
50682 break;
50684 case CONST_DOUBLE:
50685 (imm_values->imm)++;
50686 (imm_values->imm64)++;
50687 break;
50689 case CODE_LABEL:
50690 if (LABEL_KIND (x) == LABEL_NORMAL)
50692 (imm_values->imm)++;
50693 (imm_values->imm32)++;
50695 break;
50697 default:
50698 break;
50702 /* Return total size of immediate operands of an instruction along with number
50703 of corresponding immediate-operands. It initializes its parameters to zero
50704 befor calling FIND_CONSTANT.
50705 INSN is the input instruction. IMM is the total of immediates.
50706 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
50707 bit immediates. */
50709 static int
50710 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
50712 imm_info imm_values = {0, 0, 0};
50714 find_constant (insn, &imm_values);
50715 *imm = imm_values.imm;
50716 *imm32 = imm_values.imm32;
50717 *imm64 = imm_values.imm64;
50718 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
50721 /* This function indicates if an operand of an instruction is an
50722 immediate. */
50724 static bool
50725 has_immediate (rtx insn)
50727 int num_imm_operand;
50728 int num_imm32_operand;
50729 int num_imm64_operand;
50731 if (insn)
50732 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50733 &num_imm64_operand);
50734 return false;
50737 /* Return single or double path for instructions. */
50739 static enum insn_path
50740 get_insn_path (rtx_insn *insn)
50742 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
50744 if ((int)path == 0)
50745 return path_single;
50747 if ((int)path == 1)
50748 return path_double;
50750 return path_multi;
50753 /* Return insn dispatch group. */
50755 static enum dispatch_group
50756 get_insn_group (rtx_insn *insn)
50758 enum dispatch_group group = get_mem_group (insn);
50759 if (group)
50760 return group;
50762 if (is_branch (insn))
50763 return disp_branch;
50765 if (is_cmp (insn))
50766 return disp_cmp;
50768 if (has_immediate (insn))
50769 return disp_imm;
50771 if (is_prefetch (insn))
50772 return disp_prefetch;
50774 return disp_no_group;
50777 /* Count number of GROUP restricted instructions in a dispatch
50778 window WINDOW_LIST. */
50780 static int
50781 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
50783 enum dispatch_group group = get_insn_group (insn);
50784 int imm_size;
50785 int num_imm_operand;
50786 int num_imm32_operand;
50787 int num_imm64_operand;
50789 if (group == disp_no_group)
50790 return 0;
50792 if (group == disp_imm)
50794 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50795 &num_imm64_operand);
50796 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
50797 || num_imm_operand + window_list->num_imm > MAX_IMM
50798 || (num_imm32_operand > 0
50799 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
50800 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
50801 || (num_imm64_operand > 0
50802 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
50803 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
50804 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
50805 && num_imm64_operand > 0
50806 && ((window_list->num_imm_64 > 0
50807 && window_list->num_insn >= 2)
50808 || window_list->num_insn >= 3)))
50809 return BIG;
50811 return 1;
50814 if ((group == disp_load_store
50815 && (window_list->num_loads >= MAX_LOAD
50816 || window_list->num_stores >= MAX_STORE))
50817 || ((group == disp_load
50818 || group == disp_prefetch)
50819 && window_list->num_loads >= MAX_LOAD)
50820 || (group == disp_store
50821 && window_list->num_stores >= MAX_STORE))
50822 return BIG;
50824 return 1;
50827 /* This function returns true if insn satisfies dispatch rules on the
50828 last window scheduled. */
50830 static bool
50831 fits_dispatch_window (rtx_insn *insn)
50833 dispatch_windows *window_list = dispatch_window_list;
50834 dispatch_windows *window_list_next = dispatch_window_list->next;
50835 unsigned int num_restrict;
50836 enum dispatch_group group = get_insn_group (insn);
50837 enum insn_path path = get_insn_path (insn);
50838 int sum;
50840 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
50841 instructions should be given the lowest priority in the
50842 scheduling process in Haifa scheduler to make sure they will be
50843 scheduled in the same dispatch window as the reference to them. */
50844 if (group == disp_jcc || group == disp_cmp)
50845 return false;
50847 /* Check nonrestricted. */
50848 if (group == disp_no_group || group == disp_branch)
50849 return true;
50851 /* Get last dispatch window. */
50852 if (window_list_next)
50853 window_list = window_list_next;
50855 if (window_list->window_num == 1)
50857 sum = window_list->prev->window_size + window_list->window_size;
50859 if (sum == 32
50860 || (min_insn_size (insn) + sum) >= 48)
50861 /* Window 1 is full. Go for next window. */
50862 return true;
50865 num_restrict = count_num_restricted (insn, window_list);
50867 if (num_restrict > num_allowable_groups[group])
50868 return false;
50870 /* See if it fits in the first window. */
50871 if (window_list->window_num == 0)
50873 /* The first widow should have only single and double path
50874 uops. */
50875 if (path == path_double
50876 && (window_list->num_uops + 2) > MAX_INSN)
50877 return false;
50878 else if (path != path_single)
50879 return false;
50881 return true;
50884 /* Add an instruction INSN with NUM_UOPS micro-operations to the
50885 dispatch window WINDOW_LIST. */
50887 static void
50888 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
50890 int byte_len = min_insn_size (insn);
50891 int num_insn = window_list->num_insn;
50892 int imm_size;
50893 sched_insn_info *window = window_list->window;
50894 enum dispatch_group group = get_insn_group (insn);
50895 enum insn_path path = get_insn_path (insn);
50896 int num_imm_operand;
50897 int num_imm32_operand;
50898 int num_imm64_operand;
50900 if (!window_list->violation && group != disp_cmp
50901 && !fits_dispatch_window (insn))
50902 window_list->violation = true;
50904 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50905 &num_imm64_operand);
50907 /* Initialize window with new instruction. */
50908 window[num_insn].insn = insn;
50909 window[num_insn].byte_len = byte_len;
50910 window[num_insn].group = group;
50911 window[num_insn].path = path;
50912 window[num_insn].imm_bytes = imm_size;
50914 window_list->window_size += byte_len;
50915 window_list->num_insn = num_insn + 1;
50916 window_list->num_uops = window_list->num_uops + num_uops;
50917 window_list->imm_size += imm_size;
50918 window_list->num_imm += num_imm_operand;
50919 window_list->num_imm_32 += num_imm32_operand;
50920 window_list->num_imm_64 += num_imm64_operand;
50922 if (group == disp_store)
50923 window_list->num_stores += 1;
50924 else if (group == disp_load
50925 || group == disp_prefetch)
50926 window_list->num_loads += 1;
50927 else if (group == disp_load_store)
50929 window_list->num_stores += 1;
50930 window_list->num_loads += 1;
50934 /* Adds a scheduled instruction, INSN, to the current dispatch window.
50935 If the total bytes of instructions or the number of instructions in
50936 the window exceed allowable, it allocates a new window. */
50938 static void
50939 add_to_dispatch_window (rtx_insn *insn)
50941 int byte_len;
50942 dispatch_windows *window_list;
50943 dispatch_windows *next_list;
50944 dispatch_windows *window0_list;
50945 enum insn_path path;
50946 enum dispatch_group insn_group;
50947 bool insn_fits;
50948 int num_insn;
50949 int num_uops;
50950 int window_num;
50951 int insn_num_uops;
50952 int sum;
50954 if (INSN_CODE (insn) < 0)
50955 return;
50957 byte_len = min_insn_size (insn);
50958 window_list = dispatch_window_list;
50959 next_list = window_list->next;
50960 path = get_insn_path (insn);
50961 insn_group = get_insn_group (insn);
50963 /* Get the last dispatch window. */
50964 if (next_list)
50965 window_list = dispatch_window_list->next;
50967 if (path == path_single)
50968 insn_num_uops = 1;
50969 else if (path == path_double)
50970 insn_num_uops = 2;
50971 else
50972 insn_num_uops = (int) path;
50974 /* If current window is full, get a new window.
50975 Window number zero is full, if MAX_INSN uops are scheduled in it.
50976 Window number one is full, if window zero's bytes plus window
50977 one's bytes is 32, or if the bytes of the new instruction added
50978 to the total makes it greater than 48, or it has already MAX_INSN
50979 instructions in it. */
50980 num_insn = window_list->num_insn;
50981 num_uops = window_list->num_uops;
50982 window_num = window_list->window_num;
50983 insn_fits = fits_dispatch_window (insn);
50985 if (num_insn >= MAX_INSN
50986 || num_uops + insn_num_uops > MAX_INSN
50987 || !(insn_fits))
50989 window_num = ~window_num & 1;
50990 window_list = allocate_next_window (window_num);
50993 if (window_num == 0)
50995 add_insn_window (insn, window_list, insn_num_uops);
50996 if (window_list->num_insn >= MAX_INSN
50997 && insn_group == disp_branch)
50999 process_end_window ();
51000 return;
51003 else if (window_num == 1)
51005 window0_list = window_list->prev;
51006 sum = window0_list->window_size + window_list->window_size;
51007 if (sum == 32
51008 || (byte_len + sum) >= 48)
51010 process_end_window ();
51011 window_list = dispatch_window_list;
51014 add_insn_window (insn, window_list, insn_num_uops);
51016 else
51017 gcc_unreachable ();
51019 if (is_end_basic_block (insn_group))
51021 /* End of basic block is reached do end-basic-block process. */
51022 process_end_window ();
51023 return;
51027 /* Print the dispatch window, WINDOW_NUM, to FILE. */
51029 DEBUG_FUNCTION static void
51030 debug_dispatch_window_file (FILE *file, int window_num)
51032 dispatch_windows *list;
51033 int i;
51035 if (window_num == 0)
51036 list = dispatch_window_list;
51037 else
51038 list = dispatch_window_list1;
51040 fprintf (file, "Window #%d:\n", list->window_num);
51041 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
51042 list->num_insn, list->num_uops, list->window_size);
51043 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
51044 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
51046 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
51047 list->num_stores);
51048 fprintf (file, " insn info:\n");
51050 for (i = 0; i < MAX_INSN; i++)
51052 if (!list->window[i].insn)
51053 break;
51054 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
51055 i, group_name[list->window[i].group],
51056 i, (void *)list->window[i].insn,
51057 i, list->window[i].path,
51058 i, list->window[i].byte_len,
51059 i, list->window[i].imm_bytes);
51063 /* Print to stdout a dispatch window. */
51065 DEBUG_FUNCTION void
51066 debug_dispatch_window (int window_num)
51068 debug_dispatch_window_file (stdout, window_num);
51071 /* Print INSN dispatch information to FILE. */
51073 DEBUG_FUNCTION static void
51074 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
51076 int byte_len;
51077 enum insn_path path;
51078 enum dispatch_group group;
51079 int imm_size;
51080 int num_imm_operand;
51081 int num_imm32_operand;
51082 int num_imm64_operand;
51084 if (INSN_CODE (insn) < 0)
51085 return;
51087 byte_len = min_insn_size (insn);
51088 path = get_insn_path (insn);
51089 group = get_insn_group (insn);
51090 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51091 &num_imm64_operand);
51093 fprintf (file, " insn info:\n");
51094 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
51095 group_name[group], path, byte_len);
51096 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
51097 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
51100 /* Print to STDERR the status of the ready list with respect to
51101 dispatch windows. */
51103 DEBUG_FUNCTION void
51104 debug_ready_dispatch (void)
51106 int i;
51107 int no_ready = number_in_ready ();
51109 fprintf (stdout, "Number of ready: %d\n", no_ready);
51111 for (i = 0; i < no_ready; i++)
51112 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
51115 /* This routine is the driver of the dispatch scheduler. */
51117 static void
51118 do_dispatch (rtx_insn *insn, int mode)
51120 if (mode == DISPATCH_INIT)
51121 init_dispatch_sched ();
51122 else if (mode == ADD_TO_DISPATCH_WINDOW)
51123 add_to_dispatch_window (insn);
51126 /* Return TRUE if Dispatch Scheduling is supported. */
51128 static bool
51129 has_dispatch (rtx_insn *insn, int action)
51131 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
51132 && flag_dispatch_scheduler)
51133 switch (action)
51135 default:
51136 return false;
51138 case IS_DISPATCH_ON:
51139 return true;
51140 break;
51142 case IS_CMP:
51143 return is_cmp (insn);
51145 case DISPATCH_VIOLATION:
51146 return dispatch_violation ();
51148 case FITS_DISPATCH_WINDOW:
51149 return fits_dispatch_window (insn);
51152 return false;
51155 /* Implementation of reassociation_width target hook used by
51156 reassoc phase to identify parallelism level in reassociated
51157 tree. Statements tree_code is passed in OPC. Arguments type
51158 is passed in MODE.
51160 Currently parallel reassociation is enabled for Atom
51161 processors only and we set reassociation width to be 2
51162 because Atom may issue up to 2 instructions per cycle.
51164 Return value should be fixed if parallel reassociation is
51165 enabled for other processors. */
51167 static int
51168 ix86_reassociation_width (unsigned int, machine_mode mode)
51170 /* Vector part. */
51171 if (VECTOR_MODE_P (mode))
51173 if (TARGET_VECTOR_PARALLEL_EXECUTION)
51174 return 2;
51175 else
51176 return 1;
51179 /* Scalar part. */
51180 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
51181 return 2;
51182 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
51183 return 2;
51184 else
51185 return 1;
51188 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
51189 place emms and femms instructions. */
51191 static machine_mode
51192 ix86_preferred_simd_mode (machine_mode mode)
51194 if (!TARGET_SSE)
51195 return word_mode;
51197 switch (mode)
51199 case QImode:
51200 return TARGET_AVX512BW ? V64QImode :
51201 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
51202 case HImode:
51203 return TARGET_AVX512BW ? V32HImode :
51204 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
51205 case SImode:
51206 return TARGET_AVX512F ? V16SImode :
51207 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
51208 case DImode:
51209 return TARGET_AVX512F ? V8DImode :
51210 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
51212 case SFmode:
51213 if (TARGET_AVX512F)
51214 return V16SFmode;
51215 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51216 return V8SFmode;
51217 else
51218 return V4SFmode;
51220 case DFmode:
51221 if (!TARGET_VECTORIZE_DOUBLE)
51222 return word_mode;
51223 else if (TARGET_AVX512F)
51224 return V8DFmode;
51225 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51226 return V4DFmode;
51227 else if (TARGET_SSE2)
51228 return V2DFmode;
51229 /* FALLTHRU */
51231 default:
51232 return word_mode;
51236 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
51237 vectors. If AVX512F is enabled then try vectorizing with 512bit,
51238 256bit and 128bit vectors. */
51240 static unsigned int
51241 ix86_autovectorize_vector_sizes (void)
51243 return TARGET_AVX512F ? 64 | 32 | 16 :
51244 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
51249 /* Return class of registers which could be used for pseudo of MODE
51250 and of class RCLASS for spilling instead of memory. Return NO_REGS
51251 if it is not possible or non-profitable. */
51252 static reg_class_t
51253 ix86_spill_class (reg_class_t rclass, machine_mode mode)
51255 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
51256 && (mode == SImode || (TARGET_64BIT && mode == DImode))
51257 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
51258 return ALL_SSE_REGS;
51259 return NO_REGS;
51262 /* Implement targetm.vectorize.init_cost. */
51264 static void *
51265 ix86_init_cost (struct loop *)
51267 unsigned *cost = XNEWVEC (unsigned, 3);
51268 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
51269 return cost;
51272 /* Implement targetm.vectorize.add_stmt_cost. */
51274 static unsigned
51275 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
51276 struct _stmt_vec_info *stmt_info, int misalign,
51277 enum vect_cost_model_location where)
51279 unsigned *cost = (unsigned *) data;
51280 unsigned retval = 0;
51282 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
51283 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
51285 /* Statements in an inner loop relative to the loop being
51286 vectorized are weighted more heavily. The value here is
51287 arbitrary and could potentially be improved with analysis. */
51288 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
51289 count *= 50; /* FIXME. */
51291 retval = (unsigned) (count * stmt_cost);
51293 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
51294 for Silvermont as it has out of order integer pipeline and can execute
51295 2 scalar instruction per tick, but has in order SIMD pipeline. */
51296 if (TARGET_SILVERMONT || TARGET_INTEL)
51297 if (stmt_info && stmt_info->stmt)
51299 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
51300 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
51301 retval = (retval * 17) / 10;
51304 cost[where] += retval;
51306 return retval;
51309 /* Implement targetm.vectorize.finish_cost. */
51311 static void
51312 ix86_finish_cost (void *data, unsigned *prologue_cost,
51313 unsigned *body_cost, unsigned *epilogue_cost)
51315 unsigned *cost = (unsigned *) data;
51316 *prologue_cost = cost[vect_prologue];
51317 *body_cost = cost[vect_body];
51318 *epilogue_cost = cost[vect_epilogue];
51321 /* Implement targetm.vectorize.destroy_cost_data. */
51323 static void
51324 ix86_destroy_cost_data (void *data)
51326 free (data);
51329 /* Validate target specific memory model bits in VAL. */
51331 static unsigned HOST_WIDE_INT
51332 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
51334 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
51335 bool strong;
51337 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
51338 |MEMMODEL_MASK)
51339 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
51341 warning (OPT_Winvalid_memory_model,
51342 "Unknown architecture specific memory model");
51343 return MEMMODEL_SEQ_CST;
51345 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
51346 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
51348 warning (OPT_Winvalid_memory_model,
51349 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
51350 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
51352 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
51354 warning (OPT_Winvalid_memory_model,
51355 "HLE_RELEASE not used with RELEASE or stronger memory model");
51356 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
51358 return val;
51361 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
51362 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
51363 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
51364 or number of vecsize_mangle variants that should be emitted. */
51366 static int
51367 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
51368 struct cgraph_simd_clone *clonei,
51369 tree base_type, int num)
51371 int ret = 1;
51373 if (clonei->simdlen
51374 && (clonei->simdlen < 2
51375 || clonei->simdlen > 16
51376 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
51378 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51379 "unsupported simdlen %d", clonei->simdlen);
51380 return 0;
51383 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
51384 if (TREE_CODE (ret_type) != VOID_TYPE)
51385 switch (TYPE_MODE (ret_type))
51387 case QImode:
51388 case HImode:
51389 case SImode:
51390 case DImode:
51391 case SFmode:
51392 case DFmode:
51393 /* case SCmode: */
51394 /* case DCmode: */
51395 break;
51396 default:
51397 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51398 "unsupported return type %qT for simd\n", ret_type);
51399 return 0;
51402 tree t;
51403 int i;
51405 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
51406 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
51407 switch (TYPE_MODE (TREE_TYPE (t)))
51409 case QImode:
51410 case HImode:
51411 case SImode:
51412 case DImode:
51413 case SFmode:
51414 case DFmode:
51415 /* case SCmode: */
51416 /* case DCmode: */
51417 break;
51418 default:
51419 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51420 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
51421 return 0;
51424 if (clonei->cilk_elemental)
51426 /* Parse here processor clause. If not present, default to 'b'. */
51427 clonei->vecsize_mangle = 'b';
51429 else if (!TREE_PUBLIC (node->decl))
51431 /* If the function isn't exported, we can pick up just one ISA
51432 for the clones. */
51433 if (TARGET_AVX2)
51434 clonei->vecsize_mangle = 'd';
51435 else if (TARGET_AVX)
51436 clonei->vecsize_mangle = 'c';
51437 else
51438 clonei->vecsize_mangle = 'b';
51439 ret = 1;
51441 else
51443 clonei->vecsize_mangle = "bcd"[num];
51444 ret = 3;
51446 switch (clonei->vecsize_mangle)
51448 case 'b':
51449 clonei->vecsize_int = 128;
51450 clonei->vecsize_float = 128;
51451 break;
51452 case 'c':
51453 clonei->vecsize_int = 128;
51454 clonei->vecsize_float = 256;
51455 break;
51456 case 'd':
51457 clonei->vecsize_int = 256;
51458 clonei->vecsize_float = 256;
51459 break;
51461 if (clonei->simdlen == 0)
51463 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
51464 clonei->simdlen = clonei->vecsize_int;
51465 else
51466 clonei->simdlen = clonei->vecsize_float;
51467 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
51468 if (clonei->simdlen > 16)
51469 clonei->simdlen = 16;
51471 return ret;
51474 /* Add target attribute to SIMD clone NODE if needed. */
51476 static void
51477 ix86_simd_clone_adjust (struct cgraph_node *node)
51479 const char *str = NULL;
51480 gcc_assert (node->decl == cfun->decl);
51481 switch (node->simdclone->vecsize_mangle)
51483 case 'b':
51484 if (!TARGET_SSE2)
51485 str = "sse2";
51486 break;
51487 case 'c':
51488 if (!TARGET_AVX)
51489 str = "avx";
51490 break;
51491 case 'd':
51492 if (!TARGET_AVX2)
51493 str = "avx2";
51494 break;
51495 default:
51496 gcc_unreachable ();
51498 if (str == NULL)
51499 return;
51500 push_cfun (NULL);
51501 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
51502 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
51503 gcc_assert (ok);
51504 pop_cfun ();
51505 ix86_reset_previous_fndecl ();
51506 ix86_set_current_function (node->decl);
51509 /* If SIMD clone NODE can't be used in a vectorized loop
51510 in current function, return -1, otherwise return a badness of using it
51511 (0 if it is most desirable from vecsize_mangle point of view, 1
51512 slightly less desirable, etc.). */
51514 static int
51515 ix86_simd_clone_usable (struct cgraph_node *node)
51517 switch (node->simdclone->vecsize_mangle)
51519 case 'b':
51520 if (!TARGET_SSE2)
51521 return -1;
51522 if (!TARGET_AVX)
51523 return 0;
51524 return TARGET_AVX2 ? 2 : 1;
51525 case 'c':
51526 if (!TARGET_AVX)
51527 return -1;
51528 return TARGET_AVX2 ? 1 : 0;
51529 break;
51530 case 'd':
51531 if (!TARGET_AVX2)
51532 return -1;
51533 return 0;
51534 default:
51535 gcc_unreachable ();
51539 /* This function adjusts the unroll factor based on
51540 the hardware capabilities. For ex, bdver3 has
51541 a loop buffer which makes unrolling of smaller
51542 loops less important. This function decides the
51543 unroll factor using number of memory references
51544 (value 32 is used) as a heuristic. */
51546 static unsigned
51547 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
51549 basic_block *bbs;
51550 rtx_insn *insn;
51551 unsigned i;
51552 unsigned mem_count = 0;
51554 if (!TARGET_ADJUST_UNROLL)
51555 return nunroll;
51557 /* Count the number of memory references within the loop body.
51558 This value determines the unrolling factor for bdver3 and bdver4
51559 architectures. */
51560 subrtx_iterator::array_type array;
51561 bbs = get_loop_body (loop);
51562 for (i = 0; i < loop->num_nodes; i++)
51563 FOR_BB_INSNS (bbs[i], insn)
51564 if (NONDEBUG_INSN_P (insn))
51565 FOR_EACH_SUBRTX (iter, array, insn, NONCONST)
51566 if (const_rtx x = *iter)
51567 if (MEM_P (x))
51569 machine_mode mode = GET_MODE (x);
51570 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
51571 if (n_words > 4)
51572 mem_count += 2;
51573 else
51574 mem_count += 1;
51576 free (bbs);
51578 if (mem_count && mem_count <=32)
51579 return 32/mem_count;
51581 return nunroll;
51585 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
51587 static bool
51588 ix86_float_exceptions_rounding_supported_p (void)
51590 /* For x87 floating point with standard excess precision handling,
51591 there is no adddf3 pattern (since x87 floating point only has
51592 XFmode operations) so the default hook implementation gets this
51593 wrong. */
51594 return TARGET_80387 || TARGET_SSE_MATH;
51597 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
51599 static void
51600 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
51602 if (!TARGET_80387 && !TARGET_SSE_MATH)
51603 return;
51604 tree exceptions_var = create_tmp_var (integer_type_node);
51605 if (TARGET_80387)
51607 tree fenv_index_type = build_index_type (size_int (6));
51608 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
51609 tree fenv_var = create_tmp_var (fenv_type);
51610 mark_addressable (fenv_var);
51611 tree fenv_ptr = build_pointer_type (fenv_type);
51612 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
51613 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
51614 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
51615 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
51616 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
51617 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
51618 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
51619 tree hold_fnclex = build_call_expr (fnclex, 0);
51620 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
51621 hold_fnclex);
51622 *clear = build_call_expr (fnclex, 0);
51623 tree sw_var = create_tmp_var (short_unsigned_type_node);
51624 tree fnstsw_call = build_call_expr (fnstsw, 0);
51625 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
51626 sw_var, fnstsw_call);
51627 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
51628 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
51629 exceptions_var, exceptions_x87);
51630 *update = build2 (COMPOUND_EXPR, integer_type_node,
51631 sw_mod, update_mod);
51632 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
51633 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
51635 if (TARGET_SSE_MATH)
51637 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node);
51638 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node);
51639 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
51640 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
51641 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
51642 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
51643 mxcsr_orig_var, stmxcsr_hold_call);
51644 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
51645 mxcsr_orig_var,
51646 build_int_cst (unsigned_type_node, 0x1f80));
51647 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
51648 build_int_cst (unsigned_type_node, 0xffffffc0));
51649 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
51650 mxcsr_mod_var, hold_mod_val);
51651 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51652 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
51653 hold_assign_orig, hold_assign_mod);
51654 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
51655 ldmxcsr_hold_call);
51656 if (*hold)
51657 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
51658 else
51659 *hold = hold_all;
51660 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51661 if (*clear)
51662 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
51663 ldmxcsr_clear_call);
51664 else
51665 *clear = ldmxcsr_clear_call;
51666 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
51667 tree exceptions_sse = fold_convert (integer_type_node,
51668 stxmcsr_update_call);
51669 if (*update)
51671 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
51672 exceptions_var, exceptions_sse);
51673 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
51674 exceptions_var, exceptions_mod);
51675 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
51676 exceptions_assign);
51678 else
51679 *update = build2 (MODIFY_EXPR, integer_type_node,
51680 exceptions_var, exceptions_sse);
51681 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
51682 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51683 ldmxcsr_update_call);
51685 tree atomic_feraiseexcept
51686 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
51687 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
51688 1, exceptions_var);
51689 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51690 atomic_feraiseexcept_call);
51693 /* Return mode to be used for bounds or VOIDmode
51694 if bounds are not supported. */
51696 static enum machine_mode
51697 ix86_mpx_bound_mode ()
51699 /* Do not support pointer checker if MPX
51700 is not enabled. */
51701 if (!TARGET_MPX)
51703 if (flag_check_pointer_bounds)
51704 warning (0, "Pointer Checker requires MPX support on this target."
51705 " Use -mmpx options to enable MPX.");
51706 return VOIDmode;
51709 return BNDmode;
51712 /* Return constant used to statically initialize constant bounds.
51714 This function is used to create special bound values. For now
51715 only INIT bounds and NONE bounds are expected. More special
51716 values may be added later. */
51718 static tree
51719 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
51721 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
51722 : build_zero_cst (pointer_sized_int_node);
51723 tree high = ub ? build_zero_cst (pointer_sized_int_node)
51724 : build_minus_one_cst (pointer_sized_int_node);
51726 /* This function is supposed to be used to create INIT and
51727 NONE bounds only. */
51728 gcc_assert ((lb == 0 && ub == -1)
51729 || (lb == -1 && ub == 0));
51731 return build_complex (NULL, low, high);
51734 /* Generate a list of statements STMTS to initialize pointer bounds
51735 variable VAR with bounds LB and UB. Return the number of generated
51736 statements. */
51738 static int
51739 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
51741 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
51742 tree lhs, modify, var_p;
51744 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
51745 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
51747 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
51748 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
51749 append_to_statement_list (modify, stmts);
51751 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
51752 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
51753 TYPE_SIZE_UNIT (pointer_sized_int_node)));
51754 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
51755 append_to_statement_list (modify, stmts);
51757 return 2;
51760 /* Initialize the GCC target structure. */
51761 #undef TARGET_RETURN_IN_MEMORY
51762 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
51764 #undef TARGET_LEGITIMIZE_ADDRESS
51765 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
51767 #undef TARGET_ATTRIBUTE_TABLE
51768 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
51769 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
51770 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
51771 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51772 # undef TARGET_MERGE_DECL_ATTRIBUTES
51773 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
51774 #endif
51776 #undef TARGET_COMP_TYPE_ATTRIBUTES
51777 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
51779 #undef TARGET_INIT_BUILTINS
51780 #define TARGET_INIT_BUILTINS ix86_init_builtins
51781 #undef TARGET_BUILTIN_DECL
51782 #define TARGET_BUILTIN_DECL ix86_builtin_decl
51783 #undef TARGET_EXPAND_BUILTIN
51784 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
51786 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
51787 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
51788 ix86_builtin_vectorized_function
51790 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
51791 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
51793 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
51794 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
51796 #undef TARGET_VECTORIZE_BUILTIN_GATHER
51797 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
51799 #undef TARGET_BUILTIN_RECIPROCAL
51800 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
51802 #undef TARGET_ASM_FUNCTION_EPILOGUE
51803 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
51805 #undef TARGET_ENCODE_SECTION_INFO
51806 #ifndef SUBTARGET_ENCODE_SECTION_INFO
51807 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
51808 #else
51809 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
51810 #endif
51812 #undef TARGET_ASM_OPEN_PAREN
51813 #define TARGET_ASM_OPEN_PAREN ""
51814 #undef TARGET_ASM_CLOSE_PAREN
51815 #define TARGET_ASM_CLOSE_PAREN ""
51817 #undef TARGET_ASM_BYTE_OP
51818 #define TARGET_ASM_BYTE_OP ASM_BYTE
51820 #undef TARGET_ASM_ALIGNED_HI_OP
51821 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
51822 #undef TARGET_ASM_ALIGNED_SI_OP
51823 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
51824 #ifdef ASM_QUAD
51825 #undef TARGET_ASM_ALIGNED_DI_OP
51826 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
51827 #endif
51829 #undef TARGET_PROFILE_BEFORE_PROLOGUE
51830 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
51832 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
51833 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
51835 #undef TARGET_ASM_UNALIGNED_HI_OP
51836 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
51837 #undef TARGET_ASM_UNALIGNED_SI_OP
51838 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
51839 #undef TARGET_ASM_UNALIGNED_DI_OP
51840 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
51842 #undef TARGET_PRINT_OPERAND
51843 #define TARGET_PRINT_OPERAND ix86_print_operand
51844 #undef TARGET_PRINT_OPERAND_ADDRESS
51845 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
51846 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
51847 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
51848 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
51849 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
51851 #undef TARGET_SCHED_INIT_GLOBAL
51852 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
51853 #undef TARGET_SCHED_ADJUST_COST
51854 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
51855 #undef TARGET_SCHED_ISSUE_RATE
51856 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
51857 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
51858 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
51859 ia32_multipass_dfa_lookahead
51860 #undef TARGET_SCHED_MACRO_FUSION_P
51861 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
51862 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
51863 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
51865 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
51866 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
51868 #undef TARGET_MEMMODEL_CHECK
51869 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
51871 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
51872 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
51874 #ifdef HAVE_AS_TLS
51875 #undef TARGET_HAVE_TLS
51876 #define TARGET_HAVE_TLS true
51877 #endif
51878 #undef TARGET_CANNOT_FORCE_CONST_MEM
51879 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
51880 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
51881 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
51883 #undef TARGET_DELEGITIMIZE_ADDRESS
51884 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
51886 #undef TARGET_MS_BITFIELD_LAYOUT_P
51887 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
51889 #if TARGET_MACHO
51890 #undef TARGET_BINDS_LOCAL_P
51891 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
51892 #endif
51893 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51894 #undef TARGET_BINDS_LOCAL_P
51895 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
51896 #endif
51898 #undef TARGET_ASM_OUTPUT_MI_THUNK
51899 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
51900 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
51901 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
51903 #undef TARGET_ASM_FILE_START
51904 #define TARGET_ASM_FILE_START x86_file_start
51906 #undef TARGET_OPTION_OVERRIDE
51907 #define TARGET_OPTION_OVERRIDE ix86_option_override
51909 #undef TARGET_REGISTER_MOVE_COST
51910 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
51911 #undef TARGET_MEMORY_MOVE_COST
51912 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
51913 #undef TARGET_RTX_COSTS
51914 #define TARGET_RTX_COSTS ix86_rtx_costs
51915 #undef TARGET_ADDRESS_COST
51916 #define TARGET_ADDRESS_COST ix86_address_cost
51918 #undef TARGET_FIXED_CONDITION_CODE_REGS
51919 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
51920 #undef TARGET_CC_MODES_COMPATIBLE
51921 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
51923 #undef TARGET_MACHINE_DEPENDENT_REORG
51924 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
51926 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
51927 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
51929 #undef TARGET_BUILD_BUILTIN_VA_LIST
51930 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
51932 #undef TARGET_FOLD_BUILTIN
51933 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
51935 #undef TARGET_COMPARE_VERSION_PRIORITY
51936 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
51938 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
51939 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
51940 ix86_generate_version_dispatcher_body
51942 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
51943 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
51944 ix86_get_function_versions_dispatcher
51946 #undef TARGET_ENUM_VA_LIST_P
51947 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
51949 #undef TARGET_FN_ABI_VA_LIST
51950 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
51952 #undef TARGET_CANONICAL_VA_LIST_TYPE
51953 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
51955 #undef TARGET_EXPAND_BUILTIN_VA_START
51956 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
51958 #undef TARGET_MD_ASM_CLOBBERS
51959 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
51961 #undef TARGET_PROMOTE_PROTOTYPES
51962 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
51963 #undef TARGET_SETUP_INCOMING_VARARGS
51964 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
51965 #undef TARGET_MUST_PASS_IN_STACK
51966 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
51967 #undef TARGET_FUNCTION_ARG_ADVANCE
51968 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
51969 #undef TARGET_FUNCTION_ARG
51970 #define TARGET_FUNCTION_ARG ix86_function_arg
51971 #undef TARGET_INIT_PIC_REG
51972 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
51973 #undef TARGET_USE_PSEUDO_PIC_REG
51974 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
51975 #undef TARGET_FUNCTION_ARG_BOUNDARY
51976 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
51977 #undef TARGET_PASS_BY_REFERENCE
51978 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
51979 #undef TARGET_INTERNAL_ARG_POINTER
51980 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
51981 #undef TARGET_UPDATE_STACK_BOUNDARY
51982 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
51983 #undef TARGET_GET_DRAP_RTX
51984 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
51985 #undef TARGET_STRICT_ARGUMENT_NAMING
51986 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
51987 #undef TARGET_STATIC_CHAIN
51988 #define TARGET_STATIC_CHAIN ix86_static_chain
51989 #undef TARGET_TRAMPOLINE_INIT
51990 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
51991 #undef TARGET_RETURN_POPS_ARGS
51992 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
51994 #undef TARGET_LEGITIMATE_COMBINED_INSN
51995 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
51997 #undef TARGET_ASAN_SHADOW_OFFSET
51998 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
52000 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
52001 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
52003 #undef TARGET_SCALAR_MODE_SUPPORTED_P
52004 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
52006 #undef TARGET_VECTOR_MODE_SUPPORTED_P
52007 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
52009 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
52010 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
52011 ix86_libgcc_floating_mode_supported_p
52013 #undef TARGET_C_MODE_FOR_SUFFIX
52014 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
52016 #ifdef HAVE_AS_TLS
52017 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
52018 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
52019 #endif
52021 #ifdef SUBTARGET_INSERT_ATTRIBUTES
52022 #undef TARGET_INSERT_ATTRIBUTES
52023 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
52024 #endif
52026 #undef TARGET_MANGLE_TYPE
52027 #define TARGET_MANGLE_TYPE ix86_mangle_type
52029 #if !TARGET_MACHO
52030 #undef TARGET_STACK_PROTECT_FAIL
52031 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
52032 #endif
52034 #undef TARGET_FUNCTION_VALUE
52035 #define TARGET_FUNCTION_VALUE ix86_function_value
52037 #undef TARGET_FUNCTION_VALUE_REGNO_P
52038 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
52040 #undef TARGET_PROMOTE_FUNCTION_MODE
52041 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
52043 #undef TARGET_MEMBER_TYPE_FORCES_BLK
52044 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
52046 #undef TARGET_INSTANTIATE_DECLS
52047 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
52049 #undef TARGET_SECONDARY_RELOAD
52050 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
52052 #undef TARGET_CLASS_MAX_NREGS
52053 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
52055 #undef TARGET_PREFERRED_RELOAD_CLASS
52056 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
52057 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
52058 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
52059 #undef TARGET_CLASS_LIKELY_SPILLED_P
52060 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
52062 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
52063 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
52064 ix86_builtin_vectorization_cost
52065 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
52066 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
52067 ix86_vectorize_vec_perm_const_ok
52068 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
52069 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
52070 ix86_preferred_simd_mode
52071 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
52072 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
52073 ix86_autovectorize_vector_sizes
52074 #undef TARGET_VECTORIZE_INIT_COST
52075 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
52076 #undef TARGET_VECTORIZE_ADD_STMT_COST
52077 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
52078 #undef TARGET_VECTORIZE_FINISH_COST
52079 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
52080 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
52081 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
52083 #undef TARGET_SET_CURRENT_FUNCTION
52084 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
52086 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
52087 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
52089 #undef TARGET_OPTION_SAVE
52090 #define TARGET_OPTION_SAVE ix86_function_specific_save
52092 #undef TARGET_OPTION_RESTORE
52093 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
52095 #undef TARGET_OPTION_POST_STREAM_IN
52096 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
52098 #undef TARGET_OPTION_PRINT
52099 #define TARGET_OPTION_PRINT ix86_function_specific_print
52101 #undef TARGET_OPTION_FUNCTION_VERSIONS
52102 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
52104 #undef TARGET_CAN_INLINE_P
52105 #define TARGET_CAN_INLINE_P ix86_can_inline_p
52107 #undef TARGET_EXPAND_TO_RTL_HOOK
52108 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
52110 #undef TARGET_LEGITIMATE_ADDRESS_P
52111 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
52113 #undef TARGET_LRA_P
52114 #define TARGET_LRA_P hook_bool_void_true
52116 #undef TARGET_REGISTER_PRIORITY
52117 #define TARGET_REGISTER_PRIORITY ix86_register_priority
52119 #undef TARGET_REGISTER_USAGE_LEVELING_P
52120 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
52122 #undef TARGET_LEGITIMATE_CONSTANT_P
52123 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
52125 #undef TARGET_FRAME_POINTER_REQUIRED
52126 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
52128 #undef TARGET_CAN_ELIMINATE
52129 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
52131 #undef TARGET_EXTRA_LIVE_ON_ENTRY
52132 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
52134 #undef TARGET_ASM_CODE_END
52135 #define TARGET_ASM_CODE_END ix86_code_end
52137 #undef TARGET_CONDITIONAL_REGISTER_USAGE
52138 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
52140 #if TARGET_MACHO
52141 #undef TARGET_INIT_LIBFUNCS
52142 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
52143 #endif
52145 #undef TARGET_LOOP_UNROLL_ADJUST
52146 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
52148 #undef TARGET_SPILL_CLASS
52149 #define TARGET_SPILL_CLASS ix86_spill_class
52151 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
52152 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
52153 ix86_simd_clone_compute_vecsize_and_simdlen
52155 #undef TARGET_SIMD_CLONE_ADJUST
52156 #define TARGET_SIMD_CLONE_ADJUST \
52157 ix86_simd_clone_adjust
52159 #undef TARGET_SIMD_CLONE_USABLE
52160 #define TARGET_SIMD_CLONE_USABLE \
52161 ix86_simd_clone_usable
52163 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
52164 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
52165 ix86_float_exceptions_rounding_supported_p
52167 #undef TARGET_MODE_EMIT
52168 #define TARGET_MODE_EMIT ix86_emit_mode_set
52170 #undef TARGET_MODE_NEEDED
52171 #define TARGET_MODE_NEEDED ix86_mode_needed
52173 #undef TARGET_MODE_AFTER
52174 #define TARGET_MODE_AFTER ix86_mode_after
52176 #undef TARGET_MODE_ENTRY
52177 #define TARGET_MODE_ENTRY ix86_mode_entry
52179 #undef TARGET_MODE_EXIT
52180 #define TARGET_MODE_EXIT ix86_mode_exit
52182 #undef TARGET_MODE_PRIORITY
52183 #define TARGET_MODE_PRIORITY ix86_mode_priority
52185 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
52186 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
52188 #undef TARGET_LOAD_BOUNDS_FOR_ARG
52189 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
52191 #undef TARGET_STORE_BOUNDS_FOR_ARG
52192 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
52194 #undef TARGET_LOAD_RETURNED_BOUNDS
52195 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
52197 #undef TARGET_STORE_RETURNED_BOUNDS
52198 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
52200 #undef TARGET_CHKP_BOUND_MODE
52201 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
52203 #undef TARGET_BUILTIN_CHKP_FUNCTION
52204 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
52206 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
52207 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
52209 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
52210 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
52212 #undef TARGET_CHKP_INITIALIZE_BOUNDS
52213 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
52215 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
52216 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
52218 #undef TARGET_OFFLOAD_OPTIONS
52219 #define TARGET_OFFLOAD_OPTIONS \
52220 ix86_offload_options
52222 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
52223 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
52225 struct gcc_target targetm = TARGET_INITIALIZER;
52227 #include "gt-i386.h"